Example #1
0
def advanceWorkflow(event):
    """
    Advance to next step in workflow. Send a notification on error.
    """
    jobId = event.info['jobId']
    stepName = event.info['stepName']
    userId = event.info['userId']

    user = User().load(userId, force=True, exc=True)

    try:
        DanesfieldWorkflowManager.instance().advance(jobId=jobId)
    except DanesfieldWorkflowException as e:
        logprint.warning(
            'advanceWorkflow: Error advancing workflow '
            'Job={} Step={} PreviousStep={} Message=\'{}\''.format(
                jobId, e.step, stepName, str(e)))

        # Create notification for workflow error
        Notification().createNotification(type='danesfield_workflow_error',
                                          data={
                                              'step': e.step or '',
                                              'previousStep': stepName,
                                              'message': str(e)
                                          },
                                          user=user,
                                          expires=datetime.datetime.utcnow() +
                                          datetime.timedelta(seconds=30))
Example #2
0
def mountServer(path, database=None, fuseOptions=None, quiet=False, plugins=None):
    """
    Perform the mount.

    :param path: the mount location.
    :param database: a database connection URI, if it contains '://'.
        Otherwise, the default database is used.
    :param fuseOptions: a comma-separated string of options to pass to the FUSE
        mount.  A key without a value is taken as True.  Boolean values are
        case insensitive.  For instance, 'foreground' or 'foreground=True' will
        keep this program running until the SIGTERM or unmounted.
    :param quiet: if True, suppress Girder logs.
    :param plugins: an optional list of plugins to enable.  If None, use the
        plugins that are configured.
    """
    if quiet:
        curConfig = config.getConfig()
        curConfig.setdefault('logging', {})['log_quiet'] = True
        curConfig.setdefault('logging', {})['log_level'] = 'FATAL'
        girder._attachFileLogHandlers()
    if database and '://' in database:
        cherrypy.config['database']['uri'] = database
    if plugins is not None:
        plugins = plugins.split(',')
    webroot, appconf = configureServer(plugins=plugins)
    girder._setupCache()

    opClass = ServerFuse(stat=os.stat(path))
    options = {
        # By default, we run in the background so the mount command returns
        # immediately.  If we run in the foreground, a SIGTERM will shut it
        # down
        'foreground': False,
        # Cache files if their size and timestamp haven't changed.
        # This lets the OS buffer files efficiently.
        'auto_cache': True,
        # We aren't specifying our own inos
        'use_ino': False,
        # read-only file system
        'ro': True,
    }
    if sys.platform != 'darwin':
        # Automatically unmount when we try to mount again
        options['auto_unmount'] = True
    if fuseOptions:
        for opt in fuseOptions.split(','):
            if '=' in opt:
                key, value = opt.split('=', 1)
                value = (False if value.lower() == 'false' else
                         True if value.lower() == 'true' else value)
            else:
                key, value = opt, True
            if key in ('use_ino', 'ro', 'rw') and options.get(key) != value:
                logprint.warning('Ignoring the %s=%r option' % (key, value))
                continue
            options[key] = value
    Setting().set(SettingKey.GIRDER_MOUNT_INFORMATION,
                  {'path': path, 'mounttime': time.time()})
    FUSELogError(opClass, path, **options)
Example #3
0
def mountServer(path, database=None, fuseOptions=None, quiet=False, plugins=None):
    """
    Perform the mount.

    :param path: the mount location.
    :param database: a database connection URI, if it contains '://'.
        Otherwise, the default database is used.
    :param fuseOptions: a comma-separated string of options to pass to the FUSE
        mount.  A key without a value is taken as True.  Boolean values are
        case insensitive.  For instance, 'foreground' or 'foreground=True' will
        keep this program running until the SIGTERM or unmounted.
    :param quiet: if True, suppress Girder logs.
    :param plugins: an optional list of plugins to enable.  If None, use the
        plugins that are configured.
    """
    if quiet:
        curConfig = config.getConfig()
        curConfig.setdefault('logging', {})['log_quiet'] = True
        curConfig.setdefault('logging', {})['log_level'] = 'FATAL'
        girder._setupLogger()
    if database and '://' in database:
        cherrypy.config['database']['uri'] = database
    if plugins is not None:
        plugins = plugins.split(',')
    webroot, appconf = configureServer(plugins=plugins)
    girder._setupCache()

    opClass = ServerFuse(stat=os.stat(path))
    options = {
        # By default, we run in the background so the mount command returns
        # immediately.  If we run in the foreground, a SIGTERM will shut it
        # down
        'foreground': False,
        # Cache files if their size and timestamp haven't changed.
        # This lets the OS buffer files efficiently.
        'auto_cache': True,
        # We aren't specifying our own inos
        'use_ino': False,
        # read-only file system
        'ro': True,
    }
    if sys.platform != 'darwin':
        # Automatically unmount when we try to mount again
        options['auto_unmount'] = True
    if fuseOptions:
        for opt in fuseOptions.split(','):
            if '=' in opt:
                key, value = opt.split('=', 1)
                value = (False if value.lower() == 'false' else
                         True if value.lower() == 'true' else value)
            else:
                key, value = opt, True
            if key in ('use_ino', 'ro', 'rw') and options.get(key) != value:
                logprint.warning('Ignoring the %s=%r option' % (key, value))
                continue
            options[key] = value
    Setting().set(SettingKey.GIRDER_MOUNT_INFORMATION,
                  {'path': path, 'mounttime': time.time()})
    FUSELogError(opClass, path, **options)
Example #4
0
    def route(self, method, route, handler, nodoc=False, resource=None):
        """
        Define a route for your REST resource.

        :param method: The HTTP method, e.g. 'GET', 'POST', 'PUT', 'PATCH'
        :type method: str
        :param route: The route, as a list of path params relative to the
            resource root. Elements of this list starting with ':' are assumed
            to be wildcards.
        :type route: tuple[str]
        :param handler: The method to be called if the route and method are
            matched by a request. Wildcards in the route will be expanded and
            passed as kwargs with the same name as the wildcard identifier.
        :type handler: function
        :param nodoc: If your route intentionally provides no documentation,
            set this to True to disable the warning on startup.

        :type nodoc: bool
        :param resource: The name of the resource at the root of this route.
            The resource instance (self) can also be passed. This allows the
            mount path to be looked up. This allows a resource to be mounted at a
            prefix.
        """
        self._ensureInit()
        # Insertion sort to maintain routes in required order.
        nLengthRoutes = self._routes[method.lower()][len(route)]
        for i in range(len(nLengthRoutes)):
            if self._shouldInsertRoute(route, nLengthRoutes[i][0]):
                nLengthRoutes.insert(i, (route, handler))
                break
        else:
            nLengthRoutes.append((route, handler))

        # Now handle the api doc if the handler has any attached
        if resource is None and hasattr(self, 'resourceName'):
            resource = self.resourceName
        elif resource is None:
            resource = self

        if hasattr(handler, 'description'):
            if handler.description is not None:
                docs.addRouteDocs(resource=resource,
                                  route=route,
                                  method=method,
                                  info=handler.description.asDict(),
                                  handler=handler)
        elif not nodoc:
            routePath = '/'.join([resource] + list(route))
            logprint.warning(
                'WARNING: No description docs present for route %s %s' %
                (method, routePath))

        # Warn if there is no access decorator on the handler function
        if not hasattr(handler, 'accessLevel'):
            routePath = '/'.join([resource] + list(route))
            logprint.warning(
                'WARNING: No access level specified for route %s %s' %
                (method, routePath))
Example #5
0
    def route(self, method, route, handler, nodoc=False, resource=None):
        """
        Define a route for your REST resource.

        :param method: The HTTP method, e.g. 'GET', 'POST', 'PUT', 'PATCH'
        :type method: str
        :param route: The route, as a list of path params relative to the
            resource root. Elements of this list starting with ':' are assumed
            to be wildcards.
        :type route: tuple[str]
        :param handler: The method to be called if the route and method are
            matched by a request. Wildcards in the route will be expanded and
            passed as kwargs with the same name as the wildcard identifier.
        :type handler: function
        :param nodoc: If your route intentionally provides no documentation,
            set this to True to disable the warning on startup.

        :type nodoc: bool
        :param resource: The name of the resource at the root of this route.
            The resource instance (self) can also be passed. This allows the
            mount path to be looked up. This allows a resource to be mounted at a
            prefix.
        """
        self._ensureInit()
        # Insertion sort to maintain routes in required order.
        nLengthRoutes = self._routes[method.lower()][len(route)]
        for i in range(len(nLengthRoutes)):
            if self._shouldInsertRoute(route, nLengthRoutes[i][0]):
                nLengthRoutes.insert(i, (route, handler))
                break
        else:
            nLengthRoutes.append((route, handler))

        # Now handle the api doc if the handler has any attached
        if resource is None and hasattr(self, 'resourceName'):
            resource = self.resourceName
        elif resource is None:
            resource = self

        if hasattr(handler, 'description'):
            if handler.description is not None:
                docs.addRouteDocs(
                    resource=resource, route=route, method=method,
                    info=handler.description.asDict(), handler=handler)
        elif not nodoc:
            routePath = '/'.join([resource] + list(route))
            logprint.warning(
                'WARNING: No description docs present for route %s %s' % (
                    method, routePath))

        # Warn if there is no access decorator on the handler function
        if not hasattr(handler, 'accessLevel'):
            routePath = '/'.join([resource] + list(route))
            logprint.warning(
                'WARNING: No access level specified for route %s %s' % (
                    method, routePath))
Example #6
0
def loadPlugins(plugins, root, appconf, apiRoot=None, curConfig=None,
                buildDag=True):
    """
    Loads a set of plugins into the application.

    :param plugins: The set of plugins to load, by directory name.
    :type plugins: list
    :param root: The root node of the server tree.
    :type root: object
    :param appconf: The server's cherrypy configuration object.
    :type appconf: dict
    :param apiRoot: The cherrypy api root object.
    :type apiRoot: object or None
    :param curConfig: A girder config object to use.
    :type curConfig: dict or None
    :param buildDag: If the ``plugins`` parameter is already a topo-sorted list
        with all dependencies resolved, set this to False and it will skip
        rebuilding the DAG. Otherwise the dependency resolution and sorting
        will occur within this method.
    :type buildDag: bool
    :returns: A 3-tuple containing the modified root, config, and apiRoot
        objects.
    :rtype tuple:
    """
    # Register a pseudo-package for the root of all plugins. This must be
    # present in the system module list in order to avoid import warnings.
    if curConfig is None:
        curConfig = _config.getConfig()

    if 'plugins' in curConfig and 'plugin_directory' in curConfig['plugins']:
        logprint.warning(
            'Warning: the plugin_directory setting is deprecated. Please use '
            'the `girder-install plugin` command and remove this setting from '
            'your config file.')

    if ROOT_PLUGINS_PACKAGE not in sys.modules:
        module = imp.new_module(ROOT_PLUGINS_PACKAGE)
        girder.plugins = module
        sys.modules[ROOT_PLUGINS_PACKAGE] = module

    logprint.info('Resolving plugin dependencies...')

    if buildDag:
        plugins = getToposortedPlugins(plugins, curConfig, ignoreMissing=True)

    for plugin in plugins:
        try:
            root, appconf, apiRoot = loadPlugin(
                plugin, root, appconf, apiRoot, curConfig=curConfig)
            logprint.success('Loaded plugin "%s"' % plugin)
        except Exception:
            logprint.exception(
                'ERROR: Failed to load plugin "%s":' % plugin)

    return root, appconf, apiRoot
Example #7
0
    def _ensureInit(self):
        """
        Calls ``Resource.__init__`` if the subclass constructor did not already
        do so.

        In the past, Resource subclasses were not expected to call their
        superclass constructor.
        """
        if not hasattr(self, '_routes'):
            Resource.__init__(self)
            logprint.warning('WARNING: Resource subclass "%s" did not call '
                             '"Resource__init__()" from its constructor.' %
                             self.__class__.__name__)
Example #8
0
 def reconnect(self):
     """
     Reconnect to the database and rebuild indices if necessary.  If a
     unique index on key does not exist, make one, first discarding any
     extant index on key and removing duplicate keys if necessary.
     """
     super(Setting, self).reconnect()
     try:
         indices = self.collection.index_information()
     except pymongo.errors.OperationFailure:
         indices = []
     hasUniqueKeyIndex = False
     presentKeyIndices = []
     for index in indices:
         if indices[index]['key'][0][0] == 'key':
             if indices[index].get('unique'):
                 hasUniqueKeyIndex = True
                 break
             presentKeyIndices.append(index)
     if not hasUniqueKeyIndex:
         for index in presentKeyIndices:
             self.collection.drop_index(index)
         duplicates = self.collection.aggregate([{
             '$group': {
                 '_id': '$key',
                 'key': {
                     '$first': '$key'
                 },
                 'ids': {
                     '$addToSet': '$_id'
                 },
                 'count': {
                     '$sum': 1
                 }
             }
         }, {
             '$match': {
                 'count': {
                     '$gt': 1
                 }
             }
         }])
         for duplicate in duplicates:
             logprint.warning('Removing duplicate setting with key %s.' %
                              (duplicate['key']))
             # Remove all of the duplicates.  Keep the item with the lowest
             # id in Mongo.
             for duplicateId in sorted(duplicate['ids'])[1:]:
                 self.collection.delete_one({'_id': duplicateId})
         self.collection.create_index('key', unique=True)
Example #9
0
    def _ensureInit(self):
        """
        Calls ``Resource.__init__`` if the subclass constructor did not already
        do so.

        In the past, Resource subclasses were not expected to call their
        superclass constructor.
        """
        if not hasattr(self, '_routes'):
            Resource.__init__(self)
            logprint.warning(
                'WARNING: Resource subclass "%s" did not call '
                '"Resource__init__()" from its constructor.' %
                self.__class__.__name__)
Example #10
0
def findAllPlugins(curConfig=None):
    """
    Walks the plugins directories to find all of the plugins. If the plugin has
    a plugin.json file, this reads that file to determine dependencies.
    """
    allPlugins = {}

    findEntryPointPlugins(allPlugins)
    pluginDirs = getPluginDirs(curConfig)
    if not pluginDirs:
        logprint.warning('Plugin directory not found.')
        return allPlugins

    for pluginDir in pluginDirs:
        dirs = [dir for dir in os.listdir(pluginDir) if os.path.isdir(
            os.path.join(pluginDir, dir))]

        for plugin in dirs:
            data = {}
            configJson = os.path.join(pluginDir, plugin, 'plugin.json')
            configYml = os.path.join(pluginDir, plugin, 'plugin.yml')
            if os.path.isfile(configJson):
                with open(configJson) as conf:
                    try:
                        data = json.load(conf)
                    except ValueError:
                        logprint.exception(
                            'ERROR: Plugin "%s": plugin.json is not valid '
                            'JSON.' % plugin)
            elif os.path.isfile(configYml):
                with open(configYml) as conf:
                    try:
                        data = yaml.safe_load(conf)
                    except yaml.YAMLError:
                        logprint.exception(
                            'ERROR: Plugin "%s": plugin.yml is not valid '
                            'YAML.' % plugin)

            allPlugins[plugin] = {
                'name': data.get('name', plugin),
                'description': data.get('description', ''),
                'version': data.get('version', ''),
                'dependencies': set(data.get('dependencies', []))
            }
    return allPlugins
Example #11
0
    def _validateParamInfo(self, dataType, paramType, name):
        """
        Helper to convert and validate the dataType and paramType.
        Prints warnings if invalid values were passed.
        """
        # Legacy data type conversions
        if dataType == 'int':
            dataType = 'integer'

        # Parameter Object spec:
        # If type is "file", then the swagger "consumes" field MUST be either
        # "multipart/form-data", "application/x-www-form-urlencoded" or both
        # and the parameter MUST be in "formData".
        if dataType == 'file':
            paramType = 'formData'

        # Get type and format from common name
        dataTypeFormat = None
        if dataType in self._dataTypeMap:
            dataType, dataTypeFormat = self._dataTypeMap[dataType]
        # If we are dealing with the body then the dataType might be defined
        # by a schema added using addModel(...), we don't know for sure as we
        # don't know the resource name here to look it up.
        elif paramType != 'body':
            logprint.warning(
                'WARNING: Invalid dataType "%s" specified for parameter names "%s"'
                % (dataType, name))

        # Parameter Object spec:
        # Since the parameter is not located at the request body, it is limited
        # to simple types (that is, not an object).
        if paramType != 'body' and dataType not in (
                'string', 'number', 'integer', 'long', 'boolean', 'array',
                'file', 'float', 'double', 'date', 'dateTime'):
            logprint.warning(
                'WARNING: Invalid dataType "%s" specified for parameter "%s"' %
                (dataType, name))

        if paramType == 'form':
            paramType = 'formData'

        return dataType, dataTypeFormat, paramType
Example #12
0
    def _validateParamInfo(self, dataType, paramType, name):
        """
        Helper to convert and validate the dataType and paramType.
        Prints warnings if invalid values were passed.
        """
        # Legacy data type conversions
        if dataType == 'int':
            dataType = 'integer'

        # Parameter Object spec:
        # If type is "file", then the swagger "consumes" field MUST be either
        # "multipart/form-data", "application/x-www-form-urlencoded" or both
        # and the parameter MUST be in "formData".
        if dataType == 'file':
            paramType = 'formData'

        # Get type and format from common name
        dataTypeFormat = None
        if dataType in self._dataTypeMap:
            dataType, dataTypeFormat = self._dataTypeMap[dataType]
        # If we are dealing with the body then the dataType might be defined
        # by a schema added using addModel(...), we don't know for sure as we
        # don't know the resource name here to look it up.
        elif paramType != 'body':
            logprint.warning(
                'WARNING: Invalid dataType "%s" specified for parameter names "%s"' %
                (dataType, name))

        # Parameter Object spec:
        # Since the parameter is not located at the request body, it is limited
        # to simple types (that is, not an object).
        if paramType != 'body' and dataType not in (
                'string', 'number', 'integer', 'long', 'boolean', 'array', 'file', 'float',
                'double', 'date', 'dateTime'):
            logprint.warning(
                'WARNING: Invalid dataType "%s" specified for parameter "%s"' % (dataType, name))

        if paramType == 'form':
            paramType = 'formData'

        return dataType, dataTypeFormat, paramType
Example #13
0
def addModel(name, model, resources=None, silent=False):
    """
    Add a model to the Swagger documentation.

    :param resources: The type(s) of resource(s) to add the model to. New
        resource types may be implicitly defined, with the expectation that
        routes will be added for them at some point. If no resources are
        passed, the model will be exposed for every resource type
    :param resources: str or tuple/list[str]
    :param name: The name of the model.
    :type name: str
    :param model: The model to add.
    :type model: dict
    :param silent: Set this to True to suppress warnings.
    :type silent: bool

    .. warning:: This is a low-level API which does not validate the format of
       ``model``. See the `Swagger Model documentation`_ for a complete
       specification of the correct format for ``model``.

    .. versionchanged:: The syntax and behavior of this function was modified
        after v1.3.2. The previous implementation did not include a resources
        parameter.

    .. _Swagger Model documentation: https://github.com/OAI/
        OpenAPI-Specification/blob/0122c22e7fb93b571740dd3c6e141c65563a18be/
        versions/2.0.md#definitionsObject
    """
    if resources:
        if isinstance(resources, six.string_types):
            resources = (resources,)
        for resource in resources:
            models[resource][name] = model
    else:
        if not silent:
            logprint.warning(
                'WARNING: adding swagger models without specifying resources '
                'to bind to is discouraged (%s).' % name)
        models[None][name] = model
Example #14
0
def getPluginDirs(curConfig=None):
    """Return an ordered list of directories that plugins can live in."""
    failedPluginDirs = set()

    if curConfig is None:
        curConfig = _config.getConfig()

    if 'plugins' in curConfig and 'plugin_directory' in curConfig['plugins']:
        pluginDirs = curConfig['plugins']['plugin_directory'].split(':')
    else:
        pluginDirs = [defaultPluginDir()]

    for pluginDir in pluginDirs:
        try:
            mkdir(pluginDir)
        except OSError:
            logprint.warning(
                'Could not create plugin directory %s.' % pluginDir)

            failedPluginDirs.add(pluginDir)

    return [dir for dir in pluginDirs if dir not in failedPluginDirs]
Example #15
0
def addModel(name, model, resources=None, silent=False):
    """
    Add a model to the Swagger documentation.

    :param resources: The type(s) of resource(s) to add the model to. New
        resource types may be implicitly defined, with the expectation that
        routes will be added for them at some point. If no resources are
        passed, the model will be exposed for every resource type
    :param resources: str or tuple/list[str]
    :param name: The name of the model.
    :type name: str
    :param model: The model to add.
    :type model: dict
    :param silent: Set this to True to suppress warnings.
    :type silent: bool

    .. warning:: This is a low-level API which does not validate the format of
       ``model``. See the `Swagger Model documentation`_ for a complete
       specification of the correct format for ``model``.

    .. versionchanged:: The syntax and behavior of this function was modified
        after v1.3.2. The previous implementation did not include a resources
        parameter.

    .. _Swagger Model documentation: https://github.com/OAI/
        OpenAPI-Specification/blob/0122c22e7fb93b571740dd3c6e141c65563a18be/
        versions/2.0.md#definitionsObject
    """
    if resources:
        if isinstance(resources, str):
            resources = (resources, )
        for resource in resources:
            models[resource][name] = model
    else:
        if not silent:
            logprint.warning(
                'WARNING: adding swagger models without specifying resources '
                'to bind to is discouraged (%s).' % name)
        models[None][name] = model
Example #16
0
    def reconnect(self):
        """
        Reconnect to the database and rebuild indices if necessary. Users should
        typically not have to call this method.
        """
        db_connection = getDbConnection()
        self.database = db_connection.get_default_database()
        self.collection = MongoProxy(self.database[self.name])

        for index in self._indices:
            if isinstance(index, (list, tuple)):
                self.collection.create_index(index[0], **index[1])
            else:
                self.collection.create_index(index)

        if isinstance(self._textIndex, dict):
            textIdx = [(k, 'text') for k in six.viewkeys(self._textIndex)]
            try:
                self.collection.create_index(
                    textIdx, weights=self._textIndex,
                    default_language=self._textLanguage)
            except pymongo.errors.OperationFailure:
                logprint.warning('WARNING: Text search not enabled.')
Example #17
0
    def reconnect(self):
        """
        Reconnect to the database and rebuild indices if necessary. Users should
        typically not have to call this method.
        """
        db_connection = getDbConnection()
        self.database = db_connection.get_default_database()
        self.collection = MongoProxy(self.database[self.name])

        for index in self._indices:
            if isinstance(index, (list, tuple)):
                self.collection.create_index(index[0], **index[1])
            else:
                self.collection.create_index(index)

        if isinstance(self._textIndex, dict):
            textIdx = [(k, 'text') for k in six.viewkeys(self._textIndex)]
            try:
                self.collection.create_index(
                    textIdx, weights=self._textIndex,
                    default_language=self._textLanguage)
            except pymongo.errors.OperationFailure:
                logprint.warning('WARNING: Text search not enabled.')
Example #18
0
 def reconnect(self):
     """
     Reconnect to the database and rebuild indices if necessary.  If a
     unique index on key does not exist, make one, first discarding any
     extant index on key and removing duplicate keys if necessary.
     """
     super(Setting, self).reconnect()
     try:
         indices = self.collection.index_information()
     except pymongo.errors.OperationFailure:
         indices = []
     hasUniqueKeyIndex = False
     presentKeyIndices = []
     for index in indices:
         if indices[index]['key'][0][0] == 'key':
             if indices[index].get('unique'):
                 hasUniqueKeyIndex = True
                 break
             presentKeyIndices.append(index)
     if not hasUniqueKeyIndex:
         for index in presentKeyIndices:
             self.collection.drop_index(index)
         duplicates = self.collection.aggregate([{
             '$group': {'_id': '$key',
                        'key': {'$first': '$key'},
                        'ids': {'$addToSet': '$_id'},
                        'count': {'$sum': 1}}}, {
             '$match': {'count': {'$gt': 1}}}])
         for duplicate in duplicates:
             logprint.warning(
                 'Removing duplicate setting with key %s.' % (
                     duplicate['key']))
             # Remove all of the duplicates.  Keep the item with the lowest
             # id in Mongo.
             for duplicateId in sorted(duplicate['ids'])[1:]:
                 self.collection.delete_one({'_id': duplicateId})
         self.collection.create_index('key', unique=True)
Example #19
0
def getDbConnection(uri=None, replicaSet=None, autoRetry=True, **kwargs):
    """
    Get a MongoClient object that is connected to the configured database.
    We lazy-instantiate a module-level singleton, the MongoClient objects
    manage their own connection pools internally. Any extra kwargs you pass to
    this method will be passed through to the MongoClient.

    :param uri: if specified, connect to this mongo db rather than the one in
                the config.
    :param replicaSet: if uri is specified, use this replica set.
    :param autoRetry: if this connection should automatically retry operations
        in the event of an AutoReconnect exception. If you're testing the
        connection, set this to False. If disabled, this also will not cache
        the mongo client, so make sure to only disable if you're testing a
        connection.
    :type autoRetry: bool
    """
    global _dbClients

    origKey = (uri, replicaSet)
    if origKey in _dbClients:
        return _dbClients[origKey]

    if uri is None or uri == '':
        dbConf = getDbConfig()
        uri = dbConf.get('uri')
        replicaSet = dbConf.get('replica_set')
    clientOptions = {
        # This is the maximum time between when we fetch data from a cursor.
        # If it times out, the cursor is lost and we can't reconnect.  If it
        # isn't set, we have issues with replica sets when the primary goes
        # down.  This value can be overridden in the mongodb uri connection
        # string with the socketTimeoutMS.
        'socketTimeoutMS': 60000,
        'connectTimeoutMS': 20000,
        'serverSelectionTimeoutMS': 20000,
        'read_preference': ReadPreference.SECONDARY_PREFERRED,
        'replicaSet': replicaSet
    }
    clientOptions.update(kwargs)

    if uri is None:
        dbUriRedacted = 'mongodb://*****:*****@')
        if len(parts) == 2:
            dbUriRedacted = 'mongodb://' + parts[1]
        else:
            dbUriRedacted = uri

        client = pymongo.MongoClient(uri, **clientOptions)

    # Make sure we can connect to the mongo server at startup
    client.server_info()

    if autoRetry:
        client = MongoProxy(client, logger=logger)
        _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client

    desc = ''
    if replicaSet:
        desc += ', replica set: %s' % replicaSet
    logprint.info('Connected to MongoDB: %s%s' % (dbUriRedacted, desc))
    return client
Example #20
0
def getDbConnection(uri=None, replicaSet=None, autoRetry=True, quiet=False, **kwargs):
    """
    Get a MongoClient object that is connected to the configured database.
    We lazy-instantiate a module-level singleton, the MongoClient objects
    manage their own connection pools internally. Any extra kwargs you pass to
    this method will be passed through to the MongoClient.

    :param uri: if specified, connect to this mongo db rather than the one in
                the config.
    :param replicaSet: if uri is specified, use this replica set.
    :param autoRetry: if this connection should automatically retry operations
        in the event of an AutoReconnect exception. If you're testing the
        connection, set this to False. If disabled, this also will not cache
        the mongo client, so make sure to only disable if you're testing a
        connection.
    :type autoRetry: bool
    :param quiet: if true, don't logprint warnings and success.
    :type quiet: bool
    """
    global _dbClients

    origKey = (uri, replicaSet)
    if origKey in _dbClients:
        return _dbClients[origKey]

    dbConf = getDbConfig()

    if uri is None or uri == '':
        uri = dbConf.get('uri')
        replicaSet = dbConf.get('replica_set')

    clientOptions = {
        # This is the maximum time between when we fetch data from a cursor.
        # If it times out, the cursor is lost and we can't reconnect.  If it
        # isn't set, we have issues with replica sets when the primary goes
        # down.  This value can be overridden in the mongodb uri connection
        # string with the socketTimeoutMS.
        'socketTimeoutMS': 60000,
        'connectTimeoutMS': 20000,
        'serverSelectionTimeoutMS': 20000,
        'readPreference': 'secondaryPreferred',
        'replicaSet': replicaSet,
        'w': 'majority'
    }

    # All other options in the [database] section will be passed directly as
    # options to the mongo client
    for opt, val in six.viewitems(dict(dbConf)):
        if opt not in {'uri', 'replica_set'}:
            clientOptions[opt] = val

    # Finally, kwargs take precedence
    clientOptions.update(kwargs)
    # if the connection URI overrides any option, honor it above our own
    # settings.
    uriParams = urllib.parse.parse_qs(urllib.parse.urlparse(uri).query)
    for key in uriParams:
        if key in clientOptions:
            del clientOptions[key]

    if uri is None:
        dbUriRedacted = 'mongodb://*****:*****@')
        if len(parts) == 2:
            dbUriRedacted = 'mongodb://' + parts[1]
        else:
            dbUriRedacted = uri

        client = pymongo.MongoClient(uri, **clientOptions)

    if not quiet:
        desc = ''
        if replicaSet:
            desc += ', replica set: %s' % replicaSet
        logprint.info('Connecting to MongoDB: %s%s' % (dbUriRedacted, desc))

    # Make sure we can connect to the mongo server at startup
    client.server_info()

    if autoRetry:
        client = MongoProxy(client, logger=logger)
        _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client

    return client
Example #21
0
def getDbConnection(uri=None, replicaSet=None, autoRetry=True, **kwargs):
    """
    Get a MongoClient object that is connected to the configured database.
    We lazy-instantiate a module-level singleton, the MongoClient objects
    manage their own connection pools internally. Any extra kwargs you pass to
    this method will be passed through to the MongoClient.

    :param uri: if specified, connect to this mongo db rather than the one in
                the config.
    :param replicaSet: if uri is specified, use this replica set.
    :param autoRetry: if this connection should automatically retry operations
        in the event of an AutoReconnect exception. If you're testing the
        connection, set this to False. If disabled, this also will not cache
        the mongo client, so make sure to only disable if you're testing a
        connection.
    :type autoRetry: bool
    """
    global _dbClients

    origKey = (uri, replicaSet)
    if origKey in _dbClients:
        return _dbClients[origKey]

    if uri is None or uri == '':
        dbConf = getDbConfig()
        uri = dbConf.get('uri')
        replicaSet = dbConf.get('replica_set')
    clientOptions = {
        # This is the maximum time between when we fetch data from a cursor.
        # If it times out, the cursor is lost and we can't reconnect.  If it
        # isn't set, we have issues with replica sets when the primary goes
        # down.  This value can be overridden in the mongodb uri connection
        # string with the socketTimeoutMS.
        'socketTimeoutMS': 60000,
        'connectTimeoutMS': 20000,
        'serverSelectionTimeoutMS': 20000,
        'read_preference': ReadPreference.SECONDARY_PREFERRED,
        'replicaSet': replicaSet
    }
    clientOptions.update(kwargs)

    if uri is None:
        dbUriRedacted = 'mongodb://*****:*****@')
        if len(parts) == 2:
            dbUriRedacted = 'mongodb://' + parts[1]
        else:
            dbUriRedacted = uri

        client = pymongo.MongoClient(uri, **clientOptions)

    # Make sure we can connect to the mongo server at startup
    client.server_info()

    if autoRetry:
        client = MongoProxy(client, logger=logger)
        _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client

    desc = ''
    if replicaSet:
        desc += ', replica set: %s' % replicaSet
    logprint.info('Connected to MongoDB: %s%s' % (dbUriRedacted, desc))
    return client
Example #22
0
def getDbConnection(uri=None,
                    replicaSet=None,
                    autoRetry=True,
                    quiet=False,
                    **kwargs):
    """
    Get a MongoClient object that is connected to the configured database.
    We lazy-instantiate a module-level singleton, the MongoClient objects
    manage their own connection pools internally. Any extra kwargs you pass to
    this method will be passed through to the MongoClient.

    :param uri: if specified, connect to this mongo db rather than the one in
                the config.
    :param replicaSet: if uri is specified, use this replica set.
    :param autoRetry: if this connection should automatically retry operations
        in the event of an AutoReconnect exception. If you're testing the
        connection, set this to False. If disabled, this also will not cache
        the mongo client, so make sure to only disable if you're testing a
        connection.
    :type autoRetry: bool
    :param quiet: if true, don't logprint warnings and success.
    :type quiet: bool
    """
    global _dbClients

    origKey = (uri, replicaSet)
    if origKey in _dbClients:
        return _dbClients[origKey]

    dbConf = getDbConfig()

    if uri is None or uri == '':
        uri = dbConf.get('uri')
        replicaSet = dbConf.get('replica_set')

    clientOptions = {
        # This is the maximum time between when we fetch data from a cursor.
        # If it times out, the cursor is lost and we can't reconnect.  If it
        # isn't set, we have issues with replica sets when the primary goes
        # down.  This value can be overridden in the mongodb uri connection
        # string with the socketTimeoutMS.
        'socketTimeoutMS': 60000,
        'connectTimeoutMS': 20000,
        'serverSelectionTimeoutMS': 20000,
        'readPreference': 'secondaryPreferred',
        'replicaSet': replicaSet,
        'w': 'majority'
    }

    # All other options in the [database] section will be passed directly as
    # options to the mongo client
    for opt, val in six.viewitems(dict(dbConf)):
        if opt not in {'uri', 'replica_set'}:
            clientOptions[opt] = val

    # Finally, kwargs take precedence
    clientOptions.update(kwargs)
    # if the connection URI overrides any option, honor it above our own
    # settings.
    uriParams = urllib.parse.parse_qs(urllib.parse.urlparse(uri).query)
    for key in uriParams:
        if key in clientOptions:
            del clientOptions[key]

    if uri is None:
        dbUriRedacted = 'mongodb://*****:*****@')
        if len(parts) == 2:
            dbUriRedacted = 'mongodb://' + parts[1]
        else:
            dbUriRedacted = uri

        client = pymongo.MongoClient(uri, **clientOptions)

    if not quiet:
        desc = ''
        if replicaSet:
            desc += ', replica set: %s' % replicaSet
        logprint.info('Connecting to MongoDB: %s%s' % (dbUriRedacted, desc))

    # Make sure we can connect to the mongo server at startup
    client.server_info()

    if autoRetry:
        client = MongoProxy(client, logger=logger)
        _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client

    return client
Example #23
0
    def initJob(self,
                requestInfo,
                workingSet,
                outputFolder,
                options,
                previousWorkingSet=None):
        """
        Initialize a new job to run the workflow.

        :param requestInfo: HTTP request and authorization info.
        :type requestInfo: RequestInfo
        :param workingSet: Source image working set.
        :type workingSet: dict
        :param outputFolder: Output folder document.
        :type outputFolder: dict
        :returns: Job identifier.
        :param options: Processing options.
        :type options: dict
        """
        with self._lock:
            if not self.workflow:
                raise DanesfieldWorkflowException('Workflow not configured')

            jobId = self._createJobId()

            # TODO: Improve job data storage
            jobData = {
                # Running steps
                'runningSteps': set(),
                # Completed steps
                'completedSteps': set(),
                # Failed steps:
                'failedSteps': set(),
                # Request info
                'requestInfo': requestInfo,
                # Working sets indexed by step name
                'workingSets': {
                    DanesfieldStep.INIT: workingSet
                },
                # Files indexed by step name
                'files': {},
                # Standard output indexed by step name
                'standardOutput': {},
                # Output folder
                'outputFolder': outputFolder,
                # Options
                'options': options if options is not None else {},
                # For composite steps, list of [Celery GroupResult,
                # number of jobs remaining], indexed by step name
                'groupResult': {}
            }

            logprint.info(
                'DanesfieldWorkflowManager.initJob Job={} WorkingSet={}'.
                format(jobId, workingSet['_id']))

            # If a workingSet exists for a given step, we include that
            # working set in the current jobData and flag it as being
            # complete (the step will not be re-run)
            step_name_re = re.compile(".*:\\s(.*)")
            for ws in WorkingSet().find(
                {'parentWorkingSetId': workingSet['_id']}):
                match = re.match(step_name_re, ws['name'])
                if match:
                    stepName = match.group(1)
                    jobData['workingSets'][stepName] = ws

                    # Set the skipped job as completed
                    jobData['completedSteps'].add(stepName)
                    logprint.info(
                        'DanesfieldWorkflowManager.skippingStep Job={} '
                        'StepName={}'.format(jobId, stepName))
                else:
                    logprint.warning(
                        'DanesfieldWorkflowManager.unableToParseStepName '
                        'Job={} WorkingSetName={}'.format(jobId, ws['name']))

            self._jobData[jobId] = jobData

            return jobId