예제 #1
0
    def reconnect(self):
        """
        Reconnect to the database and rebuild indices if necessary. Users should
        typically not have to call this method.
        """
        db_connection = getDbConnection()
        self.database = db_connection.get_default_database()
        self.collection = MongoProxy(self.database[self.name])

        for index in self._indices:
            if isinstance(index, (list, tuple)):
                self.collection.ensure_index(index[0], **index[1])
            else:
                self.collection.ensure_index(index)

        if type(self._textIndex) is dict:
            textIdx = [(k, 'text') for k in self._textIndex.keys()]
            try:
                self.collection.ensure_index(
                    textIdx,
                    weights=self._textIndex,
                    default_language=self._textLanguage)
            except pymongo.errors.OperationFailure:
                print(
                    TerminalColor.warning('WARNING: Text search not enabled.'))
예제 #2
0
    def __init__(self):
        self.name = None
        self._indices = []
        self._textIndex = None
        self._textLanguage = None

        self._filterKeys = {
            AccessType.READ: set(),
            AccessType.WRITE: set(),
            AccessType.ADMIN: set(),
            AccessType.SITE_ADMIN: set()
        }

        self.initialize()

        db_connection = getDbConnection()
        self.database = db_connection.get_default_database()
        self.collection = MongoProxy(self.database[self.name])

        for index in self._indices:
            if isinstance(index, (list, tuple)):
                self.collection.ensure_index(index[0], **index[1])
            else:
                self.collection.ensure_index(index)

        if type(self._textIndex) is dict:
            textIdx = [(k, 'text') for k in self._textIndex.keys()]
            try:
                self.collection.ensure_index(
                    textIdx,
                    weights=self._textIndex,
                    default_language=self._textLanguage)
            except pymongo.errors.OperationFailure:
                print(
                    TerminalColor.warning('WARNING: Text search not enabled.'))
예제 #3
0
파일: __init__.py 프로젝트: cryos/girder
def getDbConnection(uri=None, replicaSet=None):
    """
    Get a MongoClient object that is connected to the configured database.
    We lazy-instantiate a module-level singleton, the MongoClient objects
    manage their own connection pools internally.

    :param uri: if specified, connect to this mongo db rather than the one in
                the config.
    :param replicaSet: if uri is specified, use this replica set.
    """
    global _dbClients

    origKey = (uri, replicaSet)
    if origKey in _dbClients:
        return _dbClients[origKey]

    if uri is None or uri == '':
        dbConf = getDbConfig()
        uri = dbConf.get('uri')
        replicaSet = dbConf.get('replica_set')
    clientOptions = {
        'connectTimeoutMS': 15000,
        # This is the maximum time between when we fetch data from a cursor.
        # If it times out, the cursor is lost and we can't reconnect.  If it
        # isn't set, we have issues with replica sets when the primary goes
        # down.  This value can be overridden in the mongodb uri connection
        # string with the socketTimeoutMS.
        'socketTimeoutMS': 60000,
    }
    if uri is None:
        dbUriRedacted = 'mongodb://*****:*****@')
        if len(parts) == 2:
            dbUriRedacted = 'mongodb://' + parts[1]
        else:
            dbUriRedacted = uri

        if replicaSet:
            client = pymongo.MongoReplicaSetClient(
                uri,
                replicaSet=replicaSet,
                read_preference=ReadPreference.SECONDARY_PREFERRED,
                **clientOptions)
        else:
            client = pymongo.MongoClient(uri, **clientOptions)
    client = MongoProxy(client, logger=logger)
    _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client
    desc = ''
    if replicaSet:
        desc += ', replica set: %s' % replicaSet
    print(
        TerminalColor.info('Connected to MongoDB: %s%s' %
                           (dbUriRedacted, desc)))
    return client
예제 #4
0
파일: model_base.py 프로젝트: cryos/girder
    def __init__(self):
        self.name = None
        self._indices = []
        self._textIndex = None
        self._textLanguage = None

        self._filterKeys = {
            AccessType.READ: set(),
            AccessType.WRITE: set(),
            AccessType.ADMIN: set(),
            AccessType.SITE_ADMIN: set()
        }

        self.initialize()

        db_connection = getDbConnection()
        self.database = db_connection.get_default_database()
        self.collection = MongoProxy(self.database[self.name])

        for index in self._indices:
            if isinstance(index, (list, tuple)):
                self.collection.ensure_index(index[0], **index[1])
            else:
                self.collection.ensure_index(index)

        if type(self._textIndex) is dict:
            textIdx = [(k, 'text') for k in self._textIndex.keys()]
            try:
                self.collection.ensure_index(
                    textIdx, weights=self._textIndex,
                    default_language=self._textLanguage)
            except pymongo.errors.OperationFailure:
                print(
                    TerminalColor.warning('WARNING: Text search not enabled.'))
예제 #5
0
    def setUp(self):
        """
        Set up the mongo db for the external dataset, with 3 collections:
        a) tweetsgeo, which has tweet data that is geolocated (lat/long fields).
        b) polyGeoIndexed, w/2 polygons in a 2dsphere-indexed 'geometry' field
        c) polyGeoIndeces, same as above but without the 2dsphere index
        """
        super(MongoDatasetTestCase, self).setUp()

        self._user = self.model('user').createUser(
            'minervauser', 'password', 'minerva', 'user',
            '*****@*****.**')

        from girder.utility import config
        dbUri = config.getConfig()['database']['uri']
        self.dbName = 'minerva_test_external_mongo_dataset'
        dbUriParts = dbUri.split('/')[0:-1]
        self.dbUri = '/'.join(dbUriParts + [self.dbName])
        from girder.models import getDbConnection
        self.externalMongoDbConnection = getDbConnection(self.dbUri)
        self.externalMongoDb = self.externalMongoDbConnection.get_default_database()
        from girder.external.mongodb_proxy import MongoProxy
        self.geojsonIndexedName = 'polyGeoIndexed'
        self.geojsonNonIndexedName = 'polyGeoNonIndexed'
        self.polyIndexedCollection = MongoProxy(self.externalMongoDb[self.geojsonIndexedName])
        self.polyNonIndexedCollection = MongoProxy(self.externalMongoDb[self.geojsonNonIndexedName])
        self.pluginTestDir = os.path.dirname(os.path.realpath(__file__))
        geojsonPath = os.path.join(self.pluginTestDir, 'data', 'polygons.json')
        with open(geojsonPath) as geojsonFile:
            polys = json.load(geojsonFile)
            for poly in polys:
                self.polyIndexedCollection.save(poly)
                self.polyNonIndexedCollection.save(poly)
            self.polyIndexedCollection.create_index([('geometry', '2dsphere')])
        self.collectionName = 'tweetsgeo'
        self.tweetsgeoCollection = MongoProxy(self.externalMongoDb[self.collectionName])
        # add test data to external dataset
        self.pluginTestDir = os.path.dirname(os.path.realpath(__file__))
        tweets100Path = os.path.join(self.pluginTestDir, 'data', 'tweets100.json')
        z = zipfile.ZipFile('%s.zip' % tweets100Path)
        tweets = json.load(z.open('tweets100.json'))
        from datetime import datetime
        dateformat = '%Y-%m-%dT%H:%M:%S'
        for tweet in tweets:
            d = datetime.strptime((tweet['created_at']), dateformat)
            tweet['created_at'] = int((d - datetime(1970, 1, 1)).total_seconds())
            self.tweetsgeoCollection.save(tweet)
예제 #6
0
파일: dataset.py 프로젝트: Kitware/minerva
 def mongoCollection(self, connectionUri, collectionName):
     # TODO not sure if this is a good idea to do this db stuff here
     # maybe this suggests a new model?
     from girder.models import getDbConnection
     dbConn = getDbConnection(connectionUri)
     db = dbConn.get_default_database()
     from girder.external.mongodb_proxy import MongoProxy
     collection = MongoProxy(db[collectionName])
     return collection
 def __init__(self, assetstore):
     """
     :param assetstore: The assetstore to act on.
     """
     super(GridFsAssetstoreAdapter, self).__init__(assetstore)
     recent = False
     try:
         # Guard in case the connectionArgs is unhashable
         key = (self.assetstore.get('mongohost'),
                self.assetstore.get('replicaset'),
                self.assetstore.get('shard'))
         if key in _recentConnections:
             recent = (time.time() - _recentConnections[key]['created'] <
                       RECENT_CONNECTION_CACHE_TIME)
     except TypeError:
         key = None
     try:
         # MongoClient automatically reuses connections from a pool, but we
         # want to avoid redoing ensureChunkIndices each time we get such a
         # connection.
         client = getDbConnection(self.assetstore.get('mongohost'),
                                  self.assetstore.get('replicaset'),
                                  quiet=recent)
         self.chunkColl = MongoProxy(client[self.assetstore['db']].chunk)
         if not recent:
             _ensureChunkIndices(self.chunkColl)
             if self.assetstore.get('shard') == 'auto':
                 _setupSharding(self.chunkColl)
             if key is not None:
                 if len(_recentConnections) >= RECENT_CONNECTION_CACHE_MAX_SIZE:
                     _recentConnections.clear()
                 _recentConnections[key] = {
                     'created': time.time()
                 }
     except pymongo.errors.ConnectionFailure:
         logger.error('Failed to connect to GridFS assetstore %s',
                      self.assetstore['db'])
         self.chunkColl = 'Failed to connect'
         self.unavailable = True
     except pymongo.errors.ConfigurationError:
         logger.exception('Failed to configure GridFS assetstore %s',
                          self.assetstore['db'])
         self.chunkColl = 'Failed to configure'
         self.unavailable = True
 def __init__(self, assetstore):
     """
     :param assetstore: The assetstore to act on.
     """
     super(GridFsAssetstoreAdapter, self).__init__(assetstore)
     recent = False
     try:
         # Guard in case the connectionArgs is unhashable
         key = (self.assetstore.get('mongohost'),
                self.assetstore.get('replicaset'),
                self.assetstore.get('shard'))
         if key in _recentConnections:
             recent = (time.time() - _recentConnections[key]['created'] <
                       RECENT_CONNECTION_CACHE_TIME)
     except TypeError:
         key = None
     try:
         # MongoClient automatically reuses connections from a pool, but we
         # want to avoid redoing ensureChunkIndices each time we get such a
         # connection.
         client = getDbConnection(self.assetstore.get('mongohost'),
                                  self.assetstore.get('replicaset'),
                                  quiet=recent)
         self.chunkColl = MongoProxy(client[self.assetstore['db']].chunk)
         if not recent:
             _ensureChunkIndices(self.chunkColl)
             if self.assetstore.get('shard') == 'auto':
                 _setupSharding(self.chunkColl)
             if key is not None:
                 if len(_recentConnections) >= RECENT_CONNECTION_CACHE_MAX_SIZE:
                     _recentConnections.clear()
                 _recentConnections[key] = {
                     'created': time.time()
                 }
     except pymongo.errors.ConnectionFailure:
         logger.error('Failed to connect to GridFS assetstore %s',
                      self.assetstore['db'])
         self.chunkColl = 'Failed to connect'
         self.unavailable = True
     except pymongo.errors.ConfigurationError:
         logger.exception('Failed to configure GridFS assetstore %s',
                          self.assetstore['db'])
         self.chunkColl = 'Failed to configure'
         self.unavailable = True
예제 #9
0
    def setUp(self):
        """
        Set up the mongo db for the external dataset, with a collection
        named tweetsgeo, which have tweet data that is geolocated.
        """
        super(ExternalMongoDatasetTestCase, self).setUp()

        self._user = self.model('user').createUser(
            'minervauser', 'password', 'minerva', 'user',
            '*****@*****.**')

        from girder.utility import config
        dbUri = config.getConfig()['database']['uri']
        self.dbName = 'minerva_test_external_mongo_dataset'
        dbUriParts = dbUri.split('/')[0:-1]
        self.dbUri = '/'.join(dbUriParts + [self.dbName])
        from girder.models import getDbConnection
        self.externalMongoDbConnection = getDbConnection(self.dbUri)
        self.externalMongoDb = self.externalMongoDbConnection.get_default_database()
        from girder.external.mongodb_proxy import MongoProxy
        self.collectionName = 'tweetsgeo'
        self.tweetsgeoCollection = MongoProxy(self.externalMongoDb[self.collectionName])
        # add test data to external dataset
        self.pluginTestDir = os.path.dirname(os.path.realpath(__file__))
        tweets100Path = os.path.join(self.pluginTestDir, 'data', 'tweets100.json')
        z = zipfile.ZipFile('%s.zip' % tweets100Path)
        tweets = json.load(z.open('tweets100.json'))
        from datetime import datetime
        dateformat = '%Y-%m-%dT%H:%M:%S'
        for tweet in tweets:
            d = datetime.strptime((tweet['created_at']), dateformat)
            tweet['created_at'] = int((d - datetime(1970, 1, 1)).total_seconds())
            self.tweetsgeoCollection.save(tweet)

        path = '/minerva_dataset/folder'
        params = {
            'userId': self._user['_id'],
        }
        # create a dataset folder
        self.request(path=path, method='POST', params=params, user=self._user)
예제 #10
0
파일: model_base.py 프로젝트: kotfic/girder
    def reconnect(self):
        """
        Reconnect to the database and rebuild indices if necessary. Users should
        typically not have to call this method.
        """
        db_connection = getDbConnection()
        self.database = db_connection.get_default_database()
        self.collection = MongoProxy(self.database[self.name])

        for index in self._indices:
            if isinstance(index, (list, tuple)):
                self.collection.create_index(index[0], **index[1])
            else:
                self.collection.create_index(index)

        if isinstance(self._textIndex, dict):
            textIdx = [(k, 'text') for k in six.viewkeys(self._textIndex)]
            try:
                self.collection.create_index(
                    textIdx, weights=self._textIndex,
                    default_language=self._textLanguage)
            except pymongo.errors.OperationFailure:
                logprint.warning('WARNING: Text search not enabled.')
예제 #11
0
class Model(ModelImporter):
    """
    Model base class. Models are responsible for abstracting away the
    persistence layer. Each collection in the database should have its own
    model. Methods that deal with database interaction belong in the
    model layer.
    """
    def __init__(self):
        self.name = None
        self._indices = []
        self._textIndex = None
        self._textLanguage = None

        self._filterKeys = {
            AccessType.READ: set(),
            AccessType.WRITE: set(),
            AccessType.ADMIN: set(),
            AccessType.SITE_ADMIN: set()
        }

        self.initialize()
        self.reconnect()

    def reconnect(self):
        """
        Reconnect to the database and rebuild indices if necessary. Users should
        typically not have to call this method.
        """
        db_connection = getDbConnection()
        self.database = db_connection.get_default_database()
        self.collection = MongoProxy(self.database[self.name])

        for index in self._indices:
            if isinstance(index, (list, tuple)):
                self.collection.ensure_index(index[0], **index[1])
            else:
                self.collection.ensure_index(index)

        if type(self._textIndex) is dict:
            textIdx = [(k, 'text') for k in self._textIndex.keys()]
            try:
                self.collection.ensure_index(
                    textIdx,
                    weights=self._textIndex,
                    default_language=self._textLanguage)
            except pymongo.errors.OperationFailure:
                print(
                    TerminalColor.warning('WARNING: Text search not enabled.'))

    def exposeFields(self, level, fields):
        """
        Expose model fields to users with the given access level. Subclasses
        should call this in their initialize method to declare what fields
        should be exposed to what access levels if they are using the default
        filter implementation in this class. Since filtered fields are sets,
        this method is idempotent.

        :param level: The required access level for the field.
        :type level: AccessType
        :param fields: A field or list of fields to expose for that level.
        :type fields: str, list, or tuple
        """
        if isinstance(fields, six.string_types):
            fields = (fields, )

        self._filterKeys[level] = self._filterKeys[level].union(fields)

    def hideFields(self, level, fields):
        """
        Hide a field, i.e. make sure it is not exposed via the default
        filtering method. Since the filter uses a white list, it is only ever
        necessary to call this for fields that were added previously with
        exposeFields().

        :param level: The access level to remove the fields from.
        :type level: AccessType
        :param fields: The field or fields to remove from the white list.
        :type fields: str, list, or tuple
        """
        if isinstance(fields, six.string_types):
            fields = (fields, )

        self._filterKeys[level] = self._filterKeys[level].difference(fields)

    def filter(self, doc, user=None, additionalKeys=None):
        """
        Filter this model for the given user. This is a default implementation
        that assumes this model has no notion of access control, and simply
        allows all keys under READ access level, and conditionally allows any
        keys assigned to SITE_ADMIN level.

        :param doc: The document of this model type to be filtered.
        :type doc: dict or None
        :param user: The current user for whom we are filtering.
        :type user: dict or None
        :param additionalKeys: Any additional keys that should be included in
            the document for this call only.
        :type additionalKeys: list, tuple, or None
        :returns: The filtered document (dict).
        """
        if doc is None:
            return None

        keys = self._filterKeys[AccessType.READ]

        if user and user.get('admin') is True:
            keys = keys.union(self._filterKeys[AccessType.SITE_ADMIN])

        if additionalKeys:
            keys = keys.union(additionalKeys)

        return self.filterDocument(doc, allow=tuple(keys))

    def ensureTextIndex(self, index, language='english'):
        """
        Call this during initialize() of the subclass if you want your
        model to have a full-text searchable index. Each collection may
        have zero or one full-text index.

        :param language: The default_language value for the text index,
            which is used for stemming and stop words. If the text index
            should not use stemming and stop words, set this param to 'none'.
        :type language: str
        """
        self._textIndex = index
        self._textLanguage = language

    def ensureIndices(self, indices):
        """
        Subclasses should call this with a list of strings representing
        fields that should be indexed in the database if there are any.
        Otherwise, it is not necessary to call this method. Elements of the list
        may also be a list or tuple, where the second element is a dictionary
        that will be passed as kwargs to the pymongo ensure_index call.
        """
        self._indices.extend(indices)

    def ensureIndex(self, index):
        """
        Like ensureIndices, but declares just a single index rather than a list
        of them.
        """
        self._indices.append(index)

    def validate(self, doc):
        """
        Models should implement this to validate the document before it enters
        the database. It must return the document with any necessary filters
        applied, or throw a ValidationException if validation of the document
        fails.

        :param doc: The document to validate before saving to the collection.
        :type doc: dict
        """
        raise Exception('Must override validate() in %s model.' %
                        self.__class__.__name__)  # pragma: no cover

    def initialize(self):
        """
        Subclasses should override this and set the name of the collection as
        self.name. Also, they should set any indexed fields that they require.
        """
        raise Exception('Must override initialize() in %s model' %
                        self.__class__.__name__)  # pragma: no cover

    def find(self, query=None, offset=0, limit=0, **kwargs):
        """
        Search the collection by a set of parameters. Passes any kwargs
        through to the underlying pymongo.collection.find function.

        :param query: The search query (see general MongoDB docs for "find()")
        :type query: dict
        :param offset: The offset into the results
        :type offset: int
        :param limit: Maximum number of documents to return
        :type limit: int
        :param sort: The sort order.
        :type sort: List of (key, order) tuples.
        :param fields: A mask for filtering result documents by key.
        :type fields: List of strings
        :returns: A pymongo database cursor.
        """
        if not query:
            query = {}

        if 'timeout' not in kwargs:
            kwargs['timeout'] = False

        return self.collection.find(spec=query,
                                    skip=offset,
                                    limit=limit,
                                    **kwargs)

    def findOne(self, query=None, **kwargs):
        """
        Search the collection by a set of parameters. Passes any kwargs
        through to the underlying pymongo.collection.find function.

        :param query: The search query (see general MongoDB docs for "find()")
        :type query: dict
        :param sort: The sort order.
        :type sort: List of (key, order) tuples.
        :param fields: A mask for filtering result documents by key.
        :type fields: List of strings
        :returns: the first object that was found, or None if none found.
        """
        if not query:
            query = {}
        return self.collection.find_one(query, **kwargs)

    def textSearch(self,
                   query,
                   offset=0,
                   limit=0,
                   sort=None,
                   fields=None,
                   filters=None):
        """
        Perform a full-text search against the text index for this collection.

        :param query: The text query. Will be stemmed internally.
        :type query: str
        :param filters: Any additional query operators to apply.
        :type filters: dict
        :returns: A pymongo cursor. It is left to the caller to build the
            results from the cursor.
        """
        if not filters:
            filters = {}
        if not fields:
            fields = {}

        fields['_textScore'] = {'$meta': 'textScore'}
        filters['$text'] = {'$search': query}

        cursor = self.find(filters,
                           offset=offset,
                           limit=limit,
                           sort=sort,
                           fields=fields)

        # Sort by meta text score, but only if result count is below a certain
        # threshold. The text score is not a real index, so we cannot always
        # sort by it if there is a high number of matching documents.
        if cursor.count() < TEXT_SCORE_SORT_MAX and sort is None:
            cursor.sort([('_textScore', {'$meta': 'textScore'})])

        return cursor

    def save(self, document, validate=True, triggerEvents=True):
        """
        Create or update a document in the collection. This triggers two
        events; one prior to validation, and one prior to saving. Either of
        these events may have their default action prevented.

        :param document: The document to save.
        :type document: dict
        :param validate: Whether to call the model's validate() before saving.
        :type validate: bool
        :param triggerEvents: Whether to trigger events for validate and
            pre- and post-save hooks.
        """
        if validate and triggerEvents:
            event = events.trigger('.'.join(('model', self.name, 'validate')),
                                   document)
            if event.defaultPrevented:
                validate = False

        if validate:
            document = self.validate(document)

        if triggerEvents:
            event = events.trigger('model.{}.save'.format(self.name), document)
            if event.defaultPrevented:
                return document

        sendCreateEvent = ('_id' not in document)
        document['_id'] = self.collection.save(document)

        if triggerEvents:
            if sendCreateEvent:
                events.trigger('model.{}.save.created'.format(self.name),
                               document)
            events.trigger('model.{}.save.after'.format(self.name), document)

        return document

    def update(self, query, update, multi=True):
        """
        This method should be used for updating multiple documents in the
        collection. This is useful for things like removing all references in
        this collection to a document that is being deleted from another
        collection.

        This is a thin wrapper around pymongo db.collection.update().

        For updating a single document, use the save() model method instead.

        :param query: The query for finding documents to update. It's
                      the same format as would be passed to find().
        :type query: dict
        :param update: The update specifier.
        :type update: dict
        """
        self.collection.update(query, update, multi=multi)

    def increment(self, query, field, amount, **kwargs):
        """
        This is a specialization of the update method that atomically increments
        a field by a given amount. Additional kwargs are passed directly through
        to update.

        :param query: The query selector for documents to update.
        :type query: dict
        :param field: The name of the field in the document to increment.
        :type field: str
        :param amount: The amount to increment the field by.
        :type amount: int or float
        """
        self.update(query=query, update={'$inc': {field: amount}}, **kwargs)

    def remove(self, document, **kwargs):
        """
        Delete an object from the collection; must have its _id set.

        :param doc: the item to remove.
        """
        assert '_id' in document

        event = events.trigger('.'.join(('model', self.name, 'remove')),
                               document)
        kwargsEvent = events.trigger(
            '.'.join(('model', self.name, 'remove_with_kwargs')), {
                'document': document,
                'kwargs': kwargs
            })
        if not event.defaultPrevented and not kwargsEvent.defaultPrevented:
            return self.collection.remove({'_id': document['_id']})

    def removeWithQuery(self, query):
        """
        Remove all documents matching a given query from the collection.
        For safety reasons, you may not pass an empty query.
        """
        assert query

        return self.collection.remove(query)

    def load(self, id, objectId=True, fields=None, exc=False):
        """
        Fetch a single object from the database using its _id field.

        :param id: The value for searching the _id field.
        :type id: string or ObjectId
        :param objectId: Whether the id should be coerced to ObjectId type.
        :type objectId: bool
        :param fields: Fields list to include. Also can be a dict for
                       exclusion. See pymongo docs for how to use this arg.
        :param exc: Whether to raise a ValidationException if there is no
                    document with the given id.
        :type exc: bool
        :returns: The matching document, or None.
        """
        if not id:
            raise Exception('Attempt to load null ObjectId: %s' % id)

        if objectId and type(id) is not ObjectId:
            try:
                id = ObjectId(id)
            except Exception:
                raise ValidationException('Invalid ObjectId: {}'.format(id),
                                          field='id')
        doc = self.findOne({'_id': id}, fields=fields)

        if doc is None and exc is True:
            raise ValidationException('No such {}: {}'.format(self.name, id),
                                      field='id')

        return doc

    def filterDocument(self, doc, allow=None):
        """
        This method will filter the given document to make it suitable to
        output to the user.

        :param doc: The document to filter.
        :type doc: dict
        :param allow: The whitelist of fields to allow in the output document.
        :type allow: List of strings
        """
        if not allow:
            allow = []

        if doc is None:
            return None

        out = {}
        for field in allow:
            if field in doc:
                out[field] = doc[field]

        if '_textScore' in doc:
            out['_textScore'] = doc['_textScore']

        out['_modelType'] = self.name

        return out

    def subtreeCount(self, doc):
        """
        Return the size of the subtree rooted at the given document.  In
        general, if this contains items or folders, it will be the count of the
        items and folders in all containers.  If it does not, it will be 1.
        This returns the absolute size of the subtree, it does not filter by
        permissions.

        :param doc: The root of the subtree.
        :type doc: dict
        """
        return 1
예제 #12
0
class GridFsAssetstoreAdapter(AbstractAssetstoreAdapter):
    """
    This assetstore type stores files within MongoDB using the GridFS data
    model.
    """
    @staticmethod
    def validateInfo(doc):
        """
        Validate the assetstore -- make sure we can connect to it and that the
        necessary indexes are set up.
        """
        if not doc.get('db', ''):
            raise ValidationException('Database name must not be empty.', 'db')
        if '.' in doc['db'] or ' ' in doc['db']:
            raise ValidationException(
                'Database name cannot contain spaces'
                ' or periods.', 'db')

        try:
            chunkColl = getDbConnection(
                doc.get('mongohost'),
                doc.get('replicaset'),
                autoRetry=False,
                serverSelectionTimeoutMS=10000)[doc['db']].chunk
            _ensureChunkIndices(chunkColl)
        except pymongo.errors.ServerSelectionTimeoutError as e:
            raise ValidationException('Could not connect to the database: %s' %
                                      str(e))

        return doc

    @staticmethod
    def fileIndexFields():
        return ['sha512', 'chunkUuid']

    def __init__(self, assetstore):
        """
        :param assetstore: The assetstore to act on.
        """
        super(GridFsAssetstoreAdapter, self).__init__(assetstore)
        recent = False
        try:
            # Guard in case the connectionArgs is unhashable
            key = (self.assetstore.get('mongohost'),
                   self.assetstore.get('replicaset'),
                   self.assetstore.get('shard'))
            if key in _recentConnections:
                recent = (time.time() - _recentConnections[key]['created'] <
                          RECENT_CONNECTION_CACHE_TIME)
        except TypeError:
            key = None
        try:
            # MongoClient automatically reuses connections from a pool, but we
            # want to avoid redoing ensureChunkIndices each time we get such a
            # connection.
            client = getDbConnection(self.assetstore.get('mongohost'),
                                     self.assetstore.get('replicaset'),
                                     quiet=recent)
            self.chunkColl = MongoProxy(client[self.assetstore['db']].chunk)
            if not recent:
                _ensureChunkIndices(self.chunkColl)
                if self.assetstore.get('shard') == 'auto':
                    _setupSharding(self.chunkColl)
                if key is not None:
                    if len(_recentConnections
                           ) >= RECENT_CONNECTION_CACHE_MAX_SIZE:
                        _recentConnections.clear()
                    _recentConnections[key] = {'created': time.time()}
        except pymongo.errors.ConnectionFailure:
            logger.error('Failed to connect to GridFS assetstore %s',
                         self.assetstore['db'])
            self.chunkColl = 'Failed to connect'
            self.unavailable = True
        except pymongo.errors.ConfigurationError:
            logger.exception('Failed to configure GridFS assetstore %s',
                             self.assetstore['db'])
            self.chunkColl = 'Failed to configure'
            self.unavailable = True

    def initUpload(self, upload):
        """
        Creates a UUID that will be used to uniquely link each chunk to
        """
        upload['chunkUuid'] = uuid.uuid4().hex
        upload['sha512state'] = _hash_state.serializeHex(sha512())
        return upload

    def uploadChunk(self, upload, chunk):
        """
        Stores the uploaded chunk in fixed-sized pieces in the chunks
        collection of this assetstore's database.
        """
        # If we know the chunk size is too large or small, fail early.
        self.checkUploadSize(upload, self.getChunkSize(chunk))

        if isinstance(chunk, six.text_type):
            chunk = chunk.encode('utf8')

        if isinstance(chunk, six.binary_type):
            chunk = BytesIO(chunk)

        # Restore the internal state of the streaming SHA-512 checksum
        checksum = _hash_state.restoreHex(upload['sha512state'], 'sha512')

        # TODO: when saving uploads is optional, we can conditionally try to
        # fetch the last chunk.  Add these line before `lastChunk = ...`:
        #   lastChunk = None
        #   if '_id' in upload or upload['received'] != 0:
        lastChunk = self.chunkColl.find_one({'uuid': upload['chunkUuid']},
                                            projection=['n'],
                                            sort=[('n', pymongo.DESCENDING)])
        if lastChunk:
            # This bit of code will only do anything if there is a discrepancy
            # between the received count of the upload record and the length of
            # the file stored as chunks in the database. This code updates the
            # sha512 state with the difference before reading the bytes sent
            # from the user.
            if self.requestOffset(upload) > upload['received']:
                # This isn't right -- the last received amount may not be a
                # complete chunk.
                cursor = self.chunkColl.find(
                    {
                        'uuid': upload['chunkUuid'],
                        'n': {
                            '$gte': upload['received'] // CHUNK_SIZE
                        }
                    },
                    projection=['data']).sort('n', pymongo.ASCENDING)
                for result in cursor:
                    checksum.update(result['data'])
        n = lastChunk['n'] + 1 if lastChunk else 0

        size = 0
        startingN = n

        while upload['received'] + size < upload['size']:
            data = chunk.read(CHUNK_SIZE)
            if not data:
                break
            # If a timeout occurs while we are trying to load data, we might
            # have succeeded, in which case we will get a DuplicateKeyError
            # when it automatically retries.  Therefore, log this error but
            # don't stop.
            try:
                self.chunkColl.insert_one({
                    'n': n,
                    'uuid': upload['chunkUuid'],
                    'data': bson.binary.Binary(data)
                })
            except pymongo.errors.DuplicateKeyError:
                logger.info(
                    'Received a DuplicateKeyError while uploading, '
                    'probably because we reconnected to the database '
                    '(chunk uuid %s part %d)', upload['chunkUuid'], n)
            n += 1
            size += len(data)
            checksum.update(data)
        chunk.close()

        try:
            self.checkUploadSize(upload, size)
        except ValidationException:
            # The user tried to upload too much or too little.  Delete
            # everything we added
            self.chunkColl.delete_many({
                'uuid': upload['chunkUuid'],
                'n': {
                    '$gte': startingN
                }
            })
            raise

        # Persist the internal state of the checksum
        upload['sha512state'] = _hash_state.serializeHex(checksum)
        upload['received'] += size
        return upload

    def requestOffset(self, upload):
        """
        The offset will be the CHUNK_SIZE * total number of chunks in the
        database for this file. We return the max of that and the received
        count because in testing mode we are uploading chunks that are smaller
        than the CHUNK_SIZE, which in practice will not work.
        """
        lastChunk = self.chunkColl.find_one({'uuid': upload['chunkUuid']},
                                            projection=['n'],
                                            sort=[('n', pymongo.DESCENDING)])

        if lastChunk is None:
            offset = 0
        else:
            offset = lastChunk['n'] * CHUNK_SIZE
        return max(offset, upload['received'])

    def finalizeUpload(self, upload, file):
        """
        Grab the final state of the checksum and set it on the file object,
        and write the generated UUID into the file itself.
        """
        hash = _hash_state.restoreHex(upload['sha512state'],
                                      'sha512').hexdigest()

        file['sha512'] = hash
        file['chunkUuid'] = upload['chunkUuid']
        file['chunkSize'] = CHUNK_SIZE

        return file

    def downloadFile(self,
                     file,
                     offset=0,
                     headers=True,
                     endByte=None,
                     contentDisposition=None,
                     extraParameters=None,
                     **kwargs):
        """
        Returns a generator function that will be used to stream the file from
        the database to the response.
        """
        if endByte is None or endByte > file['size']:
            endByte = file['size']

        if headers:
            setResponseHeader('Accept-Ranges', 'bytes')
            self.setContentHeaders(file, offset, endByte, contentDisposition)

        # If the file is empty, we stop here
        if endByte - offset <= 0:
            return lambda: ''

        n = 0
        chunkOffset = 0

        # We must "seek" to the correct chunk index and local offset
        if offset > 0:
            n = offset // file['chunkSize']
            chunkOffset = offset % file['chunkSize']

        cursor = self.chunkColl.find(
            {
                'uuid': file['chunkUuid'],
                'n': {
                    '$gte': n
                }
            }, projection=['data']).sort('n', pymongo.ASCENDING)

        def stream():
            co = chunkOffset  # Can't assign to outer scope without "nonlocal"
            position = offset
            shouldBreak = False

            for chunk in cursor:
                chunkLen = len(chunk['data'])

                if position + chunkLen - co > endByte:
                    chunkLen = endByte - position + co
                    shouldBreak = True

                yield chunk['data'][co:chunkLen]

                if shouldBreak:
                    break

                position += chunkLen - co

                if co > 0:
                    co = 0

        return stream

    def deleteFile(self, file):
        """
        Delete all of the chunks in the collection that correspond to the
        given file.
        """
        q = {
            'chunkUuid': file['chunkUuid'],
            'assetstoreId': self.assetstore['_id']
        }
        matching = File().find(q, limit=2, projection=[])
        if matching.count(True) == 1:
            # If we can't reach the database, we return anyway.  A system check
            # will be necessary to remove the abandoned file.  Since we already
            # can handle that case, tell Mongo to use a 0 write concern -- we
            # don't need to know that the chunks have been deleted, and this
            # can be faster.
            try:
                self.chunkColl.with_options(write_concern=pymongo.WriteConcern(
                    w=0)).delete_many({'uuid': file['chunkUuid']})
            except pymongo.errors.AutoReconnect:
                pass

    def cancelUpload(self, upload):
        """
        Delete all of the chunks associated with a given upload.
        """
        self.chunkColl.delete_many({'uuid': upload['chunkUuid']})
class GridFsAssetstoreAdapter(AbstractAssetstoreAdapter):
    """
    This assetstore type stores files within MongoDB using the GridFS data
    model.
    """

    @staticmethod
    def validateInfo(doc):
        """
        Validate the assetstore -- make sure we can connect to it and that the
        necessary indexes are set up.
        """
        if not doc.get('db', ''):
            raise ValidationException('Database name must not be empty.', 'db')
        if '.' in doc['db'] or ' ' in doc['db']:
            raise ValidationException('Database name cannot contain spaces'
                                      ' or periods.', 'db')

        try:
            chunkColl = getDbConnection(
                doc.get('mongohost'), doc.get('replicaset'), autoRetry=False,
                serverSelectionTimeoutMS=10000)[doc['db']].chunk
            _ensureChunkIndices(chunkColl)
        except pymongo.errors.ServerSelectionTimeoutError as e:
            raise ValidationException(
                'Could not connect to the database: %s' % str(e))

        return doc

    @staticmethod
    def fileIndexFields():
        return ['sha512', 'chunkUuid']

    def __init__(self, assetstore):
        """
        :param assetstore: The assetstore to act on.
        """
        super(GridFsAssetstoreAdapter, self).__init__(assetstore)
        recent = False
        try:
            # Guard in case the connectionArgs is unhashable
            key = (self.assetstore.get('mongohost'),
                   self.assetstore.get('replicaset'),
                   self.assetstore.get('shard'))
            if key in _recentConnections:
                recent = (time.time() - _recentConnections[key]['created'] <
                          RECENT_CONNECTION_CACHE_TIME)
        except TypeError:
            key = None
        try:
            # MongoClient automatically reuses connections from a pool, but we
            # want to avoid redoing ensureChunkIndices each time we get such a
            # connection.
            client = getDbConnection(self.assetstore.get('mongohost'),
                                     self.assetstore.get('replicaset'),
                                     quiet=recent)
            self.chunkColl = MongoProxy(client[self.assetstore['db']].chunk)
            if not recent:
                _ensureChunkIndices(self.chunkColl)
                if self.assetstore.get('shard') == 'auto':
                    _setupSharding(self.chunkColl)
                if key is not None:
                    if len(_recentConnections) >= RECENT_CONNECTION_CACHE_MAX_SIZE:
                        _recentConnections.clear()
                    _recentConnections[key] = {
                        'created': time.time()
                    }
        except pymongo.errors.ConnectionFailure:
            logger.error('Failed to connect to GridFS assetstore %s',
                         self.assetstore['db'])
            self.chunkColl = 'Failed to connect'
            self.unavailable = True
        except pymongo.errors.ConfigurationError:
            logger.exception('Failed to configure GridFS assetstore %s',
                             self.assetstore['db'])
            self.chunkColl = 'Failed to configure'
            self.unavailable = True

    def initUpload(self, upload):
        """
        Creates a UUID that will be used to uniquely link each chunk to
        """
        upload['chunkUuid'] = uuid.uuid4().hex
        upload['sha512state'] = hash_state.serializeHex(sha512())
        return upload

    def uploadChunk(self, upload, chunk):
        """
        Stores the uploaded chunk in fixed-sized pieces in the chunks
        collection of this assetstore's database.
        """
        # If we know the chunk size is too large or small, fail early.
        self.checkUploadSize(upload, self.getChunkSize(chunk))

        if isinstance(chunk, six.text_type):
            chunk = chunk.encode('utf8')

        if isinstance(chunk, six.binary_type):
            chunk = BytesIO(chunk)

        # Restore the internal state of the streaming SHA-512 checksum
        checksum = hash_state.restoreHex(upload['sha512state'], 'sha512')

        # TODO: when saving uploads is optional, we can conditionally try to
        # fetch the last chunk.  Add these line before `lastChunk = ...`:
        #   lastChunk = None
        #   if '_id' in upload or upload['received'] != 0:
        lastChunk = self.chunkColl.find_one({
            'uuid': upload['chunkUuid']
        }, projection=['n'], sort=[('n', pymongo.DESCENDING)])
        if lastChunk:
            # This bit of code will only do anything if there is a discrepancy
            # between the received count of the upload record and the length of
            # the file stored as chunks in the database. This code updates the
            # sha512 state with the difference before reading the bytes sent
            # from the user.
            if self.requestOffset(upload) > upload['received']:
                # This isn't right -- the last received amount may not be a
                # complete chunk.
                cursor = self.chunkColl.find({
                    'uuid': upload['chunkUuid'],
                    'n': {'$gte': upload['received'] // CHUNK_SIZE}
                }, projection=['data']).sort('n', pymongo.ASCENDING)
                for result in cursor:
                    checksum.update(result['data'])
        n = lastChunk['n'] + 1 if lastChunk else 0

        size = 0
        startingN = n

        while upload['received']+size < upload['size']:
            data = chunk.read(CHUNK_SIZE)
            if not data:
                break
            # If a timeout occurs while we are trying to load data, we might
            # have succeeded, in which case we will get a DuplicateKeyError
            # when it automatically retries.  Therefore, log this error but
            # don't stop.
            try:
                self.chunkColl.insert_one({
                    'n': n,
                    'uuid': upload['chunkUuid'],
                    'data': bson.binary.Binary(data)
                })
            except pymongo.errors.DuplicateKeyError:
                logger.info('Received a DuplicateKeyError while uploading, '
                            'probably because we reconnected to the database '
                            '(chunk uuid %s part %d)', upload['chunkUuid'], n)
            n += 1
            size += len(data)
            checksum.update(data)
        chunk.close()

        try:
            self.checkUploadSize(upload, size)
        except ValidationException:
            # The user tried to upload too much or too little.  Delete
            # everything we added
            self.chunkColl.delete_many({
                'uuid': upload['chunkUuid'],
                'n': {'$gte': startingN}
            })
            raise

        # Persist the internal state of the checksum
        upload['sha512state'] = hash_state.serializeHex(checksum)
        upload['received'] += size
        return upload

    def requestOffset(self, upload):
        """
        The offset will be the CHUNK_SIZE * total number of chunks in the
        database for this file. We return the max of that and the received
        count because in testing mode we are uploading chunks that are smaller
        than the CHUNK_SIZE, which in practice will not work.
        """
        lastChunk = self.chunkColl.find_one({
            'uuid': upload['chunkUuid']
        }, projection=['n'], sort=[('n', pymongo.DESCENDING)])

        if lastChunk is None:
            offset = 0
        else:
            offset = lastChunk['n'] * CHUNK_SIZE
        return max(offset, upload['received'])

    def finalizeUpload(self, upload, file):
        """
        Grab the final state of the checksum and set it on the file object,
        and write the generated UUID into the file itself.
        """
        hash = hash_state.restoreHex(upload['sha512state'],
                                     'sha512').hexdigest()

        file['sha512'] = hash
        file['chunkUuid'] = upload['chunkUuid']
        file['chunkSize'] = CHUNK_SIZE

        return file

    def downloadFile(self, file, offset=0, headers=True, endByte=None,
                     contentDisposition=None, extraParameters=None, **kwargs):
        """
        Returns a generator function that will be used to stream the file from
        the database to the response.
        """
        if endByte is None or endByte > file['size']:
            endByte = file['size']

        if headers:
            setResponseHeader('Accept-Ranges', 'bytes')
            self.setContentHeaders(file, offset, endByte, contentDisposition)

        # If the file is empty, we stop here
        if endByte - offset <= 0:
            return lambda: ''

        n = 0
        chunkOffset = 0

        # We must "seek" to the correct chunk index and local offset
        if offset > 0:
            n = offset // file['chunkSize']
            chunkOffset = offset % file['chunkSize']

        cursor = self.chunkColl.find({
            'uuid': file['chunkUuid'],
            'n': {'$gte': n}
        }, projection=['data']).sort('n', pymongo.ASCENDING)

        def stream():
            co = chunkOffset  # Can't assign to outer scope without "nonlocal"
            position = offset
            shouldBreak = False

            for chunk in cursor:
                chunkLen = len(chunk['data'])

                if position + chunkLen - co > endByte:
                    chunkLen = endByte - position + co
                    shouldBreak = True

                yield chunk['data'][co:chunkLen]

                if shouldBreak:
                    break

                position += chunkLen - co

                if co > 0:
                    co = 0

        return stream

    def deleteFile(self, file):
        """
        Delete all of the chunks in the collection that correspond to the
        given file.
        """
        q = {
            'chunkUuid': file['chunkUuid'],
            'assetstoreId': self.assetstore['_id']
        }
        matching = File().find(q, limit=2, projection=[])
        if matching.count(True) == 1:
            # If we can't reach the database, we return anyway.  A system check
            # will be necessary to remove the abandoned file.  Since we already
            # can handle that case, tell Mongo to use a 0 write concern -- we
            # don't need to know that the chunks have been deleted, and this
            # can be faster.
            try:
                self.chunkColl.with_options(
                    write_concern=pymongo.WriteConcern(w=0)).delete_many(
                        {'uuid': file['chunkUuid']})
            except pymongo.errors.AutoReconnect:
                pass

    def cancelUpload(self, upload):
        """
        Delete all of the chunks associated with a given upload.
        """
        self.chunkColl.delete_many({'uuid': upload['chunkUuid']})
예제 #14
0
class Model(ModelImporter):
    """
    Model base class. Models are responsible for abstracting away the
    persistence layer. Each collection in the database should have its own
    model. Methods that deal with database interaction belong in the
    model layer.
    """

    def __init__(self):
        self.name = None
        self._indices = []
        self._textIndex = None
        self._textLanguage = None
        self.prefixSearchFields = ('lowerName', 'name')

        self._filterKeys = {
            AccessType.READ: set(),
            AccessType.WRITE: set(),
            AccessType.ADMIN: set(),
            AccessType.SITE_ADMIN: set()
        }

        self.initialize()
        self.reconnect()

    def reconnect(self):
        """
        Reconnect to the database and rebuild indices if necessary. Users should
        typically not have to call this method.
        """
        db_connection = getDbConnection()
        self.database = db_connection.get_default_database()
        self.collection = MongoProxy(self.database[self.name])

        for index in self._indices:
            if isinstance(index, (list, tuple)):
                self.collection.create_index(index[0], **index[1])
            else:
                self.collection.create_index(index)

        if type(self._textIndex) is dict:
            textIdx = [(k, 'text') for k in six.viewkeys(self._textIndex)]
            try:
                self.collection.create_index(
                    textIdx, weights=self._textIndex,
                    default_language=self._textLanguage)
            except pymongo.errors.OperationFailure:
                print(
                    TerminalColor.warning('WARNING: Text search not enabled.'))

    def exposeFields(self, level, fields):
        """
        Expose model fields to users with the given access level. Subclasses
        should call this in their initialize method to declare what fields
        should be exposed to what access levels if they are using the default
        filter implementation in this class. Since filtered fields are sets,
        this method is idempotent.

        :param level: The required access level for the field.
        :type level: AccessType
        :param fields: A field or list of fields to expose for that level.
        :type fields: str, list, or tuple
        """
        if isinstance(fields, six.string_types):
            fields = (fields, )

        self._filterKeys[level].update(fields)

    def hideFields(self, level, fields):
        """
        Hide a field, i.e. make sure it is not exposed via the default
        filtering method. Since the filter uses a white list, it is only ever
        necessary to call this for fields that were added previously with
        exposeFields().

        :param level: The access level to remove the fields from.
        :type level: AccessType
        :param fields: The field or fields to remove from the white list.
        :type fields: str, list, or tuple
        """
        if isinstance(fields, six.string_types):
            fields = (fields, )

        self._filterKeys[level].difference_update(fields)

    def filter(self, doc, user=None, additionalKeys=None):
        """
        Filter this model for the given user. This is a default implementation
        that assumes this model has no notion of access control, and simply
        allows all keys under READ access level, and conditionally allows any
        keys assigned to SITE_ADMIN level.

        :param doc: The document of this model type to be filtered.
        :type doc: dict or None
        :param user: The current user for whom we are filtering.
        :type user: dict or None
        :param additionalKeys: Any additional keys that should be included in
            the document for this call only.
        :type additionalKeys: list, tuple, set, or None
        :returns: The filtered document (dict).
        """
        if doc is None:
            return None

        keys = set(self._filterKeys[AccessType.READ])

        if user and user.get('admin') is True:
            keys.update(self._filterKeys[AccessType.SITE_ADMIN])

        if additionalKeys:
            keys.update(additionalKeys)

        return self.filterDocument(doc, allow=keys)

    def ensureTextIndex(self, index, language='english'):
        """
        Call this during initialize() of the subclass if you want your
        model to have a full-text searchable index. Each collection may
        have zero or one full-text index.

        :param language: The default_language value for the text index,
            which is used for stemming and stop words. If the text index
            should not use stemming and stop words, set this param to 'none'.
        :type language: str
        """
        self._textIndex = index
        self._textLanguage = language

    def ensureIndices(self, indices):
        """
        Subclasses should call this with a list of strings representing
        fields that should be indexed in the database if there are any.
        Otherwise, it is not necessary to call this method. Elements of the list
        may also be a list or tuple, where the second element is a dictionary
        that will be passed as kwargs to the pymongo create_index call.
        """
        self._indices.extend(indices)

    def ensureIndex(self, index):
        """
        Like ensureIndices, but declares just a single index rather than a list
        of them.
        """
        self._indices.append(index)

    def validate(self, doc):
        """
        Models should implement this to validate the document before it enters
        the database. It must return the document with any necessary filters
        applied, or throw a ValidationException if validation of the document
        fails.

        :param doc: The document to validate before saving to the collection.
        :type doc: dict
        """
        raise Exception('Must override validate() in %s model.'
                        % self.__class__.__name__)  # pragma: no cover

    def initialize(self):
        """
        Subclasses should override this and set the name of the collection as
        self.name. Also, they should set any indexed fields that they require.
        """
        raise Exception('Must override initialize() in %s model'
                        % self.__class__.__name__)  # pragma: no cover

    def find(self, query=None, offset=0, limit=0, timeout=None,
             fields=None, sort=None, **kwargs):
        """
        Search the collection by a set of parameters. Passes any extra kwargs
        through to the underlying pymongo.collection.find function.

        :param query: The search query (see general MongoDB docs for "find()")
        :type query: dict
        :param offset: The offset into the results
        :type offset: int
        :param limit: Maximum number of documents to return
        :type limit: int
        :param sort: The sort order.
        :type sort: List of (key, order) tuples.
        :param fields: A mask for filtering result documents by key.
        :type fields: list[str]
        :param timeout: Cursor timeout in ms. Default is no timeout.
        :type timeout: int
        :returns: A pymongo database cursor.
        """
        query = query or {}
        kwargs = {k: kwargs[k] for k in kwargs if k in _allowedFindArgs}

        cursor = self.collection.find(
            filter=query, skip=offset, limit=limit, projection=fields,
            no_cursor_timeout=timeout is None, sort=sort, **kwargs)

        if timeout:
            cursor.max_time_ms(timeout)

        return cursor

    def findOne(self, query=None, fields=None, **kwargs):
        """
        Search the collection by a set of parameters. Passes any kwargs
        through to the underlying pymongo.collection.find_one function.

        :param query: The search query (see general MongoDB docs for "find()")
        :type query: dict
        :param sort: The sort order.
        :type sort: List of (key, order) tuples.
        :param fields: A mask for filtering result documents by key.
        :type fields: List of strings
        :returns: the first object that was found, or None if none found.
        """
        query = query or {}
        kwargs = {k: kwargs[k] for k in kwargs if k in _allowedFindArgs}
        return self.collection.find_one(query, projection=fields, **kwargs)

    def textSearch(self, query, offset=0, limit=0, sort=None, fields=None,
                   filters=None):
        """
        Perform a full-text search against the text index for this collection.

        :param query: The text query. Will be stemmed internally.
        :type query: str
        :param filters: Any additional query operators to apply.
        :type filters: dict
        :returns: A pymongo cursor. It is left to the caller to build the
            results from the cursor.
        """
        filters = filters or {}
        fields = fields or {}

        fields['_textScore'] = {'$meta': 'textScore'}
        filters['$text'] = {'$search': query}

        cursor = self.find(filters, offset=offset, limit=limit,
                           sort=sort, fields=fields)

        # Sort by meta text score, but only if result count is below a certain
        # threshold. The text score is not a real index, so we cannot always
        # sort by it if there is a high number of matching documents.
        if cursor.count() < TEXT_SCORE_SORT_MAX and sort is None:
            cursor.sort([('_textScore', {'$meta': 'textScore'})])

        return cursor

    def prefixSearch(self, query, offset=0, limit=0, sort=None, fields=None,
                     filters=None, prefixSearchFields=None):
        """
        Search for documents in this model's collection by a prefix string.
        The fields that will be searched based on this prefix must be set as
        the ``prefixSearchFields`` attribute of this model, which must be an
        iterable. Elements of this iterable must be either a string representing
        the field name, or a 2-tuple in which the first element is the field
        name, and the second element is a string representing the regex search
        options.

        :param query: The prefix string to look for.
        :type query: str
        :param filters: Any additional query operators to apply.
        :type filters: dict
        :param prefixSearchFields: To override the model's prefixSearchFields
            attribute for this invocation, pass an alternate iterable.
        :returns: A pymongo cursor. It is left to the caller to build the
            results from the cursor.
        """
        filters = filters or {}
        filters['$or'] = filters.get('$or', [])

        for field in (prefixSearchFields or self.prefixSearchFields):
            if isinstance(field, (list, tuple)):
                filters['$or'].append({
                    field[0]: {
                        '$regex': '^%s' % re.escape(query),
                        '$options': field[1]
                    }
                })
            else:
                filters['$or'].append({
                    field: {'$regex': '^%s' % re.escape(query)}
                })

        return self.find(
            filters, offset=offset, limit=limit, sort=sort, fields=fields)

    def save(self, document, validate=True, triggerEvents=True):
        """
        Create or update a document in the collection. This triggers two
        events; one prior to validation, and one prior to saving. Either of
        these events may have their default action prevented.

        :param document: The document to save.
        :type document: dict
        :param validate: Whether to call the model's validate() before saving.
        :type validate: bool
        :param triggerEvents: Whether to trigger events for validate and
            pre- and post-save hooks.
        """
        if validate and triggerEvents:
            event = events.trigger('.'.join(('model', self.name, 'validate')),
                                   document)
            if event.defaultPrevented:
                validate = False

        if validate:
            document = self.validate(document)

        if triggerEvents:
            event = events.trigger('model.%s.save' % self.name, document)
            if event.defaultPrevented:
                return document

        isNew = '_id' not in document
        try:
            if isNew:
                document['_id'] = \
                    self.collection.insert_one(document).inserted_id
            else:
                self.collection.replace_one(
                    {'_id': document['_id']}, document, True)
        except WriteError as e:
            raise ValidationException('Database save failed: %s' % e.details)

        if triggerEvents:
            if isNew:
                events.trigger('model.%s.save.created' % self.name, document)
            events.trigger('model.%s.save.after' % self.name, document)

        return document

    def update(self, query, update, multi=True):
        """
        This method should be used for updating multiple documents in the
        collection. This is useful for things like removing all references in
        this collection to a document that is being deleted from another
        collection.

        For updating a single document, use the save() model method instead.

        :param query: The query for finding documents to update. It's
                      the same format as would be passed to find().
        :type query: dict
        :param update: The update specifier.
        :type update: dict
        :param multi: Whether to update a single document, or all matching
            documents.
        :type multi: bool
        :returns: A pymongo UpdateResult object.
        """
        if multi:
            return self.collection.update_many(query, update)
        else:
            return self.collection.update_one(query, update)

    def increment(self, query, field, amount, **kwargs):
        """
        This is a specialization of the update method that atomically increments
        a field by a given amount. Additional kwargs are passed directly through
        to update.

        :param query: The query selector for documents to update.
        :type query: dict
        :param field: The name of the field in the document to increment.
        :type field: str
        :param amount: The amount to increment the field by.
        :type amount: int or float
        """
        self.update(query=query, update={
            '$inc': {field: amount}
        }, **kwargs)

    def remove(self, document, **kwargs):
        """
        Delete an object from the collection; must have its _id set.

        :param doc: the item to remove.
        """
        assert '_id' in document

        event = events.trigger('.'.join(('model', self.name, 'remove')),
                               document)
        kwargsEvent = events.trigger(
            '.'.join(('model', self.name, 'remove_with_kwargs')), {
                'document': document,
                'kwargs': kwargs
            })

        if not event.defaultPrevented and not kwargsEvent.defaultPrevented:
            return self.collection.delete_one({'_id': document['_id']})

    def removeWithQuery(self, query):
        """
        Remove all documents matching a given query from the collection.
        For safety reasons, you may not pass an empty query.
        """
        assert query

        return self.collection.delete_many(query)

    def load(self, id, objectId=True, fields=None, exc=False):
        """
        Fetch a single object from the database using its _id field.

        :param id: The value for searching the _id field.
        :type id: string or ObjectId
        :param objectId: Whether the id should be coerced to ObjectId type.
        :type objectId: bool
        :param fields: Fields list to include. Also can be a dict for
                       exclusion. See pymongo docs for how to use this arg.
        :param exc: Whether to raise a ValidationException if there is no
                    document with the given id.
        :type exc: bool
        :returns: The matching document, or None.
        """
        if not id:
            raise ValidationException('Attempt to load null ObjectId: %s' % id)

        if objectId and type(id) is not ObjectId:
            try:
                id = ObjectId(id)
            except InvalidId:
                raise ValidationException('Invalid ObjectId: %s' % id,
                                          field='id')
        doc = self.findOne({'_id': id}, fields=fields)

        if doc is None and exc is True:
            raise ValidationException('No such %s: %s' % (self.name, id),
                                      field='id')

        return doc

    def filterDocument(self, doc, allow=None):
        """
        This method will filter the given document to make it suitable to
        output to the user.

        :param doc: The document to filter.
        :type doc: dict
        :param allow: The whitelist of fields to allow in the output document.
        :type allow: List of strings
        """
        if not allow:
            allow = []

        if doc is None:
            return None

        out = {}
        for field in allow:
            if field in doc:
                out[field] = doc[field]

        if '_textScore' in doc:
            out['_textScore'] = doc['_textScore']

        out['_modelType'] = self.name

        return out

    def subtreeCount(self, doc):
        """
        Return the size of the subtree rooted at the given document.  In
        general, if this contains items or folders, it will be the count of the
        items and folders in all containers.  If it does not, it will be 1.
        This returns the absolute size of the subtree, it does not filter by
        permissions.

        :param doc: The root of the subtree.
        :type doc: dict
        """
        return 1
예제 #15
0
class ExternalMongoDatasetTestCase(base.TestCase):
    """
    Tests of the minerva external mongo dataset .
    """

    def setUp(self):
        """
        Set up the mongo db for the external dataset, with a collection
        named tweetsgeo, which have tweet data that is geolocated.
        """
        super(ExternalMongoDatasetTestCase, self).setUp()

        self._user = self.model('user').createUser(
            'minervauser', 'password', 'minerva', 'user',
            '*****@*****.**')

        from girder.utility import config
        dbUri = config.getConfig()['database']['uri']
        self.dbName = 'minerva_test_external_mongo_dataset'
        dbUriParts = dbUri.split('/')[0:-1]
        self.dbUri = '/'.join(dbUriParts + [self.dbName])
        from girder.models import getDbConnection
        self.externalMongoDbConnection = getDbConnection(self.dbUri)
        self.externalMongoDb = self.externalMongoDbConnection.get_default_database()
        from girder.external.mongodb_proxy import MongoProxy
        self.collectionName = 'tweetsgeo'
        self.tweetsgeoCollection = MongoProxy(self.externalMongoDb[self.collectionName])
        # add test data to external dataset
        self.pluginTestDir = os.path.dirname(os.path.realpath(__file__))
        tweets100Path = os.path.join(self.pluginTestDir, 'data', 'tweets100.json')
        z = zipfile.ZipFile('%s.zip' % tweets100Path)
        tweets = json.load(z.open('tweets100.json'))
        from datetime import datetime
        dateformat = '%Y-%m-%dT%H:%M:%S'
        for tweet in tweets:
            d = datetime.strptime((tweet['created_at']), dateformat)
            tweet['created_at'] = int((d - datetime(1970, 1, 1)).total_seconds())
            self.tweetsgeoCollection.save(tweet)

        path = '/minerva_dataset/folder'
        params = {
            'userId': self._user['_id'],
        }
        # create a dataset folder
        self.request(path=path, method='POST', params=params, user=self._user)

    def tearDown(self):
        self.externalMongoDbConnection.drop_database(self.dbName)


    def testExternalDataset(self):
        # create an external dataset from the mongo collection
        path = '/minerva_dataset/external_mongo_dataset'
        response = self.request(
            path=path,
            method='POST',
            user=self._user,
            params={
                'name': 'tweetsgeodataset',
                'dbConnectionUri': self.dbUri,
                'collectionName': self.collectionName
            }
        )
        self.assertStatusOk(response)
        self.assertHasKeys(response.json, ['mongo_connection', 'json_row', 'original_type'])
        self.assertEquals(response.json['original_type'], 'mongo', 'expected mongo for original_type')
        self.assertEquals(response.json['mongo_connection']['collection_name'], self.collectionName, 'unexpected collection name')
        self.assertEquals(response.json['mongo_connection']['db_uri'], self.dbUri, 'unexpected db uri')
        minervaMetadata = response.json
        datasetId = minervaMetadata['dataset_id']

        # update the minerva metadata with coordinate mapping
        minervaMetadata["mapper"] = {
            "latitudeKeypath": "coordinates.coordinates[1]",
            "longitudeKeypath": "coordinates.coordinates[0]",
        }

        path = '/item/{}/metadata'.format(datasetId)
        response = self.request(
            path=path,
            method='GET',
            user=self._user,
        )
        metadata = response.json

        metadata['minerva'] = minervaMetadata
        response = self.request(
            path=path,
            method='PUT',
            user=self._user,
            body=json.dumps(metadata),
            type='application/json'
        )
        metadata = response.json

        # create geojson in the dataset
        path = '/minerva_dataset/{}/geojson'.format(datasetId)
        response = self.request(
            path=path,
            method='POST',
            user=self._user,
        )
        self.assertHasKeys(response.json, ['geojson'])
        # expect 100 points back as that is the size of the mongo dataset
        geojsonData = geojson.loads(response.json['geojson']['data'])
        # coordinate limits empirically figured
        # coords = [feature['geometry']['coordinates'] for feature in geojsonData['features']]
        # print min([c[0] for c in coords])
        # print max([c[0] for c in coords])
        # print min([c[1] for c in coords])
        # print max([c[1] for c in coords])
        xMin = -122.64
        xMax = -57.93991735
        yMin = -34.93523486
        yMax = 47.696623
        self.assertEquals(len(geojsonData['features']), 100, 'geojson should have 100 features')
        # to ensure correct mapping, check coords
        features = geojsonData['features']
        for feature in features:
            coordinates = feature['geometry']['coordinates']
            self.assertTrue(xMin <= coordinates[0], 'x coordinate out of range')
            self.assertTrue(xMax >= coordinates[0], 'x coordinate out of range')
            self.assertTrue(yMin <= coordinates[1], 'y coordinate out of range')
            self.assertTrue(yMax >= coordinates[1], 'y coordinate out of range')

        # test external_mongo_limits endpoint

        path = '/minerva_dataset/{}/external_mongo_limits'.format(datasetId)
        params = {'field': 'created_at'}
        response = self.request(
            path=path,
            method='GET',
            user=self._user,
            params=params
        )
        limits = response.json['mongo_fields']['created_at']
        self.assertEquals(limits['max'], 1380587461, 'incorrect max date')
        self.assertEquals(limits['min'], 1380587436, 'incorrect min date')

        # test limiting geojson to date range

        params = {
            'dateField': 'created_at',
            'startTime': 1380587440,
            'endTime':   1380587455,
        }
        path = '/minerva_dataset/{}/geojson'.format(datasetId)
        response = self.request(
            path=path,
            method='POST',
            user=self._user,
            params=params
        )
        self.assertEquals(response.json['geojson']['query_count'], 52, 'invalid query count')
예제 #16
0
class MongoDatasetTestCase(base.TestCase):
    """
    Tests of the minerva mongo dataset .
    """

    def setUp(self):
        """
        Set up the mongo db for the external dataset, with 3 collections:
        a) tweetsgeo, which has tweet data that is geolocated (lat/long fields).
        b) polyGeoIndexed, w/2 polygons in a 2dsphere-indexed 'geometry' field
        c) polyGeoIndeces, same as above but without the 2dsphere index
        """
        super(MongoDatasetTestCase, self).setUp()

        self._user = self.model('user').createUser(
            'minervauser', 'password', 'minerva', 'user',
            '*****@*****.**')

        from girder.utility import config
        dbUri = config.getConfig()['database']['uri']
        self.dbName = 'minerva_test_external_mongo_dataset'
        dbUriParts = dbUri.split('/')[0:-1]
        self.dbUri = '/'.join(dbUriParts + [self.dbName])
        from girder.models import getDbConnection
        self.externalMongoDbConnection = getDbConnection(self.dbUri)
        self.externalMongoDb = self.externalMongoDbConnection.get_default_database()
        from girder.external.mongodb_proxy import MongoProxy
        self.geojsonIndexedName = 'polyGeoIndexed'
        self.geojsonNonIndexedName = 'polyGeoNonIndexed'
        self.polyIndexedCollection = MongoProxy(self.externalMongoDb[self.geojsonIndexedName])
        self.polyNonIndexedCollection = MongoProxy(self.externalMongoDb[self.geojsonNonIndexedName])
        self.pluginTestDir = os.path.dirname(os.path.realpath(__file__))
        geojsonPath = os.path.join(self.pluginTestDir, 'data', 'polygons.json')
        with open(geojsonPath) as geojsonFile:
            polys = json.load(geojsonFile)
            for poly in polys:
                self.polyIndexedCollection.save(poly)
                self.polyNonIndexedCollection.save(poly)
            self.polyIndexedCollection.create_index([('geometry', '2dsphere')])
        self.collectionName = 'tweetsgeo'
        self.tweetsgeoCollection = MongoProxy(self.externalMongoDb[self.collectionName])
        # add test data to external dataset
        self.pluginTestDir = os.path.dirname(os.path.realpath(__file__))
        tweets100Path = os.path.join(self.pluginTestDir, 'data', 'tweets100.json')
        z = zipfile.ZipFile('%s.zip' % tweets100Path)
        tweets = json.load(z.open('tweets100.json'))
        from datetime import datetime
        dateformat = '%Y-%m-%dT%H:%M:%S'
        for tweet in tweets:
            d = datetime.strptime((tweet['created_at']), dateformat)
            tweet['created_at'] = int((d - datetime(1970, 1, 1)).total_seconds())
            self.tweetsgeoCollection.save(tweet)

    def tearDown(self):
        self.externalMongoDbConnection.drop_database(self.dbName)

    def testMongoDataSourceAndDataset(self):
        """
        Test Mongo source and dataset creation.
        Test automatic geojson configuration when there is a 2dsphere index or
        'geometry' field in the collection.
        Test that geojson is not automatically configured for a collection that
        has no 'geometry' field or 2dsphere-indexed field.
        :return:
        """
        #create a mongo source
        path = '/minerva_source_mongo'
        response = self.request(
            path=path,
            method='POST',
            user=self._user,
            params={
                'name': 'mongogeodatasource',
                'dbConnectionUri': self.dbUri
            }
        )
        self.assertStatusOk(response)
        minerva_metadata = response.json['meta']['minerva']
        self.assertHasKeys(minerva_metadata, ['mongo_connection', 'source_type'])
        self.assertEquals(minerva_metadata['source_type'], 'mongo', 'expected mongo for source_type')
        self.assertEquals(minerva_metadata['mongo_connection']['db_uri'], self.dbUri, 'unexpected db uri')
        #create a mongo dataset from a spatially indexed collection
        sourceId = response.json['_id']
        path = '/minerva_dataset_mongo'
        response = self.request(
            path=path,
            method='POST',
            user=self._user,
            params={
                'name': self.geojsonIndexedName,
                'mongoSourceId': sourceId,
                'mongo_collection': self.geojsonIndexedName
            }
        )
        self.assertStatusOk(response)
        minerva_metadata_indexed = response.json['meta']['minerva']
        self.assertHasKeys(minerva_metadata_indexed, ['source_id', 'json_row', 'mongo_connection', 'json_row', 'original_type', 'geojson'])
        self.assertEquals(minerva_metadata_indexed['original_type'], 'mongo', 'expected mongo for original_type')
        self.assertEquals(minerva_metadata_indexed['mongo_connection']['db_uri'], self.dbUri, 'unexpected db uri')
        self.assertEquals(minerva_metadata_indexed['mongo_connection']['collection_name'],
                          self.geojsonIndexedName, 'unexpected collection')
        self.assertHasKeys(minerva_metadata_indexed['geojson'], ['query_count', 'data'])
        self.assertEquals(minerva_metadata_indexed['geojson']['query_count'], 2)
        geojson = json.loads(minerva_metadata_indexed['geojson']['data'])
        self.assertHasKeys(geojson, ['features', 'type'])
        self.assertEquals(geojson['type'], 'FeatureCollection')
        #create a mongo dataset from a spatial but non-indexed collection
        sourceId = response.json['_id']
        path = '/minerva_dataset_mongo'
        response = self.request(
            path=path,
            method='POST',
            user=self._user,
            params={
                'name': self.geojsonNonIndexedName,
                'mongoSourceId': sourceId,
                'mongo_collection': self.geojsonNonIndexedName
            }
        )
        self.assertStatusOk(response)
        minerva_metadata_nonindexed = response.json['meta']['minerva']
        self.assertHasKeys(minerva_metadata_nonindexed, ['source_id', 'json_row', 'mongo_connection', 'json_row', 'original_type', 'geojson'])
        self.assertEquals(minerva_metadata_nonindexed['original_type'], 'mongo', 'expected mongo for original_type')
        self.assertEquals(minerva_metadata_nonindexed['mongo_connection']['db_uri'], self.dbUri, 'unexpected db uri')
        self.assertEquals(minerva_metadata_nonindexed['mongo_connection']['collection_name'],
                          self.geojsonNonIndexedName, 'unexpected collection')
        self.assertHasKeys(minerva_metadata_nonindexed['geojson'], ['query_count', 'data'])
        self.assertEquals(minerva_metadata_nonindexed['geojson']['query_count'], 2)
        geojson = json.loads(minerva_metadata_nonindexed['geojson']['data'])
        self.assertHasKeys(geojson, ['features', 'type'])
        self.assertEquals(geojson['type'], 'FeatureCollection')
        #create a mongo dataset from a collection without a geometry field
        sourceId = response.json['_id']
        path = '/minerva_dataset_mongo'
        response = self.request(
            path=path,
            method='POST',
            user=self._user,
            params={
                'name': self.collectionName,
                'mongoSourceId': sourceId,
                'mongo_collection': self.collectionName
            }
        )
        self.assertStatusOk(response)
        minerva_metadata_nogeometry = response.json['meta']['minerva']
        self.assertHasKeys(minerva_metadata_nogeometry, ['source_id', 'json_row', 'mongo_connection', 'json_row', 'original_type'])
        self.assertEquals(minerva_metadata_nogeometry['original_type'], 'mongo', 'expected mongo for original_type')
        self.assertEquals(minerva_metadata_nogeometry['mongo_connection']['db_uri'], self.dbUri, 'unexpected db uri')
        self.assertEquals(minerva_metadata_nogeometry['mongo_connection']['collection_name'],
                          self.collectionName, 'unexpected collection')
        self.assertNotHasKeys(minerva_metadata_nogeometry, ['geojson'])
예제 #17
0
def getDbConnection(uri=None,
                    replicaSet=None,
                    autoRetry=True,
                    quiet=False,
                    **kwargs):
    """
    Get a MongoClient object that is connected to the configured database.
    We lazy-instantiate a module-level singleton, the MongoClient objects
    manage their own connection pools internally. Any extra kwargs you pass to
    this method will be passed through to the MongoClient.

    :param uri: if specified, connect to this mongo db rather than the one in
                the config.
    :param replicaSet: if uri is specified, use this replica set.
    :param autoRetry: if this connection should automatically retry operations
        in the event of an AutoReconnect exception. If you're testing the
        connection, set this to False. If disabled, this also will not cache
        the mongo client, so make sure to only disable if you're testing a
        connection.
    :type autoRetry: bool
    :param quiet: if true, don't logprint warnings and success.
    :type quiet: bool
    """
    global _dbClients

    origKey = (uri, replicaSet)
    if origKey in _dbClients:
        return _dbClients[origKey]

    dbConf = getDbConfig()

    if uri is None or uri == '':
        uri = dbConf.get('uri')
        replicaSet = dbConf.get('replica_set')

    clientOptions = {
        # This is the maximum time between when we fetch data from a cursor.
        # If it times out, the cursor is lost and we can't reconnect.  If it
        # isn't set, we have issues with replica sets when the primary goes
        # down.  This value can be overridden in the mongodb uri connection
        # string with the socketTimeoutMS.
        'socketTimeoutMS': 60000,
        'connectTimeoutMS': 20000,
        'serverSelectionTimeoutMS': 20000,
        'readPreference': 'secondaryPreferred',
        'replicaSet': replicaSet,
        'w': 'majority'
    }

    # All other options in the [database] section will be passed directly as
    # options to the mongo client
    for opt, val in six.viewitems(dict(dbConf)):
        if opt not in {'uri', 'replica_set'}:
            clientOptions[opt] = val

    # Finally, kwargs take precedence
    clientOptions.update(kwargs)
    # if the connection URI overrides any option, honor it above our own
    # settings.
    uriParams = urllib.parse.parse_qs(urllib.parse.urlparse(uri).query)
    for key in uriParams:
        if key in clientOptions:
            del clientOptions[key]

    if uri is None:
        dbUriRedacted = 'mongodb://*****:*****@')
        if len(parts) == 2:
            dbUriRedacted = 'mongodb://' + parts[1]
        else:
            dbUriRedacted = uri

        client = pymongo.MongoClient(uri, **clientOptions)

    if not quiet:
        desc = ''
        if replicaSet:
            desc += ', replica set: %s' % replicaSet
        logprint.info('Connecting to MongoDB: %s%s' % (dbUriRedacted, desc))

    # Make sure we can connect to the mongo server at startup
    client.server_info()

    if autoRetry:
        client = MongoProxy(client, logger=logger)
        _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client

    return client
예제 #18
0
파일: __init__.py 프로젝트: salamb/girder
def getDbConnection(uri=None, replicaSet=None, autoRetry=True, **kwargs):
    """
    Get a MongoClient object that is connected to the configured database.
    We lazy-instantiate a module-level singleton, the MongoClient objects
    manage their own connection pools internally. Any extra kwargs you pass to
    this method will be passed through to the MongoClient.

    :param uri: if specified, connect to this mongo db rather than the one in
                the config.
    :param replicaSet: if uri is specified, use this replica set.
    :param autoRetry: if this connection should automatically retry operations
        in the event of an AutoReconnect exception. If you're testing the
        connection, set this to False. If disabled, this also will not cache
        the mongo client, so make sure to only disable if you're testing a
        connection.
    :type autoRetry: bool
    """
    global _dbClients

    origKey = (uri, replicaSet)
    if origKey in _dbClients:
        return _dbClients[origKey]

    if uri is None or uri == '':
        dbConf = getDbConfig()
        uri = dbConf.get('uri')
        replicaSet = dbConf.get('replica_set')
    clientOptions = {
        # This is the maximum time between when we fetch data from a cursor.
        # If it times out, the cursor is lost and we can't reconnect.  If it
        # isn't set, we have issues with replica sets when the primary goes
        # down.  This value can be overridden in the mongodb uri connection
        # string with the socketTimeoutMS.
        'socketTimeoutMS': 60000,
        'connectTimeoutMS': 20000,
        'serverSelectionTimeoutMS': 20000,
        'read_preference': ReadPreference.SECONDARY_PREFERRED,
        'replicaSet': replicaSet
    }
    clientOptions.update(kwargs)

    if uri is None:
        dbUriRedacted = 'mongodb://*****:*****@')
        if len(parts) == 2:
            dbUriRedacted = 'mongodb://' + parts[1]
        else:
            dbUriRedacted = uri

        client = pymongo.MongoClient(uri, **clientOptions)

    # Make sure we can connect to the mongo server at startup
    client.server_info()

    if autoRetry:
        client = MongoProxy(client, logger=logger)
        _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client

    desc = ''
    if replicaSet:
        desc += ', replica set: %s' % replicaSet
    print(
        TerminalColor.info('Connected to MongoDB: %s%s' %
                           (dbUriRedacted, desc)))
    return client