Example #1
0
 def __init__(self, cache_dirname=None, MongoDB=None, src_db='datanator',
              verbose=False, max_entries=float('inf'), username=None, 
              password = None, authSource='admin', readPreference='nearest',
              des_collection='rna_halflife_new', src_collection='rna_halflife',
              des_db='test'):
     """Init.
     
     Args:
         cache_dirname ([type], optional): [description]. Defaults to None.
         MongoDB ([type], optional): [description]. Defaults to None.
         replicaSet ([type], optional): [description]. Defaults to None.
         db (str, optional): [description]. Defaults to 'test'.
         verbose (bool, optional): [description]. Defaults to False.
         max_entries ([type], optional): [description]. Defaults to float('inf').
         username ([type], optional): [description]. Defaults to None.
         password ([type], optional): [description]. Defaults to None.
         authSource (str, optional): [description]. Defaults to 'admin'.
         readPreference (str, optional): [description]. Defaults to 'nearest'.
     """
     self.max_entries = max_entries
     self.verbose = verbose
     self.src_client, self.src_db, self.src_collection = mongo_util.MongoUtil(cache_dirname=cache_dirname, MongoDB=MongoDB, db=src_db,
                                                                             verbose=verbose, max_entries=max_entries, username=username, 
                                                                             password=password, authSource=authSource, readPreference=readPreference).con_db(collection_str=src_collection)
     self.des_client, self.des_db, self.des_collection = mongo_util.MongoUtil(cache_dirname=cache_dirname, MongoDB=MongoDB, db=des_db,
                                                                             verbose=verbose, max_entries=max_entries, username=username, 
                                                                             password=password, authSource=authSource, readPreference=readPreference).con_db(collection_str=des_collection)
     self.collation = Collation('en', strength=CollationStrength.SECONDARY)
 def __init__(self,
              username=None,
              password=None,
              server=None,
              authSource='admin',
              database='datanator',
              max_entries=float('inf'),
              verbose=True,
              collection_str='intact_complex',
              readPreference='nearest',
              replicaSet=None):
     self.mongo_manager = mongo_util.MongoUtil(
         MongoDB=server,
         username=username,
         password=password,
         authSource=authSource,
         db=database,
         readPreference=readPreference,
         replicaSet=replicaSet)
     self.collation = Collation(locale='en',
                                strength=CollationStrength.SECONDARY)
     self.max_entries = max_entries
     self.verbose = verbose
     self.client, self.db, self.collection = self.mongo_manager.con_db(
         collection_str)
    def __init__(self, username=None, password=None, server=None, authSource='admin',
                 database='datanator', collection_str=None, readPreference='nearest',
                 replicaSet=None):

        self.mongo_manager = mongo_util.MongoUtil(MongoDB=server, username=username,
                                                  password=password, authSource=authSource, db=database,
                                                  readPreference=readPreference, replicaSet=replicaSet)
        self.koc_manager = query_kegg_organism_code.QueryKOC(username=username, password=password,
        server=server, authSource=authSource, collection_str='kegg_organism_code', readPreference=readPreference,
        replicaSet=replicaSet)
        self.client, self.db, self.collection = self.mongo_manager.con_db(collection_str)
        self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY)
 def setUpClass(cls):
     cls.cache_dirname = tempfile.mkdtemp()
     cls.db = 'datanator'
     cls.duplicate = 'duplicate_test'
     conf = config.TestConfig()
     username = conf.USERNAME
     password = conf.PASSWORD
     MongoDB = conf.SERVER
     cls.src = mongo_util.MongoUtil(cache_dirname=cls.cache_dirname,
                                    MongoDB=MongoDB,
                                    db=cls.db,
                                    verbose=True,
                                    max_entries=20,
                                    username=username,
                                    password=password)
     cls.collection_str = 'ecmdb'
     cls.src_test = mongo_util.MongoUtil(cache_dirname=cls.cache_dirname,
                                         MongoDB=MongoDB,
                                         db='test',
                                         verbose=True,
                                         max_entries=20,
                                         username=username,
                                         password=password)
     docs = [{
         "name": "mike",
         "num": 0
     }, {
         "name": "jon",
         "num": 1
     }, {
         "name": "john",
         "num": 2
     }, {
         "name": "mike",
         "num": 3
     }]
     cls.src_test.db_obj[cls.duplicate].insert_many(docs)
     cls.schema_test = "schema_test"
     time.sleep(1)
    def _default(self):
        ''' Delete elasticsearch index

            Args:
                index (:obj:`str`): name of index in es
                _id (:obj:`int`): id of the doc in index (optional)
        '''
        args = self.app.pargs
        conf = getattr(config, args.config_name)
        mongo_util.MongoUtil(MongoDB=conf.SERVER,
                             db=args.db,
                             username=conf.USERNAME,
                             password=conf.PASSWORD).define_schema(
                                 args.collection, args.jsonschema)
        print("done")
Example #6
0
def main():
    db = 'test'
    collection_str = 'brenda_constants'
    username = datanator.config.core.get_config(
    )['datanator']['mongodb']['user']
    password = datanator.config.core.get_config(
    )['datanator']['mongodb']['password']
    MongoDB = datanator.config.core.get_config(
    )['datanator']['mongodb']['server']
    manager = mongo_util.MongoUtil(MongoDB=MongoDB,
                                   db=db,
                                   username=username,
                                   password=password,
                                   collection_str=collection_str)

    with open(
            str(
                Path('~/karr_lab/datanator/docs/brenda/brenda.pkl').expanduser(
                )), 'rb') as f:
        data = pickle.load(f)
        coll.insert({'bin-data': Binary(thebytes)})
    def test_define_schema(self):
        with capturer.CaptureOutput(merged=False, relay=False) as captured:
            with __main__.App(argv=[
                    'mongo-def-schema', 'test', 'cli_test',
                    '../datanator_pattern_design/compiled/taxon_compiled.json'
            ]) as app:
                # run app
                app.run()

                # test that the arguments to the CLI were correctly parsed
                self.assertEqual(app.pargs.db, 'test')
                self.assertTrue(app.pargs.collection, 'cli_test')

                # test that the CLI produced the correct output
                self.assertEqual(captured.stdout.get_text(), 'done')
                self.assertEqual(captured.stderr.get_text(), '')
        conf = getattr(config, app.pargs.config_name)
        mongo_util.MongoUtil(MongoDB=conf.SERVER,
                             db=app.pargs.db,
                             username=conf.USERNAME,
                             password=conf.PASSWORD).db_obj.drop_collection(
                                 app.pargs.collection)
Example #8
0
    def many_to_many(self, collection_str1='metabolites_meta',
                     collection_str2='metabolites_meta', field1='inchi',
                     field2='inchi', lookup1='InChI_Key',
                     lookup2='InChI_Key', num=100):
        ''' Go through collection_str and assign each
                compound top 'num' amount of most similar 
                compounds
                Args:
                        collection_str1: collection in which compound is drawn
                        collection_str2: collection in which comparison is made
                        field1: field of interest in collection_str1
                        field2: filed of interest in collection_str2
                        num: number of most similar compound
                        batch_size: batch_size for each server round trip
        '''
        src = mongo_util.MongoUtil(
            MongoDB=self.MongoDB,
            username=self.username, password=self.password,
            authSource=self.authSource)
        db_obj = src.client[self.db]
        final = db_obj[collection_str1]

        projection = {'m2m_id':0,  'ymdb_id': 0, 'kinlaw_id': 0, 
                    'reaction_participants': 0, 'synonyms': 0}
        col = src.client["datanator"]["metabolites_meta"]
        count = col.count_documents({})
        total = min(count, self.max_entries)

        ''' The rest of the code in this function is to force
            a cursor refresh every 'limit' number of documents
            because no_cursor_timeout option in pymongo's find()
            function is not working as intended
        '''
        def process_doc(doc, final, i, total = total, collection_str1 = collection_str1,
                        field1 = field1, lookup1 = lookup1, collection_str2 = collection_str2,
                        field2 = field2, lookup2 = lookup2):
            # if 'similar_compounds_corrected' in doc:
            #     if self.verbose and i % 10 ==0:
            #         print('Skipping document {} out of {} in collection {}'.format(
            #             i, total, collection_str1))
            #     return 
            if i > self.max_entries:
                return 
            if self.verbose and i % 1 == 0:
                print('Going through document {} out of {} in collection {}'.format(
                    i, total, collection_str1))
                print(doc[field1])
            compound = doc[field1]
            coeff, inchi_hashed = self.one_to_many(compound, lookup=lookup2,
                                                   collection_str=collection_str2, field=field2, num=num)
            result = []
            for a, b in zip(coeff, inchi_hashed):
                dic = {}
                dic[b] = a
                result.append(dic)

            final.update_one({lookup1: doc[lookup1]},
                             {'$set': {'similar_compounds_corrected': result}},
                             upsert=False)
 
        limit = 100    # number of documents from the cursor to be stuffed into a list
        sorted_field = lookup1 # indexed field used to sort cursor
        i = 0

        documents = list(col.find({}, projection = projection).sort(sorted_field, pymongo.ASCENDING).limit(limit))
        for doc in documents: 
            process_doc(doc, final, i)
            i += 1

        is_last_batch = False
        while not is_last_batch:
            cursor = col.find({sorted_field: {'$gt': documents[-1][sorted_field]}}, projection = projection)
            documents = list(cursor.sort(sorted_field, pymongo.ASCENDING).limit(limit))
            is_last_batch = False if len(documents) == limit else True 
            for doc in documents:
                process_doc(doc, final, i)
                i += 1