def __init__(self, cache_dirname=None, MongoDB=None, src_db='datanator', verbose=False, max_entries=float('inf'), username=None, password = None, authSource='admin', readPreference='nearest', des_collection='rna_halflife_new', src_collection='rna_halflife', des_db='test'): """Init. Args: cache_dirname ([type], optional): [description]. Defaults to None. MongoDB ([type], optional): [description]. Defaults to None. replicaSet ([type], optional): [description]. Defaults to None. db (str, optional): [description]. Defaults to 'test'. verbose (bool, optional): [description]. Defaults to False. max_entries ([type], optional): [description]. Defaults to float('inf'). username ([type], optional): [description]. Defaults to None. password ([type], optional): [description]. Defaults to None. authSource (str, optional): [description]. Defaults to 'admin'. readPreference (str, optional): [description]. Defaults to 'nearest'. """ self.max_entries = max_entries self.verbose = verbose self.src_client, self.src_db, self.src_collection = mongo_util.MongoUtil(cache_dirname=cache_dirname, MongoDB=MongoDB, db=src_db, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource, readPreference=readPreference).con_db(collection_str=src_collection) self.des_client, self.des_db, self.des_collection = mongo_util.MongoUtil(cache_dirname=cache_dirname, MongoDB=MongoDB, db=des_db, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource, readPreference=readPreference).con_db(collection_str=des_collection) self.collation = Collation('en', strength=CollationStrength.SECONDARY)
def __init__(self, username=None, password=None, server=None, authSource='admin', database='datanator', max_entries=float('inf'), verbose=True, collection_str='intact_complex', readPreference='nearest', replicaSet=None): self.mongo_manager = mongo_util.MongoUtil( MongoDB=server, username=username, password=password, authSource=authSource, db=database, readPreference=readPreference, replicaSet=replicaSet) self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY) self.max_entries = max_entries self.verbose = verbose self.client, self.db, self.collection = self.mongo_manager.con_db( collection_str)
def __init__(self, username=None, password=None, server=None, authSource='admin', database='datanator', collection_str=None, readPreference='nearest', replicaSet=None): self.mongo_manager = mongo_util.MongoUtil(MongoDB=server, username=username, password=password, authSource=authSource, db=database, readPreference=readPreference, replicaSet=replicaSet) self.koc_manager = query_kegg_organism_code.QueryKOC(username=username, password=password, server=server, authSource=authSource, collection_str='kegg_organism_code', readPreference=readPreference, replicaSet=replicaSet) self.client, self.db, self.collection = self.mongo_manager.con_db(collection_str) self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY)
def setUpClass(cls): cls.cache_dirname = tempfile.mkdtemp() cls.db = 'datanator' cls.duplicate = 'duplicate_test' conf = config.TestConfig() username = conf.USERNAME password = conf.PASSWORD MongoDB = conf.SERVER cls.src = mongo_util.MongoUtil(cache_dirname=cls.cache_dirname, MongoDB=MongoDB, db=cls.db, verbose=True, max_entries=20, username=username, password=password) cls.collection_str = 'ecmdb' cls.src_test = mongo_util.MongoUtil(cache_dirname=cls.cache_dirname, MongoDB=MongoDB, db='test', verbose=True, max_entries=20, username=username, password=password) docs = [{ "name": "mike", "num": 0 }, { "name": "jon", "num": 1 }, { "name": "john", "num": 2 }, { "name": "mike", "num": 3 }] cls.src_test.db_obj[cls.duplicate].insert_many(docs) cls.schema_test = "schema_test" time.sleep(1)
def _default(self): ''' Delete elasticsearch index Args: index (:obj:`str`): name of index in es _id (:obj:`int`): id of the doc in index (optional) ''' args = self.app.pargs conf = getattr(config, args.config_name) mongo_util.MongoUtil(MongoDB=conf.SERVER, db=args.db, username=conf.USERNAME, password=conf.PASSWORD).define_schema( args.collection, args.jsonschema) print("done")
def main(): db = 'test' collection_str = 'brenda_constants' username = datanator.config.core.get_config( )['datanator']['mongodb']['user'] password = datanator.config.core.get_config( )['datanator']['mongodb']['password'] MongoDB = datanator.config.core.get_config( )['datanator']['mongodb']['server'] manager = mongo_util.MongoUtil(MongoDB=MongoDB, db=db, username=username, password=password, collection_str=collection_str) with open( str( Path('~/karr_lab/datanator/docs/brenda/brenda.pkl').expanduser( )), 'rb') as f: data = pickle.load(f) coll.insert({'bin-data': Binary(thebytes)})
def test_define_schema(self): with capturer.CaptureOutput(merged=False, relay=False) as captured: with __main__.App(argv=[ 'mongo-def-schema', 'test', 'cli_test', '../datanator_pattern_design/compiled/taxon_compiled.json' ]) as app: # run app app.run() # test that the arguments to the CLI were correctly parsed self.assertEqual(app.pargs.db, 'test') self.assertTrue(app.pargs.collection, 'cli_test') # test that the CLI produced the correct output self.assertEqual(captured.stdout.get_text(), 'done') self.assertEqual(captured.stderr.get_text(), '') conf = getattr(config, app.pargs.config_name) mongo_util.MongoUtil(MongoDB=conf.SERVER, db=app.pargs.db, username=conf.USERNAME, password=conf.PASSWORD).db_obj.drop_collection( app.pargs.collection)
def many_to_many(self, collection_str1='metabolites_meta', collection_str2='metabolites_meta', field1='inchi', field2='inchi', lookup1='InChI_Key', lookup2='InChI_Key', num=100): ''' Go through collection_str and assign each compound top 'num' amount of most similar compounds Args: collection_str1: collection in which compound is drawn collection_str2: collection in which comparison is made field1: field of interest in collection_str1 field2: filed of interest in collection_str2 num: number of most similar compound batch_size: batch_size for each server round trip ''' src = mongo_util.MongoUtil( MongoDB=self.MongoDB, username=self.username, password=self.password, authSource=self.authSource) db_obj = src.client[self.db] final = db_obj[collection_str1] projection = {'m2m_id':0, 'ymdb_id': 0, 'kinlaw_id': 0, 'reaction_participants': 0, 'synonyms': 0} col = src.client["datanator"]["metabolites_meta"] count = col.count_documents({}) total = min(count, self.max_entries) ''' The rest of the code in this function is to force a cursor refresh every 'limit' number of documents because no_cursor_timeout option in pymongo's find() function is not working as intended ''' def process_doc(doc, final, i, total = total, collection_str1 = collection_str1, field1 = field1, lookup1 = lookup1, collection_str2 = collection_str2, field2 = field2, lookup2 = lookup2): # if 'similar_compounds_corrected' in doc: # if self.verbose and i % 10 ==0: # print('Skipping document {} out of {} in collection {}'.format( # i, total, collection_str1)) # return if i > self.max_entries: return if self.verbose and i % 1 == 0: print('Going through document {} out of {} in collection {}'.format( i, total, collection_str1)) print(doc[field1]) compound = doc[field1] coeff, inchi_hashed = self.one_to_many(compound, lookup=lookup2, collection_str=collection_str2, field=field2, num=num) result = [] for a, b in zip(coeff, inchi_hashed): dic = {} dic[b] = a result.append(dic) final.update_one({lookup1: doc[lookup1]}, {'$set': {'similar_compounds_corrected': result}}, upsert=False) limit = 100 # number of documents from the cursor to be stuffed into a list sorted_field = lookup1 # indexed field used to sort cursor i = 0 documents = list(col.find({}, projection = projection).sort(sorted_field, pymongo.ASCENDING).limit(limit)) for doc in documents: process_doc(doc, final, i) i += 1 is_last_batch = False while not is_last_batch: cursor = col.find({sorted_field: {'$gt': documents[-1][sorted_field]}}, projection = projection) documents = list(cursor.sort(sorted_field, pymongo.ASCENDING).limit(limit)) is_last_batch = False if len(documents) == limit else True for doc in documents: process_doc(doc, final, i) i += 1