def setUpClass(cls): cls.listener = EventListener() cls.saved_listeners = monitoring._LISTENERS monitoring._LISTENERS = monitoring._Listeners([], [], [], []) cls.client = rs_or_single_client(event_listeners=[cls.listener]) cls.db = cls.client.pymongo_test cls.collation = Collation('en_US') cls.warn_context = warnings.catch_warnings() cls.warn_context.__enter__() warnings.simplefilter("ignore", DeprecationWarning)
async def test_to_list_with_chained_collation(self): await self.make_test_data() cursor = ( self.collection.find({}, {"_id": 1}) .sort([("_id", pymongo.ASCENDING)]) .collation(Collation("en")) ) expected = [{"_id": i} for i in range(200)] result = await cursor.to_list(length=1000) self.assertEqual(expected, result)
def _find_jobs_grouped(self, pipeline, group_categories): # type: (MongodbSearchPipeline, List[str]) -> Tuple[JobGroupCategory, int] """ Retrieves jobs regrouped by specified field categories and predefined search pipeline filters. """ groups = [group_categories] if isinstance(group_categories, str) else group_categories has_provider = "provider" in groups if has_provider: groups.remove("provider") groups.append("service") group_categories = {field: "$" + field for field in groups } # fields that can generate groups group_pipeline = [ { "$group": { "_id": group_categories, # grouping categories to aggregate corresponding jobs "jobs": { "$push": "$$ROOT" }, # matched jobs for corresponding grouping categories "count": { "$sum": 1 } }, # count of matches for corresponding grouping categories }, { # noqa: E123 # ignore indentation checks "$project": { "_id": False, # removes "_id" field from results "category": "$_id", # renames "_id" grouping categories key "jobs": "$jobs", # preserve field "count": "$count", # preserve field } } ] pipeline = self._apply_total_result(pipeline, group_pipeline) LOGGER.debug("Job search pipeline:\n%s", repr_json(pipeline, indent=2)) found = list( self.collection.aggregate(pipeline, collation=Collation(locale="en"))) items = found[0]["items"] # convert to Job object where applicable, since pipeline result contains (category, jobs, count) items = [{ k: (v if k != "jobs" else [Job(j) for j in v]) for k, v in i.items() } for i in items] if has_provider: for group_result in items: group_service = group_result["category"].pop("service", None) group_result["category"]["provider"] = group_service total = found[0]["total"] if items else 0 return items, total
def test_to_list_with_chained_collation(self): if not (yield at_least(self.cx, (3, 4))): raise SkipTest("collation requires MongoDB >= 3.4") yield self.make_test_data() cursor = self.collection.find({}, {'_id': 1}) \ .sort([('_id', pymongo.ASCENDING)]) \ .collation(Collation("en")) expected = [{'_id': i} for i in range(200)] result = yield cursor.to_list(length=1000) self.assertEqual(expected, result)
def get(self): data = [] patient_info = mongo.db.patients.find({}, {'_id':1, 'patient_first_name':1,'patient_last_name':1}) \ .sort([('patient_last_name', 1), ('patient_first_name', 1)]) \ .collation(Collation(locale='en')) for patient in patient_info: data.append(patient) return jsonify(data)
def update(self, jsonKey, myjson): result = [] docs = self.collection.update_many( jsonKey, {"$set": myjson}, upsert=False, collation=Collation(locale='es', strength=CollationStrength.SECONDARY)) if docs is not None: result = True return result
def select_from_list(self, key, mylist): result = [] docs = self.collection.find({key: { "$in": mylist }}, {'_id': False}, collation=Collation( locale='es', strength=CollationStrength.SECONDARY)) if docs is not None: result = list(docs) return result
def get_objects_by_key_sorted_filter_yes(self, val, key, namespace): # print self.lang if self.lang == 'ru': key = "I_S_name" else: key = "I_S_name_" + self.lang return app.config['OBJECTS'].find({ "I_S_type_this": val, "namespace": [namespace] }).sort(key, 1).collation( Collation(locale=self.lang, strength=CollationStrength.SECONDARY))
def get(self): data = [] user_info = mongo.db.users.find({}, {'_id':1, 'user_first_name':1,'user_last_name':1}) \ .sort([('user_last_name', 1), ('user_first_name', 1)]) \ .collation(Collation(locale='en')) for u in user_info: data.append(u) return jsonify(data)
def _cursor_hook(self, cursor, req): """Apply additional methods for cursor""" if not req or not req.args: return # Mongo methods if isinstance(cursor, MongoCursor): # http://api.mongodb.com/python/current/examples/collations.html # https://docs.mongodb.com/manual/reference/collation/ if "collation" in req.args: cursor.collation(Collation(**std_json.loads(req.args["collation"])))
def test_indexes_same_keys_different_collations(self): self.db.test.drop() usa_collation = Collation('en_US') ja_collation = Collation('ja') self.db.test.create_indexes([ IndexModel('fieldname', collation=usa_collation), IndexModel('fieldname', name='japanese_version', collation=ja_collation), IndexModel('fieldname', name='simple') ]) indexes = self.db.test.index_information() self.assertEqual(usa_collation.document['locale'], indexes['fieldname_1']['collation']['locale']) self.assertEqual(ja_collation.document['locale'], indexes['japanese_version']['collation']['locale']) self.assertNotIn('collation', indexes['simple']) self.db.test.drop_index('fieldname_1') indexes = self.db.test.index_information() self.assertIn('japanese_version', indexes) self.assertIn('simple', indexes) self.assertNotIn('fieldname', indexes)
def __init__(self, username=None, password=None, server=None, authSource='admin', database='datanator', collection_str=None, readPreference='nearest', replicaSet=None): self.mongo_manager = mongo_util.MongoUtil(MongoDB=server, username=username, password=password, authSource=authSource, db=database, readPreference=readPreference, replicaSet=replicaSet) self.koc_manager = query_kegg_organism_code.QueryKOC(username=username, password=password, server=server, authSource=authSource, collection_str='kegg_organism_code', readPreference=readPreference, replicaSet=replicaSet) self.client, self.db, self.collection = self.mongo_manager.con_db(collection_str) self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY)
def __init__(self, MongoDB=None, db=None, collection_str=None, username=None, password=None, authSource='admin', readPreference='nearest', verbose=True, max_entries=float('inf')): """Init """ super().__init__(MongoDB=MongoDB, db=db, username=username, password=password, authSource=authSource, readPreference=readPreference) self.collation = Collation('en', strength=CollationStrength.SECONDARY) self.collection = self.db_obj[collection_str] self.taxon_collection = self.client['datanator']['taxon_tree'] self.max_entries = max_entries self.verbose = verbose
def filesystem_load(self, data_dir): collection = self.mongodb_collection file_data = file_generator(data_dir=data_dir) collation = Collation(locale="en", strength=1) collection.create_index("title", name="avoid_dups", unique=True, collation=collation) try: result = collection.insert_many(file_data, ordered=False) except errors.BulkWriteError: pass return self
def __init__(self, cache_dir=None, server=None, src_db=None, protein_col=None, authDB=None, readPreference=None, username=None, password=None, verbose=None, max_entries=None, des_db=None, rna_col=None): """Init Args: cache_dir (:obj:`str`, optional): Cache directory for logs. Defaults to None. server (:obj:`str`, optional): MongoDB server address. Defaults to None. db (:obj:`str`, optional): Database where initial uniprot collection resides. Defaults to None. collection_str (:obj:`str`, optional): name of collection. Defaults to None. authDB (:obj:`str`, optional): MongoDB authentication database. Defaults to None. readPreference (:obj:`str`, optional): MongoDB read preference. Defaults to None. username (:obj:`str`, optional): MongoDB username. Defaults to None. password (:obj:`str`, optional): MongoDB password. Defaults to None. verbose (:obj:`bool`, optional): Wheter to display verbose messages. Defaults to None. max_entries (:obj:`int`, optional): Number of records to be processed. Defaults to None. uniprot_col_db (:obj:`int`, optional): Database to which new uniprot records will be inserted. Defaults to None. """ super().__init__(server=server, username=username, password=password, src_db=src_db, des_db=des_db, protein_col=protein_col, rna_col=rna_col, authDB=authDB, readPreference=readPreference, max_entries=max_entries, verbose=verbose, cache_dir=cache_dir) self.uniprot_query = query_uniprot.QueryUniprot( username=username, password=password, server=server, authSource=authDB, collection_str='uniprot', readPreference=readPreference) self.collation = Collation('en', strength=CollationStrength.SECONDARY) self.max_entries = max_entries self.verbose = verbose
def copy_uniprot(self): ''' Copy relevant information from uniprot collection ''' _, _, col_uniprot = self.mongo_manager.con_db('uniprot') query = {} projection = {'status': 0, '_id': 0} docs = col_uniprot.find(filter=query, projection=projection) count = col_uniprot.count_documents({}) self.col.insert_many(docs) collation = Collation(locale='en', strength=CollationStrength.SECONDARY) self.col.create_index([("uniprot_id", pymongo.ASCENDING)], background=True, collation=collation)
def init_db(db): user = db.user user.create_index('username', unique=True, collation=Collation( locale='en', strength=CollationStrength.SECONDARY)) user_gomoku = db.user_gomoku user_gomoku.create_index( 'userid', unique=True, ) gomoku = db.gomoku gomoku.create_index('status') gomoku.create_index('game_host') gomoku.create_index('game_guest')
def __init__(self, cache_dirname=None, MongoDB=None, replicaSet=None, db=None, verbose=False, max_entries=float('inf'), username=None, password=None, authSource='admin', meta_loc=None): self.cache_dirname = cache_dirname self.verbose = verbose self.MongoDB = MongoDB self.replicaSet = replicaSet self.max_entries = max_entries self.username = username self.password = password self.authSource = authSource self.meta_loc = meta_loc super(MetabolitesMeta, self).__init__(cache_dirname=cache_dirname, MongoDB=MongoDB, replicaSet=replicaSet, db=db, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource) self.frequency = 50 self.chem_manager = chem_util.ChemUtil() self.file_manager = file_util.FileUtil() self.ymdb_query = query_xmdb.QueryXmdb(username=username, password=password, server=MongoDB, authSource=authSource, database=db, collection_str='ymdb', readPreference='nearest') self.ecmdb_query = query_xmdb.QueryXmdb(username=username, password=password, server=MongoDB, authSource=authSource, database=db, collection_str='ecmdb', readPreference='nearest') self.collation = Collation('en', strength=CollationStrength.SECONDARY) self.client, self.db, self.collection = self.con_db('metabolites_meta')
def __init__(self, username=None, password=None, server=None, authSource='admin', database='datanator', max_entries=float('inf'), verbose=True, collection_str='sabio_compound', readPreference='nearest', replicaSet=None): super().__init__(MongoDB=server, db=database, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource, readPreference=readPreference, replicaSet=replicaSet) self.file_manager = file_util.FileUtil() self.max_entries = max_entries self.verbose = verbose self.db = self.db_obj self.collection = self.db[collection_str] self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY) self.collection_str = collection_str
def __init__(self, server=config.AtlasConfig.SERVER, username=config.AtlasConfig.USERNAME, password=config.AtlasConfig.PASSWORD, authSource=config.AtlasConfig.AUTHDB, replicaSet=config.AtlasConfig.REPLSET, readPreference=config.AtlasConfig.READ_PREFERENCE): super().__init__(MongoDB=server, replicaSet=replicaSet, username=username, password=password, authSource=authSource, readPreference=readPreference) self.read_preference = self._convert_read_p(readPreference) self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY)
def __init__(self, username=None, password=None, server=None, authSource='admin', src_database='datanator', max_entries=float('inf'), verbose=True, collection='sabio_reaction_entries', destination_database='datanator', cache_dir=None): ''' Args: src_database (:obj: `str`): name of database in which source collections reside destination_database (:obj: `str`): name of database to put the aggregated collection ''' super().__init__(MongoDB=server, db=destination_database, username=username, password=password, authSource=authSource) self.col = self.db_obj[collection] self.query_manager = query_sabiork_old.QuerySabioOld(MongoDB=server, password=password, authSource=authSource, username=username) self.metabolites_meta_manager = self.client[src_database]['metabolites_meta'] self.file_manager = file_util.FileUtil() self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY) self.verbose = verbose self.max_entries = max_entries
def query_insensitive_timeseries(self, word=None, start_time=None): """Query database for n-gram timeseries (case-insensitive) Args: word (string): target ngram start_time (datetime): starting date for the query Returns (pd.DataFrame): dataframe of count, rank, and frequency over time for an n-gram """ cols = [ 'count', 'count_no_rt', 'rank', 'rank_no_rt', 'freq', 'freq_no_rt' ] db_cols = [ 'counts', 'count_noRT', 'rank', 'rank_noRT', 'freq', 'freq_noRT' ] if start_time: query = {'word': word, 'time': {'$gte': start_time}} start = start_time else: query = {'word': word} start = datetime.datetime(2019, 9, 1) data = { d: {c: np.nan for c in cols} for d in pd.date_range(start=start.date(), end=datetime.datetime.today().date(), freq='D').date } for i in self.tweets.find(query).collation( Collation(locale=self.lang, strength=CollationStrength.SECONDARY)): d = i['time'].date() for c, db in zip(cols, db_cols): if np.isnan(data[d][c]): data[d][c] = i[db] else: data[d][c] += i[db] df = pd.DataFrame.from_dict(data=data, orient='index') df.index = pd.to_datetime(df.index) df.index.name = word return df
def search_corpus(query, limit, paginated, page, show_properties, sorting): """Uses the query generated in /search2 and returns the search results. """ page_size = 10 errors = [] """ # Check that the query has a valid path within the Corpus # Good for testing key = list(query.keys())[0] is_path = next((item for item in query.get(key) if item.get('path')), False) # False if the query does not have a path; set it to ',Corpus,' by default if is_path == False: query.get(key).append({'path': ',Corpus,'}) print(query.get(key)) is_corpus_path = next((item for item in query.get(key) if item.get('path') != None and item.get('path').startswith(',Corpus,')), False) # False if the path is not in the Corpus; return an error if is_corpus_path == False: errors.append('Please supply a valid path within the Corpus.') """ if len(list(corpus_db.find())) > 0: result = corpus_db.find(query, limit=limit, projection=show_properties).collation( Collation(locale='en_US', numericOrdering=True)) if sorting != []: result = result.sort(sorting) else: result = result.sort('name', pymongo.ASCENDING) result = list(result) if result != []: # Double the result for testing # result = result + result + result + result + result # result = result + result + result + result + result if paginated == True: pages = list(paginate(result, page_size=page_size)) num_pages = len(pages) page = get_page(pages, page) return page, num_pages, errors else: return result, 1, errors else: return [], 1, errors else: errors.append('The Corpus database is empty.') return [], 1, errors
def __init__(self, cache_dirname=None, MongoDB=None, replicaSet=None, db='datanator', collection_str='sabio_rk_old', verbose=False, max_entries=float('inf'), username=None, password=None, authSource='admin', readPreference='nearest'): self.max_entries = max_entries super().__init__(cache_dirname=cache_dirname, MongoDB=MongoDB, replicaSet=replicaSet, db=db, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource, readPreference=readPreference) self.u = self.client["datanator-test"]["uniprot"] self.chem_manager = chem_util.ChemUtil() self.file_manager = file_util.FileUtil() self.collection = self.db_obj[collection_str] self.collection_str = collection_str self.taxon_manager = query_taxon_tree.QueryTaxonTree( username=username, password=password, authSource=authSource, readPreference=readPreference, MongoDB=MongoDB, replicaSet=replicaSet) self.compound_manager = query_sabio_compound.QuerySabioCompound( server=MongoDB, database=db, username=username, password=password, readPreference=readPreference, authSource=authSource, replicaSet=replicaSet) self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY)
def test_update(self): self.assertEqual( 2, ModelForCollations.objects.raw({ 'name': 'Aargren' }).update({'$set': { 'touched': 1 }})) # Override with keyword argument. alternate_collation = Collation('en_US', strength=CollationStrength.TERTIARY) self.assertEqual( 1, ModelForCollations.objects.raw({ 'name': 'Aargren' }).update({'$set': { 'touched': 2 }}, collation=alternate_collation))
def get_abundance_from_uniprot(self, uniprot_id): ''' Get all abundance data for uniprot_id Args: uniprot_id (:obj:`str`): protein uniprot_id. Returns: result (:obj:`list` of :obj:`dict`): result containing [{'ncbi_taxonomy_id': , 'species_name': , 'ordered_locus_name': }, {'organ': , 'abundance'}, {'organ': , 'abundance'}]. ''' query = {'observation.protein_id.uniprot_id': uniprot_id} projection = { 'ncbi_id': 1, 'species_name': 1, 'observation.$': 1, 'organ': 1 } collation = Collation(locale='en', strength=CollationStrength.SECONDARY) docs = self.collection.find(filter=query, projection=projection, collation=collation) count = self.collection.count_documents(query) try: result = [{ 'ncbi_taxonomy_id': docs[0]['ncbi_id'], 'species_name': docs[0]['species_name'] }] except IndexError: return [] for i, doc in enumerate(docs): if i > self.max_entries: break if self.verbose and i % 50 == 0: print('Processing pax document {} out of {}'.format(i, count)) organ = doc['organ'] abundance = doc['observation'][0]['abundance'] ordered_locus_name = doc['observation'][0]['string_id'] result[0]['ordered_locus_name'] = ordered_locus_name dic = {'organ': organ, 'abundance': abundance} result.append(dic) return result
def __init__(self, cache_dirname=None, MongoDB=None, replicaSet=None, db='datanator', collection_str='pax', verbose=False, max_entries=float('inf'), username=None, password=None, authSource='admin', readPreference='nearest'): """Instantiating query_pax Args: cache_dirname (str, optional): temparory cache director. Defaults to None. MongoDB (str, optional): mongodb server address. Defaults to None. replicaSet (str, optional): name of mongodb replicaset. Defaults to None. db (str, optional): name of database in which pax collection resides. Defaults to 'datanator'. collection_str (str, optional): name of collection. Defaults to 'pax'. verbose (bool, optional): display verbose messages. Defaults to False. max_entries (float, optional): max number of operations, mainly used for tests. Defaults to float('inf'). username (str, optional): db authentication username. Defaults to None. password (str, optional): db authentication password. Defaults to None. authSource (str, optional): authentication database. Defaults to 'admin'. readPreference (str, optional): mongodb readpreference. Defaults to 'primary'. """ super().__init__(cache_dirname=cache_dirname, MongoDB=MongoDB, replicaSet=replicaSet, db=db, verbose=verbose, max_entries=max_entries, username=username, password=password, authSource=authSource, readPreference=readPreference) self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY) self.chem_manager = chem_util.ChemUtil() self.file_manager = file_util.FileUtil() self.max_entries = max_entries self.verbose = verbose self.collection = self.db_obj[collection_str]
def view_transformation(transformation_id): try: db = get_db() database = db[current_app.config['TRANSFORMATIONS_DATABASE_NAME']] transformation = database.transformations.find_one( {"_id": ObjectId(transformation_id)}) alltransformations = database.transformations.find({"transformationId": transformation["transformationId"] }).sort("version", -1).\ collation(Collation(locale='en_US', numericOrdering=True)) except Exception as e: print("Exception") raise return render_template('pages/view_transformation.html', originalTransformation=transformation, transformations=alltransformations, getIcon=getIcon, replaceEmptyString=replaceEmptyString)
def chapterdetail(self, event): bookname = self.bookcom.get() self.currentbook = bookname bookid = "" for bname, bid in self.db_dict.items(): if bname == bookname: bookid = bid self.currentbookid = bookid break if bookid is not None: binfo = self.collectioninfo.find({"book_id": "{}".format(bookid)}).collation( Collation(locale='zh', numericOrdering=True)).sort("sid") # 設定空元組 x = ("請選擇",) for b in binfo: x = x + (b["title"],) self.currentbookchapter.append(b["title"]) self.chapter["values"] = x self.chapter.current(0)
def __init__(self, MongoDB=None, db=None, max_entries=float('inf'), verbose=False, username=None, password=None, authSource='admin', replicaSet=None, collection_str='uniprot'): self.url = 'http://www.uniprot.org/uniprot/?fil=reviewed:yes' self.query_url = 'https://www.uniprot.org/uniprot/?query=' self.MongoDB = MongoDB self.db = db self.max_entries = max_entries self.collection_str = collection_str super(UniprotNoSQL, self).__init__(MongoDB=MongoDB, db=db, username=username, password=password, authSource=authSource, replicaSet=replicaSet, verbose=verbose, max_entries=max_entries) self.taxon_manager = query_taxon_tree.QueryTaxonTree(username=username, MongoDB=MongoDB, password=password, authSource=authSource) self.ko_manager = query_kegg_orthology.QueryKO(username=username, password=password, server=MongoDB, authSource=authSource, verbose=verbose, max_entries=max_entries) self.sabio_manager = query_sabiork.QuerySabio(MongoDB=MongoDB, username=username, password=password, authSource=authSource) self.collation = Collation(locale='en', strength=CollationStrength.SECONDARY) self.collection = self.db_obj[collection_str] self.verbose = verbose