class Meta: indexes = [ pymongo.IndexModel([('agentId', pymongo.ALL)], name="OffersagentIdIndexField"), pymongo.IndexModel([('valid_from', pymongo.ALL)], name="OffersvalidFromIndexField"), pymongo.IndexModel([('valid_to', pymongo.ALL)], name="OffersvalidToIndexField") ] # indexes # asd= rest.objects.raw({'location': {'$near': SON([('$geometry', SON([('type', 'Point'), ('coordinates', [-122.406417,37.785834])])), ('$maxDistance', 500)])}}) # asd = rest.objects.raw({"location":{"$near": {"$geometry": {"type":"Point","coordinates":[-122.406417,37.785834]},"$maxDistance":500}}}) # for i in asd: # print i._data
def ensure_indexes(self): """ "locked_by": None, "locked_at": None, "channel": channel, "attempts" """ next_index = pymongo.IndexModel([("locked_by", pymongo.ASCENDING), ("locked_at", pymongo.ASCENDING), ("channel", pymongo.ASCENDING), ("attempts", pymongo.ASCENDING)], name="next_index") update_index = pymongo.IndexModel([("_id", pymongo.ASCENDING), ("locked_by", pymongo.ASCENDING)], name="update_index") unique_index = pymongo.IndexModel([("job_id", pymongo.ASCENDING), ("channel", pymongo.ASCENDING)], name="unique_index", unique=True) self.collection.create_indexes([next_index, update_index, unique_index])
def init_index(self, collection_name): """ 初始化索引 :return: """ if collection_name in ["http", "https"]: index1 = pymongo.IndexModel([("host", 1)]) else: index1 = pymongo.IndexModel([("host", 1)], unique=True) index2 = pymongo.IndexModel([("host_status", 1)]) index3 = pymongo.IndexModel([("check_status", 1)]) index4 = pymongo.IndexModel([("scan_status", 1)]) self.db[collection_name].create_indexes( [index1, index2, index3, index4] )
def __init__(self): core.plugins.CSTask.__init__(self, mod_name, build) catstuff.core.dbs.CSCollection.__init__(self, mod_name) self.coll.create_indexes([ pymongo.IndexModel([(index, pymongo.ASCENDING)], name=index) for index in self.indexes ])
def load_interpro_list(file_batch_number): url = 'http://localhost:8080/SearchPrototype/interpro-id-desc_all.txt' #url = 'http://ec2-54-148-99-18.us-west-2.compute.amazonaws.com:9200/_plugin/head/mirna.txt' r = requests.get(url) lines = r.iter_lines() def parse(lines): for line in lines: try: interproId, interproDesc = line.split('~') yield { 'interpro_id': interproId, 'interpro_desc': interproDesc } except Exception as e: warningLabel = e.message db = pymongo.MongoClient().dataset collection = db.interpro collection.drop() count = 0 iterator = parse(lines) while True: records = [record for record in islice(iterator, 1000)] if len(records) > 0: count += len(collection.insert_many(records).inserted_ids) else: break collection.create_indexes([ pymongo.IndexModel([('interpro_id', pymongo.ASCENDING)]), ])
def __init__(self, dburi): """Constructor Args: dburi(str): MongoDB URI """ client = pm.MongoClient(dburi) db = client['sequann'] self.collection = db['amr'] # Create unique index index1 = pm.IndexModel([("type", pm.DESCENDING), ("genome", pm.DESCENDING), ("contig", pm.DESCENDING), ("subject", pm.DESCENDING), ("qstart", pm.DESCENDING), ("qend", pm.DESCENDING)], name="sa_unique_i1", unique=True) if not "sa_unique_i1" in self.collection.index_information(): self.collection.create_indexes([index1]) if logger: self.logger = logger else: self.logger = logging.getLogger('sequann.src.annot.AnnotDB')
def update_all_auctions(): print('Update started...') #uuid_index = pymongo.IndexModel([('uuid' , pymongo.ASCENDING)],name='uuid') item_name_price_index = pymongo.IndexModel( [('item_name', pymongo.TEXT), ('starting_bid', 1)], name='item_name_text_starting_bid') update_times = [] start_time = time.time() auctionsDB_new = db['auctions_new'] auctionsDB_new.create_indexes([item_name_price_index]) page = 0 while True: start_time_loop = time.time() print('Fetching page ' + str(page) + '...') res = requests.get(API_BASE + str(page) + KEY).json() print("Fetch took: " + str(time.time() - start_time_loop)) update_time_start = time.time() if res['success']: auctions = res['auctions'] for i in range(0, len(auctions)): auctions[i] = modify_auction_data(auctions[i]) auctionsDB_new.insert_many(auctions, ordered=False) update_time = time.time() - update_time_start update_times.append(update_time) print('Updated page ' + str(page)) page += 1 else: break auctionDB.drop() auctionsDB_new.rename('auctions') print('Update took ' + str(time.time() - start_time)) print('Average DB query took: ' + str(statistics.mean(update_times))) update_lowest_prices()
def load_terms_from_file(): client = pymongo.MongoClient() db = client.identifiers allterms = db.allterms allterms.drop() url = 'http://ec2-52-26-19-122.us-west-2.compute.amazonaws.com:8080/all-terms3.tsv' r = requests.get(url) lines = list(r.iter_lines()) count = 0 for idx, line in enumerate(lines): term, term_type = line.split('\t') #print term term_to_add = {'term': term.upper(), 'type': term_type} allterms.save(term_to_add) count = count + 1 if (count % 200 == 0): print count #dumps(term_to_add) allterms.create_indexes( [pymongo.IndexModel([('term', pymongo.ASCENDING)])]) print 'Done'
class Meta: indexes = [ pymongo.IndexModel([('email', pymongo.ALL)], name="AgentEmailUniqueIndex", unique=True), pymongo.IndexModel([("location", pymongo.GEOSPHERE)]), pymongo.IndexModel([('name', pymongo.ALL), ('consumer_id', pymongo.ALL)], name="NameIndex") ] # indexes # asd= rest.objects.raw({'location': {'$near': SON([('$geometry', SON([('type', 'Point'), ('coordinates', [-122.406417,37.785834])])), ('$maxDistance', 500)])}}) # asd = rest.objects.raw({"location":{"$near": {"$geometry": {"type":"Point","coordinates":[-122.406417,37.785834]},"$maxDistance":500}}}) # for i in asd: # print i._data
def load_pubmed_counts_list(): url = 'http://ec2-54-148-99-18.us-west-2.compute.amazonaws.com:9200/_plugin/head/pubmed_with_counts_complete.txt' r = requests.get(url) lines = r.iter_lines() def parse(lines): for line in lines: try: gene, gene_count = line.split('\t') yield {'gene': gene.upper(), 'abstract_count': gene_count} except Exception as e: warningLabel = e.message db = pymongo.MongoClient().dataset collection = db.pubmed_counts collection.drop() count = 0 iterator = parse(lines) for line in iterator: insert_this_gene_record = { 'gene_name': line.get('gene'), 'abstract_count': line.get('abstract_count') } collection.insert_one(insert_this_gene_record) count += 1 print('%s' % count) collection.create_indexes( [pymongo.IndexModel([('gene', pymongo.ASCENDING)])]) db.close()
def create_archived_collection(db): coll = db.get_collection("archived") field_names = ["ossim_id", "batch_name", "batch_timestamp", "archive_timestamp"] index_models = [] for name in field_names: index_models.append(pymongo.IndexModel([(name, pymongo.ASCENDING)])) coll.create_indexes(index_models)
def aggregate_and_index(self, write_table_name): if not isinstance(self.filter_rule, list): data = "Filter Rule Error: {}".format(str(self.filter_rule)) self.result.append(data) return False read_cursor = self.read_table.aggregate(self.filter_rule, allowDiskUse=True) data = [x for x in read_cursor] read_cursor.close() try: self.do_write(write_table_name, data) # 写库 except Exception as e: self.result.append("Error: {}".format(e)) return False data = 'write table {}/{} completed.'.format(self.args['target_db'], write_table_name) self.result.append(data) # 添加Index count_idx = pymongo.IndexModel([('count', pymongo.ASCENDING)], name='count_idx') num_idx = pymongo.IndexModel([('num', pymongo.ASCENDING)], name='num_idx') storeId_idx = pymongo.IndexModel([('storeId', pymongo.ASCENDING)], name='storeId_idx') avg_sum_idx = pymongo.IndexModel([('avg_sum', pymongo.DESCENDING)], name='avg_sum_idx') province_idx = pymongo.IndexModel([('province', pymongo.ASCENDING)], name='province_idx') province_city_idx = pymongo.IndexModel( [('province', pymongo.ASCENDING), ('city', pymongo.ASCENDING)], name='province_city_idx') sum_idx = pymongo.IndexModel([('sum', pymongo.ASCENDING)], name='sum_idx') province_city_district_idx = pymongo.IndexModel( [('province', pymongo.ASCENDING), ('city', pymongo.ASCENDING), ('district', pymongo.ASCENDING)], name='province_city_district_idx') province_city_district_zone_idx = pymongo.IndexModel( [('province', pymongo.ASCENDING), ('city', pymongo.ASCENDING), ('district', pymongo.ASCENDING), ('zone', pymongo.ASCENDING)], name='province_city_district_zone_idx') # self.target_db[write_table_name].create_indexes([count_idx, num_idx, storeId_idx, avg_sum_idx, province_idx, province_city_idx, province_city_district_idx, province_city_district_zone_idx, sum_idx]) self.conn['view'][write_table_name].create_indexes([ count_idx, num_idx, storeId_idx, avg_sum_idx, province_idx, province_city_idx, province_city_district_idx, province_city_district_zone_idx, sum_idx ]) idx_data = 'create index {}/{} completed. create index {}/{} completed'.format( self.args['target_db'], write_table_name, 'view', write_table_name) self.result.append(idx_data) return True
def open_spider(self, spider): self.client = pymongo.MongoClient(self.mongo_uri, username=self.mongo_user, password=self.mongo_pass) self.db = self.client[self.mongo_db] idx = pymongo.IndexModel([('org_link', pymongo.ASCENDING)], unique=True) self.db[self.collection_name].create_indexes([idx])
def index_corum(self, collection_str): '''Index fields in corum collection ''' if self.verbose: print('Indexing corum ...') collection = self.con_db(collection_str) index1 = pymongo.IndexModel([("$**", pymongo.TEXT)], background=False, sparse=True) # index all text fields index2 = pymongo.IndexModel([("PubMed ID", pymongo.ASCENDING)], background=False, sparse=True) index3 = pymongo.IndexModel( [("SWISSPROT organism (NCBI IDs)", pymongo.ASCENDING)], background=False, sparse=True) collection.create_indexes([index1, index2, index3])
async def create_indexes(cls, db_store): coll = db_store.get_collection(cls._collection_name) await coll.create_indexes([ pymongo.IndexModel([ ('repo_slug', pymongo.ASCENDING), ('datetime', pymongo.ASCENDING), ], unique=True), ])
def load_gene_info(): client = pymongo.MongoClient() db = client.datasets # collection stores metadata about source networks nih = db.nih url = 'http://ec2-54-148-99-18.us-west-2.compute.amazonaws.com:9200/_plugin/head/gene_info_small3b.txt' #url = 'http://ec2-54-148-99-18.us-west-2.compute.amazonaws.com:9200/_plugin/head/gene_info_small3c.txt' #url = 'http://ec2-54-148-99-18.us-west-2.compute.amazonaws.com:9200/_plugin/head/gene_info_smallx.txt' r = requests.get(url) lines = r.iter_lines() lines.next() # ignore header row def parse(lines): for line in lines: #for line in lines: try: field1, field2, field3, field4, field5, field6, field7, field8, field9, field10, field11, field12, field13, field14, field15 = line.split( '\t') yield { 'Symbol': field3.upper(), 'GeneID': field2.upper(), 'Synonyms': field5.upper(), 'description': field9, 'type_of_gene': field10 } except Exception as e: print e.message count = 0 iterator = parse(lines) while True: records = [record for record in islice(iterator, 1000)] if len(records) > 0: count += len(nih.insert_many(records).inserted_ids) print('inserted %d identifiers (%d total)', len(records), count) else: break nih.create_indexes([ pymongo.IndexModel([('Symbol', pymongo.ASCENDING)]), pymongo.IndexModel([('GeneID', pymongo.ASCENDING)]) ])
class Settings: # pylint: disable=too-few-public-methods """Sets the index used. Also defines the database 'table' name.""" name = "GenericSensor" indexes = [ pymongo.IndexModel( [("host", pymongo.ASCENDING),], unique=True, ) ]
def _createGridColIndex(self): ''' Create Geo-Spatial indexes Warning: geospatial index require -180, +180 longitudes. ''' con = self._createMongoConn(cfg=self.cfg) col_grid = con['col_grid'] idx = col_grid.index_information() should_idx = ['locgeo', 'id_grid_1'] passed = all(item in list(idx.keys()) for item in should_idx) if not passed: index1 = pymongo.IndexModel([("loc", pymongo.GEOSPHERE)], name="locgeo") index2 = pymongo.IndexModel([("id_grid", pymongo.ASCENDING)], name="id_grid_1") col_grid.create_indexes([index1, index2]) logging.info('Indexes added for the grid collection') else: logging.info('Grid indexes already exist.')
def index_metabolites_meta(self, collection_str='metabolites_meta'): ''' Index metabolites_meta collection ''' if self.verbose: print('Indexing metabolites_meta') _, _, collection = self.con_db(collection_str) index1 = pymongo.IndexModel([('inchi_hashed', pymongo.ASCENDING)], background=False, sparse=False) collection.create_indexes([index1])
def load_identifiers(): db = pymongo.MongoClient().human db.identifiers.drop() collection = db.identifiers humanUrl = 'http://genemania.org/data/current/Homo_sapiens/identifier_mappings.txt' status = Status('loading genemania identifiers from ' + humanUrl, logger=log).start() r = requests.get(humanUrl) lines = r.iter_lines() lines.next() # ignore header row def parse(lines): for line in lines: try: preferred, name, source = line.split('\t') yield { 'preferred': preferred, 'name': name, 'NAME': name.upper(), # indexed to support case-insensitive queries 'source': source } except Exception as e: log.warn(e.message) count = 0 iterator = parse(lines) while True: records = [record for record in islice(iterator, 1000)] if len(records) > 0: count += len(collection.insert_many(records).inserted_ids) log.debug('inserted %d identifiers (%d total)', len(records), count) else: break log.info('creating NAME and preferred indexes') collection.create_indexes([ pymongo.IndexModel([('NAME', pymongo.ASCENDING)]), pymongo.IndexModel([('preferred', pymongo.ASCENDING)]) ]) status.stop()
def index_strdb(self, collection_str='ecmdb'): '''Index relevant fields in string only collections: ecmdb, ymdb, and intact_interaction ''' if self.verbose: print('Indexing {} ...'.format(collection_str)) collection = self.con_db(collection_str) index1 = pymongo.IndexModel([("$**", pymongo.TEXT)], background=False, sparse=True) collection.create_indexes([index1])
class Settings: """ The index, that makes sure, that a (host, port) tuple is unique. """ indexes = [ pymongo.IndexModel( [("hostname", pymongo.ASCENDING), ("port", pymongo.ASCENDING)], unique=True, ) ]
def open_spider(self, spider): self.client = pymongo.MongoClient(self.mongo_uri) self.db = self.client[self.mongo_db] # set up index if self.indexes: create_index_model = lambda idx: pymongo.IndexModel( [(idx['field'], 1)], name=idx['idx_name'], unique=idx.get('unique', False)) index_generator = map(create_index_model, self.indexes) self.db[self.collection_name].create_indexes( list(index_generator)) # indices need to be passed in a list
def index_pax(self, collection_str='pax'): '''Index Pax collection ''' if self.verbose: print('Indexing pax ...') collection = self.con_db(collection_str) index1 = pymongo.IndexModel([("$**", pymongo.TEXT)], background=False, sparse=True) index2 = pymongo.IndexModel([('ncbi_id', pymongo.ASCENDING)], background=False, sparse=True) index3 = pymongo.IndexModel([('weight', pymongo.ASCENDING)], background=False, sparse=True) index4 = pymongo.IndexModel([('score', pymongo.ASCENDING)], background=False, sparse=True) index5 = pymongo.IndexModel([('coverage', pymongo.ASCENDING)], background=False, sparse=True) collection.create_indexes([index1, index2, index3, index4, index5])
def load_content(self): '''Load contents of several .dmp files into MongoDB ''' self.download_dump() self.parse_fullname_taxid() # taxidlineage.dmp fullnamelineage.dmp if self.verbose: print('Indexing tax_id ... \n') self.collection.create_index([("tax_id", pymongo.ASCENDING)], background=False, sparse=True) self.parse_nodes() # nodes.dmp if self.verbose: print('Indexing division_id and gene_code ... \n') index1 = pymongo.IndexModel([("division_id", pymongo.ASCENDING)], background=False, sparse=True) index2 = pymongo.IndexModel([("gene_code", pymongo.ASCENDING)], background=False, sparse=True) self.collection.create_indexes([index1, index2]) self.parse_division() # division.dmp self.parse_names() # names.dmp self.parse_gencode() # gencode.dmp
def _createDataColIndex(self): ''' If necessary, creates indexes for the data collection. ''' col_dat = self._createMongoConn(cfg=self.cfg)['col_dat'] idx = col_dat.index_information() should_idx = [ '_id_', 'year_-1', 'id_grid_1_year_-1', 'year_-1_id_grid_1' ] passed = all(item in list(idx.keys()) for item in should_idx) if not passed: index1 = pymongo.IndexModel([("year", pymongo.DESCENDING)], name="year_-1") index2 = pymongo.IndexModel([("id_grid", pymongo.ASCENDING), ("year", pymongo.DESCENDING)], name="id_grid_1_year_-1") index3 = pymongo.IndexModel([("year", pymongo.DESCENDING), ("id_grid", pymongo.ASCENDING)], name="year_-1_id_grid_1") col_dat.create_indexes([index1, index2, index3]) logging.info('Indexes added for the data collection') else: logging.info('Data indexes already exist.')
def load_mirbase_list(file_batch_number): url = 'http://ec2-54-148-99-18.us-west-2.compute.amazonaws.com:9200/_plugin/head/mirna.txt' r = requests.get(url) lines = r.iter_lines() def parse(lines): for line in lines: try: c1, mirna_id, mId, c2, c3, c4, mirna_information, c5 = line.split( '\t') yield { 'mirna_id': mirna_id, 'mId': mId, 'mirna_information': mirna_information } except Exception as e: warningLabel = e.message db = pymongo.MongoClient().dataset collection = db.mirbase collection.drop() count = 0 iterator = parse(lines) while True: records = [record for record in islice(iterator, 1000)] if len(records) > 0: count += len(collection.insert_many(records).inserted_ids) else: break collection.create_indexes([ pymongo.IndexModel([('mirna_id', pymongo.ASCENDING)]), pymongo.IndexModel([('mId', pymongo.ASCENDING)]) ])
def _create_indexes(cls, check_if_fields_exist=True): indexes = cls.__meta__.get('indexes', []) all_background = cls.__meta__.get('index_background', False) mongo_indexes = [] for index in indexes: kwargs = {} background = all_background if isinstance(index, dict): fields = index['fields'] background = all_background or index.get( 'index_background') or index.get('background') kwargs = { k: v for k, v in index.items() if k not in ['fields', 'index_background', 'background'] } elif isinstance(index, list) or isinstance(index, tuple): fields = index else: raise Exception('invalid index') mongo_fields = [] for field in fields: if not isinstance(field, str): raise Exception('invalid index field') if field[0] == '-': field = field[1:] mongo_fields.append((field, pymongo.DESCENDING)) else: mongo_fields.append((field, pymongo.ASCENDING)) if check_if_fields_exist and field not in cls._fields: raise Exception( 'Field "{}" used in index is not declared in the model' .format(field)) mongo_indexes.append( pymongo.IndexModel( mongo_fields, background=background, **kwargs, )) return cls._get_collection( create_indexes=False).create_indexes(mongo_indexes)
def test_insert_duplicate(volttron_instance1, database_client): clean_db(database_client) data_collection = database_client.get_default_database()['data'] index_model = pymongo.IndexModel([("ts", pymongo.ASCENDING), ("topic_id", pymongo.ASCENDING)], unique=True) # make sure the data collection has the unique constraint. data_collection.create_indexes([index_model]) # Install the historian agent (after this call the agent should be running # on the platform). agent_uuid = install_historian_agent(volttron_instance1, mongo_agent_config()) assert agent_uuid is not None assert volttron_instance1.is_agent_running(agent_uuid) oat_reading = random.uniform(30, 100) all_message = [{ 'OutsideAirTemperature': oat_reading }, { 'OutsideAirTemperature': { 'units': 'F', 'tz': 'UTC', 'type': 'float' } }] publisher = volttron_instance1.build_agent() # Create timestamp (no parameter to isoformat so the result is a T # separator) The now value is a string after this function is called. now = get_aware_utc_now() # now = now.replace(microsecond=random.randint(0,100)) # now = datetime(now.year, now.month, now.day, now.hour, # now.minute, now.second) # now = now.isoformat() print('NOW IS: ', now) # now = '2015-12-02T00:00:00' headers = {headers_mod.DATE: now.isoformat()} # Publish messages publisher.vip.pubsub.publish('pubsub', ALL_TOPIC, headers, all_message).get(timeout=10) gevent.sleep(0.5) publisher.vip.pubsub.publish('pubsub', ALL_TOPIC, headers, all_message).get(timeout=10)
async def setup_app(): await db_handler.create_collection(name=users_settings.USERS_COL) indexes = [ pymongo.IndexModel( keys=[("email", pymongo.ASCENDING)], name="email", unique=True, background=True, ) ] result = await db_handler.create_indexes(col_name=users_settings.USERS_COL, indexes=indexes) for index_name in result: typer.secho( message=f"Index: '{index_name}' for collection '{users_settings.USERS_COL}' created successfully.", fg=typer.colors.GREEN, )