def migrate(self): coll = self.mongo_db["noc.sla_probes"] coll.bulk_write([ # "Planned" UpdateMany( {}, {"$set": { "state": ObjectId("607a7e1d3d18d4fb3c12032a") }}), ]) # Service Profile Workflow self.mongo_db["noc.sla_profiles"].bulk_write([ UpdateMany( {}, {"$set": { "workflow": ObjectId("607a7dddff3a857a47600b9b") }}) ])
def get_update_operations(matches_to_disable: list, matches_to_insert: list, matches_to_mark_available: list) -> list: ops = list() updated_time = datetime.datetime.now() disable_hashes = [trial_match['hash'] for trial_match in matches_to_disable] ops.append(UpdateMany(filter={'hash': {'$in': disable_hashes}}, update={'$set': {'is_disabled': True, '_updated': updated_time}})) for to_insert in matches_to_insert: ops.append(InsertOne(document=to_insert)) available_hashes = [trial_match['hash'] for trial_match in matches_to_mark_available] ops.append(UpdateMany(filter={'hash': {'$in': available_hashes}}, update={'$set': {'is_disabled': False, '_updated': updated_time}})) return ops
def apply_changes(self, current, last): """ Apply database changes :param current: List of CPE statuses, received from equipment :param last: :return: """ bulk = [] left = set(global_id for global_id in last if last[global_id].get("status") != self.UNKNOWN_STATUS) for global_id in current: s = current[global_id] s["managed_object"] = self.object.id if global_id in last: # Already seen diff, changes = self.get_difference(last[global_id], s) if diff: # Changed self.logger.info("[%s] CPE status changed: %s", global_id, ", ".join(changes)) bulk += [ UpdateOne({"global_id": global_id}, {"$set": diff}) ] if global_id in left: left.remove(global_id) else: # New diff, changes = self.get_difference({}, s) self.logger.info("[%s] New CPE: %s", global_id, ", ".join(changes)) bulk += [InsertOne(diff)] # Update missed statuses if left: if len(left) == 1: bulk += [ UpdateOne({"global_id": left.pop()}, {"$set": { "status": self.UNKNOWN_STATUS }}) ] else: bulk += [ UpdateMany({"global_id": { "$in": list(left) }}, {"$set": { "status": self.UNKNOWN_STATUS }}) ] for global_id in sorted(left): self.logger.info( "[%s] CPE status missing. Changing status to %s", global_id, self.UNKNOWN_STATUS) # Apply changes if bulk: self.logger.info("Saving %d changes", len(bulk)) CPEStatus._get_collection().bulk_write(bulk) else: self.logger.info("Nothing changed")
def bulk_update_collection(mongodb_collection, list_tuple_key_query) -> None: # Request is using UpdateMany # (https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html\ # ?highlight=update#pymongo.collection.Collection.update_many) request: List[UpdateMany] = [] for query, data in list_tuple_key_query: request.append(UpdateMany(query, {"$set": data}, True)) mongodb_collection.bulk_write(request)
def submitWords(self, words): if not words or len(words) == 0: return inserts = [UpdateMany({"word": word}, {"$push": {"urls": self.url}}, upsert=True) for word in words] # Submit requests if len(inserts) > 0: self.collection.bulk_write(inserts)
def migrate(self): labels = defaultdict(set) # label: settings # Mongo models for collection, setting in self.TAG_COLLETIONS: coll = self.mongo_db[collection] coll.bulk_write([ UpdateMany({"tags": { "$exists": True }}, {"$rename": { "tags": "labels" }}) ]) r = next( coll.aggregate([ { "$match": { "labels": { "$exists": True, "$ne": [] } } }, { "$unwind": "$labels" }, { "$group": { "_id": 1, "all_labels": { "$addToSet": "$labels" } } }, ]), None, ) if r: for ll in r["all_labels"]: labels[ll].add(f"enable_{setting}") # Unset tags for collection, setting in self.TAG_COLLETIONS: coll.bulk_write([UpdateMany({}, {"$unset": {"tags": 1}})]) # Add labels self.sync_labels(labels)
def make_upsert_opers(uids): return [ UpdateMany({"uid": uid[0]}, { '$set': { 'devices': uid[1], 'createTime': arrow.now().timestamp * 1000 } }, upsert=True) for uid in uids ]
def transform_state_update(_state_update: StateUpdate): update_doc = _state_update.update_doc if _state_update.update_doc else {} new_update_doc = copy.deepcopy(update_doc) # manually $set the StateKey update so that it does the least amount of disrupt to the original update_doc # e.g. otherwise other fields in $set in update_doc might be override if "$set" not in new_update_doc: new_update_doc["$set"] = {} new_update_doc["$set"][StateKey] = _state_update.to_state return UpdateMany(filter=_state_update.filter_q, update=new_update_doc)
def _create_variable_reference(self, sheet_title: str, meta_data: Dict[str, str]): institution_names = [ name.strip() for name in meta_data.get(InstitutionField.name).split(";") ] institution = { InstitutionField.name: {"$in": institution_names}, InstitutionField.country: meta_data.get(InstitutionField.country), InstitutionField.category: meta_data.get(InstitutionField.category), } institution_docs = self.find(db_collection.institutions, institution) institution_docs_id = [ doc.get(InstitutionField._id) for doc in institution_docs ] variable = { VariableField.institution: {"$in": institution_docs_id}, VariableField.heading: meta_data.get(MetaDataField.variable_heading), VariableField.name: meta_data.get(MetaDataField.variable_name), } variable_docs = self.find(db_collection.variables, variable) variable_docs_id = [doc.get(VariableField._id) for doc in variable_docs] if len(variable_docs_id) != len(institution_names): raise UnableToFindDocument( ErrorInfo( { GoogleSheetsInfoField.sheet_title: sheet_title, DatabaseField.collection: db_collection.variables, DatabaseField.primary_keys: str(variable), } ) ) # update the variables to have type composite and the right hyperlink update_variables_request = UpdateMany( {VariableField._id: {"$in": variable_docs_id}}, { "$set": { VariableField.type: VariableType.composite, VariableField.hyperlink: meta_data.get(MetaDataField.data_type), } }, ) update_variables_request_result = self._db.get_collection( db_collection.variables ).bulk_write([update_variables_request]) log.info( f"Update {update_variables_request_result.modified_count}/{len(variable_docs_id)} variables" ) if meta_data.get(MetaDataField.data_type) == db_collection.body_of_law: return {CompositeVariableField.variables: variable_docs_id} else: return {CompositeVariableField.variable: variable_docs_id[0]}
def update_frame_error_image_information(only_missing: bool = False, batch_size: int = 5000): """ Update the image_properties field on the image :return: """ # Find the ids start_time = time.time() logging.getLogger(__name__).info( "Updating 'image_properties' for all FrameError objects ...") logging.getLogger(__name__).info("Loading set of referenced image IDs...") if only_missing: frame_errors = FrameError.objects.raw({ 'image_properties': { '$exists': False } }).only('image').values() else: frame_errors = FrameError.objects.all().only('image').values() # Do the distinct, rather than hold a cursor through all the loops # We expect this set to be about 1GB image_ids = set(error_obj['image'] for error_obj in frame_errors) # Work out how many batches to do. Images will be loaded in a batch to create a single bulk_write logging.getLogger(__name__).info( f"Found {len(image_ids)} image ids, " f"updating information {batch_size} images at a time " f"({time.time() - start_time}s) ...") n_updated = 0 updates_sent = 0 for batch_ids in grouper(image_ids, batch_size): # For each image, update all frame error objects that link to it images = Image.objects.raw({'_id': {'$in': batch_ids}}) write_operations = [ UpdateMany({'image': image.pk}, { '$set': { 'image_properties': { str(k): json_value(v) for k, v in image.get_properties().items() } } }) for image in images ] result = FrameError._mongometa.collection.bulk_write(write_operations, ordered=False) n_updated += result.modified_count updates_sent += len(write_operations) logging.getLogger(__name__).info( f"Updates sent for {updates_sent} images, updating {n_updated} FrameErrors" f" in {time.time() - start_time}s ...") logging.getLogger(__name__).info( f"Updated {n_updated} FrameError objects in {time.time() - start_time}s " )
def temp_add_trace(line): msg_id=line['id'] current_status=dict( comments_count=line['comments_count'], attitudes_count=line['attitudes_count'], reposts_count=line['reposts_count'] ) t=int(time.time()) t_str=str(t) line['status_trace.{date}'.format(date=t_str)]=current_status update_item=UpdateMany({'id':msg_id},{'$set':line},upsert=True) return update_item
def main(): comm = MPI.COMM_WORLD ID = comm.Get_rank() logging.basicConfig(filename='./logs/log_enrichKaggleWithScraper' + str(ID) + '.log', level=logging.INFO) logging.info("\n") logging.info("Log file created. Program started.") logging.info("Reading config files.") with open("configMongo.yml", "r") as file: cfgMongo = yaml.safe_load(file) logging.info("Config files succesfully read.") logging.info("Loading Mongo collections.") MONGO_HOST = cfgMongo["host"] MONGO_DATABASE = cfgMongo["database"] clientMongo = pymongo.MongoClient(MONGO_HOST) databaseMongo = clientMongo[MONGO_DATABASE] collectionName = "kaggleNation" kaggleCollection = databaseMongo[collectionName] collectionName = "scrape" scraperCollection = databaseMongo[collectionName] logging.info("Mongo collections loaded.") BATCH_SIZE = round( scraperCollection.count_documents({}) / comm.Get_size() + 0.5) cursorscraper = scraperCollection.find().skip(BATCH_SIZE * ID).limit(BATCH_SIZE) logging.info("Preparing to update.") # This Updated enriches kaggle documents with data from scraper upserts = [ UpdateMany({'id': scraperDoc["id"]}, {'$set': { "category": scraperDoc["category"] }}) for scraperDoc in cursorscraper ] logging.info("Updating documents.") kaggleCollection.bulk_write(upserts) logging.info("Data saved succesfully to Mongo.")
def mergeCastMovies(db): movies = db.movies people = db.people updateList = [] update_count = 0 count = 106257 start = time.perf_counter() movies.bulk_write([UpdateMany({}, {'$set': {'hasCastData': False}})]) for member in people.find(): for role in member['roles']: movie = movies.find_one({'_id': role['movieId']}) if movie is not None: update_count += 1 updateList.append(UpdateOne( {'_id': movie['_id']}, {'$push': { 'cast': { 'personId': member['_id'], 'name': member['name'], 'order': role['order'], 'character': role['character'], } }, '$set': { 'hasCastData': True } } )) if len(updateList) >= BULK_UPDATE_COUNT: # print(updateList) movies.bulk_write(updateList, ordered=False) cur_time = time.perf_counter() time_remaining = (cur_time-start) / update_count \ * (count-update_count) print(('Updated {0} ({1} / {2} in {3:.2f} secs) movies...' ' {4:.0f} secs remaining') .format(len(updateList), update_count, count, cur_time-start, time_remaining)) updateList = [] if len(updateList) > 0: # print(updateList) movies.bulk_write(updateList, ordered=False) cur_time = time.perf_counter() print('Updated {0} ({1} / {2} in {3:.2f} secs) movies...' .format(len(updateList), update_count, count, cur_time-start)) updateList = []
async def get_delete_ops(matches_to_disable: list, matchengine: MatchEngine) -> list: updated_time = datetime.datetime.now() hashes = [result['hash'] for result in matches_to_disable] ops = list() for chunk in chunk_list(hashes, matchengine.chunk_size): ops.append( UpdateMany(filter={'hash': { '$in': chunk }}, update={ '$set': { "is_disabled": True, '_updated': updated_time } })) return ops
def _common_get_task_from_db(self, dbName, collName, count): t0 = time.time() self.init_task_queue(dbName, collName) info = '%s, %s, qsize:%s' % ( dbName, collName, self.task_queues[dbName][collName].qsize()) self.logger.debug(info) if self.task_queues[dbName][collName].qsize() <= 0: t1 = time.time() tasks = yield self.mc[dbName][collName].find( {'status': common.NOT_CRAWL}, limit=count * 10) # .limit(settings.get_tasks_num_one_time) # tasks = self.mc[dbName][collName].find({'status':common.NOT_CRAWL}, limit=settings.get_tasks_num_one_time) requests, ts = [], [] for task in tasks: requests.append( UpdateMany({'url': task["url"]}, { "$set": { "status": common.CRAWLING, "last_crawl_time": 0 } })) task.pop('_id') ts.append(task) if len(requests) > 0: # self.mc[dbName][collName].bulk_write(requests) yield self.mc[dbName][collName].bulk_write(requests) for t in ts: self.task_queues[dbName][collName].put(t) t_diff = time.time() - t1 info = "query mongo, %s, %s, get:%s, use time:%s" % ( dbName, collName, len(ts), t_diff) self.logger.debug(info) ts = [] for x in range(count): try: t = self.task_queues[dbName][collName].get_nowait() self.task_queues[dbName][collName].task_done() ts.append(t) except: # self.logger.error(str(e)) continue t_diff = time.time() - t0 info = "total, %s, %s, return : %s , use time : %s" % ( dbName, collName, len(ts), t_diff) self.logger.debug(info) defer.returnValue(ts)
def update(self, objects, key='', method='$set', upsert=True, date=False): # TODO link from the StoreWorker documentation # TODO add pymongo documentation link. if objects: queries = self._create_queries(key, objects) if date: date = datetime.datetime.fromtimestamp(time.time()) objects = ({ date.isoformat(): obj.to_dict() } for obj in objects) # insert all the objects in the database db_requests = [ UpdateMany(query, {method: obj.to_dict()}, upsert=upsert) for obj, query in zip(objects, queries) ] return self.coll.bulk_write(db_requests) return False
def getConsensusVar(group): # print(group) if group['variation'].count() == 0: var = None else: var = '{0:.4f}'.format(group['variation'].mean()) return UpdateMany({"gvkey": group.name}, { "$set": { type_consensus: { 'mean_recom': str('{0:.4f}'.format(group['ireccd'].mean())), "num_recom": int(group['ireccd'].count()), "mean_var": str(var), "num_var": int(group['variation'].count()) } } })
def BulkPriceTarget(isin, gvkey, pt_mean, pt_count, ptvarmean, ptvarcount, var_mean, var_count): return UpdateMany({ "isin_or_cusip": isin, "gvkey": gvkey }, { "$set": { type_price_target: { 'price': str('{0:.4f}'.format(pt_mean)), "num_price": int(pt_count), "pmean_var": str(ptvarmean), "pnum_var": int(ptvarcount), "mean_var": str(var_mean), "num_var": int(var_count), } } })
def bulk_update(self, collection, filters, posts, unset=False, upsert=True): if unset: operator = "$unset" else: operator = "$set" filter_posts = list(zip(filters, posts)) requests = [ UpdateMany(x[0], {operator: x[1]}, upsert=upsert) for x in filter_posts ] try: self.db[collection].bulk_write(requests) except BulkWriteError as e: logger.exception(e.details)
def update(self, model, objects, urls, key='', method='$set', upsert=True, date=False): coll = self.get_collection(model.table) queries = self._create_queries(key, objects) if date: date = datetime.datetime.fromtimestamp(time.time()) objects = ({date.isoformat(): obj} for obj in objects) # insert all the objects in the database db_requests = [ UpdateMany(query, {method: obj}, upsert=upsert) # noqa for obj, query in zip(objects, queries) ] return coll.bulk_write(db_requests)
def get_task(): coll_name = request.args.get('coll_name', None) if coll_name not in task_queues: task_queues[coll_name] = Queue() # task = db[coll_name].find_and_modify({'status': 0}, {'$set': {'status': 1}}) if task_queues[coll_name].qsize() <= 0: tasks = db[coll_name].find({'status': 0}).limit(100) requests = [] for task in tasks: requests.append( UpdateMany({'url': task["url"]}, {"$set": {"status": 1, "last_crawl_time": 0}})) task_queues[coll_name].put(task) if len(requests) > 0: db[coll_name].bulk_write(requests) try: task = task_queues[coll_name].get_nowait() except: task = {} task.pop('_id', None) return jsonify(task)
def migrate(self): # Get migrated termination groups, created by 0184 migration db = self.mongo_db rg_map = dict( (x["_legacy_id"], x["_id"]) for x in db.resourcegroups.find({"_legacy_id": { "$exists": True }}, { "_id": 1, "_legacy_id": 1 })) # Apply Resource Groups for cname in ["noc.phoneranges", "noc.phonenumbers"]: coll = db[cname] bulk = [] for d in coll.aggregate([{ "$group": { "_id": "$termination_group" } }]): if not d.get("_id"): continue rg_id = rg_map[d["_id"]] bulk += [ UpdateMany( {"termination_group": d["_id"]}, { "$set": { "static_client_groups": [rg_id], "effective_client_groups": [rg_id], }, "$unset": { "termination_group": "" }, }, ) ] if bulk: coll.bulk_write(bulk)
def add_tags_field_to_items(db): uploads = upload.find(db, with_default_projection=False) # build query bulks = [] for upload_doc in uploads: upload_id = upload_doc['_id'] upload_tags = upload_doc['tags'] bulk = UpdateMany({'upload_id': ObjectId(upload_id)}, {'$set': { 'tags': upload_tags }}) bulks.append(bulk) # set new collection validators override_collection_validator(db, 'items', ITEMS_DB_SCHEMA) if bulks: # update DB result = db.items.bulk_write(bulks) print(result.bulk_api_result)
def fix_model(model): coll = model._get_collection() ins = defaultdict(list) bulk = [] for doc in coll.find( { "managed_object_profile": { "$exists": False }, "managed_object": { "$exists": True } }, { "_id": 1, "managed_object": 1 }, ): mo = ManagedObject.get_by_id(doc["managed_object"]) if not mo: continue mop = mo.object_profile.id ins[mop] += [doc["_id"]] if len(ins[mop]) >= IN_SIZE: bulk += [ UpdateMany({"_id": { "$in": ins[mop] }}, {"$set": { "managed_object_profile": mop }}) ] ins[mop] = [] if len(bulk) >= BULK_SIZE: coll.bulk_write(bulk) bulk = [] if bulk: coll.bulk_write(bulk)
def _common_change_task_status(self, dbName, collName, data): t0 = time.time() self.init_write_queue(dbName, collName) # 统计 success = [ t['url'] for t in data if t['status'] == common.CRAWL_SUCCESS ] self.saveCountData(dbName, collName, common.ONE_TASK, len(success)) # 更新数据 for t in data: # self.logger.debug('url:%s,status:%s'%(t['url'],t['status'])) self.write_queues[dbName][collName].put( (t['url'], UpdateMany({'url': t['url']}, { "$set": { 'status': t['status'], 'last_crawl_time': time.time() } }))) t_diff = time.time() - t0 info = "%s, %s, %s" % (dbName, collName, t_diff) self.logger.debug(info)
def fill_issueprice_and_timeToMarket(): """ ipo_info.xlsx 是从东方choice中提取出来; columns: code -- 股票代码 name -- 股票当前名字 issueprice -- 发行价格 timeToMarket -- 上市时间 """ df = pd.read_excel('data/ipo_info.xlsx', header=0, dtype={'code': str}) df = df.set_index('code') codes = df.index.tolist() total = len(codes) for i, code in enumerate(codes): update_requests = [] try: update_requests.append( UpdateMany({'code': code}, { '$set': { 'issueprice': df.issueprice[code], 'timeToMarket': df.timeToMarket[code] } }, upsert=True)) except: print('code: %s, has problem' % code) if len(update_requests) > 0: update_result = DB_CONN['basic'].bulk_write(update_requests, ordered=False) print('填充进度: %s/%s, 字段名: issueprice,数据集:%s,插入:%4d条,更新:%4d条' % (i + 1, total, 'basic', update_result.upserted_count, update_result.modified_count), flush=True)
def update_many(self, cliteria, query, upsert=False): self._batch.append(UpdateMany(cliteria, query, upsert=upsert))
def update(self, collection: str, col_filter: dict, update: dict): if collection not in self.transactions: self.transactions[collection] = [] self.transactions[collection].append(UpdateMany(col_filter, update)) return Update(self.transactions)
# - Total number of hours we need to credit the empire's bank hours = min(MAX_HOURS_OFFLINE, (now - last_income).total_seconds() / 3600) income = int(utils.net_income(empire) * hours) if bank_row.get("usd", 0) >= 250_000 and income > 0: income = int(income * 0.4) requests.append( UpdateOne({"_id": empire["_id"]}, {"$inc": { "usd": income }}, upsert=True)) requests.append(UpdateMany({"usd": {"$lt": 0}}, {"$set": {"usd": 0}})) await self.bot.db["bank"].bulk_write(requests) async def assassinated_event(ctx): """ Single unit is killed. """ empire = await ctx.bot.db["empires"].find_one({"_id": ctx.author.id}) units = empire.get("units", dict()) # - List of all military units which have an owned amount greater than 0 units_owned = [ u for u in Military.units if units.get(u.key, dict()).get("owned", 0) > 0
def migrate(self): labels = defaultdict(set) # label: settings # Create labels fields for table, setting in self.TAG_MODELS: self.db.add_column( table, "labels", ArrayField(CharField(max_length=250), null=True, blank=True, default=lambda: "{}"), ) self.db.add_column( table, "effective_labels", ArrayField(CharField(max_length=250), null=True, blank=True, default=lambda: "{}"), ) # Migrate data for table, setting in self.TAG_MODELS: self.db.execute( """ UPDATE %s SET labels = tags WHERE tags is not NULL and tags <> '{}' """ % table ) # Fill labels for (ll,) in self.db.execute( """ SELECT DISTINCT labels FROM %s WHERE labels <> '{}' """ % table ): for name in ll: labels[name].add(f"enable_{setting}") # Delete tags for table, setting in self.TAG_MODELS: self.db.delete_column( table, "tags", ) # Create indexes for table, setting in self.TAG_MODELS: self.db.execute(f'CREATE INDEX x_{table}_labels ON "{table}" USING GIN("labels")') self.db.execute( f'CREATE INDEX x_{table}_effective_labels ON "{table}" USING GIN("effective_labels")' ) # Mongo models for collection, setting in self.TAG_COLLETIONS: coll = self.mongo_db[collection] coll.bulk_write( [UpdateMany({"tags": {"$exists": True}}, {"$rename": {"tags": "labels"}})] ) r = next( coll.aggregate( [ {"$match": {"labels": {"$exists": True, "$ne": []}}}, {"$unwind": "$labels"}, {"$group": {"_id": 1, "all_labels": {"$addToSet": "$labels"}}}, ] ), None, ) if r: for ll in r["all_labels"]: labels[ll].add(f"enable_{setting}") # Unset tags for collection, setting in self.TAG_COLLETIONS: coll.bulk_write([UpdateMany({}, {"$unset": {"tags": 1}})]) # Add labels self.sync_labels(labels)