def test_bulk_write(self): self.db.test.collection.bulk_write([ DeleteOne({'noCollation': 42}), DeleteMany({'noCollation': 42}), DeleteOne({'foo': 42}, collation=self.collation), DeleteMany({'foo': 42}, collation=self.collation), ReplaceOne({'noCollation': 24}, {'bar': 42}), UpdateOne({'noCollation': 84}, {'$set': {'bar': 10}}, upsert=True), UpdateMany({'noCollation': 45}, {'$set': {'bar': 42}}), ReplaceOne({'foo': 24}, {'foo': 42}, collation=self.collation), UpdateOne({'foo': 84}, {'$set': {'foo': 10}}, upsert=True, collation=self.collation), UpdateMany({'foo': 45}, {'$set': {'foo': 42}}, collation=self.collation) ]) delete_cmd = self.listener.results['started'][0].command update_cmd = self.listener.results['started'][1].command def check_ops(ops): for op in ops: if 'noCollation' in op['q']: self.assertNotIn('collation', op) else: self.assertEqual(self.collation.document, op['collation']) check_ops(delete_cmd['deletes']) check_ops(update_cmd['updates'])
def test_batch_splitting(self): """Test retry succeeds after failures during batch splitting.""" large = 's' * 1024 * 1024 * 15 coll = self.db.retryable_write_test coll.delete_many({}) self.listener.results.clear() bulk_result = coll.bulk_write([ InsertOne({'_id': 1, 'l': large}), InsertOne({'_id': 2, 'l': large}), InsertOne({'_id': 3, 'l': large}), UpdateOne({'_id': 1, 'l': large}, {'$unset': {'l': 1}, '$inc': {'count': 1}}), UpdateOne({'_id': 2, 'l': large}, {'$set': {'foo': 'bar'}}), DeleteOne({'l': large}), DeleteOne({'l': large})]) # Each command should fail and be retried. # With OP_MSG 3 inserts are one batch. 2 updates another. # 2 deletes a third. self.assertEqual(len(self.listener.results['started']), 6) self.assertEqual(coll.find_one(), {'_id': 1, 'count': 1}) # Assert the final result expected_result = { "writeErrors": [], "writeConcernErrors": [], "nInserted": 3, "nUpserted": 0, "nMatched": 2, "nModified": 2, "nRemoved": 2, "upserted": [], } self.assertEqual(bulk_result.bulk_api_result, expected_result)
def main(): print(_client) print(_client.list_database_names()) for _db in _client.list_databases(): print(_db, _client[_db.get("name")].list_collection_names()) # print() # print(_client["test"].list_collection_names()) _cursor = _client["test"]["test"].find() print(list(_cursor)) return _list = [] for p in _cursor: _list.append( UpdateOne({"_id": p.get("_id")}, {"$set": { "tagname": p.get("tagname") + "--" }})) _list.append( UpdateOne( {"_id": 3}, {"$set": { "created_at": datetime.datetime.now(), "tagname": "小" }}, upsert=True, )) _r: BulkWriteResult = _client["test"]["test"].bulk_write(_list) print(_r.acknowledged, _r.matched_count) print(_r.inserted_count, _r.modified_count, _r.deleted_count)
def test_can_make_batches_of_upsert_queries_from_id(self): batch = batch_of_upsert_ops( [{'_id': 1, 'n': 100}, {'_id': 2, 'n': 200}], ('_id',)) self.assertEqual( batch, [UpdateOne({'_id': 1}, {'$set': {'n': 100}}, upsert=True), UpdateOne({'_id': 2}, {'$set': {'n': 200}}, upsert=True)])
def test_Mixed_Ordered_Unack(self): w0 = self.coll.with_options(write_concern=WriteConcern(w=0)) yield w0.bulk_write([InsertOne({"_id": 2}), UpdateOne({"_id": 2}, {"$set": {'x': 1}}), InsertOne({"_id": 1}), UpdateOne({"_id": 1}, {"$set": {'x': 2}})]) docs = yield self.coll.find() self.assertEqual(len(docs), 2) self.assertIn({"_id": 1}, docs) self.assertIn({"_id": 2, 'x': 1}, docs)
def test_Mixed_Ordered_Ack(self): result = self.coll.bulk_write([InsertOne({"_id": 2}), UpdateOne({"_id": 2}, {"$set": {'x': 1}}), InsertOne({"_id": 1}), UpdateOne({"_id": 1}, {"$set": {'x': 2}})]) yield self.assertFailure(result, BulkWriteError) docs = yield self.coll.find() self.assertEqual(len(docs), 2) self.assertIn({"_id": 1}, docs) self.assertIn({"_id": 2, 'x': 1}, docs)
def test_OperationFailure(self): yield self.coll.bulk_write([UpdateOne({}, {'$set': {'x': 42}}, upsert=True)]) def fake_send_query(*args): return defer.succeed(Reply(documents=[ {'ok': 0.0, 'errmsg': 'operation was interrupted', 'code': 11602, 'codeName': 'InterruptedDueToReplStateChange'}])) with patch('txmongo.protocol.MongoProtocol.send_QUERY', side_effect=fake_send_query): yield self.assertFailure( self.coll.bulk_write([UpdateOne({}, {'$set': {'x': 42}}, upsert=True)], ordered=True), OperationFailure, NotMasterError)
def test_will_create_upserts_with_full_shard_key(self): batch = batch_of_upsert_ops( [{'_id': 1, 'd': 10, 'n': 100}, {'_id': 2, 'd': 20, 'n': 200}], ('d', '_id',)) self.assertEqual( batch, [UpdateOne({'_id': 1, 'd': 10}, {'$set': {'n': 100, 'd': 10}}, upsert=True), UpdateOne({'_id': 2, 'd': 20}, {'$set': {'n': 200, 'd': 20}}, upsert=True)])
def non_retryable_single_statement_ops(coll): return [ (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}), UpdateMany({}, {'$set': {'a': 1}})]], {}), (coll.bulk_write, [[DeleteOne({}), DeleteMany({})]], {}), (coll.update_many, [{}, {'$set': {'a': 1}}], {}), (coll.delete_many, [{}], {}), # Deprecated methods. # Multi remove. (coll.remove, [{}], {}), # Multi update. (coll.update, [{}, {'$set': {'a': 1}}], {'multi': True}), # Unacknowledged deprecated methods. (coll.insert, [{}], {'w': 0}), # Unacknowledged Non-multi update. (coll.update, [{}, {'$set': {'a': 1}}], {'w': 0}), # Unacknowledged Non-multi remove. (coll.remove, [{}], {'multi': False, 'w': 0}), # Unacknowledged Replace. (coll.find_and_modify, [{}, {'a': 3}], {'writeConcern': {'w': 0}}), # Unacknowledged Update. (coll.find_and_modify, [{}, {'$set': {'a': 1}}], {'writeConcern': {'w': 0}}), # Unacknowledged Delete. (coll.find_and_modify, [{}, {}], {'remove': True, 'writeConcern': {'w': 0}}), ]
def assemble_segment_docs(self, c_name): return_values = {'_id': 0, 'document_id': 1, 'body': 1} segment_requests, tf_requests = [], [] length = db[c_name].count_documents({}) with tqdm(db[c_name].find({}, return_values), desc='assemble_segment_docs', total=length, ascii=True, mininterval=1.0) as bar: for index, doc in enumerate(bar): word_list = [ s for s in jieba.cut(doc['body']) if check_word(s) ] words = ' '.join(word_list) segment_requests.append( UpdateOne({'document_id': doc['document_id']}, {'$set': { 'words': words }}, upsert=True)) words_tf = self.calculate_words_tf(word_list) for word, tf in words_tf.items(): tf_requests.append({ 'document_id': doc['document_id'], 'word': word, 'tf': tf, }) return segment_requests, tf_requests
def segment(self, args): index, c_name, docs = args desc = 'Process {}: segment'.format(os.getpid()) segment_requests, tf_requests = [], [] with tqdm(docs, desc=desc, ascii=True, position=index, mininterval=1.0) as bar: for doc in bar: word_list = [ s for s in jieba.cut(doc['body']) if check_word(s) ] words = ' '.join(word_list) segment_requests.append( UpdateOne({'document_id': doc['document_id']}, {'$set': { 'words': words }}, upsert=True)) words_tf = self.calculate_words_tf(word_list) for word, tf in words_tf.items(): tf_requests.append({ 'document_id': doc['document_id'], 'word': word, 'tf': tf, }) self.execute_mongo_crud(c_name, segment_requests, 'update', index) self.execute_mongo_crud(self.tf_name, tf_requests, 'insert', index) del docs
def delete(self): update = {"$pull": { self.FIELD: { "_id": self.id } }} self._add_operation(UpdateOne({"_id": self._parent_id}, update))
def _new_empty(cls, database, parent_id, embedded_id=None): """ This class method is lazy, obj.write_operations() must be called afterwards! Adds new embedded document represented by the class to an array of specified parent mdoc in database. Every mdoc of this type must have an id, if none is provided, a new id is generated (text version of ObjectID). :param database: MongoDB database object :param parent_id: _id of parent mdoc :param embedded_id: _id of the new embedded document :return: object representing new embedded document """ if not embedded_id: embedded_id = str(ObjectId()) mdoc = {"_id": embedded_id} obj = cls(database, parent_id, mdoc) update = UpdateOne({"_id": parent_id}, { "$push": { cls.FIELD: mdoc } }) obj._add_operation(update) return obj
def get(self): try: queries = int(self.get_argument(self.QUERIES)) except Exception: queries = 1 else: if queries < 1: queries = 1 elif queries > 500: queries = 500 worlds = yield [db.world.find_one(randint(1, 10000)) for _ in xrange(queries)] updates = [] out = [] for world in worlds: new_value = randint(1, 10000) updates.append( UpdateOne( {"_id": world["_id"]}, {"$set": {self.RANDOM_NUMBER: new_value}} ) ) out.append({self.ID: world["_id"], self.RANDOM_NUMBER: new_value}) yield db.world.bulk_write(updates, ordered=False) self.finish(json.dumps(out))
def _process_updates(self, updates): super()._process_updates(updates) if updates: set_dict = {f"{self.FIELD}.{key}": value for key, value in updates.items()} self._add_operation(UpdateOne({"_id": self._parent_id}, {"$set": set_dict}))
def bulk_operation(self, set_name, data_list, operation_type, ordered=False): requests = [] if operation_type == "insert": requests = list(map(lambda doc: InsertOne(doc), data_list)) elif operation_type == "update": requests = list(map(lambda info: UpdateOne(filter=info[0], update=info[1]), data_list)) elif operation_type == "delete": requests = list(map(lambda info: DeleteOne(filter=info), data_list)) elif operation_type == "multi_type": for key in data_list.keys(): if data_list[key]["op_type"] == "insert": requests.append(InsertOne(data_list[key]["info"])) elif data_list[key]["op_type"] == "update": requests.append(UpdateOne(filter=data_list[key]["info"][0], update=data_list[key]["info"][1])) elif data_list[key]["op_type"] == "delete": requests.append(DeleteOne(data_list[key]["info"])) return self.db[set_name].bulk_write(requests, ordered=ordered)
def non_retryable_single_statement_ops(coll): return [ (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}), UpdateMany({}, {'$set': {'a': 1}})]], {}), (coll.bulk_write, [[DeleteOne({}), DeleteMany({})]], {}), (coll.update_many, [{}, {'$set': {'a': 1}}], {}), (coll.delete_many, [{}], {}), ]
def flush(self) -> None: """ Will raise Exception if DB issue occurs """ # No log entries if not self.log_dicts: return try: # Empty referenced logs entries dicts = self.log_dicts self.log_dicts = [] self.prev_record = None self.col.insert_many(dicts, ordered=False) except BulkWriteError as bwe: upserts = [] # Recovery procedure for 'already existing logs' # In production, we should process alerts only once (per channel(s)) # but during testing, reprocessing may occur. # In this case, we overwrite previous rejected logs for err_dict in bwe.details.get('writeErrors', []): # 'code': 11000, 'errmsg': 'E11000 duplicate key error collection: ... if err_dict.get("code") == 11000: lid = {'_id': err_dict['op'].pop('_id')} del err_dict['op']['stock'] upserts.append(UpdateOne(lid, {'$set': err_dict['op']})) if len(upserts) != len(bwe.details.get('writeErrors', [])): LoggingErrorReporter.report(self, bwe, bwe.details) raise AmpelLoggingError from None self.logger.warn("Overwriting rejected alerts logs") try: # Try again, with updates this time self.col.bulk_write(upserts, ordered=False) return except BulkWriteError as bwee: LoggingErrorReporter.report(self, bwe, bwe.details) LoggingErrorReporter.report(self, bwee, bwee.details) raise AmpelLoggingError from None except Exception as e: LoggingErrorReporter.report(self, e) # If we can no longer keep track of what Ampel is doing, # better raise Exception to stop processing raise AmpelLoggingError from None
def test_batch_splitting(self): """Test retry succeeds after failures during batch splitting.""" large = 's' * 1024 * 1024 * 15 coll = self.db.retryable_write_test coll.delete_many({}) self.listener.results.clear() coll.bulk_write([ InsertOne({ '_id': 1, 'l': large }), InsertOne({ '_id': 2, 'l': large }), InsertOne({ '_id': 3, 'l': large }), UpdateOne({ '_id': 1, 'l': large }, { '$unset': { 'l': 1 }, '$inc': { 'count': 1 } }), UpdateOne({ '_id': 2, 'l': large }, {'$set': { 'foo': 'bar' }}), DeleteOne({'l': large}), DeleteOne({'l': large}) ]) # Each command should fail and be retried. self.assertEqual(len(self.listener.results['started']), 14) self.assertEqual(coll.find_one(), {'_id': 1, 'count': 1})
def retryable_single_statement_ops(coll): return [ (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {}), (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {'ordered': False}), (coll.bulk_write, [[ReplaceOne({}, {})]], {}), (coll.bulk_write, [[ReplaceOne({}, {}), ReplaceOne({}, {})]], {}), (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}), UpdateOne({}, {'$set': {'a': 1}})]], {}), (coll.bulk_write, [[DeleteOne({})]], {}), (coll.bulk_write, [[DeleteOne({}), DeleteOne({})]], {}), (coll.insert_one, [{}], {}), (coll.insert_many, [[{}, {}]], {}), (coll.replace_one, [{}, {}], {}), (coll.update_one, [{}, {'$set': {'a': 1}}], {}), (coll.delete_one, [{}], {}), (coll.find_one_and_replace, [{}, {'a': 3}], {}), (coll.find_one_and_update, [{}, {'$set': {'a': 1}}], {}), (coll.find_one_and_delete, [{}, {}], {}), ]
def write_data(self, entry: BatchEntry, tweet_stream: TweetStream) -> None: upserts = [] for tweet in tweet_stream: j = dict(tweet.to_json()) j["_id"] = tweet.id j["entry_id"] = entry.id upserts.append( UpdateOne({"_id": tweet.id}, {"$setOnInsert": j}, upsert=True) ) if upserts: self._data.bulk_write(upserts)
def test_UpdateOneAndMany(self): result = yield self.coll.bulk_write([ UpdateOne({'x': {"$exists": True}}, {"$inc": {'x': 1}}), # simple UpdateOne({'y': 123}, {"$inc": {'y': -1}, "$set": {'a': "hi"}}), # set UpdateOne({'z': 322}, {"$inc": {'z': 1}}), # missed UpdateOne({'w': 5}, {"$set": {"u": 7}}, upsert=True), # upsert UpdateMany({}, {"$set": {"m": 1}}), # many ]) docs = yield self.coll.find(fields={"_id": 0}) self.assertEqual(len(docs), 4) self.assertIn({'x': 43, 'm': 1}, docs) self.assertIn({'y': 122, 'a': "hi", 'm': 1}, docs) self.assertIn({'z': 321, 'm': 1}, docs) self.assertIn({'w': 5, 'u': 7, 'm': 1}, docs) self.assertIsInstance(result, BulkWriteResult) self.assertEqual(result.inserted_count, 0) self.assertEqual(result.matched_count, 6) self.assertEqual(result.modified_count, 6) self.assertEqual(result.upserted_count, 1) self.assertEqual(set(result.upserted_ids), {3})
def save(self, searchspaces): """Save a hyperparameter search state. Parameters ---------- searchspaces : list of pyrameter.domains.SearchSpace Experiment state to save. """ ssupdates = [ UpdateOne({'_id': ss.id}, ss.to_json(simplify=True), upsert=True) for ss in searchspaces ] result = self.connection['searchspaces'].bulk_write(ssupdates) for key, val in result.upserted_ids.items(): searchspaces[key].id = val domainset = set() trials = [] for ss in searchspaces: domainset = domainset.union(set([d for d in ss.domains])) trials.extend([r for r in ss.results if r.dirty]) domainset = list(domainset) domupdates = [ UpdateOne({'_id': domain.id}, domain.to_json(), upsert=True) for domain in domainset ] result = self.connection['domains'].bulk_write(domupdates) for key, val in result.upserted_ids.items(): domainset[key].id = val trialupdates = [ UpdateOne({'_id': trial.id}, trial.to_json(), upsert=True) for trial in trials ] result = self.connection['trials'].bulk_write(trialupdates) for key, val in result.upserted_ids.items(): trials[key].id = val
def retryable_single_statement_ops(coll): return [ (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {}), (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {'ordered': False}), (coll.bulk_write, [[ReplaceOne({}, {})]], {}), (coll.bulk_write, [[ReplaceOne({}, {}), ReplaceOne({}, {})]], {}), (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}), UpdateOne({}, {'$set': {'a': 1}})]], {}), (coll.bulk_write, [[DeleteOne({})]], {}), (coll.bulk_write, [[DeleteOne({}), DeleteOne({})]], {}), (coll.insert_one, [{}], {}), (coll.insert_many, [[{}, {}]], {}), (coll.replace_one, [{}, {}], {}), (coll.update_one, [{}, {'$set': {'a': 1}}], {}), (coll.delete_one, [{}], {}), (coll.find_one_and_replace, [{}, {'a': 3}], {}), (coll.find_one_and_update, [{}, {'$set': {'a': 1}}], {}), (coll.find_one_and_delete, [{}, {}], {}), # Deprecated methods. # Insert with single or multiple documents. (coll.insert, [{}], {}), (coll.insert, [[{}]], {}), (coll.insert, [[{}, {}]], {}), # Save with and without an _id. (coll.save, [{}], {}), (coll.save, [{'_id': ObjectId()}], {}), # Non-multi update. (coll.update, [{}, {'$set': {'a': 1}}], {}), # Non-multi remove. (coll.remove, [{}], {'multi': False}), # Replace. (coll.find_and_modify, [{}, {'a': 3}], {}), # Update. (coll.find_and_modify, [{}, {'$set': {'a': 1}}], {}), # Delete. (coll.find_and_modify, [{}, {}], {'remove': True}), ]
def bulk_update_field(self, collection, updates): bk_updates = [] for collection, document_id, field, value in updates: bk_updates.append( UpdateOne({'code': document_id}, {'$set': { field: value }}, upsert=False)) try: self.mongodb[collection].bulk_write(bk_updates, ordered=False) except errors.BulkWriteError as e: # Ignore bulk erros, the errors are mainly related to legacy issues like duplicated keys in the legacy databases. pass
def test_unacknowledged_write(self): unacknowledged = WriteConcern(w=0) collection = self.db.get_collection('test', write_concern=unacknowledged) with self.assertRaises(ConfigurationError): collection.update_one({'hello': 'world'}, {'$set': { 'hello': 'moon' }}, collation=self.collation) update_one = UpdateOne({'hello': 'world'}, {'$set': { 'hello': 'moon' }}, collation=self.collation) with self.assertRaises(ConfigurationError): collection.bulk_write([update_one])
def bulk_inc_ngram_count(self, documents): operations = [ UpdateOne( { 'rule_num': document['rule_num'], 'match': document['match'], 'ngram': document['ngram'] }, { '$set': document, '$inc': { 'count': count } }, upsert=True) for document, count in documents ] res = self.ngram_collection.bulk_write(operations, ordered=False) print(res)
def add_ngrams(self, ngram_set): operations = [ UpdateOne({ 'key': key, 'ngram': ngram }, { '$set': { 'key': key, 'ngram': ngram, 'sent': sent, 'sent_score': score } }, upsert=True) for (key, ngram, sent, score) in ngram_set ] res = self.ngram_collection.bulk_write(operations, ordered=False) logging.info(res)
def test_unacknowledged_write(self): unacknowledged = WriteConcern(w=0) collection = self.db.get_collection( 'test', write_concern=unacknowledged) with self.assertRaises(ConfigurationError): collection.update_one( {'hello': 'world'}, {'$set': {'hello': 'moon'}}, collation=self.collation) bulk = collection.initialize_ordered_bulk_op() bulk.find({'hello': 'world'}, collation=self.collation).update_one( {'$set': {'hello': 'moon'}}) with self.assertRaises(ConfigurationError): bulk.execute() update_one = UpdateOne({'hello': 'world'}, {'$set': {'hello': 'moon'}}, collation=self.collation) with self.assertRaises(ConfigurationError): collection.bulk_write([update_one])
def add_patterns(self, pattern_counter): operations = [ UpdateOne({ 'key': key, 'norm_pattern': norm_pattern }, { '$set': { 'key': key, 'norm_pattern': norm_pattern }, '$inc': { 'count': count } }, upsert=True) for (key, norm_pattern), count in pattern_counter.items() ] res = self.pattern_collection.bulk_write(operations, ordered=False) logging.info(res)