def test_batch_splitting(self): """Test retry succeeds after failures during batch splitting.""" large = 's' * 1024 * 1024 * 15 coll = self.db.retryable_write_test coll.delete_many({}) self.listener.results.clear() bulk_result = coll.bulk_write([ InsertOne({'_id': 1, 'l': large}), InsertOne({'_id': 2, 'l': large}), InsertOne({'_id': 3, 'l': large}), UpdateOne({'_id': 1, 'l': large}, {'$unset': {'l': 1}, '$inc': {'count': 1}}), UpdateOne({'_id': 2, 'l': large}, {'$set': {'foo': 'bar'}}), DeleteOne({'l': large}), DeleteOne({'l': large})]) # Each command should fail and be retried. # With OP_MSG 3 inserts are one batch. 2 updates another. # 2 deletes a third. self.assertEqual(len(self.listener.results['started']), 6) self.assertEqual(coll.find_one(), {'_id': 1, 'count': 1}) # Assert the final result expected_result = { "writeErrors": [], "writeConcernErrors": [], "nInserted": 3, "nUpserted": 0, "nMatched": 2, "nModified": 2, "nRemoved": 2, "upserted": [], } self.assertEqual(bulk_result.bulk_api_result, expected_result)
def test_03_bulk_batch_split(self): doc1 = {'_id': 'over_2mib_1', 'unencrypted': 'a' * _2_MiB} doc2 = {'_id': 'over_2mib_2', 'unencrypted': 'a' * _2_MiB} self.listener.reset() self.coll_encrypted.bulk_write([InsertOne(doc1), InsertOne(doc2)]) self.assertEqual(self.listener.started_command_names(), ['insert', 'insert'])
def test_batch_splitting_retry_fails(self): """Test retry fails during batch splitting.""" large = 's' * 1024 * 1024 * 15 coll = self.db.retryable_write_test coll.delete_many({}) self.client.admin.command( SON([ ('configureFailPoint', 'onPrimaryTransactionalWrite'), ('mode', { 'skip': 3 }), # The number of _documents_ to skip. ('data', { 'failBeforeCommitExceptionCode': 1 }) ])) self.listener.results.clear() with self.client.start_session() as session: initial_txn = session._server_session._transaction_id try: coll.bulk_write([ InsertOne({ '_id': 1, 'l': large }), InsertOne({ '_id': 2, 'l': large }), InsertOne({ '_id': 3, 'l': large }), InsertOne({ '_id': 4, 'l': large }) ], session=session) except ConnectionFailure: pass else: self.fail("bulk_write should have failed") started = self.listener.results['started'] self.assertEqual(len(started), 3) self.assertEqual(len(self.listener.results['succeeded']), 1) expected_txn = Int64(initial_txn + 1) self.assertEqual(started[0].command['txnNumber'], expected_txn) self.assertEqual(started[0].command['lsid'], session.session_id) expected_txn = Int64(initial_txn + 2) self.assertEqual(started[1].command['txnNumber'], expected_txn) self.assertEqual(started[1].command['lsid'], session.session_id) started[1].command.pop('$clusterTime') started[2].command.pop('$clusterTime') self.assertEqual(started[1].command, started[2].command) final_txn = session._server_session._transaction_id self.assertEqual(final_txn, expected_txn) self.assertEqual(coll.find_one(projection={'_id': True}), {'_id': 1})
def test_04_bulk_batch_split(self): limits_doc = json_data('limits', 'limits-doc.json') doc1 = {'_id': 'encryption_exceeds_2mib_1', 'unencrypted': 'a' * (_2_MiB - 2000)} doc1.update(limits_doc) doc2 = {'_id': 'encryption_exceeds_2mib_2', 'unencrypted': 'a' * (_2_MiB - 2000)} doc2.update(limits_doc) self.listener.reset() self.coll_encrypted.bulk_write([InsertOne(doc1), InsertOne(doc2)]) self.assertEqual( self.listener.started_command_names(), ['insert', 'insert'])
def test_04_bulk_batch_split(self): doc1 = { '_id': 'no_encryption_under_2mib_1', 'unencrypted': 'a' * ((2**21) - 1000) } doc2 = { '_id': 'no_encryption_under_2mib_2', 'unencrypted': 'a' * ((2**21) - 1000) } self.listener.reset() self.coll_encrypted.bulk_write([InsertOne(doc1), InsertOne(doc2)]) self.assertEqual(self.listener.started_command_names(), ['insert', 'insert'])
def main(): ac = AnalyzerContext(verbose=True) max_action_id, upload_ids = ac.action_set.direct() # analyze one upload per run upload_ids = [upload_ids[0]] if len(upload_ids) > 0 else [] print("running it with: ", max_action_id, upload_ids) ac.set_result_info_direct(max_action_id, upload_ids) # complicated way to get data file by file out of hdfs files = ac.spark_uploads_direct() filenames = files.map(lambda x: x[0]).collect() for filename in filenames: metadata, data = files.lookup(filename)[0] upload_action_id = metadata['action_id'][ac.environment] merged = dataprep.prepare_data(filename, metadata, data) for group in grouper(merged.iterrows(), 1000): bulk = [] for _, row in group: obsns = analysis.create_observations(upload_action_id, row) for obs in obsns: bulk.append(InsertOne(obs)) ac.temporary_coll.bulk_write(bulk)
def test_transaction_starts_with_batched_write(self): if 'PyPy' in sys.version and client_context.tls: self.skipTest('PYTHON-2937 PyPy is so slow sending large ' 'messages over TLS that this test fails') # Start a transaction with a batch of operations that needs to be # split. listener = OvertCommandListener() client = rs_client(event_listeners=[listener]) coll = client[self.db.name].test coll.delete_many({}) listener.reset() self.addCleanup(client.close) self.addCleanup(coll.drop) large_str = '\0' * (10 * 1024 * 1024) ops = [InsertOne({'a': large_str}) for _ in range(10)] with client.start_session() as session: with session.start_transaction(): coll.bulk_write(ops, session=session) # Assert commands were constructed properly. self.assertEqual(['insert', 'insert', 'insert', 'commitTransaction'], listener.started_command_names()) first_cmd = listener.results['started'][0].command self.assertTrue(first_cmd['startTransaction']) lsid = first_cmd['lsid'] txn_number = first_cmd['txnNumber'] for event in listener.results['started'][1:]: self.assertNotIn('startTransaction', event.command) self.assertEqual(lsid, event.command['lsid']) self.assertEqual(txn_number, event.command['txnNumber']) self.assertEqual(10, coll.count_documents({}))
def test_transaction_write_concern_override(self): """Test txn overrides Client/Database/Collection write_concern.""" client = rs_client(w=0) self.addCleanup(client.close) db = client.test coll = db.test coll.insert_one({}) with client.start_session() as s: with s.start_transaction(write_concern=WriteConcern(w=1)): self.assertTrue(coll.insert_one({}, session=s).acknowledged) self.assertTrue( coll.insert_many([{}, {}], session=s).acknowledged) self.assertTrue( coll.bulk_write([InsertOne({})], session=s).acknowledged) self.assertTrue( coll.replace_one({}, {}, session=s).acknowledged) self.assertTrue( coll.update_one({}, { "$set": { "a": 1 } }, session=s).acknowledged) self.assertTrue( coll.update_many({}, { "$set": { "a": 1 } }, session=s).acknowledged) self.assertTrue(coll.delete_one({}, session=s).acknowledged) self.assertTrue(coll.delete_many({}, session=s).acknowledged) coll.find_one_and_delete({}, session=s) coll.find_one_and_replace({}, {}, session=s) coll.find_one_and_update({}, {"$set": {"a": 1}}, session=s) unsupported_txn_writes = [ (client.drop_database, [db.name], {}), (db.create_collection, ['collection'], {}), (db.drop_collection, ['collection'], {}), (coll.drop, [], {}), (coll.map_reduce, ['function() {}', 'function() {}', 'output'], {}), (coll.rename, ['collection2'], {}), # Drop collection2 between tests of "rename", above. (coll.database.drop_collection, ['collection2'], {}), (coll.create_indexes, [[IndexModel('a')]], {}), (coll.create_index, ['a'], {}), (coll.drop_index, ['a_1'], {}), (coll.drop_indexes, [], {}), (coll.aggregate, [[{ "$out": "aggout" }]], {}), ] for op in unsupported_txn_writes: op, args, kwargs = op with client.start_session() as s: kwargs['session'] = s s.start_transaction(write_concern=WriteConcern(w=1)) with self.assertRaises(OperationFailure): op(*args, **kwargs) s.abort_transaction()
def test_05_insert_succeeds_just_under_16MiB(self): doc = {'_id': 'under_16mib', 'unencrypted': 'a' * (_16_MiB - 2000)} self.coll_encrypted.insert_one(doc) # Same with bulk_write. doc['_id'] = 'under_16mib_bulk' self.coll_encrypted.bulk_write([InsertOne(doc)])
def test_01_insert_succeeds_under_2MiB(self): doc = {'_id': 'over_2mib_under_16mib', 'unencrypted': 'a' * _2_MiB} self.coll_encrypted.insert_one(doc) # Same with bulk_write. doc['_id'] = 'over_2mib_under_16mib_bulk' self.coll_encrypted.bulk_write([InsertOne(doc)])
def bulk_operation(self, set_name, data_list, operation_type, ordered=False): requests = [] if operation_type == "insert": requests = list(map(lambda doc: InsertOne(doc), data_list)) elif operation_type == "update": requests = list(map(lambda info: UpdateOne(filter=info[0], update=info[1]), data_list)) elif operation_type == "delete": requests = list(map(lambda info: DeleteOne(filter=info), data_list)) elif operation_type == "multi_type": for key in data_list.keys(): if data_list[key]["op_type"] == "insert": requests.append(InsertOne(data_list[key]["info"])) elif data_list[key]["op_type"] == "update": requests.append(UpdateOne(filter=data_list[key]["info"][0], update=data_list[key]["info"][1])) elif data_list[key]["op_type"] == "delete": requests.append(DeleteOne(data_list[key]["info"])) return self.db[set_name].bulk_write(requests, ordered=ordered)
def assertWriteOpsRaise(self, write_concern, expected_exception): wc = write_concern.document # Set socket timeout to avoid indefinite stalls client = rs_or_single_client( w=wc['w'], wTimeoutMS=wc['wtimeout'], socketTimeoutMS=30000) db = client.get_database('pymongo_test') coll = db.test def insert_command(): coll.database.command( 'insert', 'new_collection', documents=[{}], writeConcern=write_concern.document, parse_write_concern_error=True) ops = [ ('insert_one', lambda: coll.insert_one({})), ('insert_many', lambda: coll.insert_many([{}, {}])), ('update_one', lambda: coll.update_one({}, {'$set': {'x': 1}})), ('update_many', lambda: coll.update_many({}, {'$set': {'x': 1}})), ('delete_one', lambda: coll.delete_one({})), ('delete_many', lambda: coll.delete_many({})), ('bulk_write', lambda: coll.bulk_write([InsertOne({})])), ('command', insert_command), ] ops_require_34 = [ ('aggregate', lambda: coll.aggregate([{'$out': 'out'}])), ('create', lambda: db.create_collection('new')), ('rename', lambda: coll.rename('new')), ('drop', lambda: db.new.drop()), ] if client_context.version > (3, 4): ops.extend(ops_require_34) # SERVER-34776: dropDatabase does not respect wtimeout in 3.6. if client_context.version[:2] != (3, 6): ops.append(('drop_database', lambda: client.drop_database(db))) # SERVER-46668: createIndexes does not respect wtimeout in 4.4+. if client_context.version <= (4, 3): ops.extend([ ('create_index', lambda: coll.create_index([('a', DESCENDING)])), ('create_indexes', lambda: coll.create_indexes([IndexModel('b')])), ('drop_index', lambda: coll.drop_index([('a', DESCENDING)])), ]) for name, f in ops: # Ensure insert_many and bulk_write still raise BulkWriteError. if name in ('insert_many', 'bulk_write'): expected = BulkWriteError else: expected = expected_exception with self.assertRaises(expected, msg=name) as cm: f() if expected == BulkWriteError: bulk_result = cm.exception.details wc_errors = bulk_result['writeConcernErrors'] self.assertTrue(wc_errors)
def test_02_insert_succeeds_over_2MiB_post_encryption(self): doc = {'_id': 'encryption_exceeds_2mib', 'unencrypted': 'a' * ((2**21) - 2000)} doc.update(json_data('limits', 'limits-doc.json')) self.coll_encrypted.insert_one(doc) # Same with bulk_write. doc['_id'] = 'encryption_exceeds_2mib_bulk' self.coll_encrypted.bulk_write([InsertOne(doc)])
def _new_empty(cls, database, _id=None): if not _id: _id = ObjectId() mdoc = {"_id": _id} obj = cls(database, mdoc) obj._add_operation(InsertOne(mdoc)) return obj
def test_batch_splitting(self): """Test retry succeeds after failures during batch splitting.""" large = 's' * 1024 * 1024 * 15 coll = self.db.retryable_write_test coll.delete_many({}) self.listener.results.clear() coll.bulk_write([ InsertOne({ '_id': 1, 'l': large }), InsertOne({ '_id': 2, 'l': large }), InsertOne({ '_id': 3, 'l': large }), UpdateOne({ '_id': 1, 'l': large }, { '$unset': { 'l': 1 }, '$inc': { 'count': 1 } }), UpdateOne({ '_id': 2, 'l': large }, {'$set': { 'foo': 'bar' }}), DeleteOne({'l': large}), DeleteOne({'l': large}) ]) # Each command should fail and be retried. self.assertEqual(len(self.listener.results['started']), 14) self.assertEqual(coll.find_one(), {'_id': 1, 'count': 1})
def retryable_single_statement_ops(coll): return [ (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {}), (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {'ordered': False}), (coll.bulk_write, [[ReplaceOne({}, {})]], {}), (coll.bulk_write, [[ReplaceOne({}, {}), ReplaceOne({}, {})]], {}), (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}), UpdateOne({}, {'$set': {'a': 1}})]], {}), (coll.bulk_write, [[DeleteOne({})]], {}), (coll.bulk_write, [[DeleteOne({}), DeleteOne({})]], {}), (coll.insert_one, [{}], {}), (coll.insert_many, [[{}, {}]], {}), (coll.replace_one, [{}, {}], {}), (coll.update_one, [{}, {'$set': {'a': 1}}], {}), (coll.delete_one, [{}], {}), (coll.find_one_and_replace, [{}, {'a': 3}], {}), (coll.find_one_and_update, [{}, {'$set': {'a': 1}}], {}), (coll.find_one_and_delete, [{}, {}], {}), ]
def test_omit_default_read_write_concern(self): listener = EventListener() # Client with default readConcern and writeConcern client = rs_or_single_client(event_listeners=[listener]) collection = client.pymongo_test.collection # Prepare for tests of find() and aggregate(). collection.insert_many([{} for _ in range(10)]) self.addCleanup(collection.drop) self.addCleanup(client.pymongo_test.collection2.drop) # Commands MUST NOT send the default read/write concern to the server. def rename_and_drop(): # Ensure collection exists. collection.insert_one({}) collection.rename('collection2') client.pymongo_test.collection2.drop() def insert_command_default_write_concern(): collection.database.command('insert', 'collection', documents=[{}], write_concern=WriteConcern()) ops = [('aggregate', lambda: list(collection.aggregate([]))), ('find', lambda: list(collection.find())), ('insert_one', lambda: collection.insert_one({})), ('update_one', lambda: collection.update_one({}, {'$set': { 'x': 1 }})), ('update_many', lambda: collection.update_many({}, {'$set': { 'x': 1 }})), ('delete_one', lambda: collection.delete_one({})), ('delete_many', lambda: collection.delete_many({})), ('bulk_write', lambda: collection.bulk_write([InsertOne({})])), ('rename_and_drop', rename_and_drop), ('command', insert_command_default_write_concern)] for name, f in ops: listener.results.clear() f() self.assertGreaterEqual(len(listener.results['started']), 1) for i, event in enumerate(listener.results['started']): self.assertNotIn( 'readConcern', event.command, "%s sent default readConcern with %s" % (name, event.command_name)) self.assertNotIn( 'writeConcern', event.command, "%s sent default writeConcern with %s" % (name, event.command_name))
def test_02_insert_fails_over_2MiB(self): doc = { '_id': 'no_encryption_over_2mib', 'unencrypted': 'a' * (2**21 + _COMMAND_OVERHEAD) } with self.assertRaises(DocumentTooLarge): self.coll_encrypted.insert_one(doc) with self.assertRaises(DocumentTooLarge): self.coll_encrypted.insert_many([doc]) with self.assertRaises(DocumentTooLarge): self.coll_encrypted.bulk_write([InsertOne(doc)])
def test_raise_max_wire_version_error(self): opts = AutoEncryptionOpts(KMS_PROVIDERS, 'keyvault.datakeys') client = rs_or_single_client(auto_encryption_opts=opts) self.addCleanup(client.close) msg = 'Auto-encryption requires a minimum MongoDB version of 4.2' with self.assertRaisesRegex(ConfigurationError, msg): client.test.test.insert_one({}) with self.assertRaisesRegex(ConfigurationError, msg): client.admin.command('isMaster') with self.assertRaisesRegex(ConfigurationError, msg): client.test.test.find_one({}) with self.assertRaisesRegex(ConfigurationError, msg): client.test.test.bulk_write([InsertOne({})])
def test_01_insert_succeeds_under_2MiB(self): doc = { '_id': 'no_encryption_under_2mib', 'unencrypted': 'a' * ((2**21) - 1000) } self.coll_encrypted.insert_one(doc) # Same with bulk_write. doc = { '_id': 'no_encryption_under_2mib_bulk', 'unencrypted': 'a' * ((2**21) - 1000) } self.coll_encrypted.bulk_write([InsertOne(doc)])
def bulkInsert(collection, data: list): if len(data) > 0: requests = [] for item in data: requests.append(InsertOne(item)) try: Configuration.getMongoConnection().get_collection( collection).bulk_write(requests, ordered=False) except BulkWriteError as e: print(e.details) print(e.details["writeErrors"])
def retryable_single_statement_ops(coll): return [ (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {}), (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {'ordered': False}), (coll.bulk_write, [[ReplaceOne({}, {})]], {}), (coll.bulk_write, [[ReplaceOne({}, {}), ReplaceOne({}, {})]], {}), (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}), UpdateOne({}, {'$set': {'a': 1}})]], {}), (coll.bulk_write, [[DeleteOne({})]], {}), (coll.bulk_write, [[DeleteOne({}), DeleteOne({})]], {}), (coll.insert_one, [{}], {}), (coll.insert_many, [[{}, {}]], {}), (coll.replace_one, [{}, {}], {}), (coll.update_one, [{}, {'$set': {'a': 1}}], {}), (coll.delete_one, [{}], {}), (coll.find_one_and_replace, [{}, {'a': 3}], {}), (coll.find_one_and_update, [{}, {'$set': {'a': 1}}], {}), (coll.find_one_and_delete, [{}, {}], {}), # Deprecated methods. # Insert with single or multiple documents. (coll.insert, [{}], {}), (coll.insert, [[{}]], {}), (coll.insert, [[{}, {}]], {}), # Save with and without an _id. (coll.save, [{}], {}), (coll.save, [{'_id': ObjectId()}], {}), # Non-multi update. (coll.update, [{}, {'$set': {'a': 1}}], {}), # Non-multi remove. (coll.remove, [{}], {'multi': False}), # Replace. (coll.find_and_modify, [{}, {'a': 3}], {}), # Update. (coll.find_and_modify, [{}, {'$set': {'a': 1}}], {}), # Delete. (coll.find_and_modify, [{}, {}], {'remove': True}), ]
def test_06_insert_fails_over_16MiB(self): limits_doc = json_data('limits', 'limits-doc.json') doc = {'_id': 'encryption_exceeds_16mib', 'unencrypted': 'a' * (_16_MiB - 2000)} doc.update(limits_doc) with self.assertRaisesRegex(WriteError, 'object to insert too large'): self.coll_encrypted.insert_one(doc) # Same with bulk_write. doc['_id'] = 'encryption_exceeds_16mib_bulk' with self.assertRaises(BulkWriteError) as ctx: self.coll_encrypted.bulk_write([InsertOne(doc)]) err = ctx.exception.details['writeErrors'][0] self.assertEqual(2, err['code']) self.assertIn('object to insert too large', err['errmsg'])
def bulkLoadAnimalTestData(insert_count: int = 100000): print('Loading sample animal data') client = MongoClient(connectionString) db = client.get_database("rescueshelter") col = db.get_collection("animals") batches = bulkWriteListSizes(insert_count) for batch_size in batches: starttime = datetime.datetime.now() col.bulk_write([ InsertOne({ 'name': ''.join(word_generator(wordTemplate, wordSize)), 'description': description, 'image': { 'content': animalImageIconType_choice(animalImageIconTypes), 'contenttype': 'icon' }, 'category': animalCategoryType_choice(animalCategoryTypes), 'endangered': endangeredTypes_choice(endangeredTypes), 'data': populationData(population_generator(size_min, size_max)), 'dates': { 'created': datetime.datetime.utcnow(), 'modified': datetime.datetime.utcnow() }, 'sponsors': [] }) for i in range(batch_size) ], ordered=False) endtime = datetime.datetime.now() print(f'Duration: {endtime-starttime}') client.close()
def run(self): ## 每次处理100条评论 r_conn = get_comments() w_conn = get_single_comments() count = 0 while True: requests = [] result = r_conn.find({}).skip(count).limit(count + 100) try: result.next() except Exception: break for item in result: if item.get('comments'): for i in item['comments']: requests.append(InsertOne(i)) count += 100 print('writing!!') w_conn.bulk_write(requests) print('..done')
def test_error_includes_errInfo(self): expected_wce = { "code": 100, "codeName": "UnsatisfiableWriteConcern", "errmsg": "Not enough data-bearing nodes", "errInfo": { "writeConcern": { "w": 2, "wtimeout": 0, "provenance": "clientSupplied" } } } cause_wce = { "configureFailPoint": "failCommand", "mode": {"times": 2}, "data": { "failCommands": ["insert"], "writeConcernError": expected_wce }, } with self.fail_point(cause_wce): # Write concern error on insert includes errInfo. with self.assertRaises(WriteConcernError) as ctx: self.db.test.insert_one({}) self.assertEqual(ctx.exception.details, expected_wce) # Test bulk_write as well. with self.assertRaises(BulkWriteError) as ctx: self.db.test.bulk_write([InsertOne({})]) expected_details = { 'writeErrors': [], 'writeConcernErrors': [expected_wce], 'nInserted': 1, 'nUpserted': 0, 'nMatched': 0, 'nModified': 0, 'nRemoved': 0, 'upserted': []} self.assertEqual(ctx.exception.details, expected_details)
'ok': 1, 'n': 1 }), WriteOperation('remove-w0', lambda coll: coll.with_options(write_concern=WriteConcern( w=0)).remove({"_id": 1}), request=OpMsg({"delete": "coll"}, flags=OP_MSG_FLAGS['moreToCome']), reply=None), WriteOperation('remove-w0-argument', lambda coll: coll.remove({"_id": 1}, w=0), request=OpMsg({"delete": "coll"}, flags=OP_MSG_FLAGS['moreToCome']), reply=None), WriteOperation('bulk_write_insert', lambda coll: coll.bulk_write([InsertOne({}), InsertOne({})]), request=OpMsg({"insert": "coll"}, flags=0), reply={ 'ok': 1, 'n': 2 }), WriteOperation( 'bulk_write_insert-w0', lambda coll: coll.with_options(write_concern=WriteConcern( w=0)).bulk_write([InsertOne({}), InsertOne({})]), request=OpMsg({"insert": "coll"}, flags=0), reply={ 'ok': 1, 'n': 2 }),
def test_InsertOneNotEquals(self): self.assertNotEqual(InsertOne({'foo': 42}), InsertOne({'foo': 23}))
def bulk_save(self, doc): self._requests.append(InsertOne(doc))
def insert_single_comments(hot_comments, nor_comments): conn = get_single_comments() requests = [InsertOne(item) for item in hot_comments + nor_comments] if requests: conn.bulk_write(requests)