def insert_many(self, entries): """ Insert an iterable of documents. In the event that a BulkWriteError occurs (using an azure server where the throughput (RU/s) is exceeded), this method will attempt to continue in a slower batched mode. :param entries: iterable of documents :return: pymongo.results.InsertManyResult """ result = InsertManyResult((), False) collection = self.get_collection() if collection is not None: initial_num_docs = collection.count_documents({}) try: result = collection.insert_many(entries) except BulkWriteError as bwe: logging.warning(f'BulkWriteError: {bwe.details}') write_err = bwe.details['writeErrors'][0] err_index = write_err['index'] err_code = write_err['code'] # there is a RetryAfterMs value in errmsg but skip it for now and use a big value # err_msg = write_err['errmsg'] sleep(0.5) # wait 500ms if err_code == 16500 and 'mongo.cosmos.azure' in self['server']: # looks like an azure server is being used and the number of requests exceeded capacity # lets try it in batches batch_size = int(err_index * 0.25) logging.info(f'Attempting to continue in batches of {batch_size}') try: inserted_ids = [None] * err_index # can't get ObjectIds of uploaded before BulkWriteError count = err_index pause = 0.25 # wait 250ms between batches for idx in range(err_index, len(entries), batch_size): result = collection.insert_many(entries[idx:idx + batch_size]) inserted_ids.append(result.inserted_ids) count += len(result.inserted_ids) estimate = int(((len(entries)-idx)/batch_size) * pause) logging.info(f'Uploaded {count} of {len(entries)}, ETC {str(timedelta(seconds=estimate))}') sleep(pause) num_docs = collection.count_documents({}) - initial_num_docs if num_docs != len(entries): raise ValueError(f'Batch inserted document count {num_docs} ' f'does not match document size of document collection {len(entries)}') else: result = InsertManyResult(inserted_ids, result.acknowledged) except BulkWriteError as bweb: logging.warning(f'BulkWriteError: {bweb.details}') raise else: raise return result
def insert_many(self, documents, ordered=True, **kwargs): """Insert an iterable of documents into collection :param documents: An iterable of documents to insert (``list``, ``tuple``, ...) :param ordered: If ``True`` (the default) documents will be inserted on the server serially, in the order provided. If an error occurs, all remaining inserts are aborted. If ``False``, documents will be inserted on the server in arbitrary order, possibly in parallel, and all document inserts will be attempted. :returns: :class:`Deferred` that called back with :class:`pymongo.results.InsertManyResult` """ inserted_ids = [] for doc in documents: if isinstance(doc, collections.Mapping): inserted_ids.append(doc.setdefault("_id", ObjectId())) else: raise TypeError( "TxMongo: insert_many takes list of documents.") bulk = _Bulk(self, ordered, bypass_document_validation=False) bulk.ops = [(_INSERT, doc) for doc in documents] yield self._execute_bulk(bulk) defer.returnValue( InsertManyResult(inserted_ids, self.write_concern.acknowledged))
def test_format_calledWithValidOperationResult_returnCorrectResult(self): expected = '{\n\t"acknowledged" : ' + 'true' + ',\n\t"insertedIds" : [\n\t\t' + 'ObjectId("4d128b6ea794fc13a8000002"),\n\t\tObjectId("4d128b6ea794fc13a8000003")\n\t]' + '\n}' actual = self.sut.format( InsertManyResult([ ObjectId("4d128b6ea794fc13a8000002"), ObjectId("4d128b6ea794fc13a8000003") ], True)) self.assertEqual(actual, expected)
def cart_remove(item_name: str) -> InsertManyResult: """ Remove an item from a shopping cart """ LOGGER.debug('removing item from shopping cart: {}'.format(item_name)) db = connect_to_db() # pylint: disable=invalid-name result = InsertManyResult(None, False) try: item = db.cart.find_one_and_delete({'name': item_name}) if item: result = items_add([item]) except Exception as exception: # TODO: use specifc exceptions. Create a custom error exception LOGGER.error(exception) raise Exception('There was a DB problem') return result
def insert_many(self, documents, ordered=True): if not isinstance(documents, abc.Iterable) or not documents: raise TypeError("documents must be a non-empty list") inserted_ids = [] def gen(): """A generator that validates documents and handles _ids.""" for document in documents: common.validate_is_document_type("document", document) if not isinstance(document, RawBSONDocument): if "_id" not in document: document["_id"] = ObjectId() inserted_ids.append(document["_id"]) yield (message._INSERT, document) return InsertManyResult(inserted_ids)
def insert_many(self, documents, ordered=True, **kwargs): inserted_ids = yield self._insert_one_or_many(documents, ordered, **kwargs) defer.returnValue(InsertManyResult(inserted_ids, self.write_concern.acknowledged))
"nUpserted": 0, "nMatched": 0, "nModified": 0, "nRemoved": 0, "upserted": [], }, f"Duplicated documents: 1", f"Non-duplicated documents count: 0", ) ] SAVE_PARAMS = ("data", "result", "expected_result") SAVE_TEST_CASES = [ ( [ { "id": "opsmatters_uk.ThelatestupdateforBroadcomincludesDXNetOps202Netwo.opentracing" } ], InsertManyResult( [ "opsmatters_uk.ThelatestupdateforBroadcomincludesDXNetOps202Netwo.opentracing" ], True, ), 1, ), ([], InsertManyResult([], True), 0), ]
def insert_many(self, documents, ordered=True, session=None): result = self.__insert(documents) return InsertManyResult(result, True)
def insert_many(self, documents, ordered=True, **kwargs): inserted_ids = [] for doc in documents: if isinstance(doc, collections.Mapping): inserted_ids.append(doc.setdefault("_id", ObjectId())) else: raise TypeError("TxMongo: insert_many takes list of documents.") cmd_collname = str(self._database["$cmd"]) proto = yield self._database.connection.getprotocol() error = { "nInserted": 0, "writeErrors": [], "writeConcernErrors": [] } def accumulate_response(reply): response = reply.documents[0].decode() error["nInserted"] += response.get('n', 0) error["writeErrors"].extend(response.get("writeErrors", [])) if "writeConcernError" in response: error["writeConcernErrors"].append(response["writeConcernError"]) def has_errors(): return error["writeErrors"] or error["writeConcernErrors"] def raise_error(): error["writeErrors"].sort(key=lambda error: error["index"]) for write_error in error["writeErrors"]: write_error[u"op"] = documents[write_error["index"]] raise BulkWriteError(error) # There are four major cases with different behavior of insert_many: # * Unack, Unordered: sending all batches and not handling responses at all # so ignoring any errors # # * Ack, Unordered: sending all batches, accumulating all responses and # returning aggregated response # # * Unack, Ordered: handling DB responses despite unacknowledged write_concern # because we must stop on first error (not raising it though) # # * Ack, Ordered: stopping on first error and raising BulkWriteError actual_write_concern = self.write_concern if ordered and self.write_concern.acknowledged is False: actual_write_concern = WriteConcern(w=1) batches = self._generate_insert_many_batches(self._collection_name, documents, ordered, actual_write_concern, proto.max_bson_size, proto.max_write_batch_size) all_responses = [] for batch in batches: batch_result = proto.send_QUERY(Query(collection=cmd_collname, query=batch)) if self.write_concern.acknowledged or ordered: batch_result.addCallback(accumulate_response) if ordered: yield batch_result if has_errors(): if self.write_concern.acknowledged: raise_error() else: break else: all_responses.append(batch_result) if self.write_concern.acknowledged and not ordered: yield defer.DeferredList(all_responses) if has_errors(): raise_error() defer.returnValue(InsertManyResult(inserted_ids, self.write_concern.acknowledged))