def _sync_collection(self, dbname, collname): """ Sync a collection until success. """ src_dbname, src_collname = dbname, collname dst_dbname, dst_collname = self._conf.db_coll_mapping(dbname, collname) while True: try: log.info("sync collection '%s.%s' => '%s.%s'" % (src_dbname, src_collname, dst_dbname, dst_collname)) cursor = self._src.client()[src_dbname][src_collname].find(filter=None, cursor_type=pymongo.cursor.CursorType.EXHAUST, no_cursor_timeout=True, modifiers={'$snapshot': True}) count = cursor.count() if count == 0: log.info(' skip empty collection') return n = 0 reqs = [] reqs_max = 100 groups = [] groups_max = 10 for doc in cursor: if self._conf.asyncio: reqs.append(pymongo.ReplaceOne({'_id': doc['_id']}, doc, upsert=True)) if len(reqs) == reqs_max: groups.append(reqs) reqs = [] if len(groups) == groups_max: threads = [gevent.spawn(self._dst.bulk_write, dst_dbname, dst_collname, groups[i]) for i in xrange(groups_max)] gevent.joinall(threads) groups = [] else: reqs.append(pymongo.ReplaceOne({'_id': doc['_id']}, doc, upsert=True)) if len(reqs) == reqs_max: self._dst.bulk_write(dst_dbname, dst_collname, reqs) reqs = [] n += 1 if n % 10000 == 0: log.info(' %s.%s %d/%d (%.2f%%)' % (src_dbname, src_collname, n, count, float(n)/count*100)) if self._conf.asyncio: if len(groups) > 0: threads = [gevent.spawn(self._dst.bulk_write, dst_dbname, dst_collname, groups[i]) for i in xrange(len(groups))] gevent.joinall(threads) if len(reqs) > 0: self._dst.bulk_write(dst_dbname, dst_collname, reqs) else: if len(reqs) > 0: self._dst.bulk_write(dst_dbname, dst_collname, reqs) log.info(' %s.%s %d/%d (%.2f%%)' % (src_dbname, src_collname, n, count, float(n)/count*100)) return except pymongo.errors.AutoReconnect: self._src.reconnect()
def parse_jsons(): cmd = { "planarity": sh.Command("make_restrictions_planarity", [BUILD_DIR]), "tree": sh.Command("make_restrictions_tree", [BUILD_DIR]), "matrix": sh.Command("make_restrictions_matrix", [BUILD_DIR]) } for line in cmd[method](args, MATRIX_DIR, _iter=True): if not line.startswith( ("GRAPH:", "TREE:", "MATRIX:", "RESTRICTION:")): if len(line) > 500: print("Can't parse line:", line[:250], "...", line[250:]) else: print("Can't parse line:", line) continue type, data = line.split(":", 1) data = json.loads(data) if type in ("GRAPH", "TREE"): print( type, db.data_sources.replace_one({ "id": data["id"] }, data, upsert=True).raw_result) elif type == "MATRIX": print( type, db.matrices.replace_one({ "id": data["id"] }, data, upsert=True).raw_result) else: yield pymongo.ReplaceOne({"id": data["id"]}, data, upsert=True)
def parse_jsons(): if ttype in ("Zanetti", "JGraphEd", "GraphTea"): cmd = test_commands[ttype]("-n", ttype, all_restrictions, infiles) for l in cmd: j = json.loads(l) yield pymongo.ReplaceOne({"id": j["id"]}, j, upsert=True) else: for file in infiles: cmd = test_commands[ttype]("-n", ttype, all_restrictions, file) for l in cmd: j = json.loads(l) yield pymongo.ReplaceOne({"id": j["id"]}, j, upsert=True)
def generateSmsWhitelistingData(tmpValue, mobile = '918660430751'): if constant.config['cluster'] in ['nightly', 'staging']: for i in range(0, 3): try: testCol = constant.config['mongoConn'] value = { "mobile": mobile, "delivered": 0, "not_delivered": 0 } value.update(tmpValue) value['total'] = value['delivered'] + value['not_delivered'] batchReq = [] batchReq.append(pymongo.ReplaceOne({'mobile': mobile}, value, upsert=True)) testCol.bulk_write(batchReq) Logger.log(testCol.find({'mobile' : mobile})[0]) return except pymongo.errors.ConnectionFailure as e: Logger.log(e) port = constant.config['INTOUCH_DB_MONGO_MASTER'] if Utils.restartTunnel(port): DarknightHelper.getMongoConnection('whitelisting', 'mobile_status') else: break except Exception as e: break raise Exception(e)
def on_update( self, prefix: List[str], build_identifiers: BuildIdentifierSet, page_id: FileId, page: Page, ) -> None: super().on_update(prefix, build_identifiers, page_id, page) uploadable_assets = [ asset for asset in page.static_assets if asset.can_upload() ] fully_qualified_pageid = "/".join(prefix + [page_id.without_known_suffix]) # Construct filter for retrieving build documents document_filter: Dict[str, Union[str, Dict[str, Any]]] = { "page_id": fully_qualified_pageid, **construct_build_identifiers_filter(build_identifiers), } document = { "page_id": fully_qualified_pageid, **{ key: value for (key, value) in build_identifiers.items() if value is not None }, "prefix": prefix, "filename": page_id.as_posix(), "ast": page.ast.serialize(), "source": page.source, "created_at": datetime.utcnow(), "static_assets": [ {"checksum": asset.get_checksum(), "key": asset.key} for asset in uploadable_assets ], } if page.query_fields: document.update({"query_fields": page.query_fields}) self.pending_writes[COLL_DOCUMENTS].append( pymongo.ReplaceOne(document_filter, document, upsert=True) ) for static_asset in uploadable_assets: self.pending_writes[COLL_ASSETS].append( pymongo.UpdateOne( {"_id": static_asset.get_checksum()}, { "$setOnInsert": { "_id": static_asset.get_checksum(), "data": static_asset.data, } }, upsert=True, ) )
def _sync_collection_with_query(self, namespace_tuple, query, prog_q, res_q): """ Sync collection with query. """ self._src.reconnect() self._dst.reconnect() src_dbname, src_collname = namespace_tuple dst_dbname, dst_collname = self._conf.db_coll_mapping(src_dbname, src_collname) while True: try: cursor = self._src.client()[src_dbname][src_collname].find(filter=query, cursor_type=pymongo.cursor.CursorType.EXHAUST, no_cursor_timeout=True, # snapshot cause blocking, maybe bug # modifiers={'$snapshot': True} ) total = 0 n = 0 reqs = [] reqs_max = 100 groups = [] groups_max = 10 for doc in cursor: reqs.append(pymongo.ReplaceOne({'_id': doc['_id']}, doc, upsert=True)) if len(reqs) == reqs_max: groups.append(reqs) reqs = [] if len(groups) == groups_max: threads = [gevent.spawn(self.bulk_write, self._dst.client(), dst_dbname, dst_collname, groups[i]) for i in range(groups_max)] gevent.joinall(threads) groups = [] n += 1 total += 1 if n % 10000 == 0: prog_q.put(n) n = 0 if len(groups) > 0: threads = [gevent.spawn(self.bulk_write, self._dst.client(), dst_dbname, dst_collname, groups[i]) for i in range(len(groups))] gevent.joinall(threads) if len(reqs) > 0: self.bulk_write(self._dst.client(), dst_dbname, dst_collname, reqs) if n > 0: prog_q.put(n) res_q.put(total) prog_q.close() prog_q.join_thread() res_q.close() res_q.join_thread() return except pymongo.errors.AutoReconnect: self._src.reconnect()
def upsert_articles(articles: List[Article]): """ Upserts a list of article documents """ articles_upserts = [ pymongo.ReplaceOne({'article_url': article.article_url}, article.__dict__, upsert=True) for article in articles ] _DB_ARTICLES.bulk_write(articles_upserts)
def bulk_replace_upsert(collection, key, docs): ops = [] for doc in docs: ops.append(pymongo.ReplaceOne({key: doc[key]}, doc, upsert=True)) if (len(ops) == 1000): # replace 1000 entries collection.bulk_write(ops, ordered=False) ops = [] # replace remaining if (len(ops) > 0): collection.bulk_write(ops, ordered=False)
def insert_mine_compound(self, compound_dict=None, requests=None): """This method saves a RDKit Molecule as a compound entry in the MINE. Calculates necessary fields for API and includes additional information passed in the compound dict. Overwrites preexisting compounds in MINE on _id collision. :param compound_dict: Information of compound to insert to database :type compound_dict: dict :param requests: A list of requests for pymongo bulk write : type requests: list """ if compound_dict is None: return None # Store all different representations of the molecule (SMILES, Formula, # InChI key, etc.) as well as its properties in a dictionary if '_atom_count' in compound_dict: del compound_dict['_atom_count'] if 'Inchikey' in compound_dict: del compound_dict['Inchikey'] if 'ID' in compound_dict: del compound_dict['ID'] # If the compound is a reactant, then make sure the reactant name is # in a correct format. if 'Reactant_in' in compound_dict and isinstance( compound_dict['Reactant_in'], str) \ and compound_dict['Reactant_in']: compound_dict['Reactant_in'] = ast.literal_eval( compound_dict['Reactant_in']) # If the compound is a product, then make sure the reactant name is # in a correct format. if 'Product_of' in compound_dict \ and isinstance(compound_dict['Product_of'], str) \ and compound_dict['Product_of']: compound_dict['Product_of'] = ast.literal_eval( compound_dict['Product_of']) # If bulk insertion, upsert (insert and update) the database if requests != None: requests.append( pymongo.ReplaceOne({'_id': compound_dict['_id']}, compound_dict, upsert=True)) else: self.compounds.replace_one({'_id': compound_dict['_id']}, compound_dict, upsert=True) return None
def _sync_collection(self, namespace_tuple): """ Sync a collection until success. """ # create indexes first self._create_index(namespace_tuple) src_dbname, src_collname = namespace_tuple dst_dbname, dst_collname = self._conf.db_coll_mapping(src_dbname, src_collname) src_ns = '%s.%s' % (src_dbname, src_collname) total = self._src.client()[src_dbname][src_collname].count() self._progress_logger.register(src_ns, total) while True: try: cursor = self._src.client()[src_dbname][src_collname].find(filter=None, cursor_type=pymongo.cursor.CursorType.EXHAUST, no_cursor_timeout=True, modifiers={'$snapshot': True}) reqs = [] reqs_max = 100 groups = [] groups_max = 10 n = 0 for doc in cursor: reqs.append(pymongo.ReplaceOne({'_id': doc['_id']}, doc, upsert=True)) if len(reqs) == reqs_max: groups.append(reqs) reqs = [] if len(groups) == groups_max: threads = [gevent.spawn(self.bulk_write, self._dst.client(), dst_dbname, dst_collname, groups[i]) for i in range(groups_max)] gevent.joinall(threads) groups = [] n += 1 if n % 10000 == 0: self._progress_logger.add(src_ns, n) n = 0 if len(groups) > 0: threads = [gevent.spawn(self.bulk_write, self._dst.client(), dst_dbname, dst_collname, groups[i]) for i in range(len(groups))] gevent.joinall(threads) if len(reqs) > 0: self.bulk_write(self._dst.client(), dst_dbname, dst_collname, reqs) self._progress_logger.add(src_ns, n, done=True) return except pymongo.errors.AutoReconnect: self._src.reconnect()
def test_collection_bulk_write(elasticapm_client, mongo_database): elasticapm_client.begin_transaction('transaction.test') requests = [pymongo.InsertOne({'x': 1}), pymongo.DeleteOne({'x': 1}), pymongo.ReplaceOne({'w': 1}, {'z': 1}, upsert=True)] result = mongo_database.blogposts.bulk_write(requests) assert result.inserted_count == 1 assert result.deleted_count == 1 assert result.upserted_count == 1 elasticapm_client.end_transaction('transaction.test') transactions = elasticapm_client.instrumentation_store.get_all() span = _get_pymongo_trace(transactions[0]['spans']) assert span['type'] == 'db.mongodb.query' assert span['name'] == 'elasticapm_test.blogposts.bulk_write'
def update_sramongo_sra_records(docs, collection): db_operations = [] for doc in docs: db_operations.append(pymongo.ReplaceOne({"srx": doc.srx}, doc.to_mongo(), upsert=True)) # Write intermediate results if len(db_operations) > 500: res = collection.bulk_write(db_operations) logger.debug(res.bulk_api_result) db_operations = [] if db_operations: res = collection.bulk_write(db_operations) logger.debug(res.bulk_api_result)
def update_station_information(stations, collection, batch_size=100): ''' Bulk replace citi bike stations in MongoDB. New stations will be added automatically, changes in stations will be replaced. ''' batched_operations = [] for station in stations: batched_operations.append(pymongo.ReplaceOne( { '_id': station['_id'] }, station, upsert=True)) write_batch(batch=batched_operations, collection=collection, batch_size=batch_size, full_batch_required=True) # Don't forget the last batch that might not fill up the whole batch_size ;) write_batch(batch=batched_operations, collection=collection, batch_size=batch_size, full_batch_required=False)
def test_collection_bulk_write(self): self.client.begin_transaction('transaction.test') requests = [ pymongo.InsertOne({'x': 1}), pymongo.DeleteOne({'x': 1}), pymongo.ReplaceOne({'w': 1}, {'z': 1}, upsert=True) ] result = self.db.blogposts.bulk_write(requests) self.assertEqual(result.inserted_count, 1) self.assertEqual(result.deleted_count, 1) self.assertEqual(result.upserted_count, 1) self.client.end_transaction('transaction.test') transactions = self.client.instrumentation_store.get_all() trace = _get_pymongo_trace(transactions[0]['traces']) self.assertEqual(trace['type'], 'db.mongodb.query') self.assertEqual(trace['name'], 'elasticapm_test.blogposts.bulk_write')
def test_collection_bulk_write(instrument, elasticapm_client, mongo_database): elasticapm_client.begin_transaction("transaction.test") requests = [ pymongo.InsertOne({"x": 1}), pymongo.DeleteOne({"x": 1}), pymongo.ReplaceOne({"w": 1}, {"z": 1}, upsert=True), ] result = mongo_database.blogposts.bulk_write(requests) assert result.inserted_count == 1 assert result.deleted_count == 1 assert result.upserted_count == 1 elasticapm_client.end_transaction("transaction.test") transactions = elasticapm_client.transaction_store.get_all() span = _get_pymongo_trace(transactions[0]["spans"]) assert span["type"] == "db.mongodb.query" assert span["name"] == "elasticapm_test.blogposts.bulk_write"
def update_operation_entities(mongo_source, collection_name, id_creation_func, filter_criteria, correction_map, batch_size): collection = mongo_source.mongo_handle[ mongo_source.db_name][collection_name] total_updated = 0 try: for batch_of_variant in find_documents_in_batch( mongo_source, collection_name, filter_criteria, batch_size): update_statements = [] for variant in batch_of_variant: filter_dict = {'_id': variant['_id']} for key in correction_map: if callable(correction_map[key]): variant[key] = correction_map[key](variant[key]) elif 'inactiveObjects' in key: inactive_objects = variant['inactiveObjects'] prop = key.split('.')[-1] for inactive in inactive_objects: inactive[prop] = correction_map[key] else: variant[key] = correction_map[key] for inactive in variant['inactiveObjects']: inactive['hashedMessage'] = id_creation_func(inactive) variant.pop('_id') update_statements.append( pymongo.ReplaceOne(filter_dict, variant)) if update_statements: result_update = collection.with_options(write_concern=WriteConcern(w="majority", wtimeout=1200000)) \ .bulk_write(requests=update_statements, ordered=False) total_updated += result_update.modified_count logger.debug( f'{result_update.modified_count} documents updated in {collection_name}' ) else: logger.warning( f'{len(update_statements)} update statements created. Skipping.' ) logger.info(f'{total_updated} documents updated in {collection_name}') except Exception as e: print(traceback.format_exc()) raise e return total_updated
def save_all(self, items, extend=False): assert isinstance(items, list) ops = [] for item in items: assert isinstance(item, Identifier) assert item.ean is not None or item.asin is not None fil = Identifier.filter_intersection(ean=item.ean, asin=item.asin) if extend: a = self.col.find_one(fil) if a is not None: item = Identifier.extend(Identifier.from_obj(a), item) obj = item.to_obj() print('Save obj={o}.'.format(o=obj)) ops.append(pymongo.ReplaceOne(fil, obj, upsert=True)) if len(ops) == 0: return None else: res = self.col.bulk_write(ops) #print(result.bulk_api_result) return res
def load(db_host, db_port, db_name, filename): """Load a full database dump from JSON""" backup_log("Connecting database") client = pymongo.MongoClient(db_host, db_port) db = client[db_name] backup_log("Loading data") with open(filename, "r") as file: data = json.load(file) backup_log("Storing data to database") for import_item in data: collection_name = import_item["collection"] collection = db[collection_name] requests = [] for document in import_item["data"]: document["_id"] = bson.ObjectId(document["_id"]) requests.append( pymongo.ReplaceOne({"uuid": document["uuid"]}, document, upsert=True)) size = len(requests) if size > 0: collection.bulk_write(requests) backup_log("Imported %i object%s into collection '%s'" % (size, "s" if size != 1 else "", collection_name)) backup_log("Done") return True
def insert_reaction(self, reaction_dict, requests=None): """Inserts a reaction into the MINE database and returns _id of the reaction in the mine database. :param reaction_dict: A dictionary containing 'Reactants' and 'Products' lists of StoichTuples :type reaction_dict: dict :return: The hashed _id of the reaction :rtype: str """ reaction_dict = utils.convert_sets_to_lists(reaction_dict) # If bulk insertion, upsert (insert and update) the database if requests != None: requests.append( pymongo.ReplaceOne({'_id': { '$eq': reaction_dict['_id'] }}, reaction_dict, upsert=True)) else: save_document(self.reactions, reaction_dict) return reaction_dict['_id']
def write(self, symbol, item, metadata=None, chunker=DateChunker(), audit=None, **kwargs): """ Writes data from item to symbol in the database Parameters ---------- symbol: str the symbol that will be used to reference the written data item: Dataframe or Series the data to write the database metadata: ? optional per symbol metadata chunker: Object of type Chunker A chunker that chunks the data in item audit: dict audit information kwargs: optional keyword args that are passed to the chunker. Includes: chunk_size: used by chunker to break data into discrete chunks. see specific chunkers for more information about this param. func: function function to apply to each chunk before writing. Function can not modify the date column. """ if not isinstance(item, (DataFrame, Series)): raise Exception("Can only chunk DataFrames and Series") self._arctic_lib.check_quota() previous_shas = [] doc = {} meta = {} doc[SYMBOL] = symbol doc[LEN] = len(item) doc[SERIALIZER] = self.serializer.TYPE doc[CHUNKER] = chunker.TYPE doc[USERMETA] = metadata sym = self._get_symbol_info(symbol) if sym: previous_shas = set([Binary(x[SHA]) for x in self._collection.find({SYMBOL: symbol}, projection={SHA: True, '_id': False}, )]) ops = [] meta_ops = [] chunk_count = 0 for start, end, chunk_size, record in chunker.to_chunks(item, **kwargs): chunk_count += 1 data = self.serializer.serialize(record) doc[CHUNK_SIZE] = chunk_size doc[METADATA] = {'columns': data[METADATA][COLUMNS] if COLUMNS in data[METADATA] else ''} meta = data[METADATA] for i in xrange(int(len(data[DATA]) / MAX_CHUNK_SIZE + 1)): chunk = {DATA: Binary(data[DATA][i * MAX_CHUNK_SIZE: (i + 1) * MAX_CHUNK_SIZE])} chunk[SEGMENT] = i chunk[START] = meta[START] = start chunk[END] = meta[END] = end chunk[SYMBOL] = meta[SYMBOL] = symbol dates = [chunker.chunk_to_str(start), chunker.chunk_to_str(end), str(chunk[SEGMENT]).encode('ascii')] chunk[SHA] = self._checksum(dates, chunk[DATA]) meta_ops.append(pymongo.ReplaceOne({SYMBOL: symbol, START: start, END: end}, meta, upsert=True)) if chunk[SHA] not in previous_shas: ops.append(pymongo.UpdateOne({SYMBOL: symbol, START: start, END: end, SEGMENT: chunk[SEGMENT]}, {'$set': chunk}, upsert=True)) else: # already exists, dont need to update in mongo previous_shas.remove(chunk[SHA]) if ops: self._collection.bulk_write(ops, ordered=False) if meta_ops: self._mdata.bulk_write(meta_ops, ordered=False) doc[CHUNK_COUNT] = chunk_count doc[APPEND_COUNT] = 0 if previous_shas: mongo_retry(self._collection.delete_many)({SYMBOL: symbol, SHA: {'$in': list(previous_shas)}}) mongo_retry(self._symbols.update_one)({SYMBOL: symbol}, {'$set': doc}, upsert=True) if audit is not None: audit['symbol'] = symbol audit['action'] = 'write' audit['chunks'] = chunk_count self._audit.insert_one(audit)
def _sync_collection(self, src_dbname, src_collname, dst_dbname, dst_collname): """ Sync a collection through batch write. """ while True: try: self._logger.info("sync collection '%s.%s'" % (src_dbname, src_collname)) cursor = self._src_mc[src_dbname][src_collname].find( filter=None, cursor_type=pymongo.cursor.CursorType.EXHAUST, no_cursor_timeout=True, modifiers={'$snapshot': True}) if self._src_engine == 'tokumx': # TokuMX 'count' command may be very slow, use 'collStats' command instead count = self._src_mc[src_dbname].command( {'collStats': src_collname})['count'] else: count = cursor.count() if count == 0: self._logger.info('\t skip empty collection') return n = 0 reqs = [] reqs_max = 100 groups = [] groups_max = 10 for doc in cursor: if self._asyncio: reqs.append( pymongo.ReplaceOne({'_id': doc['_id']}, doc, upsert=True)) if len(reqs) == reqs_max: groups.append(reqs) reqs = [] if len(groups) == groups_max: threads = [ gevent.spawn(self._bulk_write, dst_dbname, dst_collname, groups[i]) for i in xrange(groups_max) ] gevent.joinall(threads) groups = [] else: reqs.append( pymongo.ReplaceOne({'_id': doc['_id']}, doc, upsert=True)) if len(reqs) == reqs_max: self._bulk_write(dst_dbname, dst_collname, reqs) reqs = [] n += 1 if n % 10000 == 0: self._logger.info('\t %s.%s %d/%d (%.2f%%)' % (src_dbname, src_collname, n, count, float(n) / count * 100)) if self._asyncio: if len(groups) > 0: threads = [ gevent.spawn(self._bulk_write, dst_dbname, dst_collname, groups[i]) for i in xrange(len(groups)) ] gevent.joinall(threads) if len(reqs) > 0: self._bulk_write(dst_dbname, dst_collname, reqs) else: if len(reqs) > 0: self._bulk_write(dst_dbname, dst_collname, reqs) self._logger.info('\t %s.%s %d/%d (%.2f%%)' % (src_dbname, src_collname, n, count, float(n) / count * 100)) return except pymongo.errors.AutoReconnect: self._src_mc.close() self._src_mc = self.reconnect(self._src_host, self._src_port, username=self._src_username, password=self._src_password)
def export_pymongo(docs, index, mirrors=None, update=False, num_tries=3, timeout=60, chunksize=100): """Optimized :py:func:`~.export` function for pymongo index collections. The behavior of this function is rougly equivalent to: .. code-block:: python for doc in docs: export_one(doc, index, mirrors, num_tries) .. note:: All index documents must be JSON-serializable to be able to be exported to a MongoDB collection. :param docs: The index documents to export. :param index: The database collection to export the index to. :type index: :class:`pymongo.collection.Collection` :param num_tries: The number of automatic retry attempts in case of mirror connection errors. :type num_tries: int :param timeout: The time in seconds to wait before an automatic retry attempt. :type timeout: int :param chunksize: The buffer size for export operations. :type chunksize: int""" import pymongo logger.info( "Exporting to pymongo database collection index '{}'.".format(index)) chunk = [] operations = [] ids = defaultdict(list) for doc in docs: f = {'_id': doc['_id']} if update: root = doc.get('root') if root is not None: ids[root].append(doc['_id']) chunk.append(doc) operations.append(pymongo.ReplaceOne(f, doc, upsert=True)) if len(chunk) >= chunksize: logger.debug("Pushing chunk.") _export_pymongo(chunk, operations, index, mirrors, num_tries, timeout) chunk[:] = [] operations[:] = [] if len(operations): logger.debug("Pushing final chunk.") _export_pymongo(chunk, operations, index, mirrors, num_tries, timeout) if update: if ids: stale = set() for root in ids: docs_ = index.find({'root': root}) all_ = {doc['_id'] for doc in docs_} stale.update(all_.difference(ids[root])) logger.info("Removing {} stale documents.".format(len(stale))) for _id in set(stale): index.delete_one(dict(_id=_id)) else: raise errors.ExportError( "The exported docs sequence is empty! Unable to update!")
dels = delete[i * 100:min((i + 1) * 100, len(delete))] helpers.bulk(es, dels, chunk_size=100) print(min((i + 1) * 100, len(delete))) if (is_okay): client = MongoClient( 'mongodb://*****:*****@ds119030-a0.mlab.com:19030,ds119030-a1.mlab.com:19030/glarket?replicaSet=rs-ds119030' ) db = client.glarket products = db.products batches = int(math.ceil(len(data) / 50.0)) for i in range(0, batches): upload = data[i * 50:min((i + 1) * 50, len(data))] reqs = [ pymongo.ReplaceOne({'product_link': x['product_link']}, x, upsert=True) for x in upload ] try: result = products.bulk_write(reqs, ordered=False) print(result.bulk_api_result) print("Batch: " + str(i) + "/" + str(batches) + "; " + str(i / (batches * 1.0) * 100) + "%") except BulkWriteError as bwe: print(bwe.details) ## Update ES #### If prod_id exists, update variables #### Else, upload es = Elasticsearch( 'https://*****:*****@fbc3032a2a91be69517a70b3d75f4eaa.us-east-1.aws.found.io:9243'
def parse_jsons(): for l in cmd: j = json.loads(l) j["id"] = "/".join((j["file"], j["type"], str(j["index"]))) yield pymongo.ReplaceOne({"id": j["id"]}, j, upsert=True)