def update_user_email_is_validated_status(coll: pymongo.collection.Collection, user_id: ObjectId, yes=False, no=False, token=""): assert isinstance(user_id, ObjectId) if token: res = coll.update_one({"_id": user_id}, { "$set": { "email_validated": "pending", "email_validation_token": token, } }) elif yes: coll.update_one({"_id": user_id}, { "$set": { "email_validated": "yes", }, "$unset": { "email_validation_token": "", }, }) elif no: coll.update_one({"_id": user_id}, { "$set": { "email_validated": "no", }, "$unset": { "email_validation_token": "", }, })
async def insert_dataset(path_to_dataset: str, out_dir: str, gmt_dir: str, collection: pymongo.collection.Collection): convert_folder = None token = None try: h5_dataset = H5Dataset(path_to_dataset) dataset = h5_dataset.dataset token = dataset["token"] convert_folder = os.path.join(out_dir, token) path_find_res = collection.find_one({"selfPath": path_to_dataset}) token_find_res = collection.find_one({"token": token}) if path_find_res is None and token_find_res is None: put_dataset(h5_dataset, convert_folder, collection) elif path_find_res is None and token_find_res is not None: logging.error(f"Duplicated token in dataset {h5_dataset.token}") elif path_find_res is not None: update_dataset(path_find_res, h5_dataset, convert_folder, collection) await modules_merger(out_dir, gmt_dir) except H5DatasetInvalidException as e: logging.error(f"Invalid dataset {path_to_dataset}: {e}") except Exception as e: if convert_folder is not None: rm_rf(convert_folder) logging.error(f"Failed to update dataset {token} due to: {e}")
def set_inserted_column(c: pymongo.collection.Collection, ids: List[int], time: datetime.datetime): c.update_many( {'id': {'$in': ids}}, {'$set': { 'inserted': time } })
def write_contract_to_mongo(reqId, contract: Contract, symbols: pymongo.collection.Collection): """write a contract to symbols database with valid from attribute""" contract = vars(contract) contract["reqId"] = reqId contract["validFrom"] = datetime.datetime.now() symbols.insert_one(contract)
def construct_schema_pymongo( collection: pymongo.collection.Collection, delimiter: str, sample_size: Optional[int] = None, ) -> Dict[Tuple[str, ...], SchemaDescription]: """ Calls construct_schema on a PyMongo collection. Returned schema is keyed by tuples of nested field names, with each value containing 'types', 'count', 'nullable', 'delimited_name', and 'type' attributes. Parameters ---------- collection: the PyMongo collection delimiter: string to concatenate field names by sample_size: number of items in the collection to sample (reads entire collection if not provided) """ if sample_size: # get sample documents in collection documents = collection.aggregate( [{"$sample": {"size": sample_size}}], allowDiskUse=True ) else: # if sample_size is not provided, just take all items in the collection documents = collection.find({}) return construct_schema(list(documents), delimiter)
def build_replay_info( rp_gen: Generator = REPLAY_GEN, db_collection: pymongo.collection.Collection = replays_info) -> bool: ''' Triggers the search for new replays at CONFIG.replay_path. Adds the information description of the replays to the a data collection within a MongoDB data base, if they are not in the database already. *Args:* - rp_gen (Generator = REPLAY_GEN): sc2reader.resources.Replay generator that yields the replays found in the CONFIG.replay_path. - db_collection (pymongo.collection.Collection = replays_info): the database where the function adds the new documents. *Returns:* - bool: True if new replays were found and added to the replay_info collection, False otherwise. ''' replays_data_set = [ asdict(replay_data) for replay_data in get_replays_data_set(rp_gen, db_collection) if replay_data != None ] if replays_data_set: db_collection.insert_many(replays_data_set) return True else: print(f'No new replays at {CONFIG.replay_path}') return False
def update_single_offer(self, collection: pymongo.collection.Collection, driver: webdriver.Chrome, offer: dict, old_db_corresponding_record=None): """Parses the info for a single offer Args: collection (pymongo.collection.Collection): MongoDB collection to run the update against driver (webdriver.Chrome): Selenium webdriver to get offer details offer (dict): A single offer from MongoDB old_db_corresponding_record (Dict, optional): Corresponding record in MongoDB, if exists. Defaults to None. """ doc = { "id": offer['id'], "date": offer['published'], "title": offer['title'], "position": offer['title'].split('@')[0], "author": offer['author'], "link": offer['link'], } offer_soup = BeautifulSoup(offer.summary, 'html.parser') text = offer_soup.text.split('\n') address = text[5].split('Location: ', 1)[1] city = address.split(',')[-1].strip() address = ','.join(address.split(',')[:-1]) salaries_raw = text[4].split('Salary: ')[1] salaries = [ salaries_raw ] if len(re.findall("\(.{1,17},.{1,17}\)", salaries_raw)) > 0 else salaries_raw.split(',') salary_ranges = [] for salary in salaries: salary_ranges.append( self.get_salary_details(salary.replace(" ", ""), salaries_raw)) if city in self.cities_translations: city = self.cities_translations[city] resp = driver.get(offer['id']) page_text = driver.find_element_by_id('root') doc["address"] = address doc["city"] = city doc["salary"] = salary_ranges doc["raw_salary"] = salaries_raw doc["full_description"] = page_text.text offer["full_description"] = page_text.text try: description_details = self.parse_single_description(offer) doc.update(description_details) except IndexError as e: print(f'Error parsing description for offer {offer["id"]} : {e}') if city not in self.cities: self.cities.append(city) if old_db_corresponding_record: collection.update_one({"id": offer['id']}, {"$set": doc}) else: collection.insert_one(doc)
def import_new_records(base_id: str, table: str, mongo_table: pymongo.collection.Collection, view: Optional[str] = None) -> None: """Import new records from Airtable to MongoDB.""" if not _AIRTABLE_API_KEY: raise ValueError( 'No API key found. Create an airtable API key at ' 'https://airtable.com/account and set it in the AIRTABLE_API_KEY ' 'env var.') client = airtable.Airtable(base_id, _AIRTABLE_API_KEY) records = client.iterate(table, view=view) converter = airtable_to_protos.ProtoAirtableConverter( proto_type=review_pb2.DocumentToReview, id_field=None, required_fields=('anonymized_url', )) num_inserted = 0 num_updated = 0 for record in records: mongo_id = record.get('fields', {}).get('mongo_id') proto_data = converter.convert_record(record) airtable_id = proto_data.pop('_id') if record['fields'].get('anonymized_url'): proto_data['anonymizedUrl'] = record['fields']['anonymized_url'][ 0]['url'] if mongo_id: # Already added, let's update it. document_json = mongo_table.find_one_and_update( {'_id': objectid.ObjectId(mongo_id)}, {'$set': proto_data}, ) any_pending_or_done_review = document_json.get('numPendingReviews', 0) or \ document_json.get('numDoneReviews', 0) timeout_review_count = sum( 1 for review in document_json.get('reviews', []) if review.get('status') == 'REVIEW_TIME_OUT') client.update( table, airtable_id, { 'Bayes help needed': not any_pending_or_done_review, 'review_timeouts': timeout_review_count, }) num_updated += 1 continue result = mongo_table.insert_one(proto_data) mongo_id = str(result.inserted_id) client.update(table, airtable_id, { 'mongo_id': mongo_id, 'Bayes help needed': True }) num_inserted += 1 print(f'{num_updated:d} documents updated.') print(f'{num_inserted:d} documents added.')
def insertion(collection: pymongo.collection.Collection, candles: List[Dict]): try: collection.insert_many(candles, ordered=False) except pymongo.errors.BulkWriteError as e: panic_list = list( filter(lambda x: x['code'] != 11000, e.details['writeErrors'])) if len(panic_list) > 0: raise e
def update_api( apis_collection: pymongo.collection.Collection, api_provider: str, number_of_calls: int, ): apis_collection.update_one({"provider": api_provider}, {"$set": { "number_of_calls": number_of_calls }})
def put_dataset(h5_dataset: H5Dataset, folder: str, collection: pymongo.collection.Collection): try: h5_dataset.convert(folder) collection.insert_one(h5_dataset.dataset) logging.info(f"Successfully inserted dataset {h5_dataset.token}") except pymongo.errors.DuplicateKeyError as e: rm_rf(folder) logging.error(f"Duplicated token in dataset {h5_dataset.token}")
def delete_user_address(coll: pymongo.collection.Collection, user_id: ObjectId, address_type: str): assert isinstance(user_id, ObjectId) coll.update_one({"_id": user_id}, {"$pull": { "addresses": { "type": address_type } }})
async def remove_dataset(path_to_dataset: str, out_dir: str, gmt_dir: str, collection: pymongo.collection.Collection): dataset = collection.find_one({'selfPath': path_to_dataset}) if dataset is not None: convert_folder = os.path.join(out_dir, dataset["token"]) collection.delete_one({'selfPath': path_to_dataset}) rm_rf(convert_folder) await modules_merger(out_dir, gmt_dir) logging.info(f"Successfully removed dataset {dataset['token']}")
def add_user_contract_scan(coll: pymongo.collection.Collection, user_id: ObjectId, contract_id: ObjectId, file_id): assert isinstance(user_id, ObjectId) assert isinstance(contract_id, ObjectId) coll.update_one({ "_id": user_id, "contracts._id": contract_id }, {"$set": { "contracts.$.scan_file": file_id, }})
def write_to_fomo(id: int, id_cursor: pymongo.collection.Collection, source: str): # check if it exists first record = exists_on_fomo(id, id_cursor) if record: if source == 'file': deleted_ids.append(id) id_cursor.delete_one({'id': id}) if source == 'api': return id_cursor.update_one({'id': id}, {'$set': {'updated': datetime.datetime.utcnow()}}) else: id_cursor.insert_one({"id": id, "updated": datetime.datetime.utcnow()})
def delete_user_document(coll: pymongo.collection.Collection, user_id: ObjectId, document_id: ObjectId): assert isinstance(user_id, ObjectId) assert isinstance(document_id, ObjectId) coll.update_one({"_id": user_id}, {"$pull": { "documents": { "_id": document_id } }})
def update_tree(collection: pymongo.collection.Collection, tree: MondrianTree, previous_leaves: Set[str], partitions: Dict[UUID, Partition]): new_nodes = [] is_incremental = True if not previous_leaves: new_nodes = [tree] is_incremental = False else: diff_tree(tree, previous_leaves, new_nodes) collection.bulk_write(produce_updates(new_nodes, is_incremental, partitions), ordered=False, bypass_document_validation=True)
def invalidate_user_contract(coll: pymongo.collection.Collection, user_id: ObjectId, contract_id: ObjectId, invalidation_date: datetime): assert isinstance(user_id, ObjectId) assert isinstance(contract_id, ObjectId) coll.update_one({ "_id": user_id, "contracts._id": contract_id }, {"$set": { "contracts.$.invalidation_date": invalidation_date, }})
def invalidate_user_document(coll: pymongo.collection.Collection, user_id: ObjectId, document_id: ObjectId, invalidation_date: datetime): assert isinstance(user_id, ObjectId) assert isinstance(document_id, ObjectId) coll.update_one({ "_id": user_id, "documents._id": document_id }, {"$set": { "documents.$.invalidation_date": invalidation_date }})
def insert_meme( memes_collection: pymongo.collection.Collection, meme_name: str, meme_url: str, meme_description: str = "*new meme*", ): meme = { "name": meme_name, "description": meme_description, "times_used": 0, "url": meme_url, } memes_collection.insert_one(meme)
def add_users(coll: pymongo.collection.Collection, ids: list): users = [{ "_id": _id, "user": _id, "role": [], "workpans": [], "created": datetime.now().replace(microsecond=0), "type": "user", "email_validated": "no", } for _id in ids if isinstance(_id, ObjectId)] coll.insert_many(users) return ids
def fix_all_descriptions(self, collection: pymongo.collection.Collection): """One time function to fix all descriptions Args: collection (pymongo.collection.Collection): MongoDB collection to be updated """ offers = collection.find() for offer in tqdm.tqdm(offers, desc="Fixing the descriptions"): try: description_details = self.parse_single_description(offer) collection.update_one({"id": offer['id']}, {"$set": description_details}) except IndexError: print("ERROR")
def store_quotes(quotes: list, collection: pymongo.collection.Collection): """ Creates entries in the database for the provided quotes. """ def map_quote(quote: dict) -> dict: instrument_id = parse_instrument_url(quote["instrument"]) plucked = { "instrument_id": instrument_id, **pluck(DESIRED_QUOTE_KEYS, quote) } plucked["updated_at"] = parse_updated_at(plucked["updated_at"]) return plucked quotes = list(filter(lambda quote: quote != None, quotes)) def format_quote(quote: dict) -> dict: return { "symbol": quote["symbol"], "bid": quote["bid_price"], "ask": quote["ask_price"] } pprint(list(map(format_quote, quotes))) # Update the index collection with up-to-date tradability info timestamp = datetime.datetime.utcnow() def update_index_symbol(datum: dict) -> pymongo.operations.UpdateOne: data = { "timestamp": timestamp, "has_traded": datum.get("has_traded"), "updated_at": parse_updated_at(datum.get("updated_at")), "trading_halted": datum.get("trading_halted"), } instrument_id = parse_instrument_url(datum["instrument"]) return pymongo.operations.UpdateOne({"instrument_id": instrument_id}, {"$set": data}) ops = list(map(update_index_symbol, quotes)) INDEX_COL.bulk_write(ops, ordered=False) quotes = list(map(map_quote, quotes)) try: collection.insert_many(quotes, ordered=False) except BulkWriteError as bwe: for err in bwe.details["writeErrors"]: if "duplicate key" not in err["errmsg"]: print("ERROR: Unhandled exception occured during batch write:") pprint(err)
def insert_doc(doc: dict, col: pymongo.collection.Collection): if_insert = True if doc["danmaku"]: for each in col.find({"danmaku": doc["danmaku"]}): if_insert = False break elif doc["uid"]: for each in col.find({"uid": doc["uid"]}): if_insert = False break if if_insert: col.insert_one(doc) print("insert: " + str(doc))
def get_all(collection: pymongo.collection.Collection) -> List[str]: instruments: List[str] = list() cursor = collection.find({}) for document in cursor: instruments.append(document['symbole']) return instruments
def get_next_id(col: pymongo.collection.Collection, id_name: str): # logger.warning(f'id_name: {id_name}') ret = col.find_one_and_update({"_id": id_name}, {"$inc": {"sequence_value": 1}}, new=True) new_id = ret["sequence_value"] return new_id
def get_all_items(collection: pymongo.collection.Collection, model: Type[BaseModel], *, page: int = 1, num_per_page: int = 20, query: dict = None, projection: dict = None): """ Retrieve all items from a collection :param collection: Collection to query :param model: Class which the JSON in the collection represents :param page: Page number to retrieve. #ToDo: implement correct server-side pagination :param num_per_page: Number of items per page to retrieve. Defaults to 20. :param query: Return only objects that contain the query :param projection: Filter to exclude keys from each result :return: List of objects in the collection """ projection = {} if projection is None else projection projection.update(ignore_mongo_id) collection_json = list(collection.find(filter=query, projection=projection) .skip((page - 1) * num_per_page) .limit(num_per_page)) return [model(**item_json) for item_json in collection_json]
def update_dataset(old_dataset: Dict, h5_dataset: H5Dataset, folder: str, collection: pymongo.collection.Collection): try: rm_rf(folder) h5_dataset.convert(folder) collection.update_one({'_id': old_dataset['_id']}, {'$set': h5_dataset.dataset}) logging.info(f"Successfully updated dataset {h5_dataset.token}") except pymongo.errors.DuplicateKeyError as e: rm_rf(folder) logging.error(f"Duplicated token in dataset {h5_dataset.token}") except Exception as e: rm_rf(folder) logging.error( f"Failed to update dataset {h5_dataset.token} due to: {e}")
def get_item(collection: pymongo.collection.Collection, model: Union[Type[BaseModel], Type[dict]], item_id: Union[UUID, str], *, id_key: str = "id_", query: dict = None, projection: dict = None, raise_exc: bool = True): """ Retrieve a single item from a collection :param collection: Collection to query :param model: Class which the JSON in the collection represents :param item_id: UUID or name of desired item :param id_key: If the UUID is stored outside of id_, specify here :param query: Return only objects that contain the query :param projection: Filter to exclude from mongo query result :param raise_exc: Whether to raise exception if item is not found. :return: Requested object from collection """ projection = {} if projection is None else projection projection.update(ignore_mongo_id) query = {} if query is None else query query.update(mongo_filter(model, item_id, id_key=id_key)) item_json = collection.find_one(query, projection=projection) if item_json is None and raise_exc: raise problems.DoesNotExistException("read", model.__name__, mongo_filter(model, item_id, id_key=id_key)) elif model is dict or item_json is None: return item_json else: return model(**item_json)
def add_mdoc(coll: pymongo.collection.Collection, mdoc: dict): mdoc_filter = {"_id": mdoc["_id"]} current_mdoc_in_db = coll.find_one(mdoc_filter) if current_mdoc_in_db is not None: print(f"Stávanící mdokument '{mdoc['_id']}': {current_mdoc_in_db}") rewrite = input(f"'{mdoc['_id']}' mdokument už v kolekci existuje, mám ho přepsat (a/N)? ") if rewrite.lower() not in ["a", "ano", "y", "yes"]: print("Zachovávám původní mdokument.") return coll.delete_one(mdoc_filter) coll.insert_one(mdoc) print(f"Přidán mdokument {mdoc['_id']}")