Exemple #1
0
def write_contract_to_mongo(reqId, contract: Contract,
                            symbols: pymongo.collection.Collection):
    """write a contract to symbols database with valid from attribute"""
    contract = vars(contract)
    contract["reqId"] = reqId
    contract["validFrom"] = datetime.datetime.now()
    symbols.insert_one(contract)
Exemple #2
0
    def update_single_offer(self,
                            collection: pymongo.collection.Collection,
                            driver: webdriver.Chrome,
                            offer: dict,
                            old_db_corresponding_record=None):
        """Parses the info for a single offer

        Args:
            collection (pymongo.collection.Collection): MongoDB collection to run the update against
            driver (webdriver.Chrome): Selenium webdriver to get offer details
            offer (dict): A single offer from MongoDB
            old_db_corresponding_record (Dict, optional): Corresponding record in MongoDB, if exists. Defaults to None.
        """

        doc = {
            "id": offer['id'],
            "date": offer['published'],
            "title": offer['title'],
            "position": offer['title'].split('@')[0],
            "author": offer['author'],
            "link": offer['link'],
        }
        offer_soup = BeautifulSoup(offer.summary, 'html.parser')
        text = offer_soup.text.split('\n')
        address = text[5].split('Location: ', 1)[1]
        city = address.split(',')[-1].strip()
        address = ','.join(address.split(',')[:-1])
        salaries_raw = text[4].split('Salary: ')[1]
        salaries = [
            salaries_raw
        ] if len(re.findall("\(.{1,17},.{1,17}\)",
                            salaries_raw)) > 0 else salaries_raw.split(',')
        salary_ranges = []
        for salary in salaries:
            salary_ranges.append(
                self.get_salary_details(salary.replace(" ", ""), salaries_raw))

        if city in self.cities_translations:
            city = self.cities_translations[city]
        resp = driver.get(offer['id'])
        page_text = driver.find_element_by_id('root')

        doc["address"] = address
        doc["city"] = city
        doc["salary"] = salary_ranges
        doc["raw_salary"] = salaries_raw
        doc["full_description"] = page_text.text
        offer["full_description"] = page_text.text
        try:
            description_details = self.parse_single_description(offer)
            doc.update(description_details)
        except IndexError as e:
            print(f'Error parsing description for offer {offer["id"]} : {e}')
        if city not in self.cities:
            self.cities.append(city)
        if old_db_corresponding_record:
            collection.update_one({"id": offer['id']}, {"$set": doc})
        else:
            collection.insert_one(doc)
Exemple #3
0
def put_dataset(h5_dataset: H5Dataset, folder: str,
                collection: pymongo.collection.Collection):
    try:
        h5_dataset.convert(folder)
        collection.insert_one(h5_dataset.dataset)
        logging.info(f"Successfully inserted dataset {h5_dataset.token}")
    except pymongo.errors.DuplicateKeyError as e:
        rm_rf(folder)
        logging.error(f"Duplicated token in dataset {h5_dataset.token}")
Exemple #4
0
def write_to_fomo(id: int, id_cursor: pymongo.collection.Collection, source: str):
    # check if it exists first
    record = exists_on_fomo(id, id_cursor)
    if record:
        if source == 'file':
            deleted_ids.append(id)
            id_cursor.delete_one({'id': id})
        if source == 'api':
            return id_cursor.update_one({'id': id}, {'$set': {'updated': datetime.datetime.utcnow()}})
    else:
        id_cursor.insert_one({"id": id, "updated": datetime.datetime.utcnow()})
Exemple #5
0
def insert_meme(
    memes_collection: pymongo.collection.Collection,
    meme_name: str,
    meme_url: str,
    meme_description: str = "*new meme*",
):
    meme = {
        "name": meme_name,
        "description": meme_description,
        "times_used": 0,
        "url": meme_url,
    }
    memes_collection.insert_one(meme)
def insert_doc(doc: dict, col: pymongo.collection.Collection):
    if_insert = True

    if doc["danmaku"]:
        for each in col.find({"danmaku": doc["danmaku"]}):
            if_insert = False
            break
    elif doc["uid"]:
        for each in col.find({"uid": doc["uid"]}):
            if_insert = False
            break

    if if_insert:
        col.insert_one(doc)
        print("insert: " + str(doc))
def add_mdoc(coll: pymongo.collection.Collection, mdoc: dict):
    mdoc_filter = {"_id": mdoc["_id"]}

    current_mdoc_in_db = coll.find_one(mdoc_filter)

    if current_mdoc_in_db is not None:
        print(f"Stávanící mdokument '{mdoc['_id']}': {current_mdoc_in_db}")
        rewrite = input(f"'{mdoc['_id']}' mdokument už v kolekci existuje, mám ho přepsat (a/N)? ")
        if rewrite.lower() not in ["a", "ano", "y", "yes"]:
            print("Zachovávám původní mdokument.")
            return

        coll.delete_one(mdoc_filter)

    coll.insert_one(mdoc)
    print(f"Přidán mdokument {mdoc['_id']}")
Exemple #8
0
def create_item(collection: pymongo.collection.Collection,
                model: Union[Type[BaseModel], Type[dict]],
                new_item_obj: BaseModel,
                *,
                id_key: str = "id_",
                projection: dict = None,
                raise_exc: bool = True) -> BaseModelClass:
    """
    Create an item in the collection

    :param collection: Collection to query
    :param model: Class which the JSON in the collection represents
    :param new_item_obj: Object to place in collection
    :param id_key: If the UUID is stored outside of id_, specify here
    :param projection: Filter to exclude from mongo query result
    :param raise_exc: Whether to raise exception if item cannot be created.
    :return: Created object in collection
    """
    try:
        if not getattr(new_item_obj, id_key, None):
            setattr(new_item_obj, id_key, uuid4())

        r = collection.insert_one(dict(new_item_obj))

        if r.acknowledged:
            return get_item(collection, model, getattr(new_item_obj, id_key), id_key=id_key, projection=projection,
                            raise_exc=False)

    except pymongo.errors.DuplicateKeyError:
        if raise_exc:
            raise problems.UniquenessException("create", model.__name__, id_and_name(model, new_item_obj))
Exemple #9
0
def insert_tables_mongo(coll_tables: pymongo.collection.Collection,
                        detected_table: dict) -> list:
    """
    Insert tables into mongodb
    """
    result = coll_tables.insert_one(detected_table)
    return [f'Inserted table: {result}']
Exemple #10
0
def import_new_records(base_id: str,
                       table: str,
                       mongo_table: pymongo.collection.Collection,
                       view: Optional[str] = None) -> None:
    """Import new records from Airtable to MongoDB."""

    if not _AIRTABLE_API_KEY:
        raise ValueError(
            'No API key found. Create an airtable API key at '
            'https://airtable.com/account and set it in the AIRTABLE_API_KEY '
            'env var.')
    client = airtable.Airtable(base_id, _AIRTABLE_API_KEY)
    records = client.iterate(table, view=view)

    converter = airtable_to_protos.ProtoAirtableConverter(
        proto_type=review_pb2.DocumentToReview,
        id_field=None,
        required_fields=('anonymized_url', ))

    num_inserted = 0
    num_updated = 0
    for record in records:
        mongo_id = record.get('fields', {}).get('mongo_id')

        proto_data = converter.convert_record(record)
        airtable_id = proto_data.pop('_id')
        if record['fields'].get('anonymized_url'):
            proto_data['anonymizedUrl'] = record['fields']['anonymized_url'][
                0]['url']

        if mongo_id:
            # Already added, let's update it.
            document_json = mongo_table.find_one_and_update(
                {'_id': objectid.ObjectId(mongo_id)},
                {'$set': proto_data},
            )
            any_pending_or_done_review = document_json.get('numPendingReviews', 0) or \
                document_json.get('numDoneReviews', 0)
            timeout_review_count = sum(
                1 for review in document_json.get('reviews', [])
                if review.get('status') == 'REVIEW_TIME_OUT')
            client.update(
                table, airtable_id, {
                    'Bayes help needed': not any_pending_or_done_review,
                    'review_timeouts': timeout_review_count,
                })
            num_updated += 1
            continue

        result = mongo_table.insert_one(proto_data)
        mongo_id = str(result.inserted_id)
        client.update(table, airtable_id, {
            'mongo_id': mongo_id,
            'Bayes help needed': True
        })
        num_inserted += 1

    print(f'{num_updated:d} documents updated.')
    print(f'{num_inserted:d} documents added.')
Exemple #11
0
def addPunishItem(cmp: pymongo.collection.Collection, apDataList: list):
    resultList = []
    for apData in apDataList:
        dcdCode = apData['dcdCode']

        try:
            requests.adapters.DEFAULT_RETRIES = 5
            # 获取具体信息
            r3 = requests.get(
                'http://210.76.74.232/appr-law-datacenter-service/law/datacenter/publicity/2/'
                + dcdCode,
                timeout=(5, 5))
        except Exception as e:
            resultList.append(apData)
            print("Request failed :" + dcdCode)
            continue

        deatil = json.loads(r3.text)['data']
        item = {
            'dcdCode': deatil['dcdCode'],
            # 1:法人及其他组织|2:自然人|3:个体工商户
            'admCounterCategory': deatil['admCounterCategory'],
            'admCounterName': deatil['admCounterName'],
            'caseName': deatil['caseName'],
            'decisionNum': deatil['decisionNum'],
            'illegalBasis': deatil['illegalBasis'],
            'illegalFact': deatil['illegalFact'],
            'punishBasis': deatil['punishBasis'],
            'punishDate': deatil['punishDate'],
            'punishDate': deatil['punishDate'],
            'punishType': deatil['punishTypeModels'][0]['punishType'],  # 2:罚款
            'punishAmount':
            deatil['punishTypeModels'][0]['punishAmount'],  # 单位:万
            'remark': deatil['remark']
        }
        print('Loaded successfully: ' + dcdCode)
        cmp.insert_one(item)
        time.sleep(0.1)
    return resultList
Exemple #12
0
def auto_time_insert(col: pymongo.collection.Collection,
                     insert_dict: dict):
    dt0 = datetime.datetime.utcnow()
    insert_dict['created_at'] = dt0
    return col.insert_one(insert_dict)
Exemple #13
0
def insert_id_if_not_exist(col: pymongo.collection.Collection, key_name: str, value):
    result = col.find_one({"_id": key_name})
    # logger.warning(f'result: {result}, key_name: {key_name}')
    if result is None:
        # logger.warning(f'insert key_name: {key_name}')
        col.insert_one({"_id": key_name, "sequence_value": value})
Exemple #14
0
def insert_todo(todos_collection: pymongo.collection.Collection,
                timestamp: str, todo_content: str):
    # Convert date string to Date object.
    timestamp = parser.parse(timestamp)
    todo = {"timestamp": timestamp, "content": todo_content}
    todos_collection.insert_one(todo)
Exemple #15
0
def insert_id_if_not_exist(col: pymongo.collection.Collection, key_name: str,
                           value):
    result = col.find_one({"_id": key_name})
    if result is None:
        col.insert_one({"_id": key_name, "sequence_value": value})