Esempio n. 1
0
def write_to_mongodb(names, entry_id):

    if names != []:
        mycol = refer_cleaned_collection()
        mycol.update_one({'_id': ObjectId(entry_id)},
                         {'$set': {
                             'important_person_company': names
                         }})

    elif names == []:
        mycol = refer_cleaned_collection()
        mycol.update_one({'_id': ObjectId(entry_id)}, {
            '$set': {
                'important_person_company': 'No important persons found'
            }
        })
Esempio n. 2
0
def extract_data(def_entry_id):

    mycol = refer_cleaned_collection()
    comp_data_entry = mycol.find({'_id': ObjectId(def_entry_id)})
    data = [i for i in comp_data_entry]
    print('data:', data)
    # extracted_data = data[0]['header_text'] + [data[0]["description"]] + data[0]["paragraph_text"]
    try:
        extracted_data = data[0]['header_text'] + data[0]["paragraph_text"]
    except KeyError:
        extracted_data = None

    return extracted_data
Esempio n. 3
0
def get_persons_dnb(def_entry_id):

    mycol = refer_cleaned_collection()
    comp_data_entry = mycol.find({'_id': ObjectId(def_entry_id)})
    data = [i for i in comp_data_entry]
    try:
        person_names = data[0]['dnb_cp_info']
        if len(person_names) > 1:
            person_names = person_names[-1]
        else:
            person_names = None
    except KeyError:
        return None

    return person_names
Esempio n. 4
0
def get_company(def_entry_id, key):

    type = None
    flag = False
    check_link_list = ['www.', '.com', 'https', 'http']
    mycol = refer_cleaned_collection()
    comp_data_entry = mycol.find({'_id': ObjectId(def_entry_id)})
    data = [i for i in comp_data_entry]
    comp_name = data[0][key]

    for i in range(len(check_link_list)):
        if check_link_list[i] in comp_name:
            flag = True
            type = 'link'
            break
    if flag == False:
        type = 'text'

    return comp_name, type
Esempio n. 5
0
def predict_emails(entry_id):

    # Get the data for important_person_company for the given entry id
    mycol = refer_cleaned_collection()
    comp_data_entry = mycol.find({"_id": ObjectId(entry_id)})
    data = [i for i in comp_data_entry]
    important_person_company = data[0]['important_person_company']

    comp_name, type = get_company(entry_id, 'search_text')
    if type == 'link':
        org = get_org(comp_name, type)
    elif type == 'text':
        if len(data[0]['search_text']) <= 15:
            comp_name, type = get_company(entry_id, 'search_text')
            org = get_org(comp_name, type)
        elif len(data[0]['search_text']) > 15:
            comp_name, type = get_company(entry_id, 'comp_name')
            org = get_org(comp_name, type)

    if important_person_company != 'No important persons found':

        print(important_person_company)
        # Iterating through the list of persons
        for i in range(len(important_person_company)):
            email = important_person_company[i]['email']

            if email == None:
                # Generate all possible combinations of emails and verify
                email_list = generate_validate_email(
                    important_person_company[i]['name'], org)
                important_person_company[i]['email'] = email_list

            elif email != None:
                continue

        # Write back to the MongoDB record, the updated emails
        write_to_mongodb(important_person_company, entry_id)