Exemple #1
0
def main(request, context):
    print("STEP 5 -- reading data")
    print("This Function was triggered by messageId {} published at {}".format(
        context.event_id, context.timestamp))
    aws_db, hubspot = pubsub.read_pubsub(request)

    try:
        dw_contacts = readDataWarehouse.get_contacts()
        logger.add_log(request, "Read dw data", "step 5", context.event_id,
                       "NOTICE")
        orpi_contacts = readOrpi.get_contacts()
        logger.add_log(request, "Read orpi data", "step 5", context.event_id,
                       "NOTICE")
    except Exception as error:
        logger.add_log(request, "Read table " + str(error), "step 5",
                       context.event_id, "ERROR")

    for i in orpi_contacts.index:
        try:
            pubsub.publish_pubsub(request, aws_db, hubspot, dw_contacts,
                                  orpi_contacts.loc[i])
        except Exception as error:
            logger.add_log(
                request,
                f"Trigger step 6 for {orpi_contacts.email[i]} with error {str(error)}",
                "step 5", context.event_id, "ERROR")
Exemple #2
0
def main(request, context):
    print("STEP 4 -- reading data from ftp server")
    print("This Function was triggered by messageId {} published at {}".format(
        context.event_id, context.timestamp))

    aws_db, hubspot = pubsub.read_pubsub(request)

    try:
        logger.add_log(request, "download FTP file", "step 4",
                       context.event_id, "NOTICE")
        download_xml_from_ftp()
        logger.add_log(request, "read file", "step 4", context.event_id,
                       "NOTICE")
        doc = parse_xml()
        logger.add_log(request, "parse xml", "step 4", context.event_id,
                       "NOTICE")
        agent_dict = parse_dict(doc)
        logger.add_log(request, "parse dict", "step 4", context.event_id,
                       "NOTICE")
        df = dict_to_df(agent_dict)
        logger.add_log(request, "load to BQ", "step 4", context.event_id,
                       "NOTICE")

        load_to_bigQuery(df)
    except Exception as error:
        logger.add_log(request, "READ FTP " + str(error), "step 4",
                       context.event_id, "ERROR")

    pubsub.publish_pubsub(request, aws_db, hubspot)
Exemple #3
0
def main(request, context):
    logger.add_log(request, "Start Network Import", "step 0", context.event_id,
                   "NOTICE")

    agency, aws_db, hubspot = pubsub.read_pubsub(request)

    try:
        #print("download FTP file")
        logger.add_log(request, "download FTP file", "step 0",
                       context.event_id, "NOTICE")
        download_zip_from_ftp()
        logger.add_log(request, "unzip file", "step 0", context.event_id,
                       "NOTICE")
        unzip_file()
        logger.add_log(request, "read dataframe", "step 0", context.event_id,
                       "NOTICE")
        df = pd.read_csv('/tmp/agences_orpi_national_201703.txt',
                         delimiter=";",
                         encoding='latin-1',
                         index_col=False)
        df = df[df.columns].astype(str)
        logger.add_log(request, "load data into big query", "step 0",
                       context.event_id, "NOTICE")
        load_to_bigQuery(df)
    except Exception as error:
        logger.add_log(request, error, "step 0", context.event_id, "ERROR")

    #add_test_row()

    pubsub.publish_pubsub(request,
                          agency=agency,
                          aws_db=aws_db,
                          hubspot=hubspot)

    return 'Success'
Exemple #4
0
def main(request, context):
    print("STEP 2 -- compare companies data")
    print("This Function was triggered by messageId {} published at {}".format(context.event_id, context.timestamp))
    dw_companies, lp, orpi_row, aws_db, hubspot = pubsub.read_pubsub(request)
    logger.add_log(request, "read pub sub", "step 2", context.event_id, "NOTICE")

    simulate_lp=''
    try:
        ratio_lp, simulate_lp = compare_lp(orpi_row, lp)
    except Exception as error:
        logger.add_log(request, "Compare landing page : " + str(error), "step 2", context.event_id, "ERROR", orpi_row.CodeAgence[0])

    flag_found = False

    if is_similar_companies(0, 0, ratio_lp):
        logger.add_log(request, "We found similar companies", "step 2", context.event_id, "NOTICE", orpi_row.CodeAgence[0])
        pubsub.publish_similar_companies(request=request,
                                         orgid=None,
                                         codeagence=orpi_row.CodeAgence[0],
                                         dw_name=None,
                                         orpi_name=orpi_row.Name_cleaned[0],
                                         score_name=0,
                                         dw_address=None,
                                         orpi_address=orpi_row.Adress_cleaned[0],
                                         score_address=0,
                                         landing_page=simulate_lp,
                                         score_landing_page=ratio_lp)
    else:
        for index, dw_row in dw_companies.iterrows():

            try:
                ratio_address, ratio_name = compute_ratios(dw_row, orpi_row)
                #logger.add_log(request, "We compute ratios", "step 2", context.event_id, "NOTICE")
            except Exception as error:
                logger.add_log(request, "Compute ratios : " + str(error), "step 2", context.event_id, "ERROR", orpi_row.CodeAgence[0])

            if is_similar_companies(ratio_address, ratio_name, ratio_lp):
                logger.add_log(request, "We found similar companies", "step 2", context.event_id, "NOTICE", orpi_row.CodeAgence[0])

                pubsub.publish_similar_companies(request=request,
                                                 orgid=dw_row.orgid,
                                                 codeagence=orpi_row.CodeAgence[0],
                                                 dw_name=dw_row.Name,
                                                 orpi_name=orpi_row.Name_cleaned[0],
                                                 score_name=ratio_name,
                                                 dw_address=dw_row.Adress,
                                                 orpi_address=orpi_row.Adress_cleaned[0],
                                                 score_address=ratio_address,
                                                 landing_page=dw_row.landingpage,
                                                 score_landing_page=ratio_lp)
                logger.add_log(request, "Sending pubsub for similar companies", "step 2", context.event_id, "NOTICE", orpi_row.CodeAgence[0])

                flag_found = True
                break

        if not flag_found:
            pubsub.publish_companie_to_insert_pubsub(request, orpi_row, aws_db, hubspot)
            logger.add_log(request, "New company, Trying to insert {}".format(orpi_row.Name_cleaned[0]), "step 2",
                           context.event_id, "NOTICE", orpi_row.CodeAgence[0])
Exemple #5
0
def main(request, context):
    print("STEP 3 bis -- log new companies")
    print("This Function was triggered by messageId {} published at {}".format(
        context.event_id, context.timestamp))
    dict_result = pubsub.read_pubsub(request)

    try:
        df = pd.DataFrame(dict_result, index=[0])
        if not existingLink.test_existing_link(
                df.orgid[0],
                str(df.code_agence[0]).zfill(6)):
            load_to_bigQuery(df)
        logger.add_log(request, "writing in table ", "step 3 bis",
                       context.event_id, "NOTICE", dict_result["code_agence"])
    except Exception as error:
        logger.add_log(request, "writing in table  " + str(error), "step 3",
                       context.event_id, "ERROR", dict_result["code_agence"])
Exemple #6
0
def main(request, context):
    print("STEP 2 bis -- log similar companies")
    print("This Function was triggered by messageId {} published at {}".format(
        context.event_id, context.timestamp))
    dict_result = pubsub.read_pubsub(request)
    logger.add_log(request, "read pub sub", "step 2 bis", context.event_id,
                   "NOTICE", dict_result["code_agence"])

    try:
        df = pd.DataFrame(dict_result, index=[0])
        logger.add_log(request, "convert json to df", "step 2 bis",
                       context.event_id, "NOTICE", dict_result["code_agence"])
        load_to_bigQuery(df)
        logger.add_log(request, "load to BQ", "step 2 bis", context.event_id,
                       "NOTICE", dict_result["code_agence"])

    except Exception as error:
        logger.add_log(request, error, "step 2 bis", context.event_id, "ERROR",
                       dict_result["code_agence"])
Exemple #7
0
def main(request, context):
    print("STEP 6 -- compare companies data")
    print("This Function was triggered by messageId {} published at {}".format(
        context.event_id, context.timestamp))
    aws_db, hubspot, dw_companies, orpi_row = pubsub.read_pubsub(request)

    time.sleep(10)

    flag_found = False
    logger.add_log(request, "reading pubsub", "step 6", context.event_id,
                   "NOTICE")

    for index, dw_row in dw_companies.iterrows():
        existing_email = False

        if dw_row.email is not None:
            try:
                existing_email = compareData.compare_email(
                    dw_row.cleaned_email, orpi_row.cleaned_email[0])
            except Exception as error:
                logger.add_log(request, f"Failed to compare email", "step 6",
                               context.event_id, "ERROR")
            #existing_email = False

        if existing_email:
            logger.add_log(request, "similar email found", "step 6",
                           context.event_id, "NOTICE")
            flag_found = True
            break

    if not flag_found:
        logger.add_log(request, f"Trying to insert {orpi_row.email[0]}",
                       "step 6", context.event_id, "NOTICE")
        try:
            orgid = compareData.get_orgid(orpi_row.code_agence[0])
            if orgid is not None:
                pubsub.publish_contact_to_insert_pubsub(
                    request, aws_db, hubspot, orpi_row, orgid)
        except Exception as error:
            logger.add_log(request, "Failed to get orgid", "step 6",
                           context.event_id, "ERROR")
Exemple #8
0
def main(request, context):
    print("STEP 7 -- Insert contacts")
    print("This Function was triggered by messageId {} published at {}".format(
        context.event_id, context.timestamp))
    aws_db, hubspot, orpi_row, orgid = pubsub.read_pubsub(request)

    crmid = None
    orgcrmid = None

    # Create contact
    crmid = create_contact(request, context, orpi_row, hubspot)

    # Get company ID
    orgcrmid = get_company(request, context, orpi_row, aws_db, orgid)

    # Create link betwenn contact and company
    create_link(request, context, orgcrmid, crmid, hubspot, orpi_row)

    # create the contact into the database
    insert_org_member(request, context, orpi_row, aws_db, orgcrmid, crmid,
                      orgid)
Exemple #9
0
def main(request, context):
    log = pubsub.read_pubsub(request)
    log_df = pd.DataFrame.from_records([log])
    print(log_df)
    load_to_bigQuery(log_df)