def main(request, context): print("STEP 5 -- reading data") print("This Function was triggered by messageId {} published at {}".format( context.event_id, context.timestamp)) aws_db, hubspot = pubsub.read_pubsub(request) try: dw_contacts = readDataWarehouse.get_contacts() logger.add_log(request, "Read dw data", "step 5", context.event_id, "NOTICE") orpi_contacts = readOrpi.get_contacts() logger.add_log(request, "Read orpi data", "step 5", context.event_id, "NOTICE") except Exception as error: logger.add_log(request, "Read table " + str(error), "step 5", context.event_id, "ERROR") for i in orpi_contacts.index: try: pubsub.publish_pubsub(request, aws_db, hubspot, dw_contacts, orpi_contacts.loc[i]) except Exception as error: logger.add_log( request, f"Trigger step 6 for {orpi_contacts.email[i]} with error {str(error)}", "step 5", context.event_id, "ERROR")
def main(request, context): print("STEP 4 -- reading data from ftp server") print("This Function was triggered by messageId {} published at {}".format( context.event_id, context.timestamp)) aws_db, hubspot = pubsub.read_pubsub(request) try: logger.add_log(request, "download FTP file", "step 4", context.event_id, "NOTICE") download_xml_from_ftp() logger.add_log(request, "read file", "step 4", context.event_id, "NOTICE") doc = parse_xml() logger.add_log(request, "parse xml", "step 4", context.event_id, "NOTICE") agent_dict = parse_dict(doc) logger.add_log(request, "parse dict", "step 4", context.event_id, "NOTICE") df = dict_to_df(agent_dict) logger.add_log(request, "load to BQ", "step 4", context.event_id, "NOTICE") load_to_bigQuery(df) except Exception as error: logger.add_log(request, "READ FTP " + str(error), "step 4", context.event_id, "ERROR") pubsub.publish_pubsub(request, aws_db, hubspot)
def main(request, context): logger.add_log(request, "Start Network Import", "step 0", context.event_id, "NOTICE") agency, aws_db, hubspot = pubsub.read_pubsub(request) try: #print("download FTP file") logger.add_log(request, "download FTP file", "step 0", context.event_id, "NOTICE") download_zip_from_ftp() logger.add_log(request, "unzip file", "step 0", context.event_id, "NOTICE") unzip_file() logger.add_log(request, "read dataframe", "step 0", context.event_id, "NOTICE") df = pd.read_csv('/tmp/agences_orpi_national_201703.txt', delimiter=";", encoding='latin-1', index_col=False) df = df[df.columns].astype(str) logger.add_log(request, "load data into big query", "step 0", context.event_id, "NOTICE") load_to_bigQuery(df) except Exception as error: logger.add_log(request, error, "step 0", context.event_id, "ERROR") #add_test_row() pubsub.publish_pubsub(request, agency=agency, aws_db=aws_db, hubspot=hubspot) return 'Success'
def main(request, context): print("STEP 2 -- compare companies data") print("This Function was triggered by messageId {} published at {}".format(context.event_id, context.timestamp)) dw_companies, lp, orpi_row, aws_db, hubspot = pubsub.read_pubsub(request) logger.add_log(request, "read pub sub", "step 2", context.event_id, "NOTICE") simulate_lp='' try: ratio_lp, simulate_lp = compare_lp(orpi_row, lp) except Exception as error: logger.add_log(request, "Compare landing page : " + str(error), "step 2", context.event_id, "ERROR", orpi_row.CodeAgence[0]) flag_found = False if is_similar_companies(0, 0, ratio_lp): logger.add_log(request, "We found similar companies", "step 2", context.event_id, "NOTICE", orpi_row.CodeAgence[0]) pubsub.publish_similar_companies(request=request, orgid=None, codeagence=orpi_row.CodeAgence[0], dw_name=None, orpi_name=orpi_row.Name_cleaned[0], score_name=0, dw_address=None, orpi_address=orpi_row.Adress_cleaned[0], score_address=0, landing_page=simulate_lp, score_landing_page=ratio_lp) else: for index, dw_row in dw_companies.iterrows(): try: ratio_address, ratio_name = compute_ratios(dw_row, orpi_row) #logger.add_log(request, "We compute ratios", "step 2", context.event_id, "NOTICE") except Exception as error: logger.add_log(request, "Compute ratios : " + str(error), "step 2", context.event_id, "ERROR", orpi_row.CodeAgence[0]) if is_similar_companies(ratio_address, ratio_name, ratio_lp): logger.add_log(request, "We found similar companies", "step 2", context.event_id, "NOTICE", orpi_row.CodeAgence[0]) pubsub.publish_similar_companies(request=request, orgid=dw_row.orgid, codeagence=orpi_row.CodeAgence[0], dw_name=dw_row.Name, orpi_name=orpi_row.Name_cleaned[0], score_name=ratio_name, dw_address=dw_row.Adress, orpi_address=orpi_row.Adress_cleaned[0], score_address=ratio_address, landing_page=dw_row.landingpage, score_landing_page=ratio_lp) logger.add_log(request, "Sending pubsub for similar companies", "step 2", context.event_id, "NOTICE", orpi_row.CodeAgence[0]) flag_found = True break if not flag_found: pubsub.publish_companie_to_insert_pubsub(request, orpi_row, aws_db, hubspot) logger.add_log(request, "New company, Trying to insert {}".format(orpi_row.Name_cleaned[0]), "step 2", context.event_id, "NOTICE", orpi_row.CodeAgence[0])
def main(request, context): print("STEP 3 bis -- log new companies") print("This Function was triggered by messageId {} published at {}".format( context.event_id, context.timestamp)) dict_result = pubsub.read_pubsub(request) try: df = pd.DataFrame(dict_result, index=[0]) if not existingLink.test_existing_link( df.orgid[0], str(df.code_agence[0]).zfill(6)): load_to_bigQuery(df) logger.add_log(request, "writing in table ", "step 3 bis", context.event_id, "NOTICE", dict_result["code_agence"]) except Exception as error: logger.add_log(request, "writing in table " + str(error), "step 3", context.event_id, "ERROR", dict_result["code_agence"])
def main(request, context): print("STEP 2 bis -- log similar companies") print("This Function was triggered by messageId {} published at {}".format( context.event_id, context.timestamp)) dict_result = pubsub.read_pubsub(request) logger.add_log(request, "read pub sub", "step 2 bis", context.event_id, "NOTICE", dict_result["code_agence"]) try: df = pd.DataFrame(dict_result, index=[0]) logger.add_log(request, "convert json to df", "step 2 bis", context.event_id, "NOTICE", dict_result["code_agence"]) load_to_bigQuery(df) logger.add_log(request, "load to BQ", "step 2 bis", context.event_id, "NOTICE", dict_result["code_agence"]) except Exception as error: logger.add_log(request, error, "step 2 bis", context.event_id, "ERROR", dict_result["code_agence"])
def main(request, context): print("STEP 6 -- compare companies data") print("This Function was triggered by messageId {} published at {}".format( context.event_id, context.timestamp)) aws_db, hubspot, dw_companies, orpi_row = pubsub.read_pubsub(request) time.sleep(10) flag_found = False logger.add_log(request, "reading pubsub", "step 6", context.event_id, "NOTICE") for index, dw_row in dw_companies.iterrows(): existing_email = False if dw_row.email is not None: try: existing_email = compareData.compare_email( dw_row.cleaned_email, orpi_row.cleaned_email[0]) except Exception as error: logger.add_log(request, f"Failed to compare email", "step 6", context.event_id, "ERROR") #existing_email = False if existing_email: logger.add_log(request, "similar email found", "step 6", context.event_id, "NOTICE") flag_found = True break if not flag_found: logger.add_log(request, f"Trying to insert {orpi_row.email[0]}", "step 6", context.event_id, "NOTICE") try: orgid = compareData.get_orgid(orpi_row.code_agence[0]) if orgid is not None: pubsub.publish_contact_to_insert_pubsub( request, aws_db, hubspot, orpi_row, orgid) except Exception as error: logger.add_log(request, "Failed to get orgid", "step 6", context.event_id, "ERROR")
def main(request, context): print("STEP 7 -- Insert contacts") print("This Function was triggered by messageId {} published at {}".format( context.event_id, context.timestamp)) aws_db, hubspot, orpi_row, orgid = pubsub.read_pubsub(request) crmid = None orgcrmid = None # Create contact crmid = create_contact(request, context, orpi_row, hubspot) # Get company ID orgcrmid = get_company(request, context, orpi_row, aws_db, orgid) # Create link betwenn contact and company create_link(request, context, orgcrmid, crmid, hubspot, orpi_row) # create the contact into the database insert_org_member(request, context, orpi_row, aws_db, orgcrmid, crmid, orgid)
def main(request, context): log = pubsub.read_pubsub(request) log_df = pd.DataFrame.from_records([log]) print(log_df) load_to_bigQuery(log_df)