def main(client): # file_list = ['ableton__26_12_2019__26_12_2019__16_01_08.html','acqua_di_parma__26_12_2019__20_01_05.html','becca_highlighter__26_12_2019__20_01_07.html'] for S3_REGION in S3_REGION_LIST: print(S3_REGION) output_dir = BATCH_OUTPUT_DIR_MOBILE.format(client) INPUT_DIR = os.path.join(output_dir, S3_REGION, 'page_source') OUTPUT_DIR = os.path.join(output_dir, S3_REGION, 'sponsored_result_computed') if os.path.exists(INPUT_DIR): create_dir(OUTPUT_DIR) for file in os.listdir(INPUT_DIR): # for file in file_list: # print(file) file_path = os.path.join(INPUT_DIR, file) with open(file_path, 'r') as f: page_data = f.read() soup = BeautifulSoup(page_data, 'html.parser') query_clean_name = file.split('.')[0] get_sponsored_ad_details(OUTPUT_DIR, soup, query_clean_name) else: print('{} -- No sponsored results for Mobile step 4.a'.format( S3_REGION)) print('--------------------------------------------')
def main(client): # file_list = ['ableton__26_12_2019__12_33_10.html','argireline__26_12_2019__16_00_19.html'] for S3_REGION in S3_REGION_LIST: print(S3_REGION) output_dir = BATCH_OUTPUT_DIR_MOBILE.format(client) INPUT_DIR = os.path.join(output_dir, S3_REGION, 'page_source') OUTPUT_DIR = os.path.join(output_dir, S3_REGION,'showcase_result_computed') if os.path.exists(INPUT_DIR): create_dir(OUTPUT_DIR) for file in os.listdir(INPUT_DIR): # for file in file_list: # print(file) file_path = os.path.join(INPUT_DIR, file) with open(file_path, 'r') as f: page_data = f.read() soup = BeautifulSoup(page_data,'html.parser') query_clean_name = file.split('.')[0] get_showcase_ad_details(OUTPUT_DIR, soup, query_clean_name) else: print('{} -- No showcase results for Mobile step 5.a'.format(S3_REGION)) print('--------------------------------------------')
def main(client): for S3_REGION in S3_REGION_LIST: output_dir = BATCH_OUTPUT_DIR_MOBILE.format(client) INPUT_DIR = os.path.join(output_dir, S3_REGION, 'showcase_result_computed') OUTPUT_DIR = os.path.join(output_dir, S3_REGION) output_df = pd.DataFrame() print(S3_REGION) if os.path.exists(INPUT_DIR): for file in os.listdir(INPUT_DIR): # print(file) if '~' not in file: file_path = os.path.join(INPUT_DIR, file) try: df = get_computed_df(file_path, file, S3_REGION) output_df = output_df.append(df) except Exception as e: print(str(e)) print('Exception for file: {}'.format(file_path)) if len(output_df) > 0: output_df.to_csv(os.path.join( OUTPUT_DIR, 'ShowcaseAds_Combined{}.tsv'.format(S3_REGION)), index=False, sep='\t') else: print('{} -- No sponsored results for Mobile step 5.b'.format( S3_REGION)) else: print('{} -- No sponsored results for Mobile step 5.b'.format( S3_REGION))
def main(client): output_df = pd.DataFrame() output_dir = BATCH_OUTPUT_DIR_MOBILE.format(client) for region in S3_REGION_LIST: print(region) region_related_path = os.path.join(output_dir, region, 'ShowcaseAds_Combined{}.tsv'.format(region)) if os.path.exists(region_related_path): df = pd.read_csv(region_related_path, sep='\t') output_df = output_df.append(df) if len(output_df)>0: output_df.to_csv(os.path.join(output_dir,'ShowcaseAds_Combined_allregion.tsv'), index=False,sep='\t') else: print('No showcase for all regions.')
def main(client, client_file_name): output_df = pd.DataFrame() output_dir_pc = BATCH_OUTPUT_DIR_PC.format(client) output_dir_mobile = BATCH_OUTPUT_DIR_MOBILE.format(client) pla_ads_pc_file_path = os.path.join( output_dir_pc, 'SponsoredResult_Combined_allregion.tsv') if os.path.exists(pla_ads_pc_file_path): df = pd.read_csv(pla_ads_pc_file_path, sep='\t') output_df = output_df.append(df) pla_ads_mobile_file_path = os.path.join( output_dir_mobile, 'SponsoredAds_Combined_allregion.tsv') if os.path.exists(pla_ads_mobile_file_path): df = pd.read_csv(pla_ads_mobile_file_path, sep='\t') output_df = output_df.append(df) showcase_ads_mobile_file_path = os.path.join( output_dir_mobile, 'ShowcaseAds_Combined_allregion.tsv') if os.path.exists(showcase_ads_mobile_file_path): df = pd.read_csv(showcase_ads_mobile_file_path, sep='\t') output_df = output_df.append(df) # Getting Only Top Ranked Items output_df_count = len(output_df) print(output_df_count) if output_df_count > 0: output_df = process_prospect_df(output_df, client, client_file_name) output_df['PLA Rank'] = output_df['PLA Rank'].astype(int) # Normalizing Field Value output_df['PLA Has Sale Tag'] = output_df['PLA Has Sale Tag'].fillna( value='').str.upper() column_list = output_df.columns.tolist() output_df['Prospect Name'] = client output_df = output_df[['Prospect Name'] + column_list] output_df.to_csv(MERGED_OUTPUT_PATH.format(client), index=False, sep='\t') else: print('No output of step6')
def __init__(self, thread_name, client_name): Thread.__init__(self) self.thread_name = thread_name self.client_name = client_name self.batch_output_dir = BATCH_OUTPUT_DIR_MOBILE.format( self.client_name)