def get_computed_df(file_path, file, region): file = file.split('.')[0] df = pd.read_csv(file_path, sep='\t') df['Query'] = file.split('--')[0] # df['Query'] = df['Query'].str.replace('_',' ') # Convert GMT to EST date_time_stamp = file.split('--')[1].split('_showcase_ads')[0] date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S") est_date_time_obj = date_time_obj - timedelta(hours=5) df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S') column_list = df.columns.tolist() for col in TITLE_CONVERSION_DICT.keys(): if col not in column_list: df[col] = '' for col in EXTRA_COLUMNS_FOR_SHOWCASE: if col not in column_list: df[col] = '' df['Min Price'] = df['price'].apply(lambda x: x.split('-')[0]) df['Max Price'] = df['price'].apply(lambda x: x.split('-')[-1] if '-' in x else '') df['Store_Name'] = df['url'].apply(get_domain_url) df.rename(columns=TITLE_CONVERSION_DICT, inplace=True) total_product_count = len(df) df['PLA Rank'] = range(1, total_product_count + 1) df['Search Region'] = get_region_name(region) df['PLA/Showcase'] = 'Showcase' df['Search From'] = 'Mobile' df = df[OUTPUT_COLUMN_LIST] return df
def get_computed_df(self, file_path, file, region): file = file.split('.')[0] df = pd.read_csv(file_path, sep='\t') df['Query'] = file.split('--')[0] # df['Query'] = df['Query'].str.replace('_',' ') # Convert GMT to EST date_time_stamp = file.split('--')[1].split('_brands_related')[0] date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S") est_date_time_obj = date_time_obj - timedelta(hours=5) df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S') df['ASIN'] = df['url'].apply(lambda x: x.split('asins=')[1].split('&')[0] if type(x)!= float else x) df.rename(columns= { 'Query' : 'Search Keyword', 'text' : 'Brand Tagline', 'name' : 'Brand', }, inplace=True) total_product_count = len(df) df['Result Rank'] = range(1, total_product_count+1) df['Search Region'] = get_region_name(region) df['Search From'] = 'Desktop' df = df[OUTPUT_COLUMN_LIST] return df
def get_computed_df(self, file_path, file, region): file = file.split('.')[0] df = pd.read_csv(file_path, sep='\t') df['Query'] = file.split('--')[0] df['Product Sponsored'] = 'Yes' # df['Query'] = df['Query'].str.replace('_',' ') # Convert GMT to EST date_time_stamp = file.split('--')[1].split('_today_deals')[0] date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S") est_date_time_obj = date_time_obj - timedelta(hours=5) df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S') # df['rating'] = df['rating'].apply(lambda x: x.split(' ')[0].replace('(','').replace(')','') if type(x)!=float else x) df['rating'] = df['rating'].apply(lambda x: x.split(' ')[0] if type(x)!=float else x) df['total_ratings'] = df['total_ratings'].astype(str).fillna(value='') df['total_ratings'] = df['total_ratings'].str.replace(',','') # df['total_ratings'] = df['total_ratings'].str.replace('k+','000').str.replace('+','') df['ASIN'] = df['url'].apply(lambda x: x.split('dp/')[1].split('/')[0]) df.rename(columns= { 'Query' : 'Search Keyword', 'name' : 'Product Title', 'rating' : 'Product Ratings', 'total_ratings' : 'Product Reviews', 'sale_price' :'Product Sale Price', 'marked_price' : 'Product Marked Price', 'prime_info' : 'Product Is Prime', 'sale_price' : 'Product Sale Price', 'marked_price' : 'Product Marked Price' }, inplace=True) total_product_count = len(df) df['Result Rank'] = range(1, total_product_count+1) df['Search Region'] = get_region_name(region) df['Search From'] = 'Desktop' df = df[OUTPUT_COLUMN_LIST] return df
def get_computed_df(self, file_path, file, region): file = file.split('.')[0] df = pd.read_csv(file_path, sep='\t') df['Query'] = file.split('--')[0] # df['Query'] = df['Query'].str.replace('_',' ') # Convert GMT to EST date_time_stamp = file.split('--')[1].split('_serp_result')[0] date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S") est_date_time_obj = date_time_obj - timedelta(hours=5) df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S') df['rating'] = df['rating'].apply(lambda x: x.split(' ')[0] if type(x)!=float else x) df['total_ratings'] = df['total_ratings'].astype(str).fillna(value='') df['total_ratings'] = df['total_ratings'].str.replace(',','') df['ASIN'] = df['url'].apply(self.get_asin_from_url) # Redirection needed df.rename(columns= { 'Query' : 'Search Keyword', 'name' : 'Product Title', 'rating' : 'Product Ratings', 'total_ratings' : 'Product Reviews', 'sale_price' :'Product Sale Price', 'marked_price' : 'Product Marked Price', 'prime_info' : 'Product Is Prime', 'sponsored' : 'Product Sponsored', 'stock_info' : 'Product Stock Info', 'coupon_info' : 'Product Coupon Info', 'shipping_info' : 'Product Shipping Info', 'more_buy_info' : 'Product More Buy Info', }, inplace=True) total_product_count = len(df) df['Result Rank'] = range(1, total_product_count+1) df['Search Region'] = get_region_name(region) df['Search From'] = 'Desktop' df = df[OUTPUT_COLUMN_LIST] return df
def get_computed_df(self, file_path, file, region): file = file.split('.')[0] df = pd.read_csv(file_path, sep='\t') del df['image'] del df['return_policy'] df['url'] = df['url'].fillna(value='') df['price'] = df['price'].fillna(value='') df['review'] = df['review'].fillna(value='') df['Query'] = file.split('--')[0] # df['Query'] = df['Query'].str.replace('custom_t_shirts','Custom T-shirts').str.replace('men_s_shoes','Men\'s shoes').str.replace('red_sweater','Red Sweater') # df['Query'] = df['Query'].str.replace('_',' ') # Convert GMT to EST date_time_stamp = file.split('--')[1].split('_sponsored_')[0] date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S") est_date_time_obj = date_time_obj - timedelta(hours=5) df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S') df['Store_Name'] = df['url'].apply(self.get_domain_url) df['url'] = df['url'].apply(lambda x: x.split('?')[0] if 'google.com' in x and 'www.googleadservices.com/pagead/aclk' not in x else x) df['price_drop'] = df['price_drop'].apply(lambda x: x.split('%')[0] if type(x)!=float else x) df['Min Price'] = df['price'].apply(lambda x: x.split('-')[0]) df['Max Price'] = df['price'].apply(lambda x: x.split('-')[-1] if '-' in x else '') df['rating'] = df['rating'].apply(lambda x: x.split(' ')[1] if type(x)!=float else x) df['review'] = df['review'].apply(lambda x: x.split(' ')[0].replace('(','').replace(')','') if type(x)!=float else x) df['review'] = df['review'].fillna(value='') df['review'] = df['review'].str.replace('k+','000').str.replace('+','') # df['price'] = df['price'].apply(self.get_range_price) df.rename(columns= { 'brand':'PLA Store', 'name': 'PLA Title', 'url' : 'PLA URL', 'price' : 'PLA Price Text', 'Min Price' : 'PLA Price MIN', 'Max Price' : 'PLA Price MAX', 'review' : 'PLA Reviews', 'rating' : 'PLA Ratings', 'price_drop' : 'PLA PriceDrop Percentage', 'Hour': 'Search Hour', 'Date': 'Search Date', 'Query': 'Search Keyword', 'PRICEC DROP TAG': 'PLA Has Price Drop Tag', 'SALE TAG': 'PLA Has Sale Tag', 'Store_Name': 'PLA Domain', 'In Store/Pick up Today' : 'PLA Has Special Tags' }, inplace=True) total_product_count = len(df) df['PLA Rank'] = range(1, total_product_count+1) df['Search Region'] = get_region_name(region) df['PLA/Showcase'] = 'PLA' df['Search From'] = 'Desktop' df = df[df['PLA URL']!=''] df = df[self.output_column_list] return df