def getWorstAndBestPerformers(df, feed, target_cpa): if target_cpa is None: return worst_performer = getWorstPerformer(df, target_cpa) best_performer = getBestPerformer(df, target_cpa) if worst_performer is None and worst_performer is None: return if worst_performer is None: this_feed = best_performer else: this_feed = worst_performer.append(best_performer) if functions.dfIsEmpty(this_feed): return if functions.dfIsEmpty(feed): return this_feed["created_at"] = datetime.now() this_feed["updated_at"] = datetime.now() this_feed["display_from_date"] = datetime.now() cols = ["id", "created_at", "updated_at", "account_id", "priority", "headline", "message", "suggestion", "display_from_date", "advert_id", "date_range"] this_feed = this_feed[cols] return feed.append(this_feed)
def getWorstAndBestPerformers(df, feed, target_cpa): worst_performer = getWorstPerformer(df, target_cpa) best_performer = getBestPerformer(df, target_cpa) this_feed = worst_performer.append(best_performer) if functions.dfIsEmpty(this_feed): return this_feed["created_at"] = datetime.now() this_feed["updated_at"] = datetime.now() this_feed["display_from_date"] = datetime.now() this_feed["search_query_n_gram_id"] = this_feed["id"] cols = [ "id", "created_at", "updated_at", "account_id", "priority", "headline", "message", "suggestion", "display_from_date", "n_gram", "date_range", "search_query_n_gram_id" ] this_feed = this_feed[cols] if functions.dfIsEmpty(feed): return this_feed return feed.append(this_feed)
def main(df, account_id, settings): Log("info", "processing campaign winning element", "", account_id) if functions.dfIsEmpty(df): return # add the campaign id, add path_1_path_2 df = processDf(df, account_id, settings) lines = ["headline_1", "headline_2", "description", "path_1_path_2"] ldf = None # start lines loop for line in lines: this_df = df[["clicks", "impressions", "campaign_id", line]].groupby(["campaign_id", line]).sum() this_df["ctr"] = (this_df.clicks / this_df.impressions) * 100 if functions.dfIsEmpty(this_df): break campaignIds = list(this_df.index.levels[0]) this_df = this_df.reset_index() # start campaign ids loop for i, campaign_id in enumerate(campaignIds): # if i!=5:continue tdf = this_df.copy() quantile = tdf[(tdf.campaign_id == campaign_id)].impressions.quantile() tdf = tdf[(tdf.campaign_id == campaign_id) & (tdf.impressions > quantile) & (tdf[line] != "/")].sort_values( "ctr", ascending=False).head(3).reset_index(drop=True) if tdf.shape[0] == 0: continue tdf["order"] = tdf.index + 1 tdf["type"] = line tdf.rename(columns={line: "value"}, inplace=True) tdf = tdf[["campaign_id", "value", "order", "type"]] try: ldf = ldf.append(tdf) except: ldf = tdf.copy() if functions.dfIsEmpty(ldf): return ldf = ldf.reset_index(drop=True) ldf.value = ldf.value.str.replace(" --", "") ldf["id"] = pd.Series([uuid.uuid1() for i in range(len(ldf))]).astype(str) # #now let's add created_at and updated_at as today ldf["created_at"] = datetime.now() ldf["updated_at"] = datetime.now() # for date_range in settings.date_ranges: date_range = "last_30_days" ldf["date_range"] = date_range ldf.to_sql("campaign_winning_elements", settings.createEngine(), if_exists='append', index=False) return df
def createFullDataFrame(account_id, settings): df = None for date_range in settings.date_ranges: this_df = processDateRange(date_range, account_id, settings) if functions.dfIsEmpty(this_df): continue if functions.dfIsEmpty(df): df = this_df.copy() else: df = df.append(this_df.copy()) return df
def main(account_id): Log("info", "getting keyword performance from the api", "", account_id) report = Report(account_id, "", options) df = report.createDfWithAllDateRanges(account_id) if functions.dfIsEmpty(df): return # print df[df.google_id=="309491001346"].cpc_bid # remember column headers are as per the download here df["keyword_id"] = df.apply(lambda row: functions.addIdToDf( account_id, row["Keyword ID"], row["Ad group ID"]), axis=1) df["id"] = df["keyword_id"] df = addParentId(df, account_id) # our UUID from the keywords table df = report.basicProcessing(df) df = reportSpecificProcessing(df) report.writeToEntitiesTable(df, report, account_id) report.writeToPerformanceTable(df, report, account_id)
def main(self): """Populate the adverts and advert_performance tables""" Log("info", "populating the adverts and advert_performance tables", "", self.account_id) settings = Settings() for date_range in settings.date_ranges: df_chunks = self.dataFrameFromAdPerformanceReports(date_range) self.deleteExisting(date_range) while True: try: df = next(df_chunks) except StopIteration: break if functions.dfIsEmpty(df): continue try: df = self.processDf(df, date_range) except Exception as exception: Log("error", str(exception), traceback.format_exc()) raise df = self.addInfo(df) self.writeToAdvertPerformanceTable(df, date_range)
def addUserAccounts(self, user_id, first_run): if not Helpers().isActiveUser(user_id): Log("info", "this user isn't active. Exiting", 'user_id: %s' % (user_id)) return try: Log("info", "adding accounts for user id '%s'" % user_id) self.user_id = user_id accounts_df = self.getAccountsDf() if functions.dfIsEmpty(accounts_df): return accounts_df = accounts_df.drop_duplicates('google_id') accounts_df = self.dropDuplicates(accounts_df, first_run) if (accounts_df.shape[0] == 0 and first_run): Log('warning', "no unique google accounts were found for this user", "user_id (%s)" % (user_id), "") accounts_df.to_sql("accounts", Database().createEngine(), index=False, if_exists="append") except Exception as exception: Log("error", str(exception) + " (User id: %s)" % (user_id), traceback.format_exc()) Log("info", "finished adding account meta data")
def filterAndAddColumns(df, date_range, account_id): # multiply percentages by 100 df.ctr = df.ctr * 100 df.conversion_rate = df.conversion_rate * 100 df.roas = df.roas * 100 # filter down. Must have at least 10 clicks df = df[df.clicks > 10] df = df.reset_index().fillna("") if functions.dfIsEmpty(df): return df["n_gram"] = df.level_0 + " " + df.level_1 + " " + df.level_2 df["n_gram"] = df["n_gram"].str.strip() df = df.reset_index(drop=True) df["account_id"] = account_id df["date_range"] = date_range df["id"] = pd.Series([uuid.uuid1() for i in range(len(df))]).astype(str) # #now let's search_queryd created_at and updated_at as today df["created_at"] = datetime.now() df["updated_at"] = datetime.now() return df
def main(account_id): Log("info", "processing keyword feed", "", account_id) target_cpa = functions.getTargetCpa(account_id) table_name = "keyword_feed" deleteAccountDataFromTable(account_id, table_name, settings) feed = None for date_range in settings.date_ranges: df = getKeywords(account_id, date_range) df = df.loc[:, ~df.columns.duplicated()] # print date_range # print df.shape # to check for duplicates df["keyword_and_type"] = df["keyword_text"] + df["keyword_match_type"] if functions.dfIsEmpty(df): continue if not functions.dfIsEmpty(feed): feed["keyword_and_type"] = feed["keyword_text"] + feed[ "keyword_match_type"] df = df[~df["keyword_and_type"].isin(feed["keyword_and_type"]. values)] feed = getWorstAndBestPerformers(df, feed, target_cpa) if functions.dfIsEmpty(feed): continue # drop duplicate columns # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns feed = feed.loc[:, ~feed.columns.duplicated()] if functions.dfIsEmpty(feed): return if "keyword_and_type" in feed.columns: del feed["keyword_and_type"] feed['display_from_date'] = datetime.now() functions.append_df_to_sql_table(feed, table_name)
def main(account_id): Log("info", "processing search query n-grams feed", "", account_id) target_cpa = functions.getTargetCpa(account_id) engine = settings.createEngine() deleteAllFromTable("search_query_n_gram_feed", account_id, engine) feed = None for date_range in settings.date_ranges: print(date_range) df = getQueryPerformance(account_id, date_range) df = df.loc[:, ~df.columns.duplicated()] print(date_range) print(df.shape) if functions.dfIsEmpty(df): continue if not functions.dfIsEmpty(feed): df = df[~df["n_gram"].isin(feed["n_gram"].values)] feed = getWorstAndBestPerformers(df, feed, target_cpa) if feed is None: continue # drop duplicate columns # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns feed = feed.loc[:, ~feed.columns.duplicated()] if functions.dfIsEmpty(feed): return table_name = "search_query_n_gram_feed" feed = functions.createUuid(feed) feed = functions.trimDfToTableColumns(feed, table_name) feed['display_from_date'] = datetime.now() functions.append_df_to_sql_table(feed, table_name)
def main(account_id): Log("info", "processing budget commander data", "", account_id) account_performance_by_day = getAccountPerformanceByDay( "account_performance_reports", account_id, date_range) if functions.dfIsEmpty(account_performance_by_day): return addBudgetActualGraphDataToDb(account_performance_by_day, account_id)
def processDf(self, df, date_range): if functions.dfIsEmpty(df): return df = functions.addCalculatedMetricsToDataFrame(df) df = self.addSignificance(df) return df
def getFeedInfoAllDateRanges(account_id): all_df = None for date_range in settings.date_ranges: df = getAdGroups(account_id, date_range) if functions.dfIsEmpty(df): print("%s is empty" % (date_range)) continue df = addFeedInfo(df) if functions.dfIsEmpty(all_df): all_df = df.copy() else: all_df = all_df.append(df.copy()) return all_df
def main(account_id): Log("info", "processing ad n grams feed", "", account_id) target_cpa = functions.getTargetCpa(account_id) engine = settings.createEngine() table_name = "ad_n_gram_feed" functions.deleteAllFromTable(table_name, account_id, engine) feed = None for date_range in settings.date_ranges: df = getNGrams(account_id, date_range) df = df.loc[:, ~df.columns.duplicated()] if functions.dfIsEmpty(df): print("%s is empty" % (date_range)) continue if not functions.dfIsEmpty(feed): df = df[~df["n_gram_id"].isin(feed["n_gram_id"].values)] feed = getWorstAndBestPerformers(df, feed, target_cpa) if functions.dfIsEmpty(feed): continue # drop duplicate columns # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns feed = feed.loc[:, ~feed.columns.duplicated()] if functions.dfIsEmpty(feed): print("ad_n_feed is empty") return print("ad_n_feed length: %s" % (feed.shape[0])) feed = functions.createUuid(feed) feed = functions.trimDfToTableColumns(feed, table_name) feed['display_from_date'] = datetime.now() writeToAdvertFeed(feed, table_name, engine)
def main(account_id): Log("info", "getting ad performance from the api", "", account_id) settings = Settings() for date_range in settings.date_ranges: report = Report(account_id, date_range, options) report.createAccountDirectory() report.createReportDirectory() report.downloadReport(account_id, options["where_string"]) df = report.convertCsvToDataframe() if functions.dfIsEmpty(df): continue df = report.basicProcessing(df) df = reportSpecificProcessing(df, date_range, account_id) deleteExitingData(account_id, date_range, 'ad_performance_reports') report.writeDataframeToTable(df, 'ad_performance_reports') deleteExitingData(account_id, date_range, 'advert_performance') report.writeDataframeToTable(df, 'advert_performance') if functions.dfIsEmpty(df): return if 'advert_id' not in df.columns: Log('error', 'advert_id not in df columns', df.columns, account_id) return df["id"] = df["advert_id"] df = addParentId(df, account_id) # our UUID from the adverts table report.writeToEntitiesTable( df, report, account_id) # add the final date range data to adverts
def reduceBids(self): """New bid decision making Returns a df with the new bids""" df = self.getKeywordsDataframe() if functions.dfIsEmpty(df): Log('info', "no keywords found. Can't change bids", '', self.account_id) return remaining_spend_forecast = self.total_spend_forecast - float( self.this_month_spend) spend_vs_remaining_budget_percentage = self.remaining_budget / remaining_spend_forecast def updateBid(cpc_bid, reduction_percentage, min_bid=0.1): """Df lambda function Reduce bid by percentage. Accepts reduction_percentage as whole number e.g. 98. Checks a min bid limit (optional) """ try: cpc_bid = float(cpc_bid) cpc_bid = cpc_bid * ((100 - reduction_percentage) / 100) if cpc_bid < min_bid: cpc_bid = min_bid return cpc_bid except ValueError: return cpc_bid def updateForecast(row): """Df lambda function""" try: reduction = row["new_bid"] / row["cpc_bid"] except TypeError: return float(row["cpc_bid"]) return float(((row["cost"] * reduction) / 7) * self.local_dates.days_remaining_in_this_month) start_reduction = 10 - int(spend_vs_remaining_budget_percentage * 10) for i in range(start_reduction, 10): reduction_percentage = i * 5 # print reduction_percentage df["new_bid"] = df["cpc_bid"].apply( lambda cpc_bid: updateBid(cpc_bid, reduction_percentage)) df["forecast"] = df[["cpc_bid", "new_bid", "cost" ]].apply(lambda row: updateForecast(row), axis=1) # print "Forecast: %s" %(df.forecast.sum()) if df.forecast.sum() <= self.remaining_budget: break return df
def getWorstAndBestPerformers(df, feed, target_cpa): worst_performer = getWorstPerformer(df, target_cpa) best_performer = getBestPerformer(df, target_cpa) this_feed = worst_performer.append(best_performer) if functions.dfIsEmpty(feed): return this_feed this_feed["created_at"] = datetime.now() this_feed["updated_at"] = datetime.now() this_feed["display_from_date"] = datetime.now() return feed.append(this_feed)
def createDfWithAllDateRanges(self, account_id): all_df = None settings = Settings() for date_range in settings.date_ranges: if date_range == "THIS_MONTH" and LocalDates( account_id).is_first_of_month: continue report = Report(account_id, date_range, self.options) report.createAccountDirectory() report.createReportDirectory() report.downloadReport(account_id, report.where_string) df = report.convertCsvToDataframe() df["date_range"] = date_range if df.shape[0] == 0: print("%s df is empty" % (date_range)) continue if functions.dfIsEmpty(all_df): all_df = df.copy() else: all_df = all_df.append(df.copy()) if functions.dfIsEmpty(all_df): Log("info", "%s report is empty" % (self.report_name), "", self.account_id) return all_df = report.stringifyIds(all_df) return all_df.reset_index()
def main(account_id): settings = Settings() Log("info", "processing ad n-grams", "", account_id) df = createFullDataFrame(account_id, settings) if functions.dfIsEmpty(df): return df = df.drop_duplicates() table_name = "ad_n_gram_performance" deleteFromTable(table_name, account_id, settings.createEngine()) functions.append_df_to_sql_table(df, table_name)
def populateAdGroupsAndAdGroupPerformance(account_id): column_map = { "campaign_google_id": "campaign_id", "adgroup_name": "name", "adgroup_google_id": "google_id", "adgroup_status": "status" } # all data = both performance and adgroups all_data_df = getAllData(account_id, column_map) if functions.dfIsEmpty(all_data_df): print("df is empty") return # Split into adgroups and adgroup_performance and add to the db # adgroups adgroups_cols = [column_map[i] for i in column_map] + ["created_at", "updated_at", "id"] adgroups = all_data_df[adgroups_cols] adgroups["account_id"] = account_id deleteAndWriteToAdGroupsTable(adgroups, account_id, "adgroups", settings.createEngine()) # adgroup_performance adgroup_performance_cols = list(all_data_df.columns) for k in column_map: adgroup_performance_cols.remove(column_map[k]) adgroup_performance = all_data_df[adgroup_performance_cols] # Note: just add the date range manually for now, but they can be looped through at a later date # for date_range in settings.date_ranges: date_range = "last_30_days" adgroup_performance["date_range"] = date_range adgroup_performance["adgroup_id"] = adgroup_performance["id"] del adgroup_performance["id"] adgroup_performance["account_id"] = account_id adgroup_performance["id"] = pd.Series( [uuid.uuid1() for i in range(len(adgroup_performance))]).astype(str) deleteAndWriteToAdGroupsTable(adgroup_performance, account_id, "adgroup_performance", settings.createEngine())
def main(account_id): Log("info", "processing adgroup feed", "", account_id) table_name = "adgroup_feed" deleteFromTable(table_name, account_id) df = getFeedInfoAllDateRanges(account_id) if functions.dfIsEmpty(df): print("df is empty") deleteFromTable(table_name, account_id) return df = functions.createUuid(df) df = functions.trimDfToTableColumns(df, table_name) df['display_from_date'] = datetime.now() functions.append_df_to_sql_table(df, table_name)
def main(account_id): Log("info", "getting campaign performance from the api", "", account_id) report = Report(account_id, "", options) df = report.createDfWithAllDateRanges(account_id) if functions.dfIsEmpty(df): return # remember column headers are as per the download here df["campaign_id"] = df.apply( lambda row: functions.addIdToDf(account_id, row["Campaign ID"]), axis=1) df["id"] = df["campaign_id"] df = report.basicProcessing(df) report.writeToEntitiesTable(df, report, account_id) report.writeToPerformanceTable(df, report, account_id)
def main(account_id): Log("info", "getting account performance from the api", "", account_id) report = Report(account_id, "last_30_days", options) report.createAccountDirectory() report.createReportDirectory() report.downloadReport(account_id, options["where_string"]) df = report.convertCsvToDataframe() if functions.dfIsEmpty(df): return df = report.basicProcessing(df) df = reportSpecificProcessing(df, account_id) deleteExitingData(account_id, options["performance_table_name"]) report.writeDataframeToTable(df, options["performance_table_name"])
def updateBids(self, df): if functions.dfIsEmpty(df): return mutations = df.copy() mutations["entity_google_id"] = mutations[ "adgroup_google_id"] + "," + mutations["google_id"] mutations["account_id"] = self.account_id mutations["type"] = "keyword" mutations["action"] = "set" mutations["attribute"] = "bid" mutations["value"] = mutations["new_bid"] mutations["created_at"] = datetime.now() mutations["updated_at"] = datetime.now() mutations = mutations[[ "entity_google_id", "entity_id", "account_id", "type", "action", "attribute", "value", "created_at", "updated_at" ]] mutations = mutations.reset_index(drop=True) mutations["id"] = pd.Series( [uuid.uuid1() for i in range(len(mutations))]).astype(str) print("updating %s bids" % mutations.shape[0]) # print mutations["entity_id"] Database().appendDataframe("mutations", mutations)
def main(account_id): Log("info", "getting search query performance from the api", "", account_id) report = Report(account_id, "", options) df = report.createDfWithAllDateRanges(account_id) if functions.dfIsEmpty(df): return df = df.groupby(['Search term', 'date_range'], as_index=False).sum() df["search_query_id"] = df.apply( lambda row: functions.addIdToDf(account_id, row["Search term"]), axis=1) df["id"] = df["search_query_id"] df = report.basicProcessing(df) report.writeToEntitiesTable(df, report, account_id) report.writeToPerformanceTable(df, report, account_id) return df
def addNGrams(search_query_performance_df, columns): if functions.dfIsEmpty(search_query_performance_df): return n_gram_dict = {} for i, row in search_query_performance_df.fillna(0).iterrows(): impressions = float(row['impressions']) clicks = float(row['clicks']) conversions = float(row['conversions']) conversion_value = float(row['conversion_value']) cost = float(row['cost']) text = (row['query']) # tidy up puncts = [",", ".", "!", "?", ":"] for punct in puncts: text = text.replace(punct, "") text = word_tokenize(text) bigram = bigrams(text) bigram_vec = [] for gram in bigram: bigram_vec.append(gram) trigram = trigrams(text) trigram_vec = [] for gram in trigram: trigram_vec.append(gram) total_gram_vec = bigram_vec + trigram_vec for gram in total_gram_vec: if gram not in n_gram_dict.keys(): n_gram_dict[gram] = {'impressions': impressions, \ # 'avg_pos_mult': impressions * avg_pos, \ 'gram_count': 1, 'clicks': clicks, \ 'cost': cost, 'conversions': conversions, 'conversion_value': conversion_value} else: n_gram_dict[gram]['impressions'] += impressions # n_gram_dict[gram]['avg_pos_mult'] += impressions * avg_pos n_gram_dict[gram]['gram_count'] += 1 n_gram_dict[gram]['clicks'] += clicks n_gram_dict[gram]['cost'] += cost n_gram_dict[gram]['conversions'] += conversions n_gram_dict[gram]['conversion_value'] += conversion_value ### compute average position ### and statistic data n_gram_df_data = {} for gram in n_gram_dict.keys(): impressions = n_gram_dict[gram]['impressions'] count = n_gram_dict[gram]['gram_count'] # avg_pos = n_gram_dict[gram]['avg_pos_mult'] / count clicks = n_gram_dict[gram]['clicks'] conversions = n_gram_dict[gram]['conversions'] cost = n_gram_dict[gram]['cost'] conversion_value = n_gram_dict[gram]['conversion_value'] try: cpa = cost / conversions except ZeroDivisionError: cpa = 0 try: roas = conversion_value / cost except ZeroDivisionError: roas = 0 try: ctr = clicks / impressions except ZeroDivisionError: ctr = 0 try: conversion_rate = conversions / clicks except ZeroDivisionError: conversion_rate = 0 try: average_cpc = cost / clicks except ZeroDivisionError: average_cpc = 0 if clicks != 0 and clicks != 1: std = np.sqrt(clicks * (1 - ctr) ** 2 + \ (impressions - clicks) * ctr ** 2) / (impressions - 1) standard_error = std / np.sqrt(impressions) else: standard_error = 0 min_result = ctr - standard_error * 2 max_result = ctr + standard_error * 2 n_gram_df_data[gram] = {'n_gram_count': count, 'impressions': impressions, 'ctr': ctr, 'conversion_rate': conversion_rate, \ 'average_cpc': average_cpc, 'ctr_significance': standard_error, 'conversions': conversions, 'cost': cost, 'conversion_value': conversion_value, 'cpa': cpa, 'roas': roas, 'clicks': clicks} df = pd.DataFrame(n_gram_df_data) df = df.T df["ctr_significance"] = df["ctr_significance"].replace( r'^\s*$', 0, regex=True).astype("float") return df
def main(account_id): settings = Settings() deleteFromSearchQueryNGrams(settings, account_id) columns = [ "id", "created_at", "updated_at", "account_id", "date_range", "n_gram", "n_gram_count", "show_on_graph", "ctr", "ctr_significance", "impressions", "clicks", "conversions", "cost", "average_cpc", "conversion_value", "cpa", "roas", "conversion_rate", "graph_order" ] settings = Settings() all_df = None for date_range in settings.date_ranges: df = dataFrameFromSearchQueryPerformanceReports( settings, account_id, date_range) df = addNGrams(df, columns) if functions.dfIsEmpty(df): continue df = filterAndAddColumns(df, date_range, account_id) # print date_range # print df.shape if functions.dfIsEmpty(df): continue # drop duplicate columns # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns df = df.loc[:, ~df.columns.duplicated()] if functions.dfIsEmpty(df): continue if functions.dfIsEmpty(all_df): all_df = df.copy() else: all_df = all_df.append(df.copy()) if functions.dfIsEmpty(all_df): return # print search_query_performance_df.head() # serving too many rows will cause php memory issues (and issues with the client) # just serve the first X rows, ordered by cost rowsToServe = 2000 all_df = all_df.sort_values("cost", ascending=False).head(rowsToServe) # then sort by ctr and search_queryd the graph elements (means the n-grams must be in the top 1000 spenders to be in the graph) all_df = all_df.sort_values(["ctr", "conversions", "clicks", "cost"], ascending=False).reset_index(drop=True) all_df.loc[all_df.index == 0, 'show_on_graph'] = True all_df.loc[all_df.index == 1, 'show_on_graph'] = True all_df.loc[all_df.index == 2, 'show_on_graph'] = True all_df.loc[all_df.index == 0, 'graph_order'] = 1 all_df.loc[all_df.index == 1, 'graph_order'] = 2 all_df.loc[all_df.index == 2, 'graph_order'] = 3 # finally fill NAs and append to the table all_df = all_df[columns].fillna(0) functions.append_df_to_sql_table(all_df, "search_query_n_gram_performance")
def main(account_id): Log("info", "processing account performance changes", "", account_id) ad_performance_df = dataFrameFromAdPerformanceReports(settings, account_id) if functions.dfIsEmpty(ad_performance_df): return # returns a df with the past x (60 at present) days data df = createXDaysDataFrame(ad_performance_df, settings, account_id) # add the graph data to a dict # first add the the calculated metrics (ctr, etc) - these will make up the graph data df = addCalculatedMetrics(df, settings) df = df.replace([np.inf, -np.inf], 0) previousDf = df.iloc[:30].sort_index().fillna(0).round(2) currentDf = df.iloc[30:].sort_index().fillna(0).round(2) # we'll store everything in this dict mets = getGraphDataFromDf(previousDf, currentDf, settings) # add the comparisons - the baseline amount and the -/+ amount # compare previousDf to currentDf to get the +/- numbers # was, now, diff # sum the dfs, combining the dates previousDf = previousDf.sum() currentDf = currentDf.sum() previousDf = addCalculatedMetrics(previousDf, settings).fillna(0) currentDf = addCalculatedMetrics(currentDf, settings).fillna(0) del previousDf["account_id"] del currentDf["account_id"] # add the comparisons (i.e. previous Vs current performance)... comp = pd.concat([currentDf, previousDf], axis=1) comp["vs"] = comp.apply(lambda row: returnComparison(row), axis=1) comp = comp.T.reset_index() comp = comp[comp["index"] == "vs"].reset_index(drop=True) # bring it all together! metrics = ["cpa", "ctr", "roas", "conversion_rate"] valueNames = list(settings.metrics) account_performance_changes = df.groupby("account_id").sum().reset_index()[["account_id"] + metrics + valueNames] # add the vs metrics for metric in metrics: account_performance_changes[metric] = comp[metric].values[0] # add the graph data for graph in mets: account_performance_changes[graph] = str(mets[graph]) # add the baseline (overall performance) for metric in settings.calculatedMetrics: if metric == "average_cpc": continue baseline_metric = metric + "_baseline" if settings.calculatedMetrics[metric][1] == "/": account_performance_changes[baseline_metric] = account_performance_changes[ settings.calculatedMetrics[metric][0]] / \ account_performance_changes[ settings.calculatedMetrics[metric][2]] if settings.calculatedMetrics[metric][3]: account_performance_changes[baseline_metric] = account_performance_changes[baseline_metric] * 100 account_performance_changes = account_performance_changes.fillna(0).round(2) basline_metrics = [i + "_baseline" for i in metrics] graph_metrics = [i + "_graph_data" for i in metrics] account_performance_changes = account_performance_changes[ ["account_id"] + metrics + basline_metrics + graph_metrics] account_performance_changes["date_range"] = "last_30_days" account_performance_changes["id"] = pd.Series( [uuid.uuid1() for i in range(len(account_performance_changes))]).astype(str) account_performance_changes["created_at"] = datetime.now() account_performance_changes["updated_at"] = datetime.now() for metric in graph_metrics: account_performance_changes[metric] = account_performance_changes[metric].str.replace("'", '"') account_performance_changes = account_performance_changes.replace([np.inf, -np.inf], 0) account_performance_changes.to_sql( "account_performance_changes", settings.createEngine(), if_exists='append', index=False ) return account_performance_changes
def process(self): """A place to group the core actions. Delete existing data, process and replace""" self.deleteExisting() df_chunks = self.getAdvertsDataframe() ldf = None while True: try: df = next(df_chunks) except StopIteration: return for line in self.lines: this_df = df[[ "clicks", "impressions", self.group_by_column, line ]].groupby([self.group_by_column, line]).sum() this_df["ctr"] = (this_df.clicks / this_df.impressions) * 100 adgroup_ids = list(this_df.index.levels[0]) this_df = this_df.reset_index() if this_df.shape[0] == 0: continue # start campaign ids loop for i, adgroup_id in enumerate(adgroup_ids): # if i!=5:continue tdf = this_df.copy() quantile = tdf[(tdf[self.group_by_column] == adgroup_id )].impressions.quantile() tdf = tdf[(tdf[self.group_by_column] == adgroup_id) & (tdf.impressions > quantile) & (tdf[line] != "/")].sort_values( "ctr", ascending=False).head(3).reset_index( drop=True) if tdf.shape[0] == 0: continue tdf["order"] = tdf.index + 1 tdf["type"] = line tdf.rename(columns={line: "value"}, inplace=True) tdf = tdf[[self.group_by_column, "value", "order", "type"]] try: ldf = ldf.append(tdf) except: ldf = tdf.copy() if functions.dfIsEmpty(ldf): return ldf["account_id"] = self.account_id ldf = ldf.reset_index(drop=True) ldf.value = ldf.value.str.replace(" --", "") ldf["id"] = pd.Series([uuid.uuid1() for i in range(len(ldf))]).astype(str) ldf["created_at"] = datetime.now() ldf["updated_at"] = datetime.now() ldf["date_range"] = self.date_range ldf.to_sql(self.table_name, Database().createEngine(), if_exists='append', index=False)