def get_data_reg(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request for registrations on entry service level :param date_from: :param date_to: :return: dataframe with relevant information """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "rowLimit": 10000, "analysisObjects": [{ "title": "Registrierung SSO – entry service" }], "metrics": [{ "title": "Anzahl Registrierung SSO" }, { "title": "Anzahl Registrierung SSO – entry service" }] } # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = ["entry_service", "reg_sso", "reg_sso_entry_service"] df.columns = col_names # create date df["date"] = pd.to_datetime(date_from) convert_cols = df.columns.drop(['date', 'entry_service']) df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') # rearrange order of colummns cols = df.columns.tolist() cols = cols[-1:] + cols[:-1] df = df[cols] logging.info('entry service registration imported from webtrekk for ' + date_from) return df
def get_data(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build analysisConfig and make api request :param date_from: :param date_to: :return: dataframe with relevant information [date, ai_stationaer, ai_mobile, ai_hp_stationaer, ai_hp_mobile] """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Tage" }], "metrics": [{ "title": "AI stationaer gesamt" }, { "title": "AI mobile gesamt" }, { "title": "AI HP stationaer" }, { "title": "AI HP mobile" }] } # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = [ "date", "ai_stationaer", "ai_mobile", "ai_hp_stationaer", "ai_hp_mobile" ] df.columns = col_names df.date = pd.to_datetime(df.date, format="%d.%m.%Y") convert_cols = df.columns.drop('date') df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('ad impressions imported from webtrekk for ' + date_from) return df
def get_data_admanager(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function establishes connection to ad manager api and gets adimpressions :param date_from: date_from as string :param date_to: date_to as string :return: dataframe [date, ai_stationaer, ai_mobile, ai_hp_stationaer, ai_hp_mobile] """ # set key file key_file = 'admanager-auth.json' application_name = 'AdManager API Export' network_code = 183 # Initialize the GoogleRefreshTokenClient oauth2_client = \ googleads.oauth2.GoogleServiceAccountClient(key_file, googleads.oauth2.GetAPIScope('ad_manager')) # Initialize the Ad Manager client. ad_manager_client = \ googleads.ad_manager.AdManagerClient(oauth2_client, application_name, network_code, cache=googleads.common.ZeepServiceProxy.NO_CACHE) # create dictionary with all report informations report_dict = create_admanager_dict() # initialize dict value_dict = {'date': date_from} # run report job and extract data for each adimpressions report for cur_report in [*report_dict]: cur_adimp = run_admanager_job(date_from=date_from, date_to=date_to, report_dict=report_dict[cur_report], client=ad_manager_client) value_dict[cur_report] = cur_adimp df = pd.DataFrame([value_dict]) df.date = pd.to_datetime(df.date, format="%Y-%m-%d") logging.info('ad impressions imported from AdManager for ' + date_from) return df
def get_missing_dates(table, min_date): """ get missing dates of table (check distinct dates because of multiple entries in topartikel) :param table: dataset_id.table_idn as string :param min_date: minimum date from which on distinct dates should be checked (until today) format is 'YYYY-MM-DD' :return: list of missing dates for specific table """ # initialize client client = gcbq.Client() # check if table exists try: client.get_table(table) table_exists = True except NotFound: table_exists = False # if table exist, get distinct dates and check which dates are missing from min_date until now if table_exists: # get distinct dates sql = "SELECT DISTINCT(date) FROM " + table + " ORDER BY date asc" df = client.query(sql).to_dataframe() df.date = df.date.dt.strftime("%Y-%m-%d") dates = pd.date_range(start=min_date, end=api.get_datetime_yesterday()) dates = dates.strftime("%Y-%m-%d").tolist() # make sure to check only entries which are greater than min_date df_check = df[df["date"] >= min_date] # remove existing dates for date in df_check.date: dates.remove(date) # log if there are missing dates if len(dates) == 0: logging.info('########## no missing dates in ' + table + ' ###########') else: logging.info('########## missing dates in ' + table + ' ###########') # return missing dates return dates else: logging.info(table + " doesn't exist")
def get_data(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request :param date_from: :param date_to: :return: dataframe with relevant information """ # build two analysisConfigs, since webtrekk api can only process 30 metrics at once analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Tage" }], "metrics": [ { "title": "Visitors *" }, { "title": "Visitors - angemeldet" }, { "title": "Abonnenten" }, { "title": "Abonnenten - angemeldet" }, { "title": "Browsers, Unique *" }, { "title": "Browsers, Unique - angemeldet" }, { "title": "Browsers, Unique - zeit.de" }, { "title": "Browsers, Unique - zeit.de - ang." }, { "title": "Browsers, Unique - ZON App" }, { "title": "Browsers, Unique - ZON App - ang." }, { "title": "Browsers, Unique - Abonnenten" }, { "title": "Browsers, Unique - Abonnenten - ang." }, { "title": "Einstiege *" }, { "title": "Einstiege - angemeldet" }, { "title": "Visits *" }, { "title": "Visits - angemeldet" }, { "title": "Qualified Visits" }, { "title": "Visits Stationaer" }, { "title": "Visits mobile" }, { "title": "Visits mit Paywall" }, { "title": "Visits auf Bestellstrecke" }, { "title": "Page Impressions" }, { "title": "PIs Schranke Register" }, { "title": "PIs Schranke Paid" }, { "title": "PIs Pur" }, { "title": "Anzahl Bestellungen" }, { "title": "Anzahl Best. Z Abo-Schranke nur Red. Marketing" }, { "title": "Anzahl Bestellungen Z nur Footerbar" }, { "title": "Anzahl Bestellungen Z+ gesamt" } ]} analysisConfig2 = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Tage" }], "metrics": [ { "title": "Anzahl Bestellungen Pur Only" }, { "title": "Anzahl Bestellungen Pur Upgrade" }, { "title": "Anzahl Bestellungen Pur Kombi" }, { "title": "Anzahl Registrierung SSO" }, { "title": "Anzahl Registrierungen Schranke" }, { "title": "Anzahl Login SSO" }, { "title": "Anzahl Digitalabonnenten" }, { "title": "Abonnenten - Paid Services - ang." }, { "title": "Browsers, Unique - Comments" }, { "title": "Anzahl Best. Z Abo-Schranke nur Red. Marketing 2" } ]} # request data data = api.wt_get_data(analysisConfig) data2 = api.wt_get_data(analysisConfig2) # parse data data = data["result"]["analysisData"] data2 = data2["result"]["analysisData"] data_comb = [data[0] + data2[0][1:]] df = pd.DataFrame(data_comb) col_names = ["date", "visitors", "visitors_ang", "abonnenten", "abonnenten_ang", "b_unique", "b_unique_ang", "b_unique_zeitde", "b_unique_zeitde_ang", "b_unique_zonapp", "b_unique_zonapp_ang", "b_unique_abonnenten", "b_unique_abonnenten_ang", "einstiege", "einstiege_ang", "visits", "visits_ang", "qualified_visits", "visits_stationaer", "visits_mobile", "visits_mit_paywall", "visits_bestellstrecke", "pis", "pis_schranke_register", "pis_schranke_paid", "pis_pur", "best", "best_zplus_red_marketing", "best_zplus_footer", "best_zplus_gesamt", "best_pur_only", "best_pur_upgrade", "best_pur_kombi", "reg_sso", "reg_schranke", "login_sso", "sum_abonnenten", "abonnenten_paid_serv_ang", "b_unique_comments", "best_zplus_red_marketing_2"] df.columns = col_names df.date = pd.to_datetime(df.date, format="%d.%m.%Y") convert_cols = df.columns.drop('date') df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('usercentric imported from webtrekk for ' + date_from) return df
def get_pis_of_url(url, date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ this function retrieves the PIs of a given url on a specific day :param url: vector of five urls in order to only make one api call :param date_from: :param date_to: :return: the PIs for all five given urls as a dataframe """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Seiten", "rowLimit": 5 }], "analysisFilter": { "filterRules": [{ "objectTitle": "Wall - Status", "comparator": "=", "filter": "register" }, { "link": "and", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[0]+"*" }, { "link": "or", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[1]+"*" }, { "link": "or", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[2]+"*" }, { "link": "or", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[3]+"*" }, { "link": "or", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[4]+"*" } ] }, "metrics": [{ "title": "Page Impressions", "sortOrder": "desc" } ]} # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df_pis = pd.DataFrame(data) col_names = ["url", "pis_schranke"] df_pis.columns = col_names # display only url instead of content id df_pis.url = df_pis.url.str.partition('|')[2] return df_pis
def get_data_top_reg(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request; function retrieves top five articles, which make the most registrations :param date_from: :param date_to: :return: dataframe with top five regigster articles with most registrations and their PIs """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Registrierung SSO", "rowLimit": 5 }], "metrics": [{ "title": "Anzahl Registrierungen Schranke", "sortOrder": "desc" } ]} # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = ["url", "registrierungen"] df.columns = col_names # get rid of https in url df.url = df.url.str.partition('://')[2] # get PIs of most top five register article (all at once) df_pis = get_pis_of_url(df.url) # join registrierungen and their PIs df = df.join(df_pis.set_index('url'), on="url", how="left") # create date and rank df["date"] = pd.to_datetime(date_from) df["rank"] = range(1, 1+len(df)) # get title df["title"] = df.url.apply(lambda x: get_title_from_tms(x)) # rearrange order of colummns cols = df.columns.tolist() cols = cols[-3:] + cols[:-3] df = df[cols] # convert to numeric columns convert_cols = df.columns.drop(['date', 'rank', 'title', 'url']) df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('topartikel registrierungen imported from webtrekk for ' + date_from) return df
def get_data_top(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request; function retrieves top five most read articles from yesterday :param date_from: :param date_to: :return: dataframe with top five most read articles, their visits and their referrer """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisFilter": { "filterRules": [{ "objectTitle": "Seiten", "comparator": "=", "filter": "*.article.*" }] }, "analysisObjects": [{ "title": "Seiten", "rowLimit": 5 }], "metrics": [{ "title": "Visits *", "sortOrder": "desc" }, { "title": "Visits Direct" }, { "title": "Visits Stationaer" }, { "title": "Visits mobile" }, { "title": "Visits Chrome Content Suggestions" }, { "title": "Visits Apple News (geschätzt)" # vorher Visits Direct iOS }, { "title": "Visits Facebook (inkl. IAs)" }, { "title": "Visits Firefox Recommendations" }, { "title": "Visits Flipboard" }, { "title": "Visits Google News" }, { "title": "Visits Google Organisch" }, { "title": "Visits Push" }, { "title": "Visits Socialife" }, { "title": "Visits Upday" }, { "title": "Visits Twitter" } ]} # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = ["url", "visits", "visits_direct", "visits_stationaer", "visits_mobile", "visits_chrome_sugg", "visits_direct_ios", "visits_facebook", "visits_firefox", "visits_flipboard", "visits_google_news", "visits_google_organisch", "visits_push", "visits_socialife", "visits_upday", "visits_twitter"] df.columns = col_names # create date and rank df["date"] = pd.to_datetime(date_from) df["rank"] = range(1, 1+len(df)) # use only url of article and get title df.url = df.url.str.partition('|')[2] df["title"] = df.url.apply(lambda x: get_title_from_tms(x)) # rearrange order of colummns cols = df.columns.tolist() cols = cols[-3:] + cols[:-3] df = df[cols] # convert to numeric columns convert_cols = df.columns.drop(['date', 'rank', 'title', 'url']) df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('topartikel imported from webtrekk for ' + date_from) return df
def get_data_top_best(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request; function retrieves top five abo article with most orders :param date_from: :param date_to: :return: dataframe with top five abo articles with most orders and their PIs """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisFilter": { "filterRules": [{ "objectTitle": "Wall - Status", "comparator": "=", "filter": "paid" }] }, "analysisObjects": [{ "title": "Seiten", "rowLimit": 5 }], "metrics": [{ "title": "Anzahl Bestellungen mit Seitenbezug", "sortOrder": "desc" }, { "title": "Page Impressions" } ]} # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = ["url", "bestellungen", "pis_schranke"] df.columns = col_names # create date and rank df["date"] = pd.to_datetime(date_from) df["rank"] = range(1, 1+len(df)) # use only url of article and get title df.url = df.url.str.partition('|')[2] df["title"] = df.url.apply(lambda x: get_title_from_tms(x)) # rearrange order of colummns cols = df.columns.tolist() cols = cols[-3:] + cols[:-3] df = df[cols] # convert to numeric columns convert_cols = df.columns.drop(['date', 'rank', 'title', 'url']) df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('topartikel bestellungen imported from webtrekk for ' + date_from) return df
def run_admanager_job(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday(), report_dict=None, client=None): """ create statement, runs report job and retrieves job information; also extracts only relevant data :param: date_from: date_from as string :param: date_to: date_to as string :param: dict: dictionary with specific information for getting adimpression data :param: client: google ad manager client :return: adimpressions, depending on input dict for specific date """ # set variables from input dict custom_targeting_value_id = report_dict['filter'][0] ad_unit_id = report_dict['filter'][1] parent_flag = report_dict['parent_flag'] # set where condition; if parent_flag=True use PARENT_AD_UNIT_ID variable name if parent_flag: where_condition = 'CUSTOM_TARGETING_VALUE_ID = :customTargetingValueId AND ' \ 'PARENT_AD_UNIT_ID = :adUnitId' else: where_condition = 'CUSTOM_TARGETING_VALUE_ID = :customTargetingValueId AND ' \ 'AD_UNIT_ID = :adUnitId' # convert string to datetime object; required for API date_from = datetime.strptime(date_from, '%Y-%m-%d').date() date_to = datetime.strptime(date_to, '%Y-%m-%d').date() # Initialize a DataDownloader. report_downloader = client.GetDataDownloader(version='v202011') # Create statement object to filter statement = (googleads.ad_manager.StatementBuilder( version='v202011').Where(where_condition).WithBindVariable( 'customTargetingValueId', custom_targeting_value_id).WithBindVariable( 'adUnitId', ad_unit_id).Limit(None).Offset(None)) # Create report job. report_job = { 'reportQuery': { 'dimensions': ['DATE', 'AD_UNIT_ID', 'CUSTOM_CRITERIA'], 'columns': ['TOTAL_LINE_ITEM_LEVEL_IMPRESSIONS'], 'dateRangeType': 'CUSTOM_DATE', 'startDate': date_from, 'endDate': date_to, 'statement': statement.ToStatement() } } # Run the report and wait for it to finish. report_job_id = report_downloader.WaitForReport(report_job) # download report job and save as CSV report_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False) report_downloader.DownloadReportToFile(report_job_id, export_format='CSV_DUMP', outfile=report_file, use_gzip_compression=False) report_file.close() # extract report data with open(report_file.name, 'rt') as report: df = pd.read_csv(report) return df['Column.TOTAL_LINE_ITEM_LEVEL_IMPRESSIONS'][0]