def get_data_reg(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request for registrations on entry service level :param date_from: :param date_to: :return: dataframe with relevant information """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "rowLimit": 10000, "analysisObjects": [{ "title": "Registrierung SSO – entry service" }], "metrics": [{ "title": "Anzahl Registrierung SSO" }, { "title": "Anzahl Registrierung SSO – entry service" }] } # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = ["entry_service", "reg_sso", "reg_sso_entry_service"] df.columns = col_names # create date df["date"] = pd.to_datetime(date_from) convert_cols = df.columns.drop(['date', 'entry_service']) df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') # rearrange order of colummns cols = df.columns.tolist() cols = cols[-1:] + cols[:-1] df = df[cols] logging.info('entry service registration imported from webtrekk for ' + date_from) return df
def get_data(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build analysisConfig and make api request :param date_from: :param date_to: :return: dataframe with relevant information [date, ai_stationaer, ai_mobile, ai_hp_stationaer, ai_hp_mobile] """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Tage" }], "metrics": [{ "title": "AI stationaer gesamt" }, { "title": "AI mobile gesamt" }, { "title": "AI HP stationaer" }, { "title": "AI HP mobile" }] } # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = [ "date", "ai_stationaer", "ai_mobile", "ai_hp_stationaer", "ai_hp_mobile" ] df.columns = col_names df.date = pd.to_datetime(df.date, format="%d.%m.%Y") convert_cols = df.columns.drop('date') df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('ad impressions imported from webtrekk for ' + date_from) return df
def get_data(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request :param date_from: :param date_to: :return: dataframe with relevant information """ # build two analysisConfigs, since webtrekk api can only process 30 metrics at once analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Tage" }], "metrics": [ { "title": "Visitors *" }, { "title": "Visitors - angemeldet" }, { "title": "Abonnenten" }, { "title": "Abonnenten - angemeldet" }, { "title": "Browsers, Unique *" }, { "title": "Browsers, Unique - angemeldet" }, { "title": "Browsers, Unique - zeit.de" }, { "title": "Browsers, Unique - zeit.de - ang." }, { "title": "Browsers, Unique - ZON App" }, { "title": "Browsers, Unique - ZON App - ang." }, { "title": "Browsers, Unique - Abonnenten" }, { "title": "Browsers, Unique - Abonnenten - ang." }, { "title": "Einstiege *" }, { "title": "Einstiege - angemeldet" }, { "title": "Visits *" }, { "title": "Visits - angemeldet" }, { "title": "Qualified Visits" }, { "title": "Visits Stationaer" }, { "title": "Visits mobile" }, { "title": "Visits mit Paywall" }, { "title": "Visits auf Bestellstrecke" }, { "title": "Page Impressions" }, { "title": "PIs Schranke Register" }, { "title": "PIs Schranke Paid" }, { "title": "PIs Pur" }, { "title": "Anzahl Bestellungen" }, { "title": "Anzahl Best. Z Abo-Schranke nur Red. Marketing" }, { "title": "Anzahl Bestellungen Z nur Footerbar" }, { "title": "Anzahl Bestellungen Z+ gesamt" } ]} analysisConfig2 = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Tage" }], "metrics": [ { "title": "Anzahl Bestellungen Pur Only" }, { "title": "Anzahl Bestellungen Pur Upgrade" }, { "title": "Anzahl Bestellungen Pur Kombi" }, { "title": "Anzahl Registrierung SSO" }, { "title": "Anzahl Registrierungen Schranke" }, { "title": "Anzahl Login SSO" }, { "title": "Anzahl Digitalabonnenten" }, { "title": "Abonnenten - Paid Services - ang." }, { "title": "Browsers, Unique - Comments" }, { "title": "Anzahl Best. Z Abo-Schranke nur Red. Marketing 2" } ]} # request data data = api.wt_get_data(analysisConfig) data2 = api.wt_get_data(analysisConfig2) # parse data data = data["result"]["analysisData"] data2 = data2["result"]["analysisData"] data_comb = [data[0] + data2[0][1:]] df = pd.DataFrame(data_comb) col_names = ["date", "visitors", "visitors_ang", "abonnenten", "abonnenten_ang", "b_unique", "b_unique_ang", "b_unique_zeitde", "b_unique_zeitde_ang", "b_unique_zonapp", "b_unique_zonapp_ang", "b_unique_abonnenten", "b_unique_abonnenten_ang", "einstiege", "einstiege_ang", "visits", "visits_ang", "qualified_visits", "visits_stationaer", "visits_mobile", "visits_mit_paywall", "visits_bestellstrecke", "pis", "pis_schranke_register", "pis_schranke_paid", "pis_pur", "best", "best_zplus_red_marketing", "best_zplus_footer", "best_zplus_gesamt", "best_pur_only", "best_pur_upgrade", "best_pur_kombi", "reg_sso", "reg_schranke", "login_sso", "sum_abonnenten", "abonnenten_paid_serv_ang", "b_unique_comments", "best_zplus_red_marketing_2"] df.columns = col_names df.date = pd.to_datetime(df.date, format="%d.%m.%Y") convert_cols = df.columns.drop('date') df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('usercentric imported from webtrekk for ' + date_from) return df
def get_pis_of_url(url, date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ this function retrieves the PIs of a given url on a specific day :param url: vector of five urls in order to only make one api call :param date_from: :param date_to: :return: the PIs for all five given urls as a dataframe """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Seiten", "rowLimit": 5 }], "analysisFilter": { "filterRules": [{ "objectTitle": "Wall - Status", "comparator": "=", "filter": "register" }, { "link": "and", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[0]+"*" }, { "link": "or", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[1]+"*" }, { "link": "or", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[2]+"*" }, { "link": "or", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[3]+"*" }, { "link": "or", "objectTitle": "Seiten", "comparator": "=", "filter": "*"+url[4]+"*" } ] }, "metrics": [{ "title": "Page Impressions", "sortOrder": "desc" } ]} # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df_pis = pd.DataFrame(data) col_names = ["url", "pis_schranke"] df_pis.columns = col_names # display only url instead of content id df_pis.url = df_pis.url.str.partition('|')[2] return df_pis
def get_data_top_reg(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request; function retrieves top five articles, which make the most registrations :param date_from: :param date_to: :return: dataframe with top five regigster articles with most registrations and their PIs """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisObjects": [{ "title": "Registrierung SSO", "rowLimit": 5 }], "metrics": [{ "title": "Anzahl Registrierungen Schranke", "sortOrder": "desc" } ]} # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = ["url", "registrierungen"] df.columns = col_names # get rid of https in url df.url = df.url.str.partition('://')[2] # get PIs of most top five register article (all at once) df_pis = get_pis_of_url(df.url) # join registrierungen and their PIs df = df.join(df_pis.set_index('url'), on="url", how="left") # create date and rank df["date"] = pd.to_datetime(date_from) df["rank"] = range(1, 1+len(df)) # get title df["title"] = df.url.apply(lambda x: get_title_from_tms(x)) # rearrange order of colummns cols = df.columns.tolist() cols = cols[-3:] + cols[:-3] df = df[cols] # convert to numeric columns convert_cols = df.columns.drop(['date', 'rank', 'title', 'url']) df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('topartikel registrierungen imported from webtrekk for ' + date_from) return df
def get_data_top(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request; function retrieves top five most read articles from yesterday :param date_from: :param date_to: :return: dataframe with top five most read articles, their visits and their referrer """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisFilter": { "filterRules": [{ "objectTitle": "Seiten", "comparator": "=", "filter": "*.article.*" }] }, "analysisObjects": [{ "title": "Seiten", "rowLimit": 5 }], "metrics": [{ "title": "Visits *", "sortOrder": "desc" }, { "title": "Visits Direct" }, { "title": "Visits Stationaer" }, { "title": "Visits mobile" }, { "title": "Visits Chrome Content Suggestions" }, { "title": "Visits Apple News (geschätzt)" # vorher Visits Direct iOS }, { "title": "Visits Facebook (inkl. IAs)" }, { "title": "Visits Firefox Recommendations" }, { "title": "Visits Flipboard" }, { "title": "Visits Google News" }, { "title": "Visits Google Organisch" }, { "title": "Visits Push" }, { "title": "Visits Socialife" }, { "title": "Visits Upday" }, { "title": "Visits Twitter" } ]} # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = ["url", "visits", "visits_direct", "visits_stationaer", "visits_mobile", "visits_chrome_sugg", "visits_direct_ios", "visits_facebook", "visits_firefox", "visits_flipboard", "visits_google_news", "visits_google_organisch", "visits_push", "visits_socialife", "visits_upday", "visits_twitter"] df.columns = col_names # create date and rank df["date"] = pd.to_datetime(date_from) df["rank"] = range(1, 1+len(df)) # use only url of article and get title df.url = df.url.str.partition('|')[2] df["title"] = df.url.apply(lambda x: get_title_from_tms(x)) # rearrange order of colummns cols = df.columns.tolist() cols = cols[-3:] + cols[:-3] df = df[cols] # convert to numeric columns convert_cols = df.columns.drop(['date', 'rank', 'title', 'url']) df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('topartikel imported from webtrekk for ' + date_from) return df
def get_data_top_best(date_from=api.get_datetime_yesterday(), date_to=api.get_datetime_yesterday()): """ function to build anlysisConfig and make api request; function retrieves top five abo article with most orders :param date_from: :param date_to: :return: dataframe with top five abo articles with most orders and their PIs """ # build analysisConfig analysisConfig = { "hideFooters": [1], "startTime": date_from, "stopTime": date_to, "analysisFilter": { "filterRules": [{ "objectTitle": "Wall - Status", "comparator": "=", "filter": "paid" }] }, "analysisObjects": [{ "title": "Seiten", "rowLimit": 5 }], "metrics": [{ "title": "Anzahl Bestellungen mit Seitenbezug", "sortOrder": "desc" }, { "title": "Page Impressions" } ]} # request data data = api.wt_get_data(analysisConfig) # parse data data = data["result"]["analysisData"] df = pd.DataFrame(data) col_names = ["url", "bestellungen", "pis_schranke"] df.columns = col_names # create date and rank df["date"] = pd.to_datetime(date_from) df["rank"] = range(1, 1+len(df)) # use only url of article and get title df.url = df.url.str.partition('|')[2] df["title"] = df.url.apply(lambda x: get_title_from_tms(x)) # rearrange order of colummns cols = df.columns.tolist() cols = cols[-3:] + cols[:-3] df = df[cols] # convert to numeric columns convert_cols = df.columns.drop(['date', 'rank', 'title', 'url']) df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce') logging.info('topartikel bestellungen imported from webtrekk for ' + date_from) return df