コード例 #1
0
def get_data_reg(date_from=api.get_datetime_yesterday(),
                 date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request for registrations on entry service level
    :param date_from:
    :param date_to:
    :return: dataframe with relevant information
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime":
        date_from,
        "stopTime":
        date_to,
        "rowLimit":
        10000,
        "analysisObjects": [{
            "title": "Registrierung SSO – entry service"
        }],
        "metrics": [{
            "title": "Anzahl Registrierung SSO"
        }, {
            "title": "Anzahl Registrierung SSO – entry service"
        }]
    }

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = ["entry_service", "reg_sso", "reg_sso_entry_service"]
    df.columns = col_names

    # create date
    df["date"] = pd.to_datetime(date_from)

    convert_cols = df.columns.drop(['date', 'entry_service'])
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    # rearrange order of colummns
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]

    logging.info('entry service registration imported from webtrekk for ' +
                 date_from)

    return df
コード例 #2
0
def get_data(date_from=api.get_datetime_yesterday(),
             date_to=api.get_datetime_yesterday()):
    """
    function to build analysisConfig and make api request
    :param date_from:
    :param date_to:
    :return: dataframe with relevant information [date, ai_stationaer, ai_mobile, ai_hp_stationaer,
                                                    ai_hp_mobile]
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime":
        date_from,
        "stopTime":
        date_to,
        "analysisObjects": [{
            "title": "Tage"
        }],
        "metrics": [{
            "title": "AI stationaer gesamt"
        }, {
            "title": "AI mobile gesamt"
        }, {
            "title": "AI HP stationaer"
        }, {
            "title": "AI HP mobile"
        }]
    }

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = [
        "date", "ai_stationaer", "ai_mobile", "ai_hp_stationaer",
        "ai_hp_mobile"
    ]
    df.columns = col_names
    df.date = pd.to_datetime(df.date, format="%d.%m.%Y")

    convert_cols = df.columns.drop('date')
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('ad impressions imported from webtrekk for ' + date_from)

    return df
コード例 #3
0
def get_data(date_from=api.get_datetime_yesterday(),
             date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request
    :param date_from:
    :param date_to:
    :return: dataframe with relevant information
    """
    # build two analysisConfigs, since webtrekk api can only process 30 metrics at once
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisObjects": [{
            "title": "Tage"
        }],
        "metrics": [
        {
            "title": "Visitors *"
        }, {
            "title": "Visitors - angemeldet"
        }, {
            "title": "Abonnenten"
        }, {
            "title": "Abonnenten - angemeldet"
        }, {
            "title": "Browsers, Unique *"
        }, {
            "title": "Browsers, Unique - angemeldet"
        }, {
            "title": "Browsers, Unique - zeit.de"
        }, {
            "title": "Browsers, Unique - zeit.de - ang."
        }, {
            "title": "Browsers, Unique - ZON App"
        }, {
            "title": "Browsers, Unique - ZON App - ang."
        }, {
            "title": "Browsers, Unique - Abonnenten"
        }, {
            "title": "Browsers, Unique - Abonnenten - ang."
        }, {
            "title": "Einstiege *"
        }, {
            "title": "Einstiege - angemeldet"
        }, {
            "title": "Visits *"
        }, {
            "title": "Visits - angemeldet"
        }, {
            "title": "Qualified Visits"
        }, {
            "title": "Visits Stationaer"
        }, {
            "title": "Visits mobile"
        }, {
            "title": "Visits mit Paywall"
        }, {
            "title": "Visits auf Bestellstrecke"
        }, {
            "title": "Page Impressions"
        }, {
            "title": "PIs Schranke Register"
        }, {
            "title": "PIs Schranke Paid"
        }, {
            "title": "PIs Pur"
        }, {
            "title": "Anzahl Bestellungen"
        }, {
            "title": "Anzahl Best. Z  Abo-Schranke nur Red. Marketing"
        }, {
            "title": "Anzahl Bestellungen Z  nur Footerbar"
        }, {
            "title": "Anzahl Bestellungen Z+ gesamt"
        }
        ]}

    analysisConfig2 = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisObjects": [{
            "title": "Tage"
        }],
        "metrics": [
        {
            "title": "Anzahl Bestellungen Pur Only"
        }, {
            "title": "Anzahl Bestellungen Pur Upgrade"
        }, {
            "title": "Anzahl Bestellungen Pur Kombi"
        }, {
            "title": "Anzahl Registrierung SSO"
        }, {
            "title": "Anzahl Registrierungen Schranke"
        }, {
            "title": "Anzahl Login SSO"
        }, {
            "title": "Anzahl Digitalabonnenten"
        }, {
            "title": "Abonnenten - Paid Services - ang."
        }, {
            "title": "Browsers, Unique - Comments"
        }, {
            "title": "Anzahl Best. Z  Abo-Schranke nur Red. Marketing 2"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)
    data2 = api.wt_get_data(analysisConfig2)

    # parse data
    data = data["result"]["analysisData"]
    data2 = data2["result"]["analysisData"]
    data_comb = [data[0] + data2[0][1:]]
    df = pd.DataFrame(data_comb)
    col_names = ["date", "visitors", "visitors_ang", "abonnenten", "abonnenten_ang",
                 "b_unique", "b_unique_ang", "b_unique_zeitde", "b_unique_zeitde_ang",
                 "b_unique_zonapp", "b_unique_zonapp_ang", "b_unique_abonnenten",
                 "b_unique_abonnenten_ang", "einstiege", "einstiege_ang", "visits", "visits_ang",
                 "qualified_visits", "visits_stationaer", "visits_mobile", "visits_mit_paywall",
                 "visits_bestellstrecke", "pis", "pis_schranke_register", "pis_schranke_paid",
                 "pis_pur", "best", "best_zplus_red_marketing", "best_zplus_footer",
                 "best_zplus_gesamt", "best_pur_only", "best_pur_upgrade", "best_pur_kombi",
                 "reg_sso", "reg_schranke", "login_sso", "sum_abonnenten",
                 "abonnenten_paid_serv_ang", "b_unique_comments", "best_zplus_red_marketing_2"]
    df.columns = col_names
    df.date = pd.to_datetime(df.date, format="%d.%m.%Y")

    convert_cols = df.columns.drop('date')
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('usercentric imported from webtrekk for ' + date_from)

    return df
コード例 #4
0
def get_pis_of_url(url,
                   date_from=api.get_datetime_yesterday(),
                   date_to=api.get_datetime_yesterday()):
    """
    this function retrieves the PIs of a given url on a specific day
    :param url: vector of five urls in order to only make one api call
    :param date_from:
    :param date_to:
    :return: the PIs for all five given urls as a dataframe
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisObjects": [{
            "title": "Seiten",
            "rowLimit": 5
        }],
        "analysisFilter": {
            "filterRules": [{
                "objectTitle": "Wall - Status",
                "comparator": "=",
                "filter": "register"
            }, {
                "link": "and",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[0]+"*"
            }, {
                "link": "or",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[1]+"*"
            }, {
                "link": "or",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[2]+"*"
            }, {
                "link": "or",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[3]+"*"
            }, {
                "link": "or",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[4]+"*"
            }
            ]
        },
        "metrics": [{
            "title": "Page Impressions",
            "sortOrder": "desc"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df_pis = pd.DataFrame(data)
    col_names = ["url", "pis_schranke"]
    df_pis.columns = col_names

    # display only url instead of content id
    df_pis.url = df_pis.url.str.partition('|')[2]

    return df_pis
コード例 #5
0
def get_data_top_reg(date_from=api.get_datetime_yesterday(),
                     date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request; function retrieves top five articles,
    which make the most registrations
    :param date_from:
    :param date_to:
    :return: dataframe with top five regigster articles with most registrations and their PIs
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisObjects": [{
            "title": "Registrierung SSO",
            "rowLimit": 5
        }],
        "metrics": [{
            "title": "Anzahl Registrierungen Schranke",
            "sortOrder": "desc"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = ["url", "registrierungen"]
    df.columns = col_names

    # get rid of https in url
    df.url = df.url.str.partition('://')[2]

    # get PIs of most top five register article (all at once)
    df_pis = get_pis_of_url(df.url)

    # join registrierungen and their PIs
    df = df.join(df_pis.set_index('url'), on="url", how="left")

    # create date and rank
    df["date"] = pd.to_datetime(date_from)
    df["rank"] = range(1, 1+len(df))

    # get title
    df["title"] = df.url.apply(lambda x: get_title_from_tms(x))

    # rearrange order of colummns
    cols = df.columns.tolist()
    cols = cols[-3:] + cols[:-3]
    df = df[cols]

    # convert to numeric columns
    convert_cols = df.columns.drop(['date', 'rank', 'title', 'url'])
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('topartikel registrierungen imported from webtrekk for '
                 + date_from)

    return df
コード例 #6
0
def get_data_top(date_from=api.get_datetime_yesterday(),
                 date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request; function retrieves top five most read
    articles from yesterday
    :param date_from:
    :param date_to:
    :return: dataframe with top five most read articles, their visits and their referrer
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisFilter": {
            "filterRules": [{
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*.article.*"
            }]
        },
        "analysisObjects": [{
            "title": "Seiten",
            "rowLimit": 5
        }],
        "metrics": [{
            "title": "Visits *",
            "sortOrder": "desc"
        }, {
            "title": "Visits Direct"
        }, {
            "title": "Visits Stationaer"
        }, {
            "title": "Visits mobile"
        }, {
            "title": "Visits Chrome Content Suggestions"
        }, {
            "title": "Visits Apple News (geschätzt)"                # vorher Visits Direct iOS
        }, {
            "title": "Visits Facebook (inkl. IAs)"
        }, {
            "title": "Visits Firefox Recommendations"
        }, {
            "title": "Visits Flipboard"
        }, {
            "title": "Visits Google News"
        }, {
            "title": "Visits Google Organisch"
        }, {
            "title": "Visits Push"
        }, {
            "title": "Visits Socialife"
        }, {
            "title": "Visits Upday"
        }, {
            "title": "Visits Twitter"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = ["url", "visits", "visits_direct", "visits_stationaer", "visits_mobile",
                 "visits_chrome_sugg", "visits_direct_ios", "visits_facebook", "visits_firefox",
                 "visits_flipboard", "visits_google_news", "visits_google_organisch", "visits_push",
                 "visits_socialife", "visits_upday", "visits_twitter"]
    df.columns = col_names

    # create date and rank
    df["date"] = pd.to_datetime(date_from)
    df["rank"] = range(1, 1+len(df))

    # use only url of article and get title
    df.url = df.url.str.partition('|')[2]
    df["title"] = df.url.apply(lambda x: get_title_from_tms(x))

    # rearrange order of colummns
    cols = df.columns.tolist()
    cols = cols[-3:] + cols[:-3]
    df = df[cols]

    # convert to numeric columns
    convert_cols = df.columns.drop(['date', 'rank', 'title', 'url'])
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('topartikel imported from webtrekk for ' + date_from)

    return df
コード例 #7
0
def get_data_top_best(date_from=api.get_datetime_yesterday(),
                      date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request; function retrieves top five abo article
    with most orders
    :param date_from:
    :param date_to:
    :return: dataframe with top five abo articles with most orders and their PIs
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisFilter": {
            "filterRules": [{
                "objectTitle": "Wall - Status",
                "comparator": "=",
                "filter": "paid"
            }]
        },
        "analysisObjects": [{
            "title": "Seiten",
            "rowLimit": 5
        }],
        "metrics": [{
            "title": "Anzahl Bestellungen mit Seitenbezug",
            "sortOrder": "desc"
        }, {
            "title": "Page Impressions"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = ["url", "bestellungen", "pis_schranke"]
    df.columns = col_names

    # create date and rank
    df["date"] = pd.to_datetime(date_from)
    df["rank"] = range(1, 1+len(df))

    # use only url of article and get title
    df.url = df.url.str.partition('|')[2]
    df["title"] = df.url.apply(lambda x: get_title_from_tms(x))

    # rearrange order of colummns
    cols = df.columns.tolist()
    cols = cols[-3:] + cols[:-3]
    df = df[cols]

    # convert to numeric columns
    convert_cols = df.columns.drop(['date', 'rank', 'title', 'url'])
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('topartikel bestellungen imported from webtrekk for '
                 + date_from)

    return df