コード例 #1
0
def get_data_reg(date_from=api.get_datetime_yesterday(),
                 date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request for registrations on entry service level
    :param date_from:
    :param date_to:
    :return: dataframe with relevant information
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime":
        date_from,
        "stopTime":
        date_to,
        "rowLimit":
        10000,
        "analysisObjects": [{
            "title": "Registrierung SSO – entry service"
        }],
        "metrics": [{
            "title": "Anzahl Registrierung SSO"
        }, {
            "title": "Anzahl Registrierung SSO – entry service"
        }]
    }

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = ["entry_service", "reg_sso", "reg_sso_entry_service"]
    df.columns = col_names

    # create date
    df["date"] = pd.to_datetime(date_from)

    convert_cols = df.columns.drop(['date', 'entry_service'])
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    # rearrange order of colummns
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]

    logging.info('entry service registration imported from webtrekk for ' +
                 date_from)

    return df
コード例 #2
0
def get_data(date_from=api.get_datetime_yesterday(),
             date_to=api.get_datetime_yesterday()):
    """
    function to build analysisConfig and make api request
    :param date_from:
    :param date_to:
    :return: dataframe with relevant information [date, ai_stationaer, ai_mobile, ai_hp_stationaer,
                                                    ai_hp_mobile]
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime":
        date_from,
        "stopTime":
        date_to,
        "analysisObjects": [{
            "title": "Tage"
        }],
        "metrics": [{
            "title": "AI stationaer gesamt"
        }, {
            "title": "AI mobile gesamt"
        }, {
            "title": "AI HP stationaer"
        }, {
            "title": "AI HP mobile"
        }]
    }

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = [
        "date", "ai_stationaer", "ai_mobile", "ai_hp_stationaer",
        "ai_hp_mobile"
    ]
    df.columns = col_names
    df.date = pd.to_datetime(df.date, format="%d.%m.%Y")

    convert_cols = df.columns.drop('date')
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('ad impressions imported from webtrekk for ' + date_from)

    return df
コード例 #3
0
def get_data_admanager(date_from=api.get_datetime_yesterday(),
                       date_to=api.get_datetime_yesterday()):
    """
    function establishes connection to ad manager api and gets adimpressions
    :param date_from: date_from as string
    :param date_to: date_to as string
    :return: dataframe [date, ai_stationaer, ai_mobile, ai_hp_stationaer, ai_hp_mobile]
    """
    # set key file
    key_file = 'admanager-auth.json'
    application_name = 'AdManager API Export'
    network_code = 183

    # Initialize the GoogleRefreshTokenClient
    oauth2_client = \
        googleads.oauth2.GoogleServiceAccountClient(key_file,
                                                    googleads.oauth2.GetAPIScope('ad_manager'))

    # Initialize the Ad Manager client.
    ad_manager_client = \
        googleads.ad_manager.AdManagerClient(oauth2_client, application_name, network_code,
                                             cache=googleads.common.ZeepServiceProxy.NO_CACHE)

    # create dictionary with all report informations
    report_dict = create_admanager_dict()

    # initialize dict
    value_dict = {'date': date_from}

    # run report job and extract data for each adimpressions report
    for cur_report in [*report_dict]:
        cur_adimp = run_admanager_job(date_from=date_from,
                                      date_to=date_to,
                                      report_dict=report_dict[cur_report],
                                      client=ad_manager_client)
        value_dict[cur_report] = cur_adimp

    df = pd.DataFrame([value_dict])
    df.date = pd.to_datetime(df.date, format="%Y-%m-%d")

    logging.info('ad impressions imported from AdManager for ' + date_from)

    return df
コード例 #4
0
def get_missing_dates(table, min_date):
    """
    get missing dates of table (check distinct dates because of multiple entries in topartikel)
    :param table: dataset_id.table_idn as string
    :param min_date: minimum date from which on distinct dates should be checked (until today)
                    format is 'YYYY-MM-DD'
    :return: list of missing dates for specific table
    """
    # initialize client
    client = gcbq.Client()

    # check if table exists
    try:
        client.get_table(table)
        table_exists = True
    except NotFound:
        table_exists = False

    # if table exist, get distinct dates and check which dates are missing from min_date until now
    if table_exists:

        # get distinct dates
        sql = "SELECT DISTINCT(date) FROM " + table + " ORDER BY date asc"
        df = client.query(sql).to_dataframe()
        df.date = df.date.dt.strftime("%Y-%m-%d")
        dates = pd.date_range(start=min_date, end=api.get_datetime_yesterday())
        dates = dates.strftime("%Y-%m-%d").tolist()

        # make sure to check only entries which are greater than min_date
        df_check = df[df["date"] >= min_date]

        # remove existing dates
        for date in df_check.date:
            dates.remove(date)

        # log if there are missing dates
        if len(dates) == 0:
            logging.info('########## no missing dates in ' + table +
                         ' ###########')
        else:
            logging.info('########## missing dates in ' + table +
                         ' ###########')

        # return missing dates
        return dates
    else:
        logging.info(table + " doesn't exist")
コード例 #5
0
def get_data(date_from=api.get_datetime_yesterday(),
             date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request
    :param date_from:
    :param date_to:
    :return: dataframe with relevant information
    """
    # build two analysisConfigs, since webtrekk api can only process 30 metrics at once
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisObjects": [{
            "title": "Tage"
        }],
        "metrics": [
        {
            "title": "Visitors *"
        }, {
            "title": "Visitors - angemeldet"
        }, {
            "title": "Abonnenten"
        }, {
            "title": "Abonnenten - angemeldet"
        }, {
            "title": "Browsers, Unique *"
        }, {
            "title": "Browsers, Unique - angemeldet"
        }, {
            "title": "Browsers, Unique - zeit.de"
        }, {
            "title": "Browsers, Unique - zeit.de - ang."
        }, {
            "title": "Browsers, Unique - ZON App"
        }, {
            "title": "Browsers, Unique - ZON App - ang."
        }, {
            "title": "Browsers, Unique - Abonnenten"
        }, {
            "title": "Browsers, Unique - Abonnenten - ang."
        }, {
            "title": "Einstiege *"
        }, {
            "title": "Einstiege - angemeldet"
        }, {
            "title": "Visits *"
        }, {
            "title": "Visits - angemeldet"
        }, {
            "title": "Qualified Visits"
        }, {
            "title": "Visits Stationaer"
        }, {
            "title": "Visits mobile"
        }, {
            "title": "Visits mit Paywall"
        }, {
            "title": "Visits auf Bestellstrecke"
        }, {
            "title": "Page Impressions"
        }, {
            "title": "PIs Schranke Register"
        }, {
            "title": "PIs Schranke Paid"
        }, {
            "title": "PIs Pur"
        }, {
            "title": "Anzahl Bestellungen"
        }, {
            "title": "Anzahl Best. Z  Abo-Schranke nur Red. Marketing"
        }, {
            "title": "Anzahl Bestellungen Z  nur Footerbar"
        }, {
            "title": "Anzahl Bestellungen Z+ gesamt"
        }
        ]}

    analysisConfig2 = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisObjects": [{
            "title": "Tage"
        }],
        "metrics": [
        {
            "title": "Anzahl Bestellungen Pur Only"
        }, {
            "title": "Anzahl Bestellungen Pur Upgrade"
        }, {
            "title": "Anzahl Bestellungen Pur Kombi"
        }, {
            "title": "Anzahl Registrierung SSO"
        }, {
            "title": "Anzahl Registrierungen Schranke"
        }, {
            "title": "Anzahl Login SSO"
        }, {
            "title": "Anzahl Digitalabonnenten"
        }, {
            "title": "Abonnenten - Paid Services - ang."
        }, {
            "title": "Browsers, Unique - Comments"
        }, {
            "title": "Anzahl Best. Z  Abo-Schranke nur Red. Marketing 2"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)
    data2 = api.wt_get_data(analysisConfig2)

    # parse data
    data = data["result"]["analysisData"]
    data2 = data2["result"]["analysisData"]
    data_comb = [data[0] + data2[0][1:]]
    df = pd.DataFrame(data_comb)
    col_names = ["date", "visitors", "visitors_ang", "abonnenten", "abonnenten_ang",
                 "b_unique", "b_unique_ang", "b_unique_zeitde", "b_unique_zeitde_ang",
                 "b_unique_zonapp", "b_unique_zonapp_ang", "b_unique_abonnenten",
                 "b_unique_abonnenten_ang", "einstiege", "einstiege_ang", "visits", "visits_ang",
                 "qualified_visits", "visits_stationaer", "visits_mobile", "visits_mit_paywall",
                 "visits_bestellstrecke", "pis", "pis_schranke_register", "pis_schranke_paid",
                 "pis_pur", "best", "best_zplus_red_marketing", "best_zplus_footer",
                 "best_zplus_gesamt", "best_pur_only", "best_pur_upgrade", "best_pur_kombi",
                 "reg_sso", "reg_schranke", "login_sso", "sum_abonnenten",
                 "abonnenten_paid_serv_ang", "b_unique_comments", "best_zplus_red_marketing_2"]
    df.columns = col_names
    df.date = pd.to_datetime(df.date, format="%d.%m.%Y")

    convert_cols = df.columns.drop('date')
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('usercentric imported from webtrekk for ' + date_from)

    return df
コード例 #6
0
def get_pis_of_url(url,
                   date_from=api.get_datetime_yesterday(),
                   date_to=api.get_datetime_yesterday()):
    """
    this function retrieves the PIs of a given url on a specific day
    :param url: vector of five urls in order to only make one api call
    :param date_from:
    :param date_to:
    :return: the PIs for all five given urls as a dataframe
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisObjects": [{
            "title": "Seiten",
            "rowLimit": 5
        }],
        "analysisFilter": {
            "filterRules": [{
                "objectTitle": "Wall - Status",
                "comparator": "=",
                "filter": "register"
            }, {
                "link": "and",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[0]+"*"
            }, {
                "link": "or",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[1]+"*"
            }, {
                "link": "or",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[2]+"*"
            }, {
                "link": "or",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[3]+"*"
            }, {
                "link": "or",
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*"+url[4]+"*"
            }
            ]
        },
        "metrics": [{
            "title": "Page Impressions",
            "sortOrder": "desc"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df_pis = pd.DataFrame(data)
    col_names = ["url", "pis_schranke"]
    df_pis.columns = col_names

    # display only url instead of content id
    df_pis.url = df_pis.url.str.partition('|')[2]

    return df_pis
コード例 #7
0
def get_data_top_reg(date_from=api.get_datetime_yesterday(),
                     date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request; function retrieves top five articles,
    which make the most registrations
    :param date_from:
    :param date_to:
    :return: dataframe with top five regigster articles with most registrations and their PIs
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisObjects": [{
            "title": "Registrierung SSO",
            "rowLimit": 5
        }],
        "metrics": [{
            "title": "Anzahl Registrierungen Schranke",
            "sortOrder": "desc"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = ["url", "registrierungen"]
    df.columns = col_names

    # get rid of https in url
    df.url = df.url.str.partition('://')[2]

    # get PIs of most top five register article (all at once)
    df_pis = get_pis_of_url(df.url)

    # join registrierungen and their PIs
    df = df.join(df_pis.set_index('url'), on="url", how="left")

    # create date and rank
    df["date"] = pd.to_datetime(date_from)
    df["rank"] = range(1, 1+len(df))

    # get title
    df["title"] = df.url.apply(lambda x: get_title_from_tms(x))

    # rearrange order of colummns
    cols = df.columns.tolist()
    cols = cols[-3:] + cols[:-3]
    df = df[cols]

    # convert to numeric columns
    convert_cols = df.columns.drop(['date', 'rank', 'title', 'url'])
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('topartikel registrierungen imported from webtrekk for '
                 + date_from)

    return df
コード例 #8
0
def get_data_top(date_from=api.get_datetime_yesterday(),
                 date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request; function retrieves top five most read
    articles from yesterday
    :param date_from:
    :param date_to:
    :return: dataframe with top five most read articles, their visits and their referrer
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisFilter": {
            "filterRules": [{
                "objectTitle": "Seiten",
                "comparator": "=",
                "filter": "*.article.*"
            }]
        },
        "analysisObjects": [{
            "title": "Seiten",
            "rowLimit": 5
        }],
        "metrics": [{
            "title": "Visits *",
            "sortOrder": "desc"
        }, {
            "title": "Visits Direct"
        }, {
            "title": "Visits Stationaer"
        }, {
            "title": "Visits mobile"
        }, {
            "title": "Visits Chrome Content Suggestions"
        }, {
            "title": "Visits Apple News (geschätzt)"                # vorher Visits Direct iOS
        }, {
            "title": "Visits Facebook (inkl. IAs)"
        }, {
            "title": "Visits Firefox Recommendations"
        }, {
            "title": "Visits Flipboard"
        }, {
            "title": "Visits Google News"
        }, {
            "title": "Visits Google Organisch"
        }, {
            "title": "Visits Push"
        }, {
            "title": "Visits Socialife"
        }, {
            "title": "Visits Upday"
        }, {
            "title": "Visits Twitter"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = ["url", "visits", "visits_direct", "visits_stationaer", "visits_mobile",
                 "visits_chrome_sugg", "visits_direct_ios", "visits_facebook", "visits_firefox",
                 "visits_flipboard", "visits_google_news", "visits_google_organisch", "visits_push",
                 "visits_socialife", "visits_upday", "visits_twitter"]
    df.columns = col_names

    # create date and rank
    df["date"] = pd.to_datetime(date_from)
    df["rank"] = range(1, 1+len(df))

    # use only url of article and get title
    df.url = df.url.str.partition('|')[2]
    df["title"] = df.url.apply(lambda x: get_title_from_tms(x))

    # rearrange order of colummns
    cols = df.columns.tolist()
    cols = cols[-3:] + cols[:-3]
    df = df[cols]

    # convert to numeric columns
    convert_cols = df.columns.drop(['date', 'rank', 'title', 'url'])
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('topartikel imported from webtrekk for ' + date_from)

    return df
コード例 #9
0
def get_data_top_best(date_from=api.get_datetime_yesterday(),
                      date_to=api.get_datetime_yesterday()):
    """
    function to build anlysisConfig and make api request; function retrieves top five abo article
    with most orders
    :param date_from:
    :param date_to:
    :return: dataframe with top five abo articles with most orders and their PIs
    """
    # build analysisConfig
    analysisConfig = {
        "hideFooters": [1],
        "startTime": date_from,
        "stopTime": date_to,
        "analysisFilter": {
            "filterRules": [{
                "objectTitle": "Wall - Status",
                "comparator": "=",
                "filter": "paid"
            }]
        },
        "analysisObjects": [{
            "title": "Seiten",
            "rowLimit": 5
        }],
        "metrics": [{
            "title": "Anzahl Bestellungen mit Seitenbezug",
            "sortOrder": "desc"
        }, {
            "title": "Page Impressions"
        }
        ]}

    # request data
    data = api.wt_get_data(analysisConfig)

    # parse data
    data = data["result"]["analysisData"]
    df = pd.DataFrame(data)
    col_names = ["url", "bestellungen", "pis_schranke"]
    df.columns = col_names

    # create date and rank
    df["date"] = pd.to_datetime(date_from)
    df["rank"] = range(1, 1+len(df))

    # use only url of article and get title
    df.url = df.url.str.partition('|')[2]
    df["title"] = df.url.apply(lambda x: get_title_from_tms(x))

    # rearrange order of colummns
    cols = df.columns.tolist()
    cols = cols[-3:] + cols[:-3]
    df = df[cols]

    # convert to numeric columns
    convert_cols = df.columns.drop(['date', 'rank', 'title', 'url'])
    df[convert_cols] = df[convert_cols].apply(pd.to_numeric, errors='coerce')

    logging.info('topartikel bestellungen imported from webtrekk for '
                 + date_from)

    return df
コード例 #10
0
def run_admanager_job(date_from=api.get_datetime_yesterday(),
                      date_to=api.get_datetime_yesterday(),
                      report_dict=None,
                      client=None):
    """
    create statement, runs report job and retrieves job information; also extracts only relevant
    data
    :param: date_from: date_from as string
    :param: date_to: date_to as string
    :param: dict: dictionary with specific information for getting adimpression data
    :param: client: google ad manager client
    :return: adimpressions, depending on input dict for specific date
    """
    # set variables from input dict
    custom_targeting_value_id = report_dict['filter'][0]
    ad_unit_id = report_dict['filter'][1]
    parent_flag = report_dict['parent_flag']

    # set where condition; if parent_flag=True use PARENT_AD_UNIT_ID variable name
    if parent_flag:
        where_condition = 'CUSTOM_TARGETING_VALUE_ID = :customTargetingValueId AND ' \
                          'PARENT_AD_UNIT_ID = :adUnitId'
    else:
        where_condition = 'CUSTOM_TARGETING_VALUE_ID = :customTargetingValueId AND ' \
                          'AD_UNIT_ID = :adUnitId'

    # convert string to datetime object; required for API
    date_from = datetime.strptime(date_from, '%Y-%m-%d').date()
    date_to = datetime.strptime(date_to, '%Y-%m-%d').date()

    # Initialize a DataDownloader.
    report_downloader = client.GetDataDownloader(version='v202011')

    # Create statement object to filter
    statement = (googleads.ad_manager.StatementBuilder(
        version='v202011').Where(where_condition).WithBindVariable(
            'customTargetingValueId',
            custom_targeting_value_id).WithBindVariable(
                'adUnitId', ad_unit_id).Limit(None).Offset(None))

    # Create report job.
    report_job = {
        'reportQuery': {
            'dimensions': ['DATE', 'AD_UNIT_ID', 'CUSTOM_CRITERIA'],
            'columns': ['TOTAL_LINE_ITEM_LEVEL_IMPRESSIONS'],
            'dateRangeType': 'CUSTOM_DATE',
            'startDate': date_from,
            'endDate': date_to,
            'statement': statement.ToStatement()
        }
    }

    # Run the report and wait for it to finish.
    report_job_id = report_downloader.WaitForReport(report_job)

    # download report job and save as CSV
    report_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False)
    report_downloader.DownloadReportToFile(report_job_id,
                                           export_format='CSV_DUMP',
                                           outfile=report_file,
                                           use_gzip_compression=False)
    report_file.close()

    # extract report data
    with open(report_file.name, 'rt') as report:
        df = pd.read_csv(report)

    return df['Column.TOTAL_LINE_ITEM_LEVEL_IMPRESSIONS'][0]