Esempio n. 1
0
    To understand better you can visit https://ga-dev-tools.appspot.com/account-explorer/ and login in with GA account

What we do to get dim_ua_profile table:
    Firstly we loop through all the Properties within the main account (Bestseller (Universal)) to get information related with all the views under each Property.
    Secondly we filter for the Views' profiles we want -- we only need brand-country level Views to pull data from.
    Thirdly we add more columns with information we need -- sitebrand, sitecountry, table_updated_time and etc..
"""

import pandas as pd
from datetime import datetime
from src.configure_logging import configure_logging
from src.ga_connector import GoogleAnalytics
from src.s3_toolkit import s3Toolkit

# to avoid printing out logs from GA's module we need to setup our own logger
logger = configure_logging(logger_name=__name__)
ACCOUNT = 66188758


def get_bucket_name(env):
    """"
    Gets Bucket Name according to the chosen environment

    Parameters:
    ----------
        env : string
            dev or prod
    Returns:
    ----------
        bucket_name: string
    """
Esempio n. 2
0
from src.utils import *
from src.configure_logging import configure_logging

util_logger = configure_logging(logger_name='src.utils', level='INFO')
toolkit_logger = configure_logging(logger_name='src.aws_toolkit',
                                   level='WARNING')
logger = configure_logging(logger_name=__name__)

CHANNEL = 'gsc'


def process_df(df):
    """
    Transforms the input dataframe:
    - strip column names
    - extract site_country from page url
    - get brand_code from brand dictionary
    - group by relevant dimensions: one row per date|site_brand|site_country|geo_country|device|query|page

    Parameters:
    ----------
    df: dataframe
        input dataframe from funnel exports
    """
    gsc_brand_code_dict = get_brand_code_dict(CHANNEL)
    df.columns = [
        x.lower().replace('__google_search_console', '') for x in df.columns
    ]
    df['geo_country'] = df['country']
    df['countrycode'] = df['page'].apply(lambda x: x.split('/')[3])
    df['countrycode'] = df['countrycode'].str.upper()
from src.ga_connector import GoogleAnalytics
from src.configure_logging import configure_logging
from src.s3_toolkit import s3Toolkit
from src.utils import get_timestamp, get_bucket_name, get_ua_profile_list
import time
import pandas as pd
import logging

logger = configure_logging(logger_name=__name__, level=logging.INFO)
TIMESTAMP = get_timestamp()


def main(env, **date_ranges):

    t0 = time.time()

    bucket = get_bucket_name(env)
    s3 = s3Toolkit(bucket=bucket)

    profile_list = get_ua_profile_list('App', bucket)

    dimensions = ['ga:date', 'ga:deviceCategory']
    metrics = ['ga:sessions']

    # initialize connection and pull data from individual GA views
    ga = GoogleAnalytics()
    df_all = pd.DataFrame()
    for dim_ua_id, ua_id, name in profile_list:
        logger.info(f'Fetching {name} ({ua_id})')
        df = ga.fetch(view_id=ua_id,
                      dimensions=dimensions,
Esempio n. 4
0
def handle_logs(configurations, pointer_table):
    now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    get_databases(pointer_table, configurations)

    for log_group, config in configurations.items():
        update_log_group(log_group, config, now)

    with dynamodb.Table(pointer_table).batch_writer() as batch:
        for log_group, config in configurations.items():
            batch.put_item(
                Item={
                    "log_group": log_group,
                    "db_key": config["db_key"],
                    "update_time": now
                })


if __name__ == "__main__":
    configure_logging.configure_logging()
    log = structlog.get_logger("app")

    configurations = {
        config["log_group"]: config
        for config in json.loads(os.environ.get("CONFIGURATIONS", "[]"))
    }
    pointer_table = os.environ["POINTER_TABLE"]

    log.info("Run time!", configurations=configurations)

    handle_logs(configurations, pointer_table)
Esempio n. 5
0
import pandas as pd
import boto3
from boto3.s3.transfer import TransferConfig
import os
from src.configure_logging import configure_logging
from io import StringIO
import time
import datetime

logger = configure_logging(logger_name=__name__, handler=False)


class s3Toolkit(object):
    def __init__(
        self,
        bucket,
        aws_access_key_id=None,
        aws_secret_access_key=None,
        region_name=None,
    ):
        self.bucket = bucket
        self.aws_access_key_id = aws_access_key_id
        self.aws_secret_access_key = aws_secret_access_key
        self.region_name = region_name
        self.client = self.create_client()

    def create_client(self):
        """
        Creates an S3 client object.

        If `aws_access_key_id` `aws_secret_access_key` and `region_name` are None, default behaviour of Boto3