To understand better you can visit https://ga-dev-tools.appspot.com/account-explorer/ and login in with GA account What we do to get dim_ua_profile table: Firstly we loop through all the Properties within the main account (Bestseller (Universal)) to get information related with all the views under each Property. Secondly we filter for the Views' profiles we want -- we only need brand-country level Views to pull data from. Thirdly we add more columns with information we need -- sitebrand, sitecountry, table_updated_time and etc.. """ import pandas as pd from datetime import datetime from src.configure_logging import configure_logging from src.ga_connector import GoogleAnalytics from src.s3_toolkit import s3Toolkit # to avoid printing out logs from GA's module we need to setup our own logger logger = configure_logging(logger_name=__name__) ACCOUNT = 66188758 def get_bucket_name(env): """" Gets Bucket Name according to the chosen environment Parameters: ---------- env : string dev or prod Returns: ---------- bucket_name: string """
from src.utils import * from src.configure_logging import configure_logging util_logger = configure_logging(logger_name='src.utils', level='INFO') toolkit_logger = configure_logging(logger_name='src.aws_toolkit', level='WARNING') logger = configure_logging(logger_name=__name__) CHANNEL = 'gsc' def process_df(df): """ Transforms the input dataframe: - strip column names - extract site_country from page url - get brand_code from brand dictionary - group by relevant dimensions: one row per date|site_brand|site_country|geo_country|device|query|page Parameters: ---------- df: dataframe input dataframe from funnel exports """ gsc_brand_code_dict = get_brand_code_dict(CHANNEL) df.columns = [ x.lower().replace('__google_search_console', '') for x in df.columns ] df['geo_country'] = df['country'] df['countrycode'] = df['page'].apply(lambda x: x.split('/')[3]) df['countrycode'] = df['countrycode'].str.upper()
from src.ga_connector import GoogleAnalytics from src.configure_logging import configure_logging from src.s3_toolkit import s3Toolkit from src.utils import get_timestamp, get_bucket_name, get_ua_profile_list import time import pandas as pd import logging logger = configure_logging(logger_name=__name__, level=logging.INFO) TIMESTAMP = get_timestamp() def main(env, **date_ranges): t0 = time.time() bucket = get_bucket_name(env) s3 = s3Toolkit(bucket=bucket) profile_list = get_ua_profile_list('App', bucket) dimensions = ['ga:date', 'ga:deviceCategory'] metrics = ['ga:sessions'] # initialize connection and pull data from individual GA views ga = GoogleAnalytics() df_all = pd.DataFrame() for dim_ua_id, ua_id, name in profile_list: logger.info(f'Fetching {name} ({ua_id})') df = ga.fetch(view_id=ua_id, dimensions=dimensions,
def handle_logs(configurations, pointer_table): now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") get_databases(pointer_table, configurations) for log_group, config in configurations.items(): update_log_group(log_group, config, now) with dynamodb.Table(pointer_table).batch_writer() as batch: for log_group, config in configurations.items(): batch.put_item( Item={ "log_group": log_group, "db_key": config["db_key"], "update_time": now }) if __name__ == "__main__": configure_logging.configure_logging() log = structlog.get_logger("app") configurations = { config["log_group"]: config for config in json.loads(os.environ.get("CONFIGURATIONS", "[]")) } pointer_table = os.environ["POINTER_TABLE"] log.info("Run time!", configurations=configurations) handle_logs(configurations, pointer_table)
import pandas as pd import boto3 from boto3.s3.transfer import TransferConfig import os from src.configure_logging import configure_logging from io import StringIO import time import datetime logger = configure_logging(logger_name=__name__, handler=False) class s3Toolkit(object): def __init__( self, bucket, aws_access_key_id=None, aws_secret_access_key=None, region_name=None, ): self.bucket = bucket self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key self.region_name = region_name self.client = self.create_client() def create_client(self): """ Creates an S3 client object. If `aws_access_key_id` `aws_secret_access_key` and `region_name` are None, default behaviour of Boto3