Exemple #1
0
import pandas as pd
import matplotlib.pyplot as plt
from functions import return_ga_data, save_df_to_excel, clean_columns, VIEW_ID

# Get data from Analytics
df = return_ga_data(start_date='30daysAgo',
                    end_date='yesterday',
                    view_id=VIEW_ID,
                    metrics=[{
                        'expression': 'ga:sessionsPerUser'
                    }],
                    dimensions=[{
                        'name': 'ga:channelGrouping'
                    }])

# Creat pandas dataframe
df = df[['ga:channelGrouping', 'ga:sessionsPerUser']]

# rename columns
clean_columns(df)

# Save to csv
save_df_to_excel(df, 'spu.csv')

# Display chart
df.head(10).sort_values('sessionsPerUser',
                        ascending=False).plot(kind='bar',
                                              x='channelGrouping',
                                              y='sessionsPerUser')
plt.show()
# run in loop dates
# start_date = sys.argv[1]
# end_date = start_date

# base dims and all metrics
pages = return_ga_data(
  start_date = start_date,
  end_date = end_date,
  view_id = VIEW_ID,
  metrics = [
    {'expression': 'ga:pageviews'},
    {'expression': 'ga:uniquePageviews'},
    {'expression': 'ga:timeOnPage'}
  ],
  dimensions = [
    {'name': 'ga:dimension1'}, # session ID
    {'name': 'ga:dimension3'}, # timestamp

    # page dimensions
    {'name': 'ga:hostname'},
    {'name': 'ga:pagePath'}

  ],
  group_by = ['ga:dimension1', 'ga:dimension3', 'ga:hostname', 'ga:pagePath'],
  dimensionFilterClauses = [],
  segments=[]
)

# for console logs
print('start preprocessing pageviews data')

pages.fillna('na', inplace = True)
Exemple #3
0
    https://developers.google.com/analytics/devguides/reporting/core/dimsmets#cats=user,session,traffic_sources,adwords,goal_conversions,platform_or_device,geo_network,system,social_activities,page_tracking,content_grouping,internal_search,site_speed,app_tracking,event_tracking,ecommerce,social_interactions,user_timings,exceptions,content_experiments,custom_variables_or_columns,time,doubleclick_campaign_manager,audience,adsense,ad_exchange,doubleclick_for_publishers,doubleclick_for_publishers_backfill,lifetime_value_and_cohorts,channel_grouping,related_products,doubleclick_bid_manager,doubleclick_search

"""

from functions import return_ga_data, clean_df_columns, format_all_dates
import constants

#
#
#
#
# Do Stuff
#
#
#
#
sessions_df = return_ga_data(start_date=constants.START_DATE,
                             end_date=constants.END_DATE,
                             view_id=constants.VIEW_ID,
                             metrics=constants.SESSIONS_METRICS,
                             dimensions=constants.SESSIONS_DIMENSIONS,
                             segments=constants.SEGMENTS,
                             split_dates=True,
                             group_by=[])

# Rename columns
sessions_df = clean_df_columns(sessions_df)

if 'date' in sessions_df:
    sessions_df['date'] = format_all_dates(sessions_df.date)
Exemple #4
0
from functions import return_ga_data, save_df_to_excel
### Returns sessions and tranactions by channel for a specific Analytics View Id

# be sure to add correct view ID
df = return_ga_data(start_date='7daysAgo',
                    end_date='yesterday',
                    view_id='ADD VIEW ID',
                    metrics=[{
                        'expression': 'ga:sessions'
                    }, {
                        'expression': 'ga:transactions'
                    }],
                    dimensions=[{
                        'name': 'ga:channelGrouping'
                    }])

# generate dataframe
df = df[[
    'ga:channelGrouping', 'ga:sessions', 'ga:pageValue', 'ga:transactions'
]]

#cleans up column names
old_columns = list(df.columns)

for old in old_columns:
    df.rename(columns={old: old[3:]}, inplace=True)

# print out dataframe
print(df.sort_values('sessions', ascending=False))
df_new_users = return_ga_data(
    start_date=query_start_date,
    end_date=query_end_date,
    view_id=ga_view_id,
    metrics=[
        {
            'expression': 'ga:goal1Completions'
        },
    ],
    dimensions=[
        {
            'name': 'ga:isoYear'
        },
        {
            'name': 'ga:isoWeek'
        },
    ],
    split_dates=False,
    dimensionFilterClauses=[{
        'operator':
        'OR',
        'filters': [{
            'dimensionName': 'ga:userType',
            'not': False,
            'expressions': ['new visitor'],
            'caseSensitive': False
        }],
    }],
)
Exemple #6
0
yesterday = datetime.strftime(datetime.now() - timedelta(1), '%Y-%m-%d')

start_date = 'YYYY-MM-DD'
start_date = pd.to_datetime(start_date)
temp_start_date = start_date
df =  pd.DataFrame()
end_date = yesterday
step = 20 # This is a relatively arbitrary value to try to reduce the incidence of API errors
end_date = pd.to_datetime(end_date)

while temp_start_date <= end_date - timedelta(step):
    for n in range(0, 5):
        try:
            df = df.append(return_ga_data(
                start_date= datetime.strftime(temp_start_date, '%Y-%m-%d'),
                end_date = datetime.strftime(temp_start_date + timedelta(step), '%Y-%m-%d'),
                view_id=<YOUR_GA_VIEW_ID>,
                metrics=[
                        {'expression': 'ga:transactionRevenue'},
                        {'expression': 'ga:transactions'},
                        {'expression': 'ga:transactionTax'}
                        ],
                dimensions=[
                        {'name': 'ga:transactionId'},
                        {'name': 'ga:dateHourMinute'},
                        {'name': 'ga:channelGrouping'},
                        {'name': 'ga:source'},
                        {'name': 'ga:medium'},
                        {'name': 'ga:campaign'},
                        {'name': 'ga:keyword'},
                        {'name': 'ga:deviceCategory'},
Exemple #7
0
from functions import return_ga_data, save_df_to_excel

df = return_ga_data(start_date='2017-09-13',
                    end_date='2017-09-21',
                    view_id='100555616',
                    metrics=[
                        {
                            'expression': 'ga:sessions'
                        },
                    ],
                    dimensions=[
                        {
                            'name': 'ga:source'
                        },
                    ],
                    split_dates=True,
                    group_by=['ga:source'],
                    dimensionFilterClauses=[{
                        'operator':
                        'OR',
                        'filters': [{
                            'dimensionName': 'ga:userType',
                            'not': False,
                            'expressions': ['new visitor'],
                            'caseSensitive': False
                        }],
                    }],
                    segments=[])

print(df)
Exemple #8
0
sessions1 = return_ga_data(
    start_date=start_date,
    end_date=end_date,
    view_id=VIEW_ID,
    metrics=[{
        'expression': 'ga:sessions'
    }, {
        'expression': 'ga:bounces'
    }, {
        'expression': 'ga:sessionDuration'
    }, {
        'expression': 'ga:pageViews'
    }, {
        'expression': 'ga:goal1completions'
    }, {
        'expression': 'ga:goal2completions'
    }, {
        'expression': 'ga:transactions'
    }, {
        'expression': 'ga:transactionRevenue'
    }],
    dimensions=[
        {
            'name': 'ga:dimension1'
        },  # session ID
        {
            'name': 'ga:dimension2'
        },  # client ID
        {
            'name': 'ga:date'
        },
        {
            'name': 'ga:hour'
        },
        {
            'name': 'ga:minute'
        },
        {
            'name': 'ga:landingPagePath'
        },
        {
            'name': 'ga:deviceCategory'
        }
    ],
    group_by=[
        'ga:dimension1', 'ga:dimension2', 'ga:date', 'ga:hour', 'ga:minute',
        'ga:landingPagePath', 'ga:deviceCategory'
    ],
    dimensionFilterClauses=[],
    segments=[])
Exemple #9
0
ecom = return_ga_data(
    start_date=start_date,
    end_date=end_date,
    view_id=VIEW_ID,
    metrics=[
        {
            'expression': 'ga:itemRevenue'
        },
        {
            'expression': 'ga:itemQuantity'
        },
        {
            'expression': 'ga:productListViews'
        },
        {
            'expression': 'ga:productDetailViews'
        },
        {
            'expression': 'ga:productAddsToCart'
        },
        {
            'expression': 'ga:productRemovesFromCart'
        },
        {
            'expression': 'ga:productCheckouts'
        },
        {
            'expression': 'ga:uniquePurchases'
        },
    ],
    dimensions=[
        {
            'name': 'ga:dimension1'
        },  # session ID
        {
            'name': 'ga:dimension3'
        },  # timestamp
        {
            'name': 'ga:productName'
        },

        # custom dimensions
        {
            'name': 'ga:dimension11'
        },  #bundle
        {
            'name': 'ga:dimension15'
        },  # shipping method
        {
            'name': 'ga:dimension17'
        },  # fullfillment type
        {
            'name': 'ga:dimension18'
        }  # add to cart context
    ],
    group_by=[
        'ga:dimension1', 'ga:dimension3', 'ga:productName', 'ga:dimension11',
        'ga:dimension15', 'ga:dimension17', 'ga:dimension18'
    ],
    dimensionFilterClauses=[],
    segments=[])
Exemple #10
0
events1 = return_ga_data(
    start_date=start_date,
    end_date=end_date,
    view_id=VIEW_ID,
    metrics=[{
        'expression': 'ga:totalEvents'
    }, {
        'expression': 'ga:uniqueEvents'
    }, {
        'expression': 'ga:eventValue'
    }, {
        'expression': 'ga:goal1completions'
    }, {
        'expression': 'ga:goal2completions'
    }, {
        'expression': 'ga:transactions'
    }, {
        'expression': 'ga:transactionRevenue'
    }],
    dimensions=[
        {
            'name': 'ga:dimension1'
        },  # session ID
        {
            'name': 'ga:dimension3'
        },  # timestamp

        # event parameters
        {
            'name': 'ga:eventCategory'
        },
        {
            'name': 'ga:eventAction'
        },
        {
            'name': 'ga:eventLabel'
        },
        {
            'name': 'ga:dimension4'
        },  # account id
        {
            'name': 'ga:dimension5'
        }  # plan
    ],
    group_by=[
        'ga:dimension1', 'ga:dimension3', 'ga:eventCategory', 'ga:eventAction',
        'ga:eventLabel', 'ga:dimension4', 'ga:dimension5'
    ],
    dimensionFilterClauses=[],
    segments=[])
Exemple #11
0
transactions = return_ga_data(
    start_date=start_date,
    end_date=end_date,
    view_id=VIEW_ID,
    metrics=[{
        'expression': 'ga:transactionRevenue'
    }, {
        'expression': 'ga:transactionTax'
    }, {
        'expression': 'ga:transactionShipping'
    }, {
        'expression': 'ga:itemQuantity'
    }, {
        'expression': 'ga:totalRefunds'
    }, {
        'expression': 'ga:refundAmount'
    }],
    dimensions=[
        {
            'name': 'ga:dimension1'
        },  # session ID
        {
            'name': 'ga:dimension3'
        },  # timestamp
        {
            'name': 'ga:dimension4'
        },  # account id
        {
            'name': 'ga:transactionId'
        }
    ],
    group_by=[
        'ga:dimension1', 'ga:dimension3', 'ga:dimension4', 'ga:transactionId'
    ],
    dimensionFilterClauses=[],
    segments=[])