def test_fake_query(self): account = model.Account.create(service='google') Session.commit() q = api.account.query_service(self.request, account=account) r = q.get_profile() self.assertEqual(r[u'websiteUrl'], u'example.com') q = api.account.query_service(self.request, account=account) r = q.get_profile() self.assertEqual(r[u'websiteUrl'], u'example.com') t = q.get_table({'max-results': 5}, dimensions=[ Column('ga:pagePath'), ], metrics=[ Column('ga:pageviews', type_cast=int, threshold=0), Column('ga:nthWeek'), ]) self.assertEqual(len(t.rows), 5) self.assertEqual(t.rows[1].get('ga:pagePath'), '/account/create') self.assertEqual(t.rows[1].get('ga:pageviews'), 15001) self.assertEqual(t.get('ga:pageviews').max_row[0], 16399) t = q.get_table({'max-results': 7}, dimensions=[Column('ga:month')]) self.assertEqual([list(m) for m in t.iter_rows()], [[u'01']]*6 + [[u'02']])
def test_rows(self): def positive_int(n): if not n or n < 0: return return int(n) t = Table([ Column('foo', visible=1), Column('bar', visible=0), Column('baz', type_cast=positive_int, average=100), ]) data = [ (9999, '1', 123), (123, '2', 1234), (0000, '3', 23), (123, '4', 123), (123, '5', 0), ] for d in data: t.add(d) self.assertEqual(len(t.rows), len(data)) self.assertEqual(t.get('foo').min_row, (None, None)) self.assertEqual(t.get('bar').max_row, (None, None)) self.assertEqual(t.get('baz').min_row[0], 23) self.assertEqual(t.get('baz').max_row[0], 1234) rows = t.iter_visible() self.assertEqual(list(next(rows)), [t.columns[1], t.columns[0]]) self.assertEqual(list(next(rows)), ['1', 9999]) t.tag_rows() self.assertFalse(t.rows[-1].tags)
def _get_search_keywords(self, google_query, interval_field): t = self._get_interval_table(google_query, interval_field, params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.previous_date_start, # Extra week 'end-date': self.date_end, 'sort': '-{},-ga:sessions'.format(interval_field), 'filters': 'ga:keyword!=(not provided);ga:medium==organic', 'max-results': '5', }, dimensions=[ Column(interval_field), Column('ga:keyword', label='Search Keywords', type_cast=_prune_abstract, visible=1), ], metrics=[ Column('ga:sessions', label='Visits', type_cast=int, type_format=h.human_int, visible=0, threshold=0), Column('ga:avgSessionDuration', label='Time On Site', type_cast=_cast_time, type_format=h.human_time, threshold=0), Column('ga:bounceRate', label='Bounce Rate', type_cast=_cast_percent, type_format=_format_percent, reverse=True, threshold=0), ], ) t.set_visible('ga:sessions', 'ga:keyword') #split_table_delta(t, split_column=interval_field, join_column='ga:keyword', compare_column='ga:sessions') #t.sort(reverse=True) #t.limit(10) return t
def _get_ecommerce(self, google_query, interval_field, limit=10): t = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, #'start-date': self.previous_date_start, # Extra week 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-{}'.format(interval_field), }, dimensions=[ Column(interval_field), Column('ga:productName', label="Product", visible=1), ], metrics=[ Column('ga:itemRevenue', label="Revenue", type_cast=float, type_format=_format_dollars, visible=0, threshold=0), Column('ga:itemQuantity', label="Sales", type_cast=int, type_format=h.human_int), ], ) t.sort(reverse=True) # Add a limit row sum_columns = t.column_to_index['ga:itemRevenue'], t.column_to_index[ 'ga:itemQuantity'] if len(t.rows) > limit: extra, t.rows = t.rows[limit - 1:], t.rows[:limit - 1] values = extra[0].values[:] for row in extra[1:]: for col_idx in sum_columns: values[col_idx] += row.values[col_idx] values[ t.column_to_index['ga:productName']] = "(%s)" % h.format_int( len(extra), u"{:,} other product") t.add(values) idx_sales = t.column_to_index['ga:itemQuantity'] for row in t.rows: v = row.values[idx_sales] row.tag(h.format_int(v, u"{:,} Sale")) # Add total row #row = t.rows[-1].values[:] #row[t.column_to_index['ga:itemRevenue']] = t.get('ga:itemRevenue').sum #row[t.column_to_index['ga:productName']] = '(total)' #t.add(row) # Old work from extra week mode #split_table_delta(t, interval_field, 'ga:productName', 'ga:itemRevenue') return t
def _get_interval_table(self, google_query, interval_field, params, dimensions=None, metrics=None, _cache_keys=None): if interval_field != 'bm:quarter': return google_query.get_table(params=params, dimensions=dimensions, metrics=metrics, _cache_keys=_cache_keys) dimensions += [Column('ga:year')] columns = google_query._columns_to_params(params.copy(), dimensions=dimensions, metrics=metrics) result = Table(columns).new() # Decouple column refs quarter_idx = result.column_to_index['bm:quarter'] if dimensions: dimensions = [d for d in dimensions if not d.id.startswith('bm:quarter')] # We assume that there are no date dimensions... Maybe not a safe assumption? for (yr, q) in reversed(list(iter_quarters(params['start-date'], params['end-date']))): start_date, end_date = quarter_to_dates(q, yr) quarter_params = params.copy() quarter_params.update({ 'start-date': start_date, 'end-date': end_date, }) quarter_params.pop('sort', None) print(quarter_params) t = google_query.get_table(params=quarter_params, dimensions=dimensions, metrics=metrics, renew=True, _cache_keys=_cache_keys) quarter_str = "{}Q{}".format(yr, q) for row in t.rows: vals = row.values vals.insert(quarter_idx, quarter_str) result.add(vals) return result
def inject_table_delta(a, b, join_column, compare_column='ga:pageviews', num_normal=10, num_missing=5): """ Annotate rows in table `a` with deltas from table `b`. Also add `num_missing` top rows from `b` that are not present in `a`. It's basically a left-join with extra sauce. """ col_compare = a.get(compare_column) a_lookup = set(source for source, in a.iter_rows(join_column)) b_lookup = dict( (source, views) for source, views in b.iter_rows(join_column, compare_column)) # Annotate delta tags col_compare_delta = Column('%s:delta' % col_compare.id, label=col_compare.label, type_cast=float, type_format=h.human_delta, threshold=0) idx_join, idx_compare = a.column_to_index[join_column], a.column_to_index[ compare_column] for row in a.rows: j, views = row.values[idx_join], row.values[idx_compare] last_views = b_lookup.get(j) or 0 views_delta = (views - last_views) / float(views) if not last_views: row.tag(type='new') elif abs(views_delta) > 0.20: row.tag(type='delta', value=views_delta, column=col_compare_delta) a.limit(num_normal) # Add missing entries for source, views in b.iter_rows(join_column, compare_column): if source in a_lookup: continue if col_compare.is_boring(views): break # Done early row = a.add([source, 0, 0, None, None, -views], is_measured=False) row.tag(type='views', value=h.human_int(-views), is_positive=False, is_prefixed=True) num_missing -= 1 if num_missing <= 0: break
def _get_social_search(self, google_query, date_start, date_end, summary_metrics, max_results=10): organic_table = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': date_start, 'end-date': date_end, 'filters': 'ga:medium!=referral;ga:medium!=(not set);ga:socialNetwork==(not set)', 'sort': '-ga:pageviews', 'max-results': str(max_results), }, dimensions=[ Column('ga:source', type_cast=_prune_abstract), ], metrics=[col.new() for col in summary_metrics], ) social_table = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': date_start, 'end-date': date_end, 'sort': '-ga:pageviews', 'max-results': str(max_results), }, dimensions=[ Column('ga:socialNetwork', type_cast=_prune_abstract), ], metrics=[col.new() for col in summary_metrics], ) source_col = Column('source', label='Social & Search & Campaigns', visible=1, type_cast=_cast_title) t = Table(columns=[ source_col, ] + [col.new() for col in summary_metrics]) for cells in social_table.iter_rows(): t.add(cells) for cells in organic_table.iter_rows(): t.add(cells) t.sort(reverse=True) return t
def explore_api(request): u = api.account.get_admin(request) a = u.get_account(service='google') report_id, dimensions, metrics, extra, date_start, date_end = get_many( request.params, ['report_id'], optional=['dimensions', 'metrics', 'extra', 'date_start', 'date_end']) report = model.Report.get_by(account_id=a.id, id=report_id) if not report: raise APIControllerError("Invalid report id: %s" % report_id) cache_keys = ('admin/explore_api', ) google_query = api.account.query_service(request, report.account, cache_keys=cache_keys) date_end = date_end or date.today() date_start = date_start or date_end - timedelta(days=7) params = { 'ids': 'ga:%s' % report.remote_id, 'start-date': date_start, 'end-date': date_end, } if metrics: metrics = [Column(m) for m in metrics.split(',')] if dimensions: dimensions = [Column(m) for m in dimensions.split(',')] if extra: params.update(part.split('=', 1) for part in extra.split('&')) try: r = google_query.get_table(params, metrics=metrics, dimensions=dimensions) except KeyError as e: raise APIControllerError("Invalid metric or dimension: %s" % e.args[0]) return {'table': r}
def test_join(self): t = Table([ Column('value'), Column('joincol'), Column('splitcol'), ]) expected = t.new() t.add((1, 'foo', 'a')) t.add((2, 'bar', 'a')) t.add((3, 'baz', 'a')) t.add((8, 'bar', 'b')) t.add((7, 'baz', 'b')) t.add((6, 'quux', 'b')) expected.add((1, 'foo', 'a')) expected.add((2, 'bar', 'a')).tag('Value', -6) expected.add((3, 'baz', 'a')).tag('Value', -4) split_table_delta(t, 'splitcol', 'joincol', 'value') self.assertEqual(dump(t), dump(expected))
def _get_geo(self, google_query, summary_metrics): t = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:users', 'max-results': '50', }, dimensions=[ Column('ga:country', label='Country', visible=1, type_cast=_prune_abstract), ], metrics=[col.new() for col in summary_metrics] + [ Column('ga:sessionsPerUser', label='Sessions Per User', threshold=0.0, type_cast=float, type_format=_format_float), ], ) total = float(t.get('ga:users').sum) out = Table(columns=[col.new() for col in t.columns] + [ Column( 'users', label='Users', visible=0, type_format=_format_percent), ]) out.get('ga:users')._threshold = None out.set_visible('users', 'ga:country') idx_users = t.column_to_index['ga:users'] for row in t.rows[:5]: out.add(row.values + [row.values[idx_users] * 100.0 / total]) out.tag_rows() return out
def _get_search_keywords(self, google_query, interval_field): # TODO: Not used anymore, should be safe to remove. t = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, # Extra week 'end-date': self.date_end, 'sort': '-{},-ga:users'.format(interval_field), 'filters': 'ga:keyword!=(not provided);ga:medium==organic', 'max-results': '5', }, dimensions=[ Column(interval_field), Column('ga:keyword', label='Search Keywords', type_cast=_prune_abstract, visible=1), ], metrics=[ Column('ga:users', label='Users', type_cast=int, type_format=h.human_int, visible=0, threshold=0), Column('ga:avgSessionDuration', label='Session', type_cast=_cast_time, type_format=h.human_time, threshold=0), ], ) t.set_visible('ga:users', 'ga:keyword') #split_table_delta(t, split_column=interval_field, join_column='ga:keyword', compare_column='ga:sessions') #t.sort(reverse=True) #t.limit(10) return t
def _get_summary(self, google_query, interval_field, metrics): # Summary summary_params = { 'ids': 'ga:%s' % self.remote_id, 'start-date': self.previous_date_start, # Extra week 'end-date': self.date_end, 'sort': '-{}'.format(interval_field), } summary_dimensions = [ Column(interval_field), ] return self._get_interval_table(google_query, interval_field, params=summary_params, dimensions=summary_dimensions, metrics=metrics, )
def fetch(self, google_query): last_month_date_start = (self.date_start - datetime.timedelta(days=self.date_start.day + 1)).replace(day=1) # Summary summary_metrics = [ Column('ga:pageviews', label='Views', type_cast=int, type_format=h.human_int, threshold=0, visible=0), Column('ga:users', label='Uniques', type_cast=int, type_format=h.human_int), Column('ga:avgSessionDuration', label='Time On Site', type_cast=_cast_time, type_format=h.human_time, threshold=0), Column('ga:bounceRate', label='Bounce Rate', type_cast=_cast_percent, type_format=_format_percent, reverse=True, threshold=0), ] self.tables['summary'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': last_month_date_start, # Extra month 'end-date': self.date_end, 'sort': '-ga:yearMonth', }, dimensions=[ Column('ga:yearMonth'), ], metrics=summary_metrics + [Column('ga:sessions', type_cast=int)], ) self.tables['geo'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:users', 'max-results': '10', }, dimensions=[ Column('ga:country', label='Country', visible=1, type_cast=_prune_abstract), ], metrics=[col.new() for col in summary_metrics], ) self.tables['device'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:users', }, dimensions=[ Column('ga:deviceCategory', label='Device', visible=1, type_cast=_cast_title), ], metrics=[col.new() for col in summary_metrics], ) self.tables['browser'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:users', }, dimensions=[ Column('ga:browser', label='Browser', visible=1), ], metrics=[col.new() for col in summary_metrics] + [ Column('ga:avgPageLoadTime', label='Load Time', type_cast=float), ], ) self.tables['geo'].tag_rows() self.tables['device'].tag_rows() self.tables['browser'].tag_rows() # TODO: Add last year's month historic_table = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': last_month_date_start, 'end-date': self.date_end, }, dimensions=[ Column('ga:date'), Column('ga:yearMonth', visible=0), ], metrics=[ Column('ga:pageviews', label='Views', type_cast=int, visible=1), Column('ga:users', label='Uniques', type_cast=int), ], ) intro_config = self.config.get('intro') if intro_config: # For John Sheehan historic_table.set_visible('ga:yearMonth', intro_config) iter_historic = historic_table.iter_visible() _, views_column = next(iter_historic) monthly_data, max_value = cumulative_by_month(iter_historic) last_month, current_month = monthly_data self.data['historic_data'] = encode_rows(monthly_data, max_value) self.data['total_units'] = '{:,} %s' % views_column.label.lower().rstrip('s') self.data['total_current'] = current_month[-1] self.data['total_last'] = last_month[-1] self.data['current_month'] = self.date_start.strftime('%B') self.data['last_month'] = last_month_date_start.strftime('%B') self.data['current_month_days'] = self.date_end.day self.data['last_month_days'] = (self.date_start - datetime.timedelta(days=1)).day
def fetch(self, google_query): days_delta = (self.date_end - self.date_start).days is_year_delta = days_delta > 360 interval_field = 'ga:nthWeek' if is_year_delta: interval_field = 'ga:year' elif days_delta > 6: interval_field = 'ga:nthMonth' # Summary summary_params = { 'ids': 'ga:%s' % self.remote_id, 'start-date': self.previous_date_start, # Extra week 'end-date': self.date_end, 'sort': '-{}'.format(interval_field), } summary_dimensions = [ Column(interval_field), ] basic_metrics = [ Column('ga:screenviews', label='Views', type_cast=int, type_format=h.human_int), Column('ga:sessions', label='Sessions', type_cast=int, type_format=h.human_int), Column('ga:users', label='Users', type_cast=int, type_format=h.human_int, threshold=0, visible=0), Column('ga:avgSessionDuration', label='Session', type_cast=_cast_time, type_format=h.human_time, threshold=0), ] summary_metrics = basic_metrics + [ Column('ga:goalConversionRateAll', label='Conversion', type_cast=float, type_format=_format_percent, threshold=0.1), Column('ga:itemRevenue', label="Revenue", type_cast=float, type_format=_format_dollars), Column('ga:itemQuantity', label="Sales", type_cast=int, type_format=h.human_int), ] self.tables['summary'] = summary_table = google_query.get_table( params=summary_params, dimensions=summary_dimensions, metrics=summary_metrics, ) if not summary_table.has_value('ga:users'): raise EmptyReportError() include_ads = self.config.get('ads') or summary_table.has_value( 'ga:itemRevenue') # Ads # FIXME: Merge this with summary_metrics once https://code.google.com/p/analytics-issues/issues/detail?id=693 is fixed. if include_ads: self.tables['ads'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, }, dimensions=[Column('ga:adGroup')], metrics=[ Column('ga:adCost', label="Ad Spend", type_cast=float, type_format=_format_dollars, threshold=0), Column('ga:impressions', label="Ad Impressions", type_cast=int, type_format=h.human_int, threshold=0), Column('ga:adClicks', label="Ad Clicks", type_cast=int, type_format=h.human_int, threshold=0), Column('ga:itemRevenue', label="Revenue", type_cast=float, type_format=_format_dollars, threshold=0), Column('ga:itemQuantity', label="Sales", type_cast=int, type_format=h.human_int, threshold=0), ], ) # Screens screens_metrics = [col.new() for col in basic_metrics] + [ Column('ga:exitRate', label='Exit', type_cast=float, type_format=_format_percent, reverse=True, threshold=0), ] if self.config.get('pageloadtime', True): screens_metrics += [ Column('ga:avgPageLoadTime', label='Load', type_cast=float, type_format=h.human_time, reverse=True, threshold=0), ] self.tables['screens'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:screenviews', 'max-results': '10', }, dimensions=[ Column('ga:screenName', label='Screens', visible=1, type_cast=_prune_abstract), ], metrics=screens_metrics, ) self.tables['screens'].set_visible('ga:screenviews', 'ga:screenName') if summary_table.has_value('ga:goalConversionRateAll'): # Goals self.tables['goals'] = self._get_goals(google_query, interval_field) if summary_table.has_value('ga:itemRevenue'): # Ecommerce self.tables['ecommerce'] = self._get_ecommerce( google_query, interval_field) # Historic historic_start_date = self.previous_date_start if not is_year_delta: # Override to just previous month historic_start_date = self.date_end - datetime.timedelta( days=self.date_end.day) historic_start_date -= datetime.timedelta( days=historic_start_date.day - 1) # Note: Pace is different from interval, as year pace is still month over month whereas year interval is year over year. compare_interval = self.config.get('pace', 'month') if compare_interval == 'year' and not is_year_delta: historic_start_date = self.date_end - datetime.timedelta( days=self.date_end.day - 1) historic_start_date = historic_start_date.replace( year=historic_start_date.year - 1) dimensions = [ Column('ga:yearMonth', visible=0), ] if not is_year_delta: dimensions += [ Column('ga:date'), ] historic_table = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': historic_start_date, 'end-date': self.date_end, }, dimensions=dimensions, metrics=[ # Note: This is unique users over the dimension which is daily, # so the combined counts we use later are actually more daily # sessions than users. Column('ga:users', label='Daily Sessions', type_cast=int, visible=1), Column('ga:sessions', label='Sessions', type_cast=int), ], ) intro_config = self.config.get('intro') if intro_config: # For John Sheehan historic_table.set_visible('ga:yearMonth', intro_config) iter_historic = historic_table.iter_visible() _, views_column = next(iter_historic) if is_year_delta: iter_historic = ((mo[:4], v) for mo, v in iter_historic) elif compare_interval == 'year': mo_filter = u'{d.month:02d}'.format(d=historic_start_date) iter_historic = ((mo, v) for mo, v in iter_historic if mo.endswith(mo_filter)) monthly_data, max_value = cumulative_by_month(iter_historic) last_month, current_month = monthly_data[-2:] self.data['historic_data'] = encode_rows(monthly_data, max_value) self.data['total_units'] = '{:,} %s' % views_column.label.lower( ).rstrip('s') self.data['total_current'] = current_month[-1] self.data['total_last'] = last_month[-1] self.data['total_last_relative'] = last_month[ min(len(current_month), len(last_month)) - 1] self.data['total_last_date_start'] = historic_start_date self.tables['geo'] = self._get_geo(google_query, summary_metrics) self.tables['versions'] = self._get_versions(google_query, summary_metrics) self.tables['screens'].tag_rows()
def _get_versions(self, google_query, summary_metrics): t = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:appVersion', 'max-results': '50', }, dimensions=[ Column('ga:appVersion', label='Version', visible=1), Column('ga:operatingSystem', label='Operating System', visible=2), ], metrics=[col.new() for col in summary_metrics] + [], ) t.set_visible('ga:users', 'ga:appVersion') t.tag_rows() col_exception = Column('ga:fatalExceptions', label='Crashes', type_cast=int, type_format=h.human_int) crashes_table = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:appVersion', 'max-results': '50', }, dimensions=[ Column('ga:appVersion', label='Version', visible=1), Column('ga:operatingSystem', label='Operating System', visible=2), ], metrics=[col_exception], ) crashes_lookup = dict( ((version, os), crashes) for crashes, version, os in crashes_table.iter_rows( 'ga:fatalExceptions', 'ga:appVersion', 'ga:operatingSystem')) total = float(t.get('ga:users').sum) out = Table(columns=[ Column( 'users', label='Users', visible=0, type_format=_format_percent), Column('version', label='Version', visible=1), ]) latest_version = None for i, (users, sessions, version, os) in enumerate( t.iter_rows('ga:users', 'ga:sessions', 'ga:appVersion', 'ga:operatingSystem')): row = out.add([users * 100.0 / total, u"%s on %s" % (version, os)]) row.tags = t.rows[i].tags if not latest_version: latest_version = version crashes = crashes_lookup.get((version, os), 0) if latest_version == version: row.tag('latest') if crashes and crashes > sessions * 0.1: row.tag('crashes', value=h.human_int(crashes), is_positive=False, is_prefixed=True) self.latet_version = latest_version out.limit(5) out.sort(reverse=True) return out
def _get_goals(self, google_query, interval_field): goals_api = 'https://www.googleapis.com/analytics/v3/management/accounts/{accountId}/webproperties/{webPropertyId}/profiles/{profileId}/goals' r = google_query.get( goals_api.format(profileId=self.remote_data['id'], **self.remote_data)) has_goals = r.get('items') or [] goal_metrics = [ Column('ga:goal{id}Completions'.format(id=g['id']), label=g['name'], type_cast=float) for g in has_goals if g.get('active') ] if not goal_metrics: return # Note: max 10 metrics allowed metrics = goal_metrics[-9:] + [Column('ga:sessions', type_cast=int)] raw_table = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.previous_date_start, # Extra week 'end-date': self.date_end, 'sort': '-{}'.format(interval_field), }, metrics=metrics, dimensions=[ Column(interval_field), ], ) if len(raw_table.rows) != 2: # Less than 2 weeks of data available return t = Table(columns=[ Column('goal', label='Goals', visible=1, type_cast=_cast_title), Column('completions', label='Events', visible=0, type_cast=int, type_format=h.human_int, threshold=0), ]) num_sessions, num_sessions_last = [ next(v) for v in raw_table.iter_rows('ga:sessions') ] this_week, last_week = raw_table.rows col_compare = t.get('completions') col_compare_delta = Column('%s:delta' % col_compare.id, label='Events', type_cast=float, type_format=h.human_delta, threshold=0) has_completions = False for col_id, pos in raw_table.column_to_index.items(): col = raw_table.columns[pos] if not col.id.startswith('ga:goal'): continue completions, completions_last = this_week.values[ pos], last_week.values[pos] percent_completions = completions * 100.0 / num_sessions if num_sessions else 0.0 percent_completions_last = completions_last * 100.0 / num_sessions_last if num_sessions_last else 0.0 row = t.add([col.label, completions]) if not row: # Boring continue if completions > 0: row.tag(type="Conversion", value=_format_percent(percent_completions)) if completions + completions_last > 0: has_completions = True # Old method: # delta = (percent_completions - percent_completions_last) / 100.0 # New method (same as GA shows): delta = completions / completions_last - 1 if completions_last > 0.0 else 1.0 if abs(delta) > 0.001: row.tag(type='delta', value=delta, column=col_compare_delta, is_positive=delta > 0) if not has_completions: return t.sort(reverse=True) return t
def fetch(self, api_query): last_month_date_start = self.date_end - datetime.timedelta( days=self.date_end.day) last_month_date_start -= datetime.timedelta( days=last_month_date_start.day - 1) # TODO: Check 'has_more' week_params = { 'created[gte]': to_epoch(self.date_start), 'created[lt]': to_epoch(self.date_end + datetime.timedelta(days=1)), 'limit': 100, } self.tables['customers'] = customers_table = Timeline() items = api_query.get_paged('https://api.stripe.com/v1/customers', params=week_params) for item in items: plan = (item.get('subscription') or {}).get('plan') if plan: plan = describe_plan(plan) customers_table.add([ to_datetime(item['created']), ' '.join([ item.get('email') or '(no email)', plan or '(no plan yet)', ]) ]) ## self.tables['events'] = events_table = Timeline() items = api_query.get_paged('https://api.stripe.com/v1/events', params=week_params) for item in items: events_table.add([ to_datetime(item['created']), describe_event(item), ]) ## historic_table = Table([ Column('created', visible=0), Column('amount', label='Amount', visible=1), ]) items = api_query.get_paged('https://api.stripe.com/v1/charges', params={ 'created[gte]': to_epoch(last_month_date_start), 'created[lt]': to_epoch(self.date_end + datetime.timedelta(days=1)), 'limit': 100, }) for item in items: if not item['paid'] or item['refunded']: continue historic_table.add([ to_datetime(item['created']), item['amount'], ]) iter_historic = historic_table.iter_visible(reverse=True) _, views_column = next(iter_historic) monthly_data, max_value = sparse_cumulative(iter_historic, final_date=self.date_end) last_month, current_month = [[0], [0]] if monthly_data: last_month, current_month = monthly_data[-2:] self.data['historic_data'] = encode_rows(monthly_data, max_value) self.data['total_current'] = current_month[-1] self.data['total_last'] = last_month[-1] self.data['total_last_relative'] = last_month[ min(len(current_month), len(last_month)) - 1] self.data['total_last_date_start'] = last_month_date_start self.data['canary'] = None # XXX: Fix this before launching
def fetch(self, google_query): interval_field = self._get_interval_field() is_year_delta = interval_field == 'ga:year' is_quarter_delta = interval_field == 'bm:quarter' # Summary basic_metrics = [ Column('ga:pageviews', label='Views', type_cast=int, type_format=h.human_int, threshold=0, visible=0), Column('ga:users', label='Uniques', type_cast=int, type_format=h.human_int), Column('ga:avgSessionDuration', label='Time On Site', type_cast=_cast_time, type_format=h.human_time, threshold=0), Column('ga:bounceRate', label='Bounce Rate', type_cast=_cast_percent, type_format=_format_percent, reverse=True, threshold=0), ] summary_metrics = basic_metrics + [ Column('ga:goalConversionRateAll', label='Conversion', type_cast=float, type_format=_format_percent, threshold=0.1), Column('ga:itemRevenue', label="Revenue", type_cast=float, type_format=_format_dollars), Column('ga:itemQuantity', label="Sales", type_cast=int, type_format=h.human_int), ] self.tables['summary'] = summary_table = self._get_summary(google_query, interval_field, metrics=summary_metrics + [ Column('ga:sessions', type_cast=int), Column('ga:adClicks', type_cast=int), ], ) if not summary_table.has_value('ga:pageviews'): raise EmptyReportError() include_ads = self.config.get('ads') or summary_table.has_value('ga:adClicks') or summary_table.has_value('ga:itemRevenue') # Ads # FIXME: Merge this with summary_metrics once https://code.google.com/p/analytics-issues/issues/detail?id=693 is fixed. if include_ads: self.tables['ads'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, }, dimensions=[Column('ga:adGroup')], metrics=[ Column('ga:adCost', label="Ad Spend", type_cast=float, type_format=_format_dollars, threshold=0), Column('ga:impressions', label="Ad Impressions", type_cast=int, type_format=h.human_int, threshold=0), Column('ga:adClicks', label="Ad Clicks", type_cast=int, type_format=h.human_int, threshold=0), Column('ga:itemRevenue', label="Revenue", type_cast=float, type_format=_format_dollars, threshold=0), Column('ga:itemQuantity', label="Sales", type_cast=int, type_format=h.human_int, threshold=0), ], ) # Pages pages_metrics = [col.new() for col in basic_metrics] if self.config.get('pageloadtime', True): pages_metrics += [ Column('ga:avgPageLoadTime', label='Page Load', type_cast=float, type_format=h.human_time, reverse=True, threshold=0), ] self.tables['pages'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:pageviews', 'max-results': '10', }, dimensions=[ Column('ga:pagePath', label='Pages', visible=1, type_cast=_prune_abstract), ], metrics=pages_metrics, ) # Referrers current_referrers = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'filters': 'ga:medium==referral;ga:socialNetwork==(not set)', 'sort': '-ga:pageviews', 'max-results': '25', }, dimensions=[ Column('ga:fullReferrer', label='Referrer', visible=1, type_cast=_prune_referrer) ], metrics=[col.new() for col in summary_metrics], ) last_referrers = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.previous_date_start, 'end-date': self.previous_date_end, 'filters': 'ga:medium==referral;ga:socialNetwork==(not set)', 'sort': '-ga:pageviews', 'max-results': '250', }, dimensions=[ Column('ga:fullReferrer', label='Referrer', visible=1, type_cast=_prune_referrer) ], metrics=[ summary_table.get('ga:pageviews').new(), ], ) inject_table_delta(current_referrers, last_referrers, join_column='ga:fullReferrer') self.tables['referrers'] = current_referrers if summary_table.has_value('ga:goalConversionRateAll'): # Goals try: self.tables['goals'] = self._get_goals(google_query, interval_field) except APIError: # Missing permissions for goals pass if summary_table.has_value('ga:itemRevenue'): # Ecommerce self.tables['ecommerce'] = self._get_ecommerce(google_query, interval_field) # Historic historic_start_date = self.previous_date_start if not is_year_delta and not is_quarter_delta: # Override to just previous month historic_start_date = self.date_end - datetime.timedelta(days=self.date_end.day) historic_start_date -= datetime.timedelta(days=historic_start_date.day-1) # Note: Pace is different from interval, as year pace is still month over month whereas year interval is year over year. compare_interval = self.config.get('pace', 'month') if compare_interval == 'year' and not is_year_delta: historic_start_date = self.date_end - datetime.timedelta(days=self.date_end.day-1) historic_start_date = historic_start_date.replace(year=historic_start_date.year-1) dimensions = [ Column('ga:yearMonth', visible=0), ] if not is_year_delta: dimensions += [ Column('ga:date'), ] historic_table = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': historic_start_date, 'end-date': self.date_end, }, dimensions=dimensions, metrics=[ Column('ga:pageviews', label='Views', type_cast=int, visible=1), Column('ga:users', label='Uniques', type_cast=int), ], ) intro_config = self.config.get('intro') if intro_config: # For John Sheehan historic_table.set_visible('ga:yearMonth', intro_config) iter_historic = historic_table.iter_visible() _, views_column = next(iter_historic) if is_year_delta: iter_historic = ((mo[:4], v) for mo, v in iter_historic) elif compare_interval == 'year': mo_filter = u'{d.month:02d}'.format(d=historic_start_date) iter_historic = ((mo, v) for mo, v in iter_historic if mo.endswith(mo_filter)) monthly_data, max_value = cumulative_by_month(iter_historic) if len(monthly_data) < 2: raise ValueError("invalid number of historic months", self.remote_id, historic_table.rows) if is_quarter_delta: # TODO: This needs to be generalized beyond months at this point, sigh monthly_data, max_value = cumulative_splitter(monthly_data, split_on=3) last_month, current_month = monthly_data[-2:] self.data['historic_data'] = encode_rows(monthly_data, max_value) self.data['total_units'] = '{:,} %s' % views_column.label.lower().rstrip('s') self.data['total_current'] = current_month[-1] self.data['total_last'] = last_month[-1] self.data['total_last_relative'] = last_month[min(len(current_month), len(last_month))-1] self.data['total_last_date_start'] = historic_start_date social_search_table = self._get_social_search(google_query, self.date_start, self.date_end, summary_metrics, max_results=25) last_social_search = self._get_social_search(google_query, self.previous_date_start, self.previous_date_end, summary_metrics, max_results=100) inject_table_delta(social_search_table, last_social_search, join_column='source') self.tables['social_search'] = social_search_table if self.config.get('search_keywords'): self.tables['search_keywords'] = self._get_search_keywords(google_query, interval_field=interval_field) self.tables['search_keywords'].tag_rows() if self.config.get('geo'): self.tables['geo'] = google_query.get_table( params={ 'ids': 'ga:%s' % self.remote_id, 'start-date': self.date_start, 'end-date': self.date_end, 'sort': '-ga:pageviews', 'max-results': '5', }, dimensions=[ Column('ga:country', label='Country', visible=1, type_cast=_prune_abstract), ], metrics=[col.new() for col in summary_metrics], ) self.tables['geo'].tag_rows() self.tables['social_search'].tag_rows() self.tables['referrers'].sort(reverse=True) self.tables['referrers'].tag_rows() self.tables['pages'].sort(reverse=True) self.tables['pages'].tag_rows()
def test_column(self): s = Column('foo', type_cast=int) self.assertEqual(s.cast('123'), 123) self.assertEqual(s.min_row, (None, None)) self.assertEqual(s.max_row, (None, None)) s = Column('foo', average=100, threshold=0.25) self.assertEqual(s.min_row, (100.0, None)) self.assertEqual(s.max_row, (100.0, None)) self.assertFalse(s.is_interesting(90)) s.measure(90, 'a') self.assertEqual(s.min_row, (90.0, 'a')) self.assertEqual(s.max_row, (100.0, None)) self.assertTrue(s.is_interesting(50)) s.measure(50, 'b') self.assertEqual(s.min_row, (50.0, 'b')) self.assertEqual(s.max_row, (100.0, None)) self.assertTrue(s.is_interesting(150)) s.measure(150, 'c') self.assertEqual(s.min_row, (50.0, 'b')) self.assertEqual(s.max_row, (150.0, 'c')) self.assertTrue(s.is_interesting(200)) s.measure(200, 'd') self.assertEqual(s.min_row, (50.0, 'b')) self.assertEqual(s.max_row, (200.0, 'd'))