def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info( 'Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) if not self.skip_url_stats: # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) # accountName = config.get('ds_stats.ga.account') log.info('Downloading analytics for dataset views') # TODO: [extract SA] data = self.download(start_date, end_date, '~^/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store( period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') data = self.download(start_date, end_date, '~^/organization/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store( period_name, period_complete_day, data, ) # Make sure the All records are correct. ga_model.post_update_url_stats() log.info('Associating datasets with their publisher') # about 30 seconds. ga_model.update_publisher_stats(period_name) log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats(period_name, period_complete_day) log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date)
def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info('Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) if not self.skip_url_stats: # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) accountName = config.get('googleanalytics.account') log.info('Downloading analytics for dataset views') #data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) log.info('Downloading analytics for organization views') #data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) data = self.download(start_date, end_date, '~/organization/[a-z0-9-_]+') #log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) # Make sure the All records are correct. ga_model.post_update_url_stats() log.info('Associating datasets with their organization') ga_model.update_publisher_stats(period_name) # about 30 seconds. log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats( period_name, period_complete_day ) log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date)
def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info( 'Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) if self.stat in (None, 'url'): # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) accountName = config.get('googleanalytics.account') path_prefix = '~' # i.e. it is a regex # Possibly there is a domain in the path. # I'm not sure why, but on the data.gov.uk property we see # the domain gets added to the GA path. e.g. # '/data.gov.uk/data/search' # '/co-prod2.dh.bytemark.co.uk/apps/test-app' # but on other properties we don't. e.g. # '/data/search' path_prefix += '(/%s)?' % accountName log.info('Downloading analytics for dataset views') data = self.download(start_date, end_date, path_prefix + '/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store( period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') data = self.download(start_date, end_date, path_prefix + '/publisher/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store( period_name, period_complete_day, data, ) # Create the All records ga_model.post_update_url_stats() log.info('Associating datasets with their publisher') ga_model.update_publisher_stats( period_name) # about 30 seconds. if self.stat == 'url-all': # This stat is split off just for test purposes ga_model.post_update_url_stats() if self.stat in (None, 'sitewide'): # Clean out old ga_stats data before storing the new ga_model.pre_update_sitewide_stats(period_name) log.info( 'Downloading and storing analytics for site-wide stats') self.sitewide_stats(period_name, period_complete_day) if self.stat in (None, 'social'): # Clean out old ga_stats data before storing the new ga_model.pre_update_social_stats(period_name) log.info( 'Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date)
def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info('Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) if self.stat in (None, 'url'): # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) accountName = config.get('googleanalytics.account') path_prefix = '~' # i.e. it is a regex # Possibly there is a domain in the path. # I'm not sure why, but on the data.gov.uk property we see # the domain gets added to the GA path. e.g. # '/data.gov.uk/data/search' # '/co-prod2.dh.bytemark.co.uk/apps/test-app' # but on other properties we don't. e.g. # '/data/search' path_prefix += '(/%s)?' % accountName log.info('Downloading analytics for dataset views') data = self.download(start_date, end_date, path_prefix + '/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') data = self.download(start_date, end_date, path_prefix + '/publisher/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) # Create the All records ga_model.post_update_url_stats() log.info('Associating datasets with their publisher') ga_model.update_publisher_stats(period_name) # about 30 seconds. if self.stat == 'url-all': # This stat is split off just for test purposes ga_model.post_update_url_stats() if self.stat in (None, 'sitewide'): # Clean out old ga_stats data before storing the new ga_model.pre_update_sitewide_stats(period_name) log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats(period_name, period_complete_day) if self.stat in (None, 'social'): # Clean out old ga_stats data before storing the new ga_model.pre_update_social_stats(period_name) log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date)
def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info( 'Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) print 'period_name=%s' % period_name if self.save_stats and self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) # accountName = config.get('googleanalytics.account', '') # path_prefix = '~' # i.e. it is a regex # # Possibly there is a domain in the path. # # I'm not sure why, but on the data.gov.uk property we see # # the domain gets added to the GA path. e.g. # # '/data.gov.uk/data/search' # # '/co-prod2.dh.bytemark.co.uk/apps/test-app' # # but on other properties we don't. e.g. # # '/data/search' # path_prefix += '(/%s)?' % accountName if self.stat in (None, DownloadAnalytics.PACKAGE_STAT) and \ self.kind_stats == DownloadAnalytics.KIND_STAT_PACKAGE_RESOURCES: # Clean out old dge_ga_package data before storing the new stat = DownloadAnalytics.PACKAGE_STAT if self.save_stats: ga_model.pre_update_dge_ga_package_stats(period_name) log.info('Downloading analytics for package views') data = self.download( start_date, end_date, DownloadAnalytics.PACKAGE_URL_REGEX, DownloadAnalytics.PACKAGE_URL_EXCLUDED_REGEXS, stat) if data: if self.save_stats: log.info('Storing package views (%i rows)', len(data.get(stat, []))) print 'Storing package views (%i rows)' % (len( data.get(stat, []))) self.store(period_name, period_complete_day, data, stat) # Create the All records ga_model.post_update_dge_ga_package_stats() else: print 'The result contains %i rows:' % (len( data.get(stat, []))) for row in data.get(stat): print row if self.stat in (None, DownloadAnalytics.RESOURCE_STAT) and\ self.kind_stats == DownloadAnalytics.KIND_STAT_PACKAGE_RESOURCES: # Clean out old dge_ga_package data before storing the new stat = DownloadAnalytics.RESOURCE_STAT if self.save_stats: ga_model.pre_update_dge_ga_resource_stats(period_name) log.info('Downloading analytics for resource views') data = self.download( start_date, end_date, DownloadAnalytics.PACKAGE_URL_REGEX, DownloadAnalytics.PACKAGE_URL_EXCLUDED_REGEXS, stat) if data: if self.save_stats: log.info('Storing resource views (%i rows)', len(data.get(stat, []))) print 'Storing resource views (%i rows)' % (len( data.get(stat, []))) self.store(period_name, period_complete_day, data, stat) # Create the All records ga_model.post_update_dge_ga_resource_stats() else: print 'The result contains %i rows:' % (len( data.get(stat, []))) for row in data.get(stat): print row if self.stat in (None, DownloadAnalytics.VISIT_STAT) and \ self.kind_stats == DownloadAnalytics.KIND_STAT_VISITS: # Clean out old dge_ga_package data before storing the new stat = DownloadAnalytics.VISIT_STAT if self.save_stats: ga_model.pre_update_dge_ga_visit_stats(period_name) visits = [] for section in DownloadAnalytics.SECTIONS: key = section.get('key', None) name = section.get('name', None) path = section.get('url_regex', '') excluded_paths = section.get('exluded_url_regex', []) if name or key: log.info('Downloading analytics for %s sessions', name, key) print 'Downloading analytics for %s %s sessions' % ( name, key) data = self.download(start_date, end_date, path, excluded_paths, stat) if data: visits.append((key, name, data.get(stat, 0))) if visits and len(visits) >= 1: if self.save_stats: log.info('Storing session visits (%i rows)', len(visits)) print 'Storing session visits (%i rows)' % ( len(visits)) self.store(period_name, period_complete_day, {stat: visits}, stat) else: print 'The result contains %i rows:' % (len(visits)) for row in visits: print row