def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info( 'Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) if not self.skip_url_stats: # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) # accountName = config.get('ds_stats.ga.account') log.info('Downloading analytics for dataset views') # TODO: [extract SA] data = self.download(start_date, end_date, '~^/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store( period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') data = self.download(start_date, end_date, '~^/organization/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store( period_name, period_complete_day, data, ) # Make sure the All records are correct. ga_model.post_update_url_stats() log.info('Associating datasets with their publisher') # about 30 seconds. ga_model.update_publisher_stats(period_name) log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats(period_name, period_complete_day) log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date)
def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info('Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) if not self.skip_url_stats: # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) accountName = config.get('googleanalytics.account') log.info('Downloading analytics for dataset views') #data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName) data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) log.info('Downloading analytics for organization views') #data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName) data = self.download(start_date, end_date, '~/organization/[a-z0-9-_]+') #log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) # Make sure the All records are correct. ga_model.post_update_url_stats() log.info('Associating datasets with their organization') ga_model.update_publisher_stats(period_name) # about 30 seconds. log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats( period_name, period_complete_day ) log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date)
def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info( 'Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) if self.stat in (None, 'url'): # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) accountName = config.get('googleanalytics.account') path_prefix = '~' # i.e. it is a regex # Possibly there is a domain in the path. # I'm not sure why, but on the data.gov.uk property we see # the domain gets added to the GA path. e.g. # '/data.gov.uk/data/search' # '/co-prod2.dh.bytemark.co.uk/apps/test-app' # but on other properties we don't. e.g. # '/data/search' path_prefix += '(/%s)?' % accountName log.info('Downloading analytics for dataset views') data = self.download(start_date, end_date, path_prefix + '/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store( period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') data = self.download(start_date, end_date, path_prefix + '/publisher/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store( period_name, period_complete_day, data, ) # Create the All records ga_model.post_update_url_stats() log.info('Associating datasets with their publisher') ga_model.update_publisher_stats( period_name) # about 30 seconds. if self.stat == 'url-all': # This stat is split off just for test purposes ga_model.post_update_url_stats() if self.stat in (None, 'sitewide'): # Clean out old ga_stats data before storing the new ga_model.pre_update_sitewide_stats(period_name) log.info( 'Downloading and storing analytics for site-wide stats') self.sitewide_stats(period_name, period_complete_day) if self.stat in (None, 'social'): # Clean out old ga_stats data before storing the new ga_model.pre_update_social_stats(period_name) log.info( 'Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date)
def download_and_store(self, periods): for period_name, period_complete_day, start_date, end_date in periods: log.info('Period "%s" (%s - %s)', self.get_full_period_name(period_name, period_complete_day), start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) if self.delete_first: log.info('Deleting existing Analytics for this period "%s"', period_name) ga_model.delete(period_name) if self.stat in (None, 'url'): # Clean out old url data before storing the new ga_model.pre_update_url_stats(period_name) accountName = config.get('googleanalytics.account') path_prefix = '~' # i.e. it is a regex # Possibly there is a domain in the path. # I'm not sure why, but on the data.gov.uk property we see # the domain gets added to the GA path. e.g. # '/data.gov.uk/data/search' # '/co-prod2.dh.bytemark.co.uk/apps/test-app' # but on other properties we don't. e.g. # '/data/search' path_prefix += '(/%s)?' % accountName log.info('Downloading analytics for dataset views') data = self.download(start_date, end_date, path_prefix + '/dataset/[a-z0-9-_]+') log.info('Storing dataset views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data, ) log.info('Downloading analytics for publisher views') data = self.download(start_date, end_date, path_prefix + '/publisher/[a-z0-9-_]+') log.info('Storing publisher views (%i rows)', len(data.get('url'))) self.store(period_name, period_complete_day, data,) # Create the All records ga_model.post_update_url_stats() log.info('Associating datasets with their publisher') ga_model.update_publisher_stats(period_name) # about 30 seconds. if self.stat == 'url-all': # This stat is split off just for test purposes ga_model.post_update_url_stats() if self.stat in (None, 'sitewide'): # Clean out old ga_stats data before storing the new ga_model.pre_update_sitewide_stats(period_name) log.info('Downloading and storing analytics for site-wide stats') self.sitewide_stats(period_name, period_complete_day) if self.stat in (None, 'social'): # Clean out old ga_stats data before storing the new ga_model.pre_update_social_stats(period_name) log.info('Downloading and storing analytics for social networks') self.update_social_info(period_name, start_date, end_date)