Exemple #1
0
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info(
                'Period "%s" (%s - %s)',
                self.get_full_period_name(period_name, period_complete_day),
                start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))

            if self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)

            if not self.skip_url_stats:
                # Clean out old url data before storing the new
                ga_model.pre_update_url_stats(period_name)

                # accountName = config.get('ds_stats.ga.account')

                log.info('Downloading analytics for dataset views')
                # TODO: [extract SA]
                data = self.download(start_date, end_date,
                                     '~^/dataset/[a-z0-9-_]+')

                log.info('Storing dataset views (%i rows)',
                         len(data.get('url')))
                self.store(
                    period_name,
                    period_complete_day,
                    data,
                )

                log.info('Downloading analytics for publisher views')
                data = self.download(start_date, end_date,
                                     '~^/organization/[a-z0-9-_]+')

                log.info('Storing publisher views (%i rows)',
                         len(data.get('url')))
                self.store(
                    period_name,
                    period_complete_day,
                    data,
                )

                # Make sure the All records are correct.
                ga_model.post_update_url_stats()

                log.info('Associating datasets with their publisher')
                # about 30 seconds.
                ga_model.update_publisher_stats(period_name)

            log.info('Downloading and storing analytics for site-wide stats')
            self.sitewide_stats(period_name, period_complete_day)

            log.info('Downloading and storing analytics for social networks')
            self.update_social_info(period_name, start_date, end_date)
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info('Period "%s" (%s - %s)',
                     self.get_full_period_name(period_name, period_complete_day),
                     start_date.strftime('%Y-%m-%d'),
                     end_date.strftime('%Y-%m-%d'))

            if self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)

            if not self.skip_url_stats:
                # Clean out old url data before storing the new
                ga_model.pre_update_url_stats(period_name)

                accountName = config.get('googleanalytics.account')

                log.info('Downloading analytics for dataset views')
                #data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName)
                data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
                
                log.info('Storing dataset views (%i rows)', len(data.get('url')))
                self.store(period_name, period_complete_day, data, )

                log.info('Downloading analytics for organization views')
                #data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName)
                data = self.download(start_date, end_date, '~/organization/[a-z0-9-_]+')

                #log.info('Storing publisher views (%i rows)', len(data.get('url')))
                self.store(period_name, period_complete_day, data,)

                # Make sure the All records are correct.
                ga_model.post_update_url_stats()

                log.info('Associating datasets with their organization')
                ga_model.update_publisher_stats(period_name) # about 30 seconds.


            log.info('Downloading and storing analytics for site-wide stats')
            self.sitewide_stats( period_name, period_complete_day )

            log.info('Downloading and storing analytics for social networks')
            self.update_social_info(period_name, start_date, end_date)
Exemple #3
0
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info(
                'Period "%s" (%s - %s)',
                self.get_full_period_name(period_name, period_complete_day),
                start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))

            if self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)

            if self.stat in (None, 'url'):
                # Clean out old url data before storing the new
                ga_model.pre_update_url_stats(period_name)

                accountName = config.get('googleanalytics.account')

                path_prefix = '~'  # i.e. it is a regex
                # Possibly there is a domain in the path.
                # I'm not sure why, but on the data.gov.uk property we see
                # the domain gets added to the GA path. e.g.
                #   '/data.gov.uk/data/search'
                #   '/co-prod2.dh.bytemark.co.uk/apps/test-app'
                # but on other properties we don't. e.g.
                #   '/data/search'
                path_prefix += '(/%s)?' % accountName

                log.info('Downloading analytics for dataset views')
                data = self.download(start_date, end_date,
                                     path_prefix + '/dataset/[a-z0-9-_]+')

                log.info('Storing dataset views (%i rows)',
                         len(data.get('url')))
                self.store(
                    period_name,
                    period_complete_day,
                    data,
                )

                log.info('Downloading analytics for publisher views')
                data = self.download(start_date, end_date,
                                     path_prefix + '/publisher/[a-z0-9-_]+')

                log.info('Storing publisher views (%i rows)',
                         len(data.get('url')))
                self.store(
                    period_name,
                    period_complete_day,
                    data,
                )

                # Create the All records
                ga_model.post_update_url_stats()

                log.info('Associating datasets with their publisher')
                ga_model.update_publisher_stats(
                    period_name)  # about 30 seconds.

            if self.stat == 'url-all':
                # This stat is split off just for test purposes
                ga_model.post_update_url_stats()

            if self.stat in (None, 'sitewide'):
                # Clean out old ga_stats data before storing the new
                ga_model.pre_update_sitewide_stats(period_name)

                log.info(
                    'Downloading and storing analytics for site-wide stats')
                self.sitewide_stats(period_name, period_complete_day)

            if self.stat in (None, 'social'):
                # Clean out old ga_stats data before storing the new
                ga_model.pre_update_social_stats(period_name)

                log.info(
                    'Downloading and storing analytics for social networks')
                self.update_social_info(period_name, start_date, end_date)
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info('Period "%s" (%s - %s)',
                     self.get_full_period_name(period_name, period_complete_day),
                     start_date.strftime('%Y-%m-%d'),
                     end_date.strftime('%Y-%m-%d'))

            if self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)

            if self.stat in (None, 'url'):
                # Clean out old url data before storing the new
                ga_model.pre_update_url_stats(period_name)

                accountName = config.get('googleanalytics.account')

                path_prefix = '~'  # i.e. it is a regex
                # Possibly there is a domain in the path.
                # I'm not sure why, but on the data.gov.uk property we see
                # the domain gets added to the GA path. e.g.
                #   '/data.gov.uk/data/search'
                #   '/co-prod2.dh.bytemark.co.uk/apps/test-app'
                # but on other properties we don't. e.g.
                #   '/data/search'
                path_prefix += '(/%s)?' % accountName

                log.info('Downloading analytics for dataset views')
                data = self.download(start_date, end_date,
                                     path_prefix + '/dataset/[a-z0-9-_]+')

                log.info('Storing dataset views (%i rows)', len(data.get('url')))
                self.store(period_name, period_complete_day, data, )

                log.info('Downloading analytics for publisher views')
                data = self.download(start_date, end_date,
                                     path_prefix + '/publisher/[a-z0-9-_]+')

                log.info('Storing publisher views (%i rows)', len(data.get('url')))
                self.store(period_name, period_complete_day, data,)

                # Create the All records
                ga_model.post_update_url_stats()

                log.info('Associating datasets with their publisher')
                ga_model.update_publisher_stats(period_name) # about 30 seconds.

            if self.stat == 'url-all':
                # This stat is split off just for test purposes
                ga_model.post_update_url_stats()

            if self.stat in (None, 'sitewide'):
                # Clean out old ga_stats data before storing the new
                ga_model.pre_update_sitewide_stats(period_name)

                log.info('Downloading and storing analytics for site-wide stats')
                self.sitewide_stats(period_name, period_complete_day)

            if self.stat in (None, 'social'):
                # Clean out old ga_stats data before storing the new
                ga_model.pre_update_social_stats(period_name)

                log.info('Downloading and storing analytics for social networks')
                self.update_social_info(period_name, start_date, end_date)