Beispiel #1
0
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info(
                'Period "%s" (%s - %s)',
                self.get_full_period_name(period_name, period_complete_day),
                start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))

            if self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)

            if not self.skip_url_stats:
                # Clean out old url data before storing the new
                ga_model.pre_update_url_stats(period_name)

                # accountName = config.get('ds_stats.ga.account')

                log.info('Downloading analytics for dataset views')
                # TODO: [extract SA]
                data = self.download(start_date, end_date,
                                     '~^/dataset/[a-z0-9-_]+')

                log.info('Storing dataset views (%i rows)',
                         len(data.get('url')))
                self.store(
                    period_name,
                    period_complete_day,
                    data,
                )

                log.info('Downloading analytics for publisher views')
                data = self.download(start_date, end_date,
                                     '~^/organization/[a-z0-9-_]+')

                log.info('Storing publisher views (%i rows)',
                         len(data.get('url')))
                self.store(
                    period_name,
                    period_complete_day,
                    data,
                )

                # Make sure the All records are correct.
                ga_model.post_update_url_stats()

                log.info('Associating datasets with their publisher')
                # about 30 seconds.
                ga_model.update_publisher_stats(period_name)

            log.info('Downloading and storing analytics for site-wide stats')
            self.sitewide_stats(period_name, period_complete_day)

            log.info('Downloading and storing analytics for social networks')
            self.update_social_info(period_name, start_date, end_date)
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info('Period "%s" (%s - %s)',
                     self.get_full_period_name(period_name, period_complete_day),
                     start_date.strftime('%Y-%m-%d'),
                     end_date.strftime('%Y-%m-%d'))

            if self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)

            if not self.skip_url_stats:
                # Clean out old url data before storing the new
                ga_model.pre_update_url_stats(period_name)

                accountName = config.get('googleanalytics.account')

                log.info('Downloading analytics for dataset views')
                #data = self.download(start_date, end_date, '~/%s/dataset/[a-z0-9-_]+' % accountName)
                data = self.download(start_date, end_date, '~/dataset/[a-z0-9-_]+')
                
                log.info('Storing dataset views (%i rows)', len(data.get('url')))
                self.store(period_name, period_complete_day, data, )

                log.info('Downloading analytics for organization views')
                #data = self.download(start_date, end_date, '~/%s/publisher/[a-z0-9-_]+' % accountName)
                data = self.download(start_date, end_date, '~/organization/[a-z0-9-_]+')

                #log.info('Storing publisher views (%i rows)', len(data.get('url')))
                self.store(period_name, period_complete_day, data,)

                # Make sure the All records are correct.
                ga_model.post_update_url_stats()

                log.info('Associating datasets with their organization')
                ga_model.update_publisher_stats(period_name) # about 30 seconds.


            log.info('Downloading and storing analytics for site-wide stats')
            self.sitewide_stats( period_name, period_complete_day )

            log.info('Downloading and storing analytics for social networks')
            self.update_social_info(period_name, start_date, end_date)
Beispiel #3
0
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info(
                'Period "%s" (%s - %s)',
                self.get_full_period_name(period_name, period_complete_day),
                start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))

            if self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)

            if self.stat in (None, 'url'):
                # Clean out old url data before storing the new
                ga_model.pre_update_url_stats(period_name)

                accountName = config.get('googleanalytics.account')

                path_prefix = '~'  # i.e. it is a regex
                # Possibly there is a domain in the path.
                # I'm not sure why, but on the data.gov.uk property we see
                # the domain gets added to the GA path. e.g.
                #   '/data.gov.uk/data/search'
                #   '/co-prod2.dh.bytemark.co.uk/apps/test-app'
                # but on other properties we don't. e.g.
                #   '/data/search'
                path_prefix += '(/%s)?' % accountName

                log.info('Downloading analytics for dataset views')
                data = self.download(start_date, end_date,
                                     path_prefix + '/dataset/[a-z0-9-_]+')

                log.info('Storing dataset views (%i rows)',
                         len(data.get('url')))
                self.store(
                    period_name,
                    period_complete_day,
                    data,
                )

                log.info('Downloading analytics for publisher views')
                data = self.download(start_date, end_date,
                                     path_prefix + '/publisher/[a-z0-9-_]+')

                log.info('Storing publisher views (%i rows)',
                         len(data.get('url')))
                self.store(
                    period_name,
                    period_complete_day,
                    data,
                )

                # Create the All records
                ga_model.post_update_url_stats()

                log.info('Associating datasets with their publisher')
                ga_model.update_publisher_stats(
                    period_name)  # about 30 seconds.

            if self.stat == 'url-all':
                # This stat is split off just for test purposes
                ga_model.post_update_url_stats()

            if self.stat in (None, 'sitewide'):
                # Clean out old ga_stats data before storing the new
                ga_model.pre_update_sitewide_stats(period_name)

                log.info(
                    'Downloading and storing analytics for site-wide stats')
                self.sitewide_stats(period_name, period_complete_day)

            if self.stat in (None, 'social'):
                # Clean out old ga_stats data before storing the new
                ga_model.pre_update_social_stats(period_name)

                log.info(
                    'Downloading and storing analytics for social networks')
                self.update_social_info(period_name, start_date, end_date)
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info('Period "%s" (%s - %s)',
                     self.get_full_period_name(period_name, period_complete_day),
                     start_date.strftime('%Y-%m-%d'),
                     end_date.strftime('%Y-%m-%d'))

            if self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)

            if self.stat in (None, 'url'):
                # Clean out old url data before storing the new
                ga_model.pre_update_url_stats(period_name)

                accountName = config.get('googleanalytics.account')

                path_prefix = '~'  # i.e. it is a regex
                # Possibly there is a domain in the path.
                # I'm not sure why, but on the data.gov.uk property we see
                # the domain gets added to the GA path. e.g.
                #   '/data.gov.uk/data/search'
                #   '/co-prod2.dh.bytemark.co.uk/apps/test-app'
                # but on other properties we don't. e.g.
                #   '/data/search'
                path_prefix += '(/%s)?' % accountName

                log.info('Downloading analytics for dataset views')
                data = self.download(start_date, end_date,
                                     path_prefix + '/dataset/[a-z0-9-_]+')

                log.info('Storing dataset views (%i rows)', len(data.get('url')))
                self.store(period_name, period_complete_day, data, )

                log.info('Downloading analytics for publisher views')
                data = self.download(start_date, end_date,
                                     path_prefix + '/publisher/[a-z0-9-_]+')

                log.info('Storing publisher views (%i rows)', len(data.get('url')))
                self.store(period_name, period_complete_day, data,)

                # Create the All records
                ga_model.post_update_url_stats()

                log.info('Associating datasets with their publisher')
                ga_model.update_publisher_stats(period_name) # about 30 seconds.

            if self.stat == 'url-all':
                # This stat is split off just for test purposes
                ga_model.post_update_url_stats()

            if self.stat in (None, 'sitewide'):
                # Clean out old ga_stats data before storing the new
                ga_model.pre_update_sitewide_stats(period_name)

                log.info('Downloading and storing analytics for site-wide stats')
                self.sitewide_stats(period_name, period_complete_day)

            if self.stat in (None, 'social'):
                # Clean out old ga_stats data before storing the new
                ga_model.pre_update_social_stats(period_name)

                log.info('Downloading and storing analytics for social networks')
                self.update_social_info(period_name, start_date, end_date)
Beispiel #5
0
    def download_and_store(self, periods):
        for period_name, period_complete_day, start_date, end_date in periods:
            log.info(
                'Period "%s" (%s - %s)',
                self.get_full_period_name(period_name, period_complete_day),
                start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))
            print 'period_name=%s' % period_name
            if self.save_stats and self.delete_first:
                log.info('Deleting existing Analytics for this period "%s"',
                         period_name)
                ga_model.delete(period_name)


#             accountName = config.get('googleanalytics.account', '')
#             path_prefix = '~'  # i.e. it is a regex
#             # Possibly there is a domain in the path.
#             # I'm not sure why, but on the data.gov.uk property we see
#             # the domain gets added to the GA path. e.g.
#             #   '/data.gov.uk/data/search'
#             #   '/co-prod2.dh.bytemark.co.uk/apps/test-app'
#             # but on other properties we don't. e.g.
#             #   '/data/search'
#             path_prefix += '(/%s)?' % accountName

            if self.stat in (None, DownloadAnalytics.PACKAGE_STAT) and \
               self.kind_stats == DownloadAnalytics.KIND_STAT_PACKAGE_RESOURCES:
                # Clean out old dge_ga_package data before storing the new
                stat = DownloadAnalytics.PACKAGE_STAT
                if self.save_stats:
                    ga_model.pre_update_dge_ga_package_stats(period_name)
                log.info('Downloading analytics for package views')
                data = self.download(
                    start_date, end_date, DownloadAnalytics.PACKAGE_URL_REGEX,
                    DownloadAnalytics.PACKAGE_URL_EXCLUDED_REGEXS, stat)
                if data:
                    if self.save_stats:
                        log.info('Storing package views (%i rows)',
                                 len(data.get(stat, [])))
                        print 'Storing package views (%i rows)' % (len(
                            data.get(stat, [])))
                        self.store(period_name, period_complete_day, data,
                                   stat)
                        # Create the All records
                        ga_model.post_update_dge_ga_package_stats()
                    else:
                        print 'The result contains %i rows:' % (len(
                            data.get(stat, [])))
                        for row in data.get(stat):
                            print row

            if self.stat in (None, DownloadAnalytics.RESOURCE_STAT) and\
               self.kind_stats == DownloadAnalytics.KIND_STAT_PACKAGE_RESOURCES:
                # Clean out old dge_ga_package data before storing the new
                stat = DownloadAnalytics.RESOURCE_STAT
                if self.save_stats:
                    ga_model.pre_update_dge_ga_resource_stats(period_name)

                log.info('Downloading analytics for resource views')
                data = self.download(
                    start_date, end_date, DownloadAnalytics.PACKAGE_URL_REGEX,
                    DownloadAnalytics.PACKAGE_URL_EXCLUDED_REGEXS, stat)
                if data:
                    if self.save_stats:
                        log.info('Storing resource views (%i rows)',
                                 len(data.get(stat, [])))
                        print 'Storing resource views (%i rows)' % (len(
                            data.get(stat, [])))
                        self.store(period_name, period_complete_day, data,
                                   stat)
                        # Create the All records
                        ga_model.post_update_dge_ga_resource_stats()
                    else:
                        print 'The result contains %i rows:' % (len(
                            data.get(stat, [])))
                        for row in data.get(stat):
                            print row

            if self.stat in (None, DownloadAnalytics.VISIT_STAT) and \
               self.kind_stats == DownloadAnalytics.KIND_STAT_VISITS:
                # Clean out old dge_ga_package data before storing the new
                stat = DownloadAnalytics.VISIT_STAT
                if self.save_stats:
                    ga_model.pre_update_dge_ga_visit_stats(period_name)

                visits = []
                for section in DownloadAnalytics.SECTIONS:
                    key = section.get('key', None)
                    name = section.get('name', None)
                    path = section.get('url_regex', '')
                    excluded_paths = section.get('exluded_url_regex', [])
                    if name or key:
                        log.info('Downloading analytics for %s sessions', name,
                                 key)
                        print 'Downloading analytics for %s %s sessions' % (
                            name, key)
                        data = self.download(start_date, end_date, path,
                                             excluded_paths, stat)
                        if data:
                            visits.append((key, name, data.get(stat, 0)))
                if visits and len(visits) >= 1:
                    if self.save_stats:
                        log.info('Storing session visits (%i rows)',
                                 len(visits))
                        print 'Storing session visits (%i rows)' % (
                            len(visits))
                        self.store(period_name, period_complete_day,
                                   {stat: visits}, stat)
                    else:
                        print 'The result contains %i rows:' % (len(visits))
                        for row in visits:
                            print row