Exemple #1
0
 def test_latest(self):
     svc = init_service("token.dat", "credentials.json")
     try:
         downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
         downloader.latest()
     except Exception as e:
         assert False, e
 def test_filter_out_long_tail(self):
     data = {'Firefox': 100,
             'Obscure Browser': 5,
             'Chrome': 150}
     DownloadAnalytics._filter_out_long_tail(data, 10)
     assert_equal(data, {'Firefox': 100,
                         'Chrome': 150})
 def test_since(self):
     svc = init_service("token.dat", "credentials.json")
     downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
     try:
         downloader.for_date(datetime.datetime.now() - datetime.timedelta(days=-30))
     except Exception as e:
         assert False, e
 def test_latest(self):
     svc = init_service("token.dat", "credentials.json")
     try:
         downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
         downloader.latest()
     except Exception as e:
         assert False, e
Exemple #5
0
 def test_since(self):
     svc = init_service("token.dat", "credentials.json")
     downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc))
     try:
         downloader.for_date(datetime.datetime.now() -
                             datetime.timedelta(days=-30))
     except Exception as e:
         assert False, e
Exemple #6
0
 def test_filter_out_long_tail(self):
     data = {'Firefox': 100, 'Obscure Browser': 5, 'Chrome': 150}
     DownloadAnalytics._filter_out_long_tail(data, 10)
     assert_equal(data, {'Firefox': 100, 'Chrome': 150})
Exemple #7
0
def command(config_file):
    # Import ckan as it changes the dependent packages imported
    from dump_analysis import (get_run_info, TxtAnalysisFile, CsvAnalysisFile,
                               DumpAnalysisOptions, DumpAnalysis)

    from pylons import config

    # settings
    ckan_instance_name = os.path.basename(config_file).replace('.ini', '')
    if ckan_instance_name not in ['development', 'dgutest']:
        default_dump_dir = '/var/lib/ckan/%s/static/dump' % ckan_instance_name
        default_analysis_dir = '/var/lib/ckan/%s/static/dump_analysis' % ckan_instance_name
        default_backup_dir = '/var/backups/ckan/%s' % ckan_instance_name
        default_openspending_reports_dir = '/var/lib/ckan/%s/openspending_reports' % ckan_instance_name
    else:
        # test purposes
        default_dump_dir = '~/dump'
        default_analysis_dir = '~/dump_analysis'
        default_backup_dir = '~/backups'
        default_openspending_reports_dir = '~/openspending_reports'
    dump_dir = os.path.expanduser(config.get('ckan.dump_dir',
                                             default_dump_dir))
    analysis_dir = os.path.expanduser(
        config.get('ckan.dump_analysis_dir', default_analysis_dir))
    backup_dir = os.path.expanduser(
        config.get('ckan.backup_dir', default_backup_dir))
    openspending_reports_dir = os.path.expanduser(
        config.get('dgu.openspending_reports_dir',
                   default_openspending_reports_dir))
    ga_token_filepath = os.path.expanduser(
        config.get('googleanalytics.token.filepath', ''))
    dump_filebase = config.get('ckan.dump_filename_base',
                               'data.gov.uk-ckan-meta-data-%Y-%m-%d')
    dump_analysis_filebase = config.get('ckan.dump_analysis_base',
                                        'data.gov.uk-analysis')
    backup_filebase = config.get('ckan.backup_filename_base',
                                 ckan_instance_name + '.%Y-%m-%d.pg_dump')
    tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp')
    openspending_reports_url = config.get(
        'ckan.openspending_reports_url',
        'http://data.etl.openspending.org/uk25k/report/')

    log = logging.getLogger('ckanext.dgu.bin.gov_daily')
    log.info('----------------------------')
    log.info('Starting daily script')
    start_time = datetime.datetime.today()

    import ckan.model as model
    import ckan.lib.dumper as dumper
    from ckanext.dgu.lib.inventory import inventory_dumper

    # Check database looks right
    num_packages_before = model.Session.query(
        model.Package).filter_by(state='active').count()
    log.info('Number of existing active packages: %i' % num_packages_before)
    if num_packages_before < 2:
        log.error('Expected more packages.')
        sys.exit(1)
    elif num_packages_before < 2500:
        log.warn('Expected more packages.')

    # Analytics
    try:
        if ga_token_filepath:
            if run_task('analytics'):
                log.info('Getting analytics for this month')
                from ckanext.ga_report.download_analytics import DownloadAnalytics
                from ckanext.ga_report.ga_auth import (init_service,
                                                       get_profile_id)
                try:
                    token, svc = init_service(ga_token_filepath, None)
                except TypeError:
                    log.error(
                        'Could not complete authorization for Google Analytics.'
                        'Have you correctly run the getauthtoken task and '
                        'specified the correct token file?')
                    sys.exit(0)
                downloader = DownloadAnalytics(svc,
                                               token=token,
                                               profile_id=get_profile_id(svc),
                                               delete_first=False,
                                               skip_url_stats=False)
                downloader.latest()
        else:
            log.info(
                'No token specified, so not downloading Google Analytics data')
    except Exception, exc_analytics:
        log.error("Failed to process Google Analytics data")
        log.exception(exc_analytics)
Exemple #8
0
def command(config_file):
    # Import ckan as it changes the dependent packages imported
    from dump_analysis import (get_run_info, TxtAnalysisFile,
                               CsvAnalysisFile, DumpAnalysisOptions,
                               DumpAnalysis)

    from pylons import config

    # settings
    ckan_instance_name = os.path.basename(config_file).replace('.ini', '')
    if ckan_instance_name not in ['development', 'dgutest']:
        default_dump_dir = '/var/lib/ckan/%s/static/dump' % ckan_instance_name
        default_analysis_dir = '/var/lib/ckan/%s/static/dump_analysis' % ckan_instance_name
        default_backup_dir = '/var/backups/ckan/%s' % ckan_instance_name
        default_openspending_reports_dir = '/var/lib/ckan/%s/openspending_reports' % ckan_instance_name
    else:
        # test purposes
        default_dump_dir = '~/dump'
        default_analysis_dir = '~/dump_analysis'
        default_backup_dir = '~/backups'
        default_openspending_reports_dir = '~/openspending_reports'
    dump_dir = os.path.expanduser(config.get('ckan.dump_dir',
                                             default_dump_dir))
    analysis_dir = os.path.expanduser(config.get('ckan.dump_analysis_dir',
                                             default_analysis_dir))
    backup_dir = os.path.expanduser(config.get('ckan.backup_dir',
                                               default_backup_dir))
    openspending_reports_dir = os.path.expanduser(config.get('dgu.openspending_reports_dir',
                                                             default_openspending_reports_dir))
    ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
    dump_filebase = config.get('ckan.dump_filename_base',
                               'data.gov.uk-ckan-meta-data-%Y-%m-%d')
    dump_analysis_filebase = config.get('ckan.dump_analysis_base',
                               'data.gov.uk-analysis')
    backup_filebase = config.get('ckan.backup_filename_base',
                                 ckan_instance_name + '.%Y-%m-%d.pg_dump')
    tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp')
    openspending_reports_url = config.get('ckan.openspending_reports_url',
                                          'http://data.etl.openspending.org/uk25k/report/')


    log = logging.getLogger('ckanext.dgu.bin.gov_daily')
    log.info('----------------------------')
    log.info('Starting daily script')
    start_time = datetime.datetime.today()

    import ckan.model as model
    import ckan.lib.dumper as dumper
    from ckanext.dgu.lib.inventory import inventory_dumper

    # Check database looks right
    num_packages_before = model.Session.query(model.Package).filter_by(state='active').count()
    log.info('Number of existing active packages: %i' % num_packages_before)
    if num_packages_before < 2:
        log.error('Expected more packages.')
        sys.exit(1)
    elif num_packages_before < 2500:
        log.warn('Expected more packages.')

    # Analytics
    try:
        if ga_token_filepath:
            if run_task('analytics'):
                log.info('Getting analytics for this month')
                from ckanext.ga_report.download_analytics import DownloadAnalytics
                from ckanext.ga_report.ga_auth import (init_service, get_profile_id)
                if not os.path.exists(ga_token_filepath):
                    log.error('GA Token does not exist: %s - not downloading '
                              'analytics' % ga_token_filepath)
                else:
                    try:
                        token, svc = init_service(ga_token_filepath, None)
                    except TypeError, e:
                        log.error('Could not complete authorization for Google '
                                'Analytics. Have you correctly run the '
                                'getauthtoken task and specified the correct '
                                'token file?\nError: %s', e)
                        sys.exit(1)
                    downloader = DownloadAnalytics(svc, token=token, profile_id=get_profile_id(svc),
                                                delete_first=False)
                    downloader.latest()
        else:
Exemple #9
0
def command(config_file):
    # Import ckan as it changes the dependent packages imported
    from dump_analysis import (get_run_info, TxtAnalysisFile,
                               CsvAnalysisFile, DumpAnalysisOptions,
                               DumpAnalysis)

    from pylons import config

    # settings
    ckan_instance_name = os.path.basename(config_file).replace('.ini', '')
    if ckan_instance_name not in ['development', 'dgutest']:
        default_dump_dir = '/var/lib/ckan/%s/static/dump' % ckan_instance_name
        default_analysis_dir = '/var/lib/ckan/%s/static/dump_analysis' % ckan_instance_name
        default_backup_dir = '/var/backups/ckan/%s' % ckan_instance_name
        default_openspending_reports_dir = '/var/lib/ckan/%s/openspending_reports' % ckan_instance_name
    else:
        # test purposes
        default_dump_dir = '~/dump'
        default_analysis_dir = '~/dump_analysis'
        default_backup_dir = '~/backups'
        default_openspending_reports_dir = '~/openspending_reports'
    dump_dir = os.path.expanduser(config.get('ckan.dump_dir',
                                             default_dump_dir))
    analysis_dir = os.path.expanduser(config.get('ckan.dump_analysis_dir',
                                             default_analysis_dir))
    backup_dir = os.path.expanduser(config.get('ckan.backup_dir',
                                               default_backup_dir))
    openspending_reports_dir = os.path.expanduser(config.get('dgu.openspending_reports_dir',
                                                             default_openspending_reports_dir))
    ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
    dump_filebase = config.get('ckan.dump_filename_base',
                               'data.gov.uk-ckan-meta-data-%Y-%m-%d')
    dump_analysis_filebase = config.get('ckan.dump_analysis_base',
                               'data.gov.uk-analysis')
    backup_filebase = config.get('ckan.backup_filename_base',
                                 ckan_instance_name + '.%Y-%m-%d.pg_dump')
    tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp')
    openspending_reports_url = config.get('ckan.openspending_reports_url',
                                          'http://data.etl.openspending.org/uk25k/report/')


    log = logging.getLogger('ckanext.dgu.bin.gov_daily')
    log.info('----------------------------')
    log.info('Starting daily script')
    start_time = datetime.datetime.today()

    import ckan.model as model
    import ckan.lib.dumper as dumper

    # Check database looks right
    num_packages_before = model.Session.query(model.Package).count()
    log.info('Number of existing packages: %i' % num_packages_before)
    if num_packages_before < 2:
        log.error('Expected more packages.')
        sys.exit(1)
    elif num_packages_before < 2500:
        log.warn('Expected more packages.')

    # Analytics
    if ga_token_filepath:
        if run_task('analytics'):
            log.info('Getting analytics for this month')
            from ckanext.ga_report.download_analytics import DownloadAnalytics
            from ckanext.ga_report.ga_auth import (init_service, get_profile_id)
            try:
                svc = init_service(ga_token_filepath, None)
            except TypeError:
                log.error('Could not complete authorization for Google Analytics.'
                          'Have you correctly run the getauthtoken task and '
                          'specified the correct token file?')
                sys.exit(0)
            downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc),
                                           delete_first=False,
                                           skip_url_stats=False)
            downloader.latest()
    else:
        log.info('No token specified, so not downloading Google Analytics data')

    # Copy openspending reports
    if run_task('openspending'):
        log.info('Copying in OpenSpending reports')
        if not os.path.exists(openspending_reports_dir):
            log.info('Creating dump dir: %s' % openspending_reports_dir)
            os.makedirs(openspending_reports_dir)
        try:
            publisher_response = urllib2.urlopen('http://data.gov.uk/api/rest/group').read()
        except urllib2.HTTPError, e:
            log.error('Could not get list of publishers for OpenSpending reports: %s',
                      e)
        else:
            try:
                publishers = json.loads(publisher_response)
                assert isinstance(publishers, list), publishers
                assert len(publishers) > 500, len(publishers)
                log.info('Got list of %i publishers starting: %r',
                         len(publishers), publishers[:3])
            except Exception, e:
                log.error('Could not decode list of publishers for OpenSpending reports: %s',
                          e)
            else:
Exemple #10
0
def command(config_file):
    # Import ckan as it changes the dependent packages imported
    from dump_analysis import get_run_info, TxtAnalysisFile, CsvAnalysisFile, DumpAnalysisOptions, DumpAnalysis

    from pylons import config

    # settings
    ckan_instance_name = os.path.basename(config_file).replace(".ini", "")
    if ckan_instance_name not in ["development", "dgutest"]:
        default_dump_dir = "/var/lib/ckan/%s/static/dump" % ckan_instance_name
        default_analysis_dir = "/var/lib/ckan/%s/static/dump_analysis" % ckan_instance_name
        default_backup_dir = "/var/backups/ckan/%s" % ckan_instance_name
        default_openspending_reports_dir = "/var/lib/ckan/%s/openspending_reports" % ckan_instance_name
    else:
        # test purposes
        default_dump_dir = "~/dump"
        default_analysis_dir = "~/dump_analysis"
        default_backup_dir = "~/backups"
        default_openspending_reports_dir = "~/openspending_reports"
    dump_dir = os.path.expanduser(config.get("ckan.dump_dir", default_dump_dir))
    analysis_dir = os.path.expanduser(config.get("ckan.dump_analysis_dir", default_analysis_dir))
    backup_dir = os.path.expanduser(config.get("ckan.backup_dir", default_backup_dir))
    openspending_reports_dir = os.path.expanduser(
        config.get("dgu.openspending_reports_dir", default_openspending_reports_dir)
    )
    ga_token_filepath = os.path.expanduser(config.get("googleanalytics.token.filepath", ""))
    dump_filebase = config.get("ckan.dump_filename_base", "data.gov.uk-ckan-meta-data-%Y-%m-%d")
    dump_analysis_filebase = config.get("ckan.dump_analysis_base", "data.gov.uk-analysis")
    backup_filebase = config.get("ckan.backup_filename_base", ckan_instance_name + ".%Y-%m-%d.pg_dump")
    tmp_filepath = config.get("ckan.temp_filepath", "/tmp/dump.tmp")
    openspending_reports_url = config.get(
        "ckan.openspending_reports_url", "http://data.etl.openspending.org/uk25k/report/"
    )

    log = logging.getLogger("ckanext.dgu.bin.gov_daily")
    log.info("----------------------------")
    log.info("Starting daily script")
    start_time = datetime.datetime.today()

    import ckan.model as model
    import ckan.lib.dumper as dumper
    from ckanext.dgu.lib.inventory import inventory_dumper

    # Check database looks right
    num_packages_before = model.Session.query(model.Package).count()
    log.info("Number of existing packages: %i" % num_packages_before)
    if num_packages_before < 2:
        log.error("Expected more packages.")
        sys.exit(1)
    elif num_packages_before < 2500:
        log.warn("Expected more packages.")

    # Analytics
    try:
        if ga_token_filepath:
            if run_task("analytics"):
                log.info("Getting analytics for this month")
                from ckanext.ga_report.download_analytics import DownloadAnalytics
                from ckanext.ga_report.ga_auth import init_service, get_profile_id

                try:
                    token, svc = init_service(ga_token_filepath, None)
                except TypeError:
                    log.error(
                        "Could not complete authorization for Google Analytics."
                        "Have you correctly run the getauthtoken task and "
                        "specified the correct token file?"
                    )
                    sys.exit(0)
                downloader = DownloadAnalytics(
                    svc, token=token, profile_id=get_profile_id(svc), delete_first=False, skip_url_stats=False
                )
                downloader.latest()
        else:
            log.info("No token specified, so not downloading Google Analytics data")
    except Exception, exc_analytics:
        log.error("Failed to process Google Analytics data")
        log.exception(exc_analytics)