def report():
    import ckan.model as model

    log = logging.getLogger(__name__)

    stats = StatsCount()
    #stats.increment('Fixable')

    f = open(TMP_FILE, 'w')
    broken_resources = csv.writer(f)

    # Prep
    tasks = model.Session.query(model.TaskStatus)\
        .filter(model.TaskStatus.task_type == 'qa')\
        .filter(model.TaskStatus.key == 'status')\
        .distinct('entity_id')\
        .all()
    for task in tasks:
        d = json.loads(task.error)
        if 'is_broken' in d and d['is_broken']:
            try:
                resource = model.Resource.get(task.entity_id)
                if resource.resource_group.package.extras.get('UKLP', '') == True:
                    # Skipping UKLP datasets
                    continue
            except Exception, e:
                log.error("Resource.get(%s) failed: %s" % (task.entity_id, e))
                continue

            if resource:
                stats.increment('Broken resource')
                broken_resources.writerow([resource.id, resource.url.encode('utf8')])
        del d
Example #2
0
def report():
    import ckan.model as model

    log = logging.getLogger(__name__)

    stats = StatsCount()
    #stats.increment('Fixable')

    f = open(TMP_FILE, 'w')
    broken_resources = csv.writer(f)

    # Prep
    tasks = model.Session.query(model.TaskStatus)\
        .filter(model.TaskStatus.task_type == 'qa')\
        .filter(model.TaskStatus.key == 'status')\
        .distinct('entity_id')\
        .all()
    for task in tasks:
        d = json.loads(task.error)
        if 'is_broken' in d and d['is_broken']:
            try:
                resource = model.Resource.get(task.entity_id)
                if resource.resource_group.package.extras.get('UKLP',
                                                              '') == True:
                    # Skipping UKLP datasets
                    continue
            except Exception, e:
                log.error("Resource.get(%s) failed: %s" % (task.entity_id, e))
                continue

            if resource:
                stats.increment('Broken resource')
                broken_resources.writerow(
                    [resource.id, resource.url.encode('utf8')])
        del d
def canada_extras():
    keys_changed = StatsCount()
    unmapped_keys = StatsList()
    licenses_changed = StatsCount()
    unmapped_licenses = StatsList()
    licenses = StatsList()
    key_mapping = {
        'Level of Government': 'level_of_government',
    }
    license_mapping = {
        # CS: bad_spelling ignore
        'http://geogratis.ca/geogratis/en/licence.jsp': 'geogratis',
        'Crown Copyright': 'canada-crown',
    }
    from ckan import model
    rev = RevisionManager('Standardize extra keys', 10)
    for pkg in model.Session.query(model.Package):
        for old_key, new_key in key_mapping.items():
            if pkg.extras.has_key(old_key):
                rev.before_change()
                pkg.extras[new_key] = pkg.extras[old_key]
                del pkg.extras[old_key]
                keys_changed.increment(old_key)
                rev.after_change()
        for license_key in ('License', 'License URL'):
            if pkg.extras.has_key(license_key):
                old_license = pkg.extras[license_key]
                if old_license in license_mapping:
                    rev.before_change()
                    pkg.license_id = unicode(license_mapping[old_license])
                    del pkg.extras[license_key]
                    licenses_changed.increment(old_license)
                    rev.after_change()
                else:
                    unmapped_licenses.add(old_license, pkg.name)
        licenses.add(pkg.license_id, pkg.name)
        for key in pkg.extras.keys():
            if key not in key_mapping.keys() and \
               key not in key_mapping.values():
                unmapped_keys.add(key, pkg.name)
    rev.finished()
    print 'Packages: %i' % model.Session.query(model.Package).count()
    print 'Changed keys:\n', keys_changed.report()
    print 'Unmapped keys:\n', unmapped_keys.report()
    print 'Changed licenses:\n', licenses_changed.report()
    print 'Unmapped licenses:\n', unmapped_licenses.report()
    print 'Licenses:\n', licenses.report()
Example #4
0
def canada_extras():
    keys_changed = StatsCount()
    unmapped_keys = StatsList()
    licenses_changed = StatsCount()
    unmapped_licenses = StatsList()
    licenses = StatsList()
    key_mapping = {
        'Level of Government':'level_of_government',
        }
    license_mapping = {
        # CS: bad_spelling ignore
        'http://geogratis.ca/geogratis/en/licence.jsp':'geogratis',
        'Crown Copyright':'canada-crown',
        }
    from ckan import model
    rev = RevisionManager('Standardize extra keys', 10)
    for pkg in model.Session.query(model.Package):
        for old_key, new_key in key_mapping.items():
            if pkg.extras.has_key(old_key):
                rev.before_change()
                pkg.extras[new_key] = pkg.extras[old_key]
                del pkg.extras[old_key]
                keys_changed.increment(old_key)
                rev.after_change()
        for license_key in ('License', 'License URL'):
            if pkg.extras.has_key(license_key):
                old_license = pkg.extras[license_key]
                if old_license in license_mapping:
                    rev.before_change()
                    pkg.license_id = unicode(license_mapping[old_license])
                    del pkg.extras[license_key]
                    licenses_changed.increment(old_license)
                    rev.after_change()
                else:
                    unmapped_licenses.add(old_license, pkg.name)
        licenses.add(pkg.license_id, pkg.name)
        for key in pkg.extras.keys():
            if key not in key_mapping.keys() and \
               key not in key_mapping.values():
                unmapped_keys.add(key, pkg.name)
    rev.finished()
    print 'Packages: %i' % model.Session.query(model.Package).count()
    print 'Changed keys:\n', keys_changed.report()
    print 'Unmapped keys:\n', unmapped_keys.report()
    print 'Changed licenses:\n', licenses_changed.report()
    print 'Unmapped licenses:\n', unmapped_licenses.report()
    print 'Licenses:\n', licenses.report()
Example #5
0
import os
import json
import logging
import sys

from paste.registry import Registry

from sqlalchemy import engine_from_config, or_
from pylons import config, translator
import time

from running_stats import StatsCount

global_log = logging.getLogger(__name__)

stats = StatsCount()


def load_config(path):
    import paste.deploy
    conf = paste.deploy.appconfig('config:' + path)
    import ckan
    ckan.config.environment.load_environment(conf.global_conf, conf.local_conf)


def command(input_csv, config_ini, commit=False):

    config_ini_filepath = os.path.abspath(config_ini)
    load_config(config_ini_filepath)
    engine = engine_from_config(config, 'sqlalchemy.')