def report():
    import ckan.model as model

    log = logging.getLogger(__name__)

    stats = StatsCount()
    #stats.increment('Fixable')

    f = open(TMP_FILE, 'w')
    broken_resources = csv.writer(f)

    # Prep
    tasks = model.Session.query(model.TaskStatus)\
        .filter(model.TaskStatus.task_type == 'qa')\
        .filter(model.TaskStatus.key == 'status')\
        .distinct('entity_id')\
        .all()
    for task in tasks:
        d = json.loads(task.error)
        if 'is_broken' in d and d['is_broken']:
            try:
                resource = model.Resource.get(task.entity_id)
                if resource.resource_group.package.extras.get('UKLP', '') == True:
                    # Skipping UKLP datasets
                    continue
            except Exception, e:
                log.error("Resource.get(%s) failed: %s" % (task.entity_id, e))
                continue

            if resource:
                stats.increment('Broken resource')
                broken_resources.writerow([resource.id, resource.url.encode('utf8')])
        del d
Example #2
0
def report():
    import ckan.model as model

    log = logging.getLogger(__name__)

    stats = StatsCount()
    #stats.increment('Fixable')

    f = open(TMP_FILE, 'w')
    broken_resources = csv.writer(f)

    # Prep
    tasks = model.Session.query(model.TaskStatus)\
        .filter(model.TaskStatus.task_type == 'qa')\
        .filter(model.TaskStatus.key == 'status')\
        .distinct('entity_id')\
        .all()
    for task in tasks:
        d = json.loads(task.error)
        if 'is_broken' in d and d['is_broken']:
            try:
                resource = model.Resource.get(task.entity_id)
                if resource.resource_group.package.extras.get('UKLP',
                                                              '') == True:
                    # Skipping UKLP datasets
                    continue
            except Exception, e:
                log.error("Resource.get(%s) failed: %s" % (task.entity_id, e))
                continue

            if resource:
                stats.increment('Broken resource')
                broken_resources.writerow(
                    [resource.id, resource.url.encode('utf8')])
        del d
Example #3
0
def canada_extras():
    keys_changed = StatsCount()
    unmapped_keys = StatsList()
    licenses_changed = StatsCount()
    unmapped_licenses = StatsList()
    licenses = StatsList()
    key_mapping = {
        'Level of Government':'level_of_government',
        }
    license_mapping = {
        # CS: bad_spelling ignore
        'http://geogratis.ca/geogratis/en/licence.jsp':'geogratis',
        'Crown Copyright':'canada-crown',
        }
    from ckan import model
    rev = RevisionManager('Standardize extra keys', 10)
    for pkg in model.Session.query(model.Package):
        for old_key, new_key in key_mapping.items():
            if pkg.extras.has_key(old_key):
                rev.before_change()
                pkg.extras[new_key] = pkg.extras[old_key]
                del pkg.extras[old_key]
                keys_changed.increment(old_key)
                rev.after_change()
        for license_key in ('License', 'License URL'):
            if pkg.extras.has_key(license_key):
                old_license = pkg.extras[license_key]
                if old_license in license_mapping:
                    rev.before_change()
                    pkg.license_id = unicode(license_mapping[old_license])
                    del pkg.extras[license_key]
                    licenses_changed.increment(old_license)
                    rev.after_change()
                else:
                    unmapped_licenses.add(old_license, pkg.name)
        licenses.add(pkg.license_id, pkg.name)
        for key in pkg.extras.keys():
            if key not in key_mapping.keys() and \
               key not in key_mapping.values():
                unmapped_keys.add(key, pkg.name)
    rev.finished()
    print 'Packages: %i' % model.Session.query(model.Package).count()
    print 'Changed keys:\n', keys_changed.report()
    print 'Unmapped keys:\n', unmapped_keys.report()
    print 'Changed licenses:\n', licenses_changed.report()
    print 'Unmapped licenses:\n', unmapped_licenses.report()
    print 'Licenses:\n', licenses.report()
def canada_extras():
    keys_changed = StatsCount()
    unmapped_keys = StatsList()
    licenses_changed = StatsCount()
    unmapped_licenses = StatsList()
    licenses = StatsList()
    key_mapping = {
        'Level of Government': 'level_of_government',
    }
    license_mapping = {
        # CS: bad_spelling ignore
        'http://geogratis.ca/geogratis/en/licence.jsp': 'geogratis',
        'Crown Copyright': 'canada-crown',
    }
    from ckan import model
    rev = RevisionManager('Standardize extra keys', 10)
    for pkg in model.Session.query(model.Package):
        for old_key, new_key in key_mapping.items():
            if pkg.extras.has_key(old_key):
                rev.before_change()
                pkg.extras[new_key] = pkg.extras[old_key]
                del pkg.extras[old_key]
                keys_changed.increment(old_key)
                rev.after_change()
        for license_key in ('License', 'License URL'):
            if pkg.extras.has_key(license_key):
                old_license = pkg.extras[license_key]
                if old_license in license_mapping:
                    rev.before_change()
                    pkg.license_id = unicode(license_mapping[old_license])
                    del pkg.extras[license_key]
                    licenses_changed.increment(old_license)
                    rev.after_change()
                else:
                    unmapped_licenses.add(old_license, pkg.name)
        licenses.add(pkg.license_id, pkg.name)
        for key in pkg.extras.keys():
            if key not in key_mapping.keys() and \
               key not in key_mapping.values():
                unmapped_keys.add(key, pkg.name)
    rev.finished()
    print 'Packages: %i' % model.Session.query(model.Package).count()
    print 'Changed keys:\n', keys_changed.report()
    print 'Unmapped keys:\n', unmapped_keys.report()
    print 'Changed licenses:\n', licenses_changed.report()
    print 'Unmapped licenses:\n', unmapped_licenses.report()
    print 'Licenses:\n', licenses.report()