def report(): import ckan.model as model log = logging.getLogger(__name__) stats = StatsCount() #stats.increment('Fixable') f = open(TMP_FILE, 'w') broken_resources = csv.writer(f) # Prep tasks = model.Session.query(model.TaskStatus)\ .filter(model.TaskStatus.task_type == 'qa')\ .filter(model.TaskStatus.key == 'status')\ .distinct('entity_id')\ .all() for task in tasks: d = json.loads(task.error) if 'is_broken' in d and d['is_broken']: try: resource = model.Resource.get(task.entity_id) if resource.resource_group.package.extras.get('UKLP', '') == True: # Skipping UKLP datasets continue except Exception, e: log.error("Resource.get(%s) failed: %s" % (task.entity_id, e)) continue if resource: stats.increment('Broken resource') broken_resources.writerow([resource.id, resource.url.encode('utf8')]) del d
def report(): import ckan.model as model log = logging.getLogger(__name__) stats = StatsCount() #stats.increment('Fixable') f = open(TMP_FILE, 'w') broken_resources = csv.writer(f) # Prep tasks = model.Session.query(model.TaskStatus)\ .filter(model.TaskStatus.task_type == 'qa')\ .filter(model.TaskStatus.key == 'status')\ .distinct('entity_id')\ .all() for task in tasks: d = json.loads(task.error) if 'is_broken' in d and d['is_broken']: try: resource = model.Resource.get(task.entity_id) if resource.resource_group.package.extras.get('UKLP', '') == True: # Skipping UKLP datasets continue except Exception, e: log.error("Resource.get(%s) failed: %s" % (task.entity_id, e)) continue if resource: stats.increment('Broken resource') broken_resources.writerow( [resource.id, resource.url.encode('utf8')]) del d
def canada_extras(): keys_changed = StatsCount() unmapped_keys = StatsList() licenses_changed = StatsCount() unmapped_licenses = StatsList() licenses = StatsList() key_mapping = { 'Level of Government':'level_of_government', } license_mapping = { # CS: bad_spelling ignore 'http://geogratis.ca/geogratis/en/licence.jsp':'geogratis', 'Crown Copyright':'canada-crown', } from ckan import model rev = RevisionManager('Standardize extra keys', 10) for pkg in model.Session.query(model.Package): for old_key, new_key in key_mapping.items(): if pkg.extras.has_key(old_key): rev.before_change() pkg.extras[new_key] = pkg.extras[old_key] del pkg.extras[old_key] keys_changed.increment(old_key) rev.after_change() for license_key in ('License', 'License URL'): if pkg.extras.has_key(license_key): old_license = pkg.extras[license_key] if old_license in license_mapping: rev.before_change() pkg.license_id = unicode(license_mapping[old_license]) del pkg.extras[license_key] licenses_changed.increment(old_license) rev.after_change() else: unmapped_licenses.add(old_license, pkg.name) licenses.add(pkg.license_id, pkg.name) for key in pkg.extras.keys(): if key not in key_mapping.keys() and \ key not in key_mapping.values(): unmapped_keys.add(key, pkg.name) rev.finished() print 'Packages: %i' % model.Session.query(model.Package).count() print 'Changed keys:\n', keys_changed.report() print 'Unmapped keys:\n', unmapped_keys.report() print 'Changed licenses:\n', licenses_changed.report() print 'Unmapped licenses:\n', unmapped_licenses.report() print 'Licenses:\n', licenses.report()
def canada_extras(): keys_changed = StatsCount() unmapped_keys = StatsList() licenses_changed = StatsCount() unmapped_licenses = StatsList() licenses = StatsList() key_mapping = { 'Level of Government': 'level_of_government', } license_mapping = { # CS: bad_spelling ignore 'http://geogratis.ca/geogratis/en/licence.jsp': 'geogratis', 'Crown Copyright': 'canada-crown', } from ckan import model rev = RevisionManager('Standardize extra keys', 10) for pkg in model.Session.query(model.Package): for old_key, new_key in key_mapping.items(): if pkg.extras.has_key(old_key): rev.before_change() pkg.extras[new_key] = pkg.extras[old_key] del pkg.extras[old_key] keys_changed.increment(old_key) rev.after_change() for license_key in ('License', 'License URL'): if pkg.extras.has_key(license_key): old_license = pkg.extras[license_key] if old_license in license_mapping: rev.before_change() pkg.license_id = unicode(license_mapping[old_license]) del pkg.extras[license_key] licenses_changed.increment(old_license) rev.after_change() else: unmapped_licenses.add(old_license, pkg.name) licenses.add(pkg.license_id, pkg.name) for key in pkg.extras.keys(): if key not in key_mapping.keys() and \ key not in key_mapping.values(): unmapped_keys.add(key, pkg.name) rev.finished() print 'Packages: %i' % model.Session.query(model.Package).count() print 'Changed keys:\n', keys_changed.report() print 'Unmapped keys:\n', unmapped_keys.report() print 'Changed licenses:\n', licenses_changed.report() print 'Unmapped licenses:\n', unmapped_licenses.report() print 'Licenses:\n', licenses.report()