Esempio n. 1
0
def openness_for_organization(organization=None,
                              include_sub_organizations=False):
    org = model.Group.get(organization)
    if not org:
        raise p.toolkit.ObjectNotFound

    if not include_sub_organizations:
        orgs = [org]
    else:
        orgs = lib.go_down_tree(org)

    context = {'model': model, 'session': model.Session, 'ignore_auth': True}
    score_counts = Counter()
    rows = []
    num_packages = 0
    for org in orgs:
        # NB org.packages() misses out many - see:
        # http://redmine.dguteam.org.uk/issues/1844
        pkgs = model.Session.query(model.Package) \
                    .filter_by(owner_org=org.id) \
                    .filter_by(state='active') \
                    .filter_by(private=False) \
                    .all()
        num_packages += len(pkgs)
        for pkg in pkgs:
            try:
                qa = p.toolkit.get_action('qa_package_openness_show')(
                    context, {
                        'id': pkg.id
                    })
            except p.toolkit.ObjectNotFound:
                log.warning('No QA info for package %s', pkg.name)
                return
            rows.append(
                OrderedDict((
                    ('dataset_name', pkg.name),
                    ('dataset_title', pkg.title),
                    ('dataset_notes', lib.dataset_notes(pkg)),
                    ('organization_name', org.name),
                    ('organization_title', org.title),
                    ('openness_score', qa['openness_score']),
                    ('openness_score_reason', qa['openness_score_reason']),
                )))
            score_counts[qa['openness_score']] += 1

    total_stars = sum([k * v for k, v in score_counts.items() if k])
    num_pkgs_with_stars = sum(
        [v for k, v in score_counts.items() if k is not None])
    average_stars = round(float(total_stars) / num_pkgs_with_stars, 1) \
        if num_pkgs_with_stars else 0.0

    return {
        'table': rows,
        'score_counts': jsonify_counter(score_counts),
        'total_stars': total_stars,
        'average_stars': average_stars,
        'num_packages_scored': len(rows),
        'num_packages': num_packages,
    }
Esempio n. 2
0
def openness_for_organization(organization=None, include_sub_organizations=False):
    org = model.Group.get(organization)
    if not org:
        raise p.toolkit.ObjectNotFound

    if not include_sub_organizations:
        orgs = [org]
    else:
        orgs = lib.go_down_tree(org)

    context = {'model': model, 'session': model.Session, 'ignore_auth': True}
    score_counts = Counter()
    rows = []
    num_packages = 0
    for org in orgs:
        # NB org.packages() misses out many - see:
        # http://redmine.dguteam.org.uk/issues/1844
        pkgs = model.Session.query(model.Package) \
                    .filter_by(owner_org=org.id) \
                    .filter_by(state='active') \
                    .all()
        num_packages += len(pkgs)
        for pkg in pkgs:
            try:
                qa = p.toolkit.get_action('qa_package_openness_show')(context, {'id': pkg.id})
            except p.toolkit.ObjectNotFound:
                log.warning('No QA info for package %s', pkg.name)
                return
            rows.append(OrderedDict((
                ('dataset_name', pkg.name),
                ('dataset_title', pkg.title),
                ('dataset_notes', lib.dataset_notes(pkg)),
                ('organization_name', org.name),
                ('organization_title', org.title),
                ('openness_score', qa['openness_score']),
                ('openness_score_reason', qa['openness_score_reason']),
                )))
            score_counts[qa['openness_score']] += 1

    total_stars = sum([k*v for k, v in score_counts.items() if k])
    num_pkgs_with_stars = sum([v for k, v in score_counts.items()
                               if k is not None])
    average_stars = round(float(total_stars) / num_pkgs_with_stars, 1) \
        if num_pkgs_with_stars else 0.0

    return {'table': rows,
            'score_counts': jsonify_counter(score_counts),
            'total_stars': total_stars,
            'average_stars': average_stars,
            'num_packages_scored': len(rows),
            'num_packages': num_packages,
            }
Esempio n. 3
0
def broken_links_for_organization(organization, include_sub_organizations=False):
    '''
    Returns a dictionary detailing broken resource links for the organization
    or if organization it returns the index page for all organizations.

    params:
      organization - name of an organization

    Returns:
    {'organization_name': 'cabinet-office',
     'organization_title:': 'Cabinet Office',
     'table': [
       {'package_name', 'package_title', 'resource_url', 'status', 'reason', 'last_success', 'first_failure', 'failure_count', 'last_updated'}
      ...]

    '''
    from ckanext.archiver.model import Archival

    org = model.Group.get(organization)
    if not org:
        raise p.toolkit.ObjectNotFound()

    name = org.name
    title = org.title

    archivals = model.Session.query(Archival, model.Package, model.Group).\
        filter(Archival.is_broken == True).\
        join(model.Package, Archival.package_id == model.Package.id).\
        filter(model.Package.state == 'active').\
        join(model.Resource, Archival.resource_id == model.Resource.id).\
        filter(model.Resource.state == 'active')

    if not include_sub_organizations:
        org_ids = [org.id]
        archivals = archivals.filter(model.Package.owner_org == org.id)
    else:
        # We want any organization_id that is part of this organization's tree
        org_ids = ['%s' % organization.id for organization in lib.go_down_tree(org)]
        archivals = archivals.filter(model.Package.owner_org.in_(org_ids))

    archivals = archivals.join(model.Group, model.Package.owner_org == model.Group.id)

    results = []

    for archival, pkg, org in archivals.all():
        pkg = model.Package.get(archival.package_id)
        resource = model.Resource.get(archival.resource_id)

        via = ''
        er = pkg.extras.get('external_reference', '')
        if er == 'ONSHUB':
            via = "Stats Hub"
        elif er.startswith("DATA4NR"):
            via = "Data4nr"

        archived_resource = model.Session.query(model.ResourceRevision)\
                            .filter_by(id=resource.id)\
                            .filter_by(revision_timestamp=archival.resource_timestamp)\
                            .first() or resource
        row_data = OrderedDict((
            ('dataset_title', pkg.title),
            ('dataset_name', pkg.name),
            ('dataset_notes', lib.dataset_notes(pkg)),
            ('organization_title', org.title),
            ('organization_name', org.name),
            ('resource_position', resource.position),
            ('resource_id', resource.id),
            ('resource_url', archived_resource.url),
            ('url_up_to_date', resource.url == archived_resource.url),
            ('via', via),
            ('first_failure', archival.first_failure.isoformat() if archival.first_failure else None),
            ('last_updated', archival.updated.isoformat() if archival.updated else None),
            ('last_success', archival.last_success.isoformat() if archival.last_success else None),
            ('url_redirected_to', archival.url_redirected_to),
            ('reason', archival.reason),
            ('status', archival.status),
            ('failure_count', archival.failure_count),
            ))

        results.append(row_data)

    num_broken_packages = archivals.distinct(model.Package.name).count()
    num_broken_resources = len(results)

    # Get total number of packages & resources
    num_packages = model.Session.query(model.Package)\
                        .filter(model.Package.owner_org.in_(org_ids))\
                        .filter_by(state='active')\
                        .count()
    num_resources = model.Session.query(model.Resource)\
                         .filter_by(state='active')
    if p.toolkit.check_ckan_version(max_version='2.2.99'):
        num_resources = num_resources.join(model.ResourceGroup)
    num_resources = num_resources \
        .join(model.Package)\
        .filter(model.Package.owner_org.in_(org_ids))\
        .filter_by(state='active').count()

    return {'organization_name': name,
            'organization_title': title,
            'num_broken_packages': num_broken_packages,
            'num_broken_resources': num_broken_resources,
            'num_packages': num_packages,
            'num_resources': num_resources,
            'broken_package_percent': lib.percent(num_broken_packages, num_packages),
            'broken_resource_percent': lib.percent(num_broken_resources, num_resources),
            'table': results}
Esempio n. 4
0
def broken_links_for_organization(organization, include_sub_organizations=False):
    '''
    Returns a dictionary detailing broken resource links for the organization
    or if organization it returns the index page for all organizations.

    params:
      organization - name of an organization

    Returns:
    {'organization_name': 'cabinet-office',
     'organization_title:': 'Cabinet Office',
     'table': [
       {'package_name', 'package_title', 'resource_url', 'status', 'reason', 'last_success', 'first_failure', 'failure_count', 'last_updated'}
      ...]

    '''
    from ckanext.archiver.model import Archival

    org = model.Group.get(organization)
    if not org:
        raise p.toolkit.ObjectNotFound()

    name = org.name
    title = org.title

    archivals = model.Session.query(Archival, model.Package, model.Group).\
        filter(Archival.is_broken == True).\
        join(model.Package, Archival.package_id == model.Package.id).\
        filter(model.Package.state == 'active').\
        join(model.Resource, Archival.resource_id == model.Resource.id).\
        filter(model.Resource.state == 'active')

    if not include_sub_organizations:
        org_ids = [org.id]
        archivals = archivals.filter(model.Package.owner_org == org.id)
    else:
        # We want any organization_id that is part of this organization's tree
        org_ids = ['%s' % organization.id for organization in lib.go_down_tree(org)]
        archivals = archivals.filter(model.Package.owner_org.in_(org_ids))

    archivals = archivals.join(model.Group, model.Package.owner_org == model.Group.id)

    results = []

    for archival, pkg, org in archivals.all():
        pkg = model.Package.get(archival.package_id)
        resource = model.Resource.get(archival.resource_id)

        via = ''
        er = pkg.extras.get('external_reference', '')
        if er == 'ONSHUB':
            via = "Stats Hub"
        elif er.startswith("DATA4NR"):
            via = "Data4nr"

        archived_resource = model.Session.query(model.ResourceRevision)\
                            .filter_by(id=resource.id)\
                            .filter_by(revision_timestamp=archival.resource_timestamp)\
                            .first() or resource
        row_data = OrderedDict((
            ('dataset_title', pkg.title),
            ('dataset_name', pkg.name),
            ('dataset_notes', lib.dataset_notes(pkg)),
            ('organization_title', org.title),
            ('organization_name', org.name),
            ('resource_position', resource.position),
            ('resource_id', resource.id),
            ('resource_url', archived_resource.url),
            ('url_up_to_date', resource.url == archived_resource.url),
            ('via', via),
            ('first_failure', archival.first_failure.isoformat() if archival.first_failure else None),
            ('last_updated', archival.updated.isoformat() if archival.updated else None),
            ('last_success', archival.last_success.isoformat() if archival.last_success else None),
            ('url_redirected_to', archival.url_redirected_to),
            ('reason', archival.reason),
            ('status', archival.status),
            ('failure_count', archival.failure_count),
            ))

        results.append(row_data)

    num_broken_packages = archivals.distinct(model.Package.name).count()
    num_broken_resources = len(results)

    # Get total number of packages & resources
    num_packages = model.Session.query(model.Package)\
                        .filter(model.Package.owner_org.in_(org_ids))\
                        .filter_by(state='active')\
                        .count()
    num_resources = model.Session.query(model.Resource)\
                         .filter_by(state='active')
    if p.toolkit.check_ckan_version(max_version='2.2.99'):
        num_resources = num_resources.join(model.ResourceGroup)
    num_resources = num_resources \
        .join(model.Package)\
        .filter(model.Package.owner_org.in_(org_ids))\
        .filter_by(state='active').count()

    return {'organization_name': name,
            'organization_title': title,
            'num_broken_packages': num_broken_packages,
            'num_broken_resources': num_broken_resources,
            'num_packages': num_packages,
            'num_resources': num_resources,
            'broken_package_percent': lib.percent(num_broken_packages, num_packages),
            'broken_resource_percent': lib.percent(num_broken_resources, num_resources),
            'table': results}
Esempio n. 5
0
def licence_report(organization=None, include_sub_organizations=False):
    '''
    Returns a dictionary detailing licences for datasets in the
    organisation specified, and optionally sub organizations.
    '''
    # Get packages
    if organization:
        top_org = model.Group.by_name(organization)
        if not top_org:
            raise p.toolkit.ObjectNotFound('Publisher not found')

        if include_sub_organizations:
            orgs = lib.go_down_tree(top_org)
        else:
            orgs = [top_org]
        pkgs = set()
        for org in orgs:
            org_pkgs = model.Session.query(model.Package)\
                            .filter_by(state='active')
            org_pkgs = lib.filter_by_organizations(
                org_pkgs, organization,
                include_sub_organizations=False)\
                .all()
            pkgs |= set(org_pkgs)
    else:
        pkgs = model.Session.query(model.Package)\
                    .filter_by(state='active')\
                    .all()

    # Get their licences
    packages_by_licence = collections.defaultdict(list)
    rows = []
    num_pkgs = 0
    for pkg in pkgs:
        if asbool(pkg.extras.get('unpublished')) is True:
            # Ignore unpublished datasets
            continue
        licence_tuple = (pkg.license_id or '',
                         pkg.license.title if pkg.license else '',
                         pkg.extras.get('licence', ''))
        packages_by_licence[licence_tuple].append((pkg.name, pkg.title))
        num_pkgs += 1

    for licence_tuple, dataset_tuples in sorted(packages_by_licence.items(),
                                                key=lambda x: -len(x[1])):
        license_id, license_title, licence = licence_tuple
        dataset_tuples.sort(key=lambda x: x[0])
        dataset_names, dataset_titles = zip(*dataset_tuples)
        licence_dict = OrderedDict((
            ('license_id', license_id),
            ('license_title', license_title),
            ('licence', licence),
            ('dataset_titles', '|'.join(t for t in dataset_titles)),
            ('dataset_names', ' '.join(dataset_names)),
            ))
        rows.append(licence_dict)

    return {
        'num_datasets': num_pkgs,
        'num_licences': len(rows),
        'table': rows,
        }