Esempio n. 1
0
def tagless_report(organization, include_sub_organizations=False):
    '''
    Produces a report on packages without tags.
    Returns something like this:
        {
         'table': [
            {'name': 'river-levels', 'title': 'River levels', 'notes': 'Harvested', 'user': '******',
             'created': '2008-06-13T10:24:59.435631'},
            {'name': 'co2-monthly', 'title' 'CO2 monthly', 'notes': '', 'user': '******',
             'created': '2009-12-14T08:42:45.473827'},
            ],
         'num_packages': 56,
         'packages_without_tags_percent': 4,
         'average_tags_per_package': 3.5,
        }
    '''
    # Find the packages without tags
    q = model.Session.query(model.Package) \
        .outerjoin(model.PackageTag) \
        .filter(
        model.PackageTag.id == None  # noqa: E711
    )
    if organization:
        q = lib.filter_by_organizations(q, organization,
                                        include_sub_organizations)
    tagless_pkgs = [
        OrderedDict((
            ('name', pkg.name),
            ('title', pkg.title),
            ('notes', lib.dataset_notes(pkg)),
            ('user', pkg.creator_user_id),
            ('created', pkg.metadata_created.isoformat()),
        )) for pkg in q.slice(0, 100)
    ]  # First 100 only for this demo

    # Average number of tags per package
    q = model.Session.query(model.Package)
    q = lib.filter_by_organizations(q, organization, include_sub_organizations)
    num_packages = q.count()
    q = q.join(model.PackageTag)
    num_taggings = q.count()
    if num_packages:
        average_tags_per_package = round(float(num_taggings) / num_packages, 1)
    else:
        average_tags_per_package = None
    packages_without_tags_percent = lib.percent(len(tagless_pkgs),
                                                num_packages)

    return {
        'table': tagless_pkgs,
        'num_packages': num_packages,
        'packages_without_tags_percent': packages_without_tags_percent,
        'average_tags_per_package': average_tags_per_package,
    }
Esempio n. 2
0
def tagless_report(organization, include_sub_organizations=False):
    '''
    Produces a report on packages without tags.
    Returns something like this:
        {
         'table': [
            {'name': 'river-levels', 'title': 'River levels', 'notes': 'Harvested', 'user': '******', 'created': '2008-06-13T10:24:59.435631'},  # noqa
            {'name': 'co2-monthly', 'title' 'CO2 monthly', 'notes': '', 'user': '******', 'created': '2009-12-14T08:42:45.473827'},  # noqa
            ],
         'num_packages': 56,
         'packages_without_tags_percent': 4,
         'average_tags_per_package': 3.5,
        }
    '''
    # Find the packages without tags
    q = model.Session.query(model.Package) \
             .outerjoin(model.PackageTag) \
             .filter(model.PackageTag.id == None)
    if organization:
        q = lib.filter_by_organizations(q, organization,
                                        include_sub_organizations)
    tagless_pkgs = [OrderedDict((
            ('name', pkg.name),
            ('title', pkg.title),
            ('notes', lib.dataset_notes(pkg)),
            ('user', pkg.creator_user_id),
            ('created', pkg.metadata_created.isoformat()),
            )) for pkg in q.slice(0, 100)]  # First 100 only for this demo

    # Average number of tags per package
    q = model.Session.query(model.Package)
    q = lib.filter_by_organizations(q, organization, include_sub_organizations)
    num_packages = q.count()
    q = q.join(model.PackageTag)
    num_taggings = q.count()
    if num_packages:
        average_tags_per_package = round(float(num_taggings) / num_packages, 1)
    else:
        average_tags_per_package = None
    packages_without_tags_percent = lib.percent(
        len(tagless_pkgs), num_packages)

    return {
        'table': tagless_pkgs,
        'num_packages': num_packages,
        'packages_without_tags_percent': packages_without_tags_percent,
        'average_tags_per_package': average_tags_per_package,
    }
def dataset_creation(organization=OD['organization'],
                     include_sub_organizations=OD['include_sub_organizations'],
                     include_private=OD['include_private'],
                     include_draft=OD['include_draft'],
                     page=1):
    """Produce a report with basic dataset info."""
    selectable_states = set(['active'])
    if include_draft:
        selectable_states.add('draft')

    query = model.Session.query(model.Package)\
        .filter(model.Package.type == 'dataset',
                model.Package.state.in_(selectable_states))
    if not include_private:
        query = query.filter(model.Package.private.is_(False))
    if organization:
        query = lib.filter_by_organizations(
            query, organization, include_sub_organizations)

    return {
        'table': [
            OrderedDict((
                ('title', pkg.title),
                ('url', url_for(controller='package', action='read', id=pkg.id, qualified=True)),
                ('owner', get_org_title(pkg)),
                ('created_at', pkg.metadata_created.isoformat()),
            )) for pkg in query.all()
        ],
        'a': query.count()
    }
Esempio n. 4
0
def dataset_creation(organization=OD['organization'],
                     include_sub_organizations=OD['include_sub_organizations'],
                     include_private=OD['include_private'],
                     include_draft=OD['include_draft'],
                     page=1):
    """Produce a report with basic dataset info."""
    selectable_states = set(['active'])
    if include_draft:
        selectable_states.add('draft')

    query = model.Session.query(model.Package)\
        .filter(model.Package.type == 'dataset',
                model.Package.state.in_(selectable_states))
    if not include_private:
        query = query.filter(model.Package.private.is_(False))
    if organization:
        query = lib.filter_by_organizations(query, organization,
                                            include_sub_organizations)

    return {
        'table': [
            OrderedDict((
                ('title', pkg.title),
                ('url',
                 url_for(controller='package',
                         action='read',
                         id=pkg.id,
                         qualified=True)),
                ('owner', get_org_title(pkg)),
                ('created_at', pkg.metadata_created.isoformat()),
            )) for pkg in query.all()
        ],
        'a':
        query.count()
    }
Esempio n. 5
0
def publisher_resources(organization=None, include_sub_organizations=False):
    '''
    Returns a dictionary detailing resources for each dataset in the
    organisation specified.
    '''
    org = model.Group.by_name(organization)
    if not org:
        raise p.toolkit.ObjectNotFound('Publisher not found')

    # Get packages
    pkgs = model.Session.query(model.Package)\
                .filter_by(state='active')
    pkgs = lib.filter_by_organizations(pkgs, organization,
                                       include_sub_organizations).all()

    # Get their resources
    def create_row(pkg_, resource_dict):
        org_ = pkg_.get_organization()
        return OrderedDict((
            ('publisher_title', org_.title),
            ('publisher_name', org_.name),
            ('package_title', pkg_.title),
            ('package_name', pkg_.name),
            ('package_notes', lib.dataset_notes(pkg_)),
            ('resource_position', resource_dict.get('position')),
            ('resource_id', resource_dict.get('id')),
            ('resource_description', resource_dict.get('description')),
            ('resource_url', resource_dict.get('url')),
            ('resource_format', resource_dict.get('format')),
            ('resource_created', resource_dict.get('created')),
        ))

    num_resources = 0
    rows = []
    for pkg in pkgs:
        resources = pkg.resources
        if resources:
            for res in resources:
                res_dict = {
                    'id': res.id,
                    'position': res.position,
                    'description': res.description,
                    'url': res.url,
                    'format': res.format,
                    'created':
                    (res.created.isoformat() if res.created else None)
                }
                rows.append(create_row(pkg, res_dict))
            num_resources += len(resources)
        else:
            # packages with no resources are still listed
            rows.append(create_row(pkg, {}))

    return {
        'organization_name': org.name,
        'organization_title': org.title,
        'num_datasets': len(pkgs),
        'num_resources': num_resources,
        'table': rows,
    }
Esempio n. 6
0
def datasets_without_resources(organization, include_sub_organizations=False):
    pkg_dicts = []
    query = model.Session.query(model.Package)\
                .filter_by(state='active')\
                .order_by(model.Package.title)
    if organization:
        query = lib.filter_by_organizations(query, organization,
                                            include_sub_organizations)
    for pkg in query.all():
        if len(pkg.resources) != 0 or \
                pkg.extras.get('unpublished', '').lower() == 'true':
            continue
        deleted, url = last_resource_deleted(pkg)
        pkg_dict = OrderedDict((
            ('name', pkg.name),
            ('title', pkg.title),
            ('metadata created', pkg.metadata_created.isoformat()),
            ('metadata modified', pkg.metadata_modified.isoformat()),
            ('last resource deleted',
             deleted.isoformat() if deleted else None),
            ('last resource url', url),
            ('dataset_notes', lib.dataset_notes(pkg)),
        ))
        pkg_dicts.append(pkg_dict)
    return {'table': pkg_dicts}
Esempio n. 7
0
def publisher_resources(organization=None, include_sub_organizations=False):
    """
    Returns a dictionary detailing resources for each dataset in the
    organisation specified.
    """
    org = model.Group.by_name(organization)
    if not org:
        raise p.toolkit.ObjectNotFound(u"Izdava\u010D nije pronaden")

    # Get packages
    pkgs = model.Session.query(model.Package).filter_by(state="active")
    pkgs = lib.filter_by_organizations(pkgs, organization, include_sub_organizations).all()

    # Get their resources
    def create_row(pkg_, resource_dict):
        org_ = pkg_.get_organization()
        return OrderedDict(
            (
                ("publisher_title", org_.title),
                ("publisher_name", org_.name),
                ("package_title", pkg_.title),
                ("package_name", pkg_.name),
                ("package_notes", lib.dataset_notes(pkg_)),
                ("resource_position", resource_dict.get("position")),
                ("resource_id", resource_dict.get("id")),
                ("resource_description", resource_dict.get("description")),
                ("resource_url", resource_dict.get("url")),
                ("resource_format", resource_dict.get("format")),
                ("resource_created", resource_dict.get("created")),
            )
        )

    num_resources = 0
    rows = []
    for pkg in pkgs:
        resources = pkg.resources
        if resources:
            for res in resources:
                res_dict = {
                    "id": res.id,
                    "position": res.position,
                    "description": res.description,
                    "url": res.url,
                    "format": res.format,
                    "created": res.created,
                }
                rows.append(create_row(pkg, res_dict))
            num_resources += len(resources)
        else:
            # packages with no resources are still listed
            rows.append(create_row(pkg, {}))

    return {
        "organization_name": org.name,
        "organization_title": org.title,
        "num_datasets": len(pkgs),
        "num_resources": num_resources,
        "table": rows,
    }
Esempio n. 8
0
def publisher_resources(organization=None,
                        include_sub_organizations=False):
    '''
    Returns a dictionary detailing resources for each dataset in the
    organisation specified.
    '''
    org = model.Group.by_name(organization)
    if not org:
        raise p.toolkit.ObjectNotFound('Publisher not found')

    # Get packages
    pkgs = model.Session.query(model.Package)\
                .filter_by(state='active')
    pkgs = lib.filter_by_organizations(pkgs, organization,
                                       include_sub_organizations).all()

    # Get their resources
    def create_row(pkg_, resource_dict):
        org_ = pkg_.get_organization()
        return OrderedDict((
                ('publisher_title', org_.title),
                ('publisher_name', org_.name),
                ('package_title', pkg_.title),
                ('package_name', pkg_.name),
                ('package_notes', lib.dataset_notes(pkg_)),
                ('resource_position', resource_dict.get('position')),
                ('resource_id', resource_dict.get('id')),
                ('resource_description', resource_dict.get('description')),
                ('resource_url', resource_dict.get('url')),
                ('resource_format', resource_dict.get('format')),
                ('resource_created', resource_dict.get('created')),
               ))
    num_resources = 0
    rows = []
    for pkg in pkgs:
        resources = pkg.resources
        if resources:
            for res in resources:
                res_dict = {'id': res.id, 'position': res.position,
                            'description': res.description, 'url': res.url,
                            'format': res.format,
                            'created': (res.created.isoformat()
                                        if res.created else None)}
                rows.append(create_row(pkg, res_dict))
            num_resources += len(resources)
        else:
            # packages with no resources are still listed
            rows.append(create_row(pkg, {}))

    return {'organization_name': org.name,
            'organization_title': org.title,
            'num_datasets': len(pkgs),
            'num_resources': num_resources,
            'table': rows,
            }
Esempio n. 9
0
def html_datasets_report(organization, include_sub_organizations=False):
    '''
    Returns datasets that only have an HTML link, by organization.
    '''

    # Get packages
    query = model.Session.query(model.Package)\
                .filter_by(state='active')
    if organization:
        query = lib.filter_by_organizations(query, organization,
                                            include_sub_organizations)

    pkgs = query.all()
    # See if HTML
    num_datasets_published = 0
    num_datasets_only_html = 0
    datasets_only_html = []
    # use yield_per, otherwise memory use just goes up til the script is killed
    # by the os.
    for pkg in pkgs:
        if p.toolkit.asbool(pkg.extras.get('unpublished')):
            continue
        num_datasets_published += 1

        formats = set([
            res.format.lower() for res in pkg.resources
            if res.resource_type != 'documentation'
        ])
        if 'html' not in formats:
            continue
        #org = pkg.get_organization().name

        data_formats = formats - set(('asp', '', None))
        if data_formats == set(('html', )):
            num_datasets_only_html += 1
            datasets_only_html.append(pkg)

    rows = []
    for pkg in datasets_only_html:
        row = OrderedDict((
            ('name', pkg.name),
            ('title', pkg.title),
            ('metadata created', pkg.metadata_created.isoformat()),
            ('metadata modified', pkg.metadata_modified.isoformat()),
            ('dataset_notes', lib.dataset_notes(pkg)),
        ))
        rows.append(row)

    return {
        'table': rows,
        'num_datasets_published': num_datasets_published,
        'num_datasets_only_html': num_datasets_only_html,
    }
Esempio n. 10
0
def publisher_activity(organization, include_sub_organizations=False):
    """
    Contains information about the datasets a specific organization has
    released in this and last quarter (calendar year). This is needed by
    departments for their quarterly transparency reports.
    """
    import datetime
    import ckan.model as model
    from paste.deploy.converters import asbool

    # These are the authors whose revisions we ignore, as they are trivial
    # changes. NB we do want to know about revisions by:
    # * harvest (harvested metadata)
    # * dgu (NS Stat Hub imports)
    # * Fix national indicators
    system_authors = (
        'autotheme',
        'co-prod3.dh.bytemark.co.uk',
        'Date format tidier',
        'current_revision_fixer',
        'current_revision_fixer2',
        'fix_contact_details.py',
        'Repoint 410 Gone to webarchive url',
        'Fix duplicate resources',
    )

    created = {'this': [], 'last': []}
    modified = {'this': [], 'last': []}

    now = datetime.datetime.now()
    quarters = get_quarter_dates(now)

    if organization:
        organization = model.Group.by_name(organization)
        if not organization:
            raise p.toolkit.ObjectNotFound()

    if not organization:
        pkgs = model.Session.query(model.Package)\
                .all()
    else:
        pkgs = model.Session.query(model.Package)
        pkgs = lib.filter_by_organizations(pkgs, organization,
                                           include_sub_organizations).all()

    for pkg in pkgs:
        created_ = model.Session.query(model.PackageRevision)\
            .filter(model.PackageRevision.id == pkg.id) \
            .order_by("revision_timestamp asc").first()

        pr_q = model.Session.query(model.PackageRevision, model.Revision)\
            .filter(model.PackageRevision.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors))
        rr_q = model.Session.query(model.Package, model.ResourceRevision, model.Revision)\
            .filter(model.Package.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.ResourceGroup)\
            .join(model.ResourceRevision,
                  model.ResourceGroup.id == model.ResourceRevision.resource_group_id)\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors))
        pe_q = model.Session.query(model.Package, model.PackageExtraRevision, model.Revision)\
            .filter(model.Package.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.PackageExtraRevision,
                  model.Package.id == model.PackageExtraRevision.package_id)\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors))

        for quarter_name in quarters:
            quarter = quarters[quarter_name]
            if quarter[0] < created_.revision_timestamp < quarter[1]:
                published = not asbool(pkg.extras.get('unpublished'))
                created[quarter_name].append(
                    (created_.name, created_.title, lib.dataset_notes(pkg),
                     'created', quarter_name,
                     created_.revision_timestamp.isoformat(),
                     created_.revision.author, published))
            else:
                prs = pr_q.filter(model.PackageRevision.revision_timestamp > quarter[0])\
                          .filter(model.PackageRevision.revision_timestamp < quarter[1])
                rrs = rr_q.filter(model.ResourceRevision.revision_timestamp > quarter[0])\
                          .filter(model.ResourceRevision.revision_timestamp < quarter[1])
                pes = pe_q.filter(model.PackageExtraRevision.revision_timestamp > quarter[0])\
                          .filter(model.PackageExtraRevision.revision_timestamp < quarter[1])
                authors = ' '.join(
                    set([r[1].author
                         for r in prs] + [r[2].author for r in rrs] +
                        [r[2].author for r in pes]))
                dates = set([r[1].timestamp.date() for r in prs] +
                            [r[2].timestamp.date() for r in rrs] +
                            [r[2].timestamp.date() for r in pes])
                dates_formatted = ' '.join(
                    [date.isoformat() for date in sorted(dates)])
                if authors:
                    published = not asbool(pkg.extras.get('unpublished'))
                    modified[quarter_name].append(
                        (pkg.name, pkg.title, lib.dataset_notes(pkg),
                         'modified', quarter_name, dates_formatted, authors,
                         published))

    datasets = []
    for quarter_name in quarters:
        datasets += sorted(created[quarter_name], key=lambda x: x[1])
        datasets += sorted(modified[quarter_name], key=lambda x: x[1])
    columns = ('Dataset name', 'Dataset title', 'Dataset notes',
               'Modified or created', 'Quarter', 'Timestamp', 'Author',
               'Published')

    return {'table': datasets, 'columns': columns, 'quarters': quarters}
Esempio n. 11
0
def feedback_report(organization=None,
                    include_sub_organizations=False,
                    include_published=False):
    """
    For the publisher provided (and optionally for sub-publishers) this
    function will generate a report on the feedback for that publisher.
    """
    import ckan.lib.helpers as helpers
    from ckanext.dgu.model.feedback import Feedback

    if organization:
        organization = model.Group.by_name(organization)
        if not organization:
            raise p.toolkit.ObjectNotFound()
    else:
        organization = None

    # Get packages for these organization(s)
    memberships = model.Session.query(model.Member)\
        .join(model.Package, model.Package.id==model.Member.table_id)\
        .filter(model.Member.state == 'active')
    memberships = lib.filter_by_organizations(memberships, organization,
                                              include_sub_organizations)\
        .filter(model.Member.table_name == 'package')\
        .filter(model.Package.state == 'active')

    # For each package, count the feedback comments
    results = []
    num_pkgs_with_feedback = 0
    for member in memberships.all():
        pkg = model.Package.get(member.table_id)

        # Skip unpublished datasets if that's asked for
        if not include_published and not pkg.extras.get('unpublished', False):
            continue

        pkg_data = collections.defaultdict(int)
        pkg_data['organization-name'] = member.group.name
        pkg_data['generated-at'] = helpers.render_datetime(
            datetime.datetime.now(), "%d/%m/%Y %H:%M")
        pkg_data['organization-title'] = member.group.title
        pkg_data['package-name'] = pkg.name
        pkg_data['package-title'] = pkg.title
        pkg_data['publish-date'] = pkg.extras.get('publish-date', '')

        for feedback in model.Session.query(Feedback).filter(Feedback.visible == True)\
                .filter(Feedback.package_id == member.table_id )\
                .filter(Feedback.active == True ):
            if feedback.economic: pkg_data['economic'] += 1
            if feedback.social: pkg_data['social'] += 1
            if feedback.linked: pkg_data['linked'] += 1
            if feedback.other: pkg_data['other'] += 1
            if feedback.effective: pkg_data['effective'] += 1

        pkg_data['total-comments'] = sum([
            pkg_data['economic'], pkg_data['social'], pkg_data['linked'],
            pkg_data['other'], pkg_data['effective']
        ])
        results.append(pkg_data)
        if pkg_data['total-comments'] > 0:
            num_pkgs_with_feedback += 1

    return {
        'table': sorted(results, key=lambda x: -x.get('total-comments')),
        'dataset_count': len(results),
        'dataset_count_with_feedback': num_pkgs_with_feedback,
    }
Esempio n. 12
0
def publisher_activity(organization, include_sub_organizations=False):
    """
    Contains information about the datasets a specific organization has
    released in this and last quarter (calendar year). This is needed by
    departments for their quarterly transparency reports.
    """
    import datetime
    import ckan.model as model
    from paste.deploy.converters import asbool

    # These are the authors whose revisions we ignore, as they are trivial
    # changes. NB we do want to know about revisions by:
    # * harvest (harvested metadata)
    # * dgu (NS Stat Hub imports)
    # * Fix national indicators
    system_authors = ('autotheme', 'co-prod3.dh.bytemark.co.uk',
                      'Date format tidier', 'current_revision_fixer',
                      'current_revision_fixer2', 'fix_contact_details.py',
                      'Repoint 410 Gone to webarchive url',
                      'Fix duplicate resources',
                      'fix_secondary_theme.py',
                      'script-fix-links-tna',
                      )

    created = {'this': [], 'last': []}
    modified = {'this': [], 'last': []}

    now = datetime.datetime.now()
    quarters = get_quarter_dates(now)

    if organization:
        organization = model.Group.by_name(organization)
        if not organization:
            raise p.toolkit.ObjectNotFound()

    if not organization:
        pkgs = model.Session.query(model.Package)\
                .all()
    else:
        pkgs = model.Session.query(model.Package)
        pkgs = lib.filter_by_organizations(pkgs, organization,
                                           include_sub_organizations).all()

    for pkg in pkgs:
        created_ = model.Session.query(model.PackageRevision)\
            .filter(model.PackageRevision.id == pkg.id) \
            .order_by("revision_timestamp asc").first()

        pr_q = model.Session.query(model.PackageRevision, model.Revision)\
            .filter(model.PackageRevision.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors))
        rr_q = model.Session.query(model.Package, model.ResourceRevision, model.Revision)\
            .filter(model.Package.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.ResourceGroup)\
            .join(model.ResourceRevision,
                  model.ResourceGroup.id == model.ResourceRevision.resource_group_id)\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors))
        pe_q = model.Session.query(model.Package, model.PackageExtraRevision, model.Revision)\
            .filter(model.Package.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.PackageExtraRevision,
                  model.Package.id == model.PackageExtraRevision.package_id)\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors))

        for quarter_name in quarters:
            quarter = quarters[quarter_name]
            if quarter[0] < created_.revision_timestamp < quarter[1]:
                published = not asbool(pkg.extras.get('unpublished'))
                created[quarter_name].append(
                    (created_.name, created_.title, lib.dataset_notes(pkg),
                     'created', quarter_name,
                     created_.revision_timestamp.isoformat(),
                     created_.revision.author, published))
            else:
                prs = pr_q.filter(model.PackageRevision.revision_timestamp > quarter[0])\
                          .filter(model.PackageRevision.revision_timestamp < quarter[1])
                rrs = rr_q.filter(model.ResourceRevision.revision_timestamp > quarter[0])\
                          .filter(model.ResourceRevision.revision_timestamp < quarter[1])
                pes = pe_q.filter(model.PackageExtraRevision.revision_timestamp > quarter[0])\
                          .filter(model.PackageExtraRevision.revision_timestamp < quarter[1])
                authors = ' '.join(set([r[1].author for r in prs] +
                                      [r[2].author for r in rrs] +
                                      [r[2].author for r in pes]))
                dates = set([r[1].timestamp.date() for r in prs] +
                            [r[2].timestamp.date() for r in rrs] +
                            [r[2].timestamp.date() for r in pes])
                dates_formatted = ' '.join([date.isoformat()
                                            for date in sorted(dates)])
                if authors:
                    published = not asbool(pkg.extras.get('unpublished'))
                    modified[quarter_name].append(
                        (pkg.name, pkg.title, lib.dataset_notes(pkg),
                         'modified', quarter_name,
                         dates_formatted, authors, published))

    datasets = []
    for quarter_name in quarters:
        datasets += sorted(created[quarter_name], key=lambda x: x[1])
        datasets += sorted(modified[quarter_name], key=lambda x: x[1])
    columns = ('Dataset name', 'Dataset title', 'Dataset notes', 'Modified or created', 'Quarter', 'Timestamp', 'Author', 'Published')

    return {'table': datasets, 'columns': columns,
            'quarters': quarters}
Esempio n. 13
0
def feedback_report(organization=None, include_sub_organizations=False, include_published=False):
    """
    For the publisher provided (and optionally for sub-publishers) this
    function will generate a report on the feedback for that publisher.
    """
    import ckan.lib.helpers as helpers
    from ckanext.dgu.model.feedback import Feedback

    if organization:
        organization = model.Group.by_name(organization)
        if not organization:
            raise p.toolkit.ObjectNotFound()
    else:
        organization = None

    # Get packages for these organization(s)
    memberships = model.Session.query(model.Member)\
        .join(model.Package, model.Package.id==model.Member.table_id)\
        .filter(model.Member.state == 'active')
    memberships = lib.filter_by_organizations(memberships, organization,
                                              include_sub_organizations)\
        .filter(model.Member.table_name == 'package')\
        .filter(model.Package.state == 'active')

    # For each package, count the feedback comments
    results = []
    num_pkgs_with_feedback = 0
    for member in memberships.all():
        pkg = model.Package.get(member.table_id)

        # Skip unpublished datasets if that's asked for
        if not include_published and not pkg.extras.get('unpublished', False):
            continue

        pkg_data = collections.defaultdict(int)
        pkg_data['organization-name'] = member.group.name
        pkg_data['generated-at'] = helpers.render_datetime(datetime.datetime.now(), "%d/%m/%Y %H:%M")
        pkg_data['organization-title'] = member.group.title
        pkg_data['package-name'] = pkg.name
        pkg_data['package-title'] = pkg.title
        pkg_data['publish-date'] = pkg.extras.get('publish-date', '')

        for feedback in model.Session.query(Feedback).filter(Feedback.visible == True)\
                .filter(Feedback.package_id == member.table_id )\
                .filter(Feedback.active == True ):
            if feedback.economic: pkg_data['economic'] += 1
            if feedback.social: pkg_data['social'] += 1
            if feedback.linked: pkg_data['linked'] += 1
            if feedback.other: pkg_data['other'] += 1
            if feedback.effective: pkg_data['effective'] += 1

        pkg_data['total-comments'] = sum([pkg_data['economic'],
                                          pkg_data['social'],
                                          pkg_data['linked'],
                                          pkg_data['other'],
                                          pkg_data['effective']])
        results.append(pkg_data)
        if pkg_data['total-comments'] > 0:
            num_pkgs_with_feedback += 1

    return {'table': sorted(results, key=lambda x: -x.get('total-comments')),
            'dataset_count': len(results),
            'dataset_count_with_feedback': num_pkgs_with_feedback,
            }
Esempio n. 14
0
def licence_report(organization=None, include_sub_organizations=False):
    '''
    Returns a dictionary detailing licences for datasets in the
    organisation specified, and optionally sub organizations.
    '''
    # Get packages
    if organization:
        top_org = model.Group.by_name(organization)
        if not top_org:
            raise p.toolkit.ObjectNotFound('Publisher not found')

        if include_sub_organizations:
            orgs = lib.go_down_tree(top_org)
        else:
            orgs = [top_org]
        pkgs = set()
        for org in orgs:
            org_pkgs = model.Session.query(model.Package)\
                            .filter_by(state='active')
            org_pkgs = lib.filter_by_organizations(
                org_pkgs, organization,
                include_sub_organizations=False)\
                .all()
            pkgs |= set(org_pkgs)
    else:
        pkgs = model.Session.query(model.Package)\
                    .filter_by(state='active')\
                    .all()

    # Get their licences
    packages_by_licence = collections.defaultdict(list)
    rows = []
    num_pkgs = 0
    for pkg in pkgs:
        if asbool(pkg.extras.get('unpublished')) is True:
            # Ignore unpublished datasets
            continue
        licence_tuple = (pkg.license_id or '',
                         pkg.license.title if pkg.license else '',
                         pkg.extras.get('licence', ''))
        packages_by_licence[licence_tuple].append((pkg.name, pkg.title))
        num_pkgs += 1

    for licence_tuple, dataset_tuples in sorted(packages_by_licence.items(),
                                                key=lambda x: -len(x[1])):
        license_id, license_title, licence = licence_tuple
        dataset_tuples.sort(key=lambda x: x[0])
        dataset_names, dataset_titles = zip(*dataset_tuples)
        licence_dict = OrderedDict((
            ('license_id', license_id),
            ('license_title', license_title),
            ('licence', licence),
            ('dataset_titles', '|'.join(t for t in dataset_titles)),
            ('dataset_names', ' '.join(dataset_names)),
            ))
        rows.append(licence_dict)

    return {
        'num_datasets': num_pkgs,
        'num_licences': len(rows),
        'table': rows,
        }
Esempio n. 15
0
def _get_activity(organization_name, include_sub_organizations, periods):
    import ckan.model as model
    from paste.deploy.converters import asbool

    created = dict((period_name, []) for period_name in periods)
    modified = dict((period_name, []) for period_name in periods)

    # These are the authors whose revisions we ignore, as they are trivial
    # changes. NB we do want to know about revisions by:
    # * harvest (harvested metadata)
    # * dgu (NS Stat Hub imports)
    # * Fix national indicators
    system_authors = ('autotheme', 'co-prod3.dh.bytemark.co.uk',
                      'Date format tidier', 'current_revision_fixer',
                      'current_revision_fixer2', 'fix_contact_details.py',
                      'Repoint 410 Gone to webarchive url',
                      'Fix duplicate resources',
                      'fix_secondary_theme.py',
                      )
    system_author_template = 'script%'  # "%" is a wildcard

    if organization_name:
        organization = model.Group.by_name(organization_name)
        if not organization:
            raise p.toolkit.ObjectNotFound()

    if not organization_name:
        pkgs = model.Session.query(model.Package)\
                    .all()
    else:
        pkgs = model.Session.query(model.Package)
        pkgs = lib.filter_by_organizations(pkgs, organization,
                                           include_sub_organizations).all()

    for pkg in pkgs:
        created_ = model.Session.query(model.PackageRevision)\
            .filter(model.PackageRevision.id == pkg.id) \
            .order_by("revision_timestamp asc").first()

        pr_q = model.Session.query(model.PackageRevision, model.Revision)\
            .filter(model.PackageRevision.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors)) \
            .filter(~model.Revision.author.like(system_author_template))
        rr_q = model.Session.query(model.Package, model.ResourceRevision, model.Revision)\
            .filter(model.Package.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.ResourceGroup)\
            .join(model.ResourceRevision,
                  model.ResourceGroup.id == model.ResourceRevision.resource_group_id)\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors))\
            .filter(~model.Revision.author.like(system_author_template))
        pe_q = model.Session.query(model.Package, model.PackageExtraRevision, model.Revision)\
            .filter(model.Package.id == pkg.id)\
            .filter_by(state='active')\
            .join(model.PackageExtraRevision,
                  model.Package.id == model.PackageExtraRevision.package_id)\
            .join(model.Revision)\
            .filter(~model.Revision.author.in_(system_authors))\
            .filter(~model.Revision.author.like(system_author_template))

        for period_name in periods:
            period = periods[period_name]
            # created
            if period[0] < created_.revision_timestamp < period[1]:
                published = not asbool(pkg.extras.get('unpublished'))
                created[period_name].append(
                    (created_.name, created_.title, lib.dataset_notes(pkg),
                     'created', period_name,
                     created_.revision_timestamp.isoformat(),
                     created_.revision.author, published))

            # modified
            # exclude the creation revision
            period_start = max(period[0], created_.revision_timestamp)
            prs = pr_q.filter(model.PackageRevision.revision_timestamp > period_start)\
                        .filter(model.PackageRevision.revision_timestamp < period[1])
            rrs = rr_q.filter(model.ResourceRevision.revision_timestamp > period_start)\
                        .filter(model.ResourceRevision.revision_timestamp < period[1])
            pes = pe_q.filter(model.PackageExtraRevision.revision_timestamp > period_start)\
                        .filter(model.PackageExtraRevision.revision_timestamp < period[1])
            authors = ' '.join(set([r[1].author for r in prs] +
                                   [r[2].author for r in rrs] +
                                   [r[2].author for r in pes]))
            dates = set([r[1].timestamp.date() for r in prs] +
                        [r[2].timestamp.date() for r in rrs] +
                        [r[2].timestamp.date() for r in pes])
            dates_formatted = ' '.join([date.isoformat()
                                        for date in sorted(dates)])
            if authors:
                published = not asbool(pkg.extras.get('unpublished'))
                modified[period_name].append(
                    (pkg.name, pkg.title, lib.dataset_notes(pkg),
                        'modified', period_name,
                        dates_formatted, authors, published))
    return created, modified
Esempio n. 16
0
def _get_activity(organization_name, include_sub_organizations, periods):
    import ckan.model as model
    from paste.deploy.converters import asbool

    created = dict((period_name, []) for period_name in periods)
    modified = dict((period_name, []) for period_name in periods)

    # These are the authors whose revisions we ignore, as they are trivial
    # changes. NB we do want to know about revisions by:
    # * harvest (harvested metadata)
    # * dgu (NS Stat Hub imports)
    # * Fix national indicators
    system_authors = (
        'autotheme',
        'co-prod3.dh.bytemark.co.uk',
        'Date format tidier',
        'current_revision_fixer',
        'current_revision_fixer2',
        'fix_contact_details.py',
        'Repoint 410 Gone to webarchive url',
        'Fix duplicate resources',
        'fix_secondary_theme.py',
    )
    system_author_template = 'script%'  # "%" is a wildcard

    if organization_name:
        organization = model.Group.by_name(organization_name)
        if not organization:
            raise p.toolkit.ObjectNotFound()

    if not organization_name:
        pkgs = model.Session.query(model.Package) \
            .all()
    else:
        pkgs = model.Session.query(model.Package)
        pkgs = lib.filter_by_organizations(pkgs, organization,
                                           include_sub_organizations).all()

    for pkg in pkgs:
        created_ = model.Session.query(model.PackageRevision) \
            .filter(model.PackageRevision.id == pkg.id) \
            .order_by("revision_timestamp asc").first()

        pr_q = model.Session.query(model.PackageRevision, model.Revision) \
            .filter(model.PackageRevision.id == pkg.id) \
            .filter_by(state='active') \
            .join(model.Revision) \
            .filter(~model.Revision.author.in_(system_authors)) \
            .filter(~model.Revision.author.like(system_author_template))
        rr_q = model.Session.query(model.Package, model.ResourceRevision, model.Revision) \
            .filter(model.Package.id == pkg.id) \
            .filter_by(state='active') \
            .join(model.ResourceRevision,
                  model.Package.id == model.ResourceRevision.package_id) \
            .join(model.Revision) \
            .filter(~model.Revision.author.in_(system_authors)) \
            .filter(~model.Revision.author.like(system_author_template))
        pe_q = model.Session.query(model.Package, model.PackageExtraRevision, model.Revision) \
            .filter(model.Package.id == pkg.id) \
            .filter_by(state='active') \
            .join(model.PackageExtraRevision,
                  model.Package.id == model.PackageExtraRevision.package_id) \
            .join(model.Revision) \
            .filter(~model.Revision.author.in_(system_authors)) \
            .filter(~model.Revision.author.like(system_author_template))

        for period_name in periods:
            period = periods[period_name]
            # created
            if period[0] < created_.revision_timestamp < period[1]:
                published = not asbool(pkg.extras.get('unpublished'))
                created[period_name].append(
                    (created_.id, created_.name, created_.title,
                     lib.dataset_notes(pkg), 'created', period_name,
                     created_.revision_timestamp.isoformat(),
                     created_.revision.author, published))

            # modified
            # exclude the creation revision
            period_start = max(period[0], created_.revision_timestamp)
            prs = pr_q.filter(model.PackageRevision.revision_timestamp > period_start) \
                .filter(model.PackageRevision.revision_timestamp < period[1])
            rrs = rr_q.filter(model.ResourceRevision.revision_timestamp > period_start) \
                .filter(model.ResourceRevision.revision_timestamp < period[1])
            pes = pe_q.filter(model.PackageExtraRevision.revision_timestamp > period_start) \
                .filter(model.PackageExtraRevision.revision_timestamp < period[1])
            authors = ' '.join(
                set([r[1].author for r in prs] + [r[2].author for r in rrs] +
                    [r[2].author for r in pes]))
            dates = set([r[1].timestamp.date()
                         for r in prs] + [r[2].timestamp.date() for r in rrs] +
                        [r[2].timestamp.date() for r in pes])
            dates_formatted = ' '.join(
                [date.isoformat() for date in sorted(dates)])
            if authors:
                published = not asbool(pkg.extras.get('unpublished'))
                modified[period_name].append(
                    (pkg.id, pkg.name, pkg.title, lib.dataset_notes(pkg),
                     'modified', period_name, dates_formatted, authors,
                     published))
    return created, modified