def refresh_packages(ids):
    '''
    Update the author string for all (or the specified) packages.
    '''
    if not ids:
        ids = list(
            set([r.package_id
                 for r in PackageContributionActivityQuery.all()]))
    click.echo(
        'Attempting to update the author field for {0} packages.'.format(
            len(ids)))
    errors = []
    with click.progressbar(ids) as bar:
        for _id in bar:
            try:
                authors = get_author_string(package_id=_id)
                PackageQuery.update(_id, author=authors)
            except Exception as e:
                errors.append('Error ({0}): {1}'.format(_id, e))
    failed = len(errors)
    total = len(ids)
    click.echo('Updated {0}/{1} ({2} failed)'.format(total - failed, total,
                                                     failed))
    for e in errors:
        click.echo(e, err=True)
def get_cited_contributors(pkg_id):
    contributions = PackageQuery.get_contributions(pkg_id)
    by_agent = {
        k: list(v)
        for k, v in itertools.groupby(sorted(contributions,
                                             key=lambda x: x.agent.id),
                                      key=lambda x: x.agent.id)
    }

    def _citation_order(entry):
        activities = entry[1]
        citation = [a.order for a in activities if a.activity == '[citation]']
        if citation:
            return max(citation)
        else:
            return -1

    entries = [{
        'agent': c[1][0].agent,
        'contributions': [a for a in c[1] if a.activity != '[citation]'],
        'order': _citation_order(c)
    } for c in by_agent.items()]

    cited_agents = {
        'cited' if k else 'uncited': sorted(list(v), key=lambda x: x['order'])
        for k, v in itertools.groupby(sorted(entries,
                                             key=lambda x: x['order']),
                                      key=lambda x: x['order'] != -1)
    }
    if 'cited' not in cited_agents:
        cited_agents['cited'] = []
    if 'uncited' not in cited_agents:
        cited_agents['uncited'] = []
    return cited_agents
def get_contributions(pkg_id):
    '''
    Template access for the
    :func:`~ckanext.attribution.model.crud.PackageQuery.get_contributions` query method.
    :param pkg_id:
    :return:
    '''
    return PackageQuery.get_contributions(pkg_id)
def get_author_string(package_id=None, citation_ids=None):
    if package_id is not None:
        citations = sorted([c for c in PackageQuery.get_contributions(package_id) if
                            c.activity == '[citation]'], key=lambda x: x.order)
    elif citation_ids is not None:
        citations = sorted([ContributionActivityQuery.read(c) for c in citation_ids],
                           key=lambda x: x.order)
    else:
        citations = []

    if len(citations) == 0:
        return toolkit.config.get('ckanext.doi.publisher',
                                  toolkit.config.get('ckan.site_title', 'Anonymous'))
    else:
        return '; '.join([c.agent.citation_name for c in citations])
def package_contributions_show(context, data_dict):
    '''
    Show associated agents and their contributions for a given package. Agents are returned in
    citation then agent id order.

    :param id: ID of the package record
    :type id: str
    :param limit: limit the number of records returned
    :type limit: int
    :param offset: skip n agents
    :type offset: int
    :returns: The package contribution activity record.
    :rtype: dict
    '''
    toolkit.check_access('package_contributions_show', context, data_dict)
    item_id = toolkit.get_or_bust(data_dict, 'id')
    limit = data_dict.get('limit')
    limit = int(limit) if limit is not None else None
    offset = int(data_dict.get('offset', 0))
    contributions = PackageQuery.get_contributions(item_id)
    by_agent = {
        k: list(v)
        for k, v in itertools.groupby(sorted(contributions,
                                             key=lambda x: x.agent.id),
                                      key=lambda x: x.agent.id)
    }
    total = len(by_agent)
    agent_order = [({
        'agent':
        v[0].agent,
        'activities': [a.as_dict() for a in v],
        'affiliations':
        toolkit.get_action('agent_affiliations')(context, {
            'agent_id': k,
            'package_id': item_id
        })
    }, v[0].agent.package_order(item_id)) for k, v in by_agent.items()]
    sorted_contributions = [
        c for c, o in sorted(agent_order,
                             key=lambda x: (x[1] if x[1] >= 0 else total, x[0][
                                 'agent'].sort_name))
    ]

    page_end = offset + limit if limit is not None else total + 1
    contributions_dict = {
        'contributions': [{
            'agent': c['agent'].as_dict(),
            'activities': c['activities'],
            'affiliations': c['affiliations']
        } for c in sorted_contributions[offset:page_end]],
        'all_agents': [c['agent'].id for c in sorted_contributions],
        'total':
        total,
        'cited_total':
        len([x for x in agent_order if x[1] >= 0]),
        'offset':
        offset,
        'page_size':
        limit or total
    }
    return contributions_dict
def migratedb(limit, dry_run, search_api):
    '''
    Semi-manual migration script that attempts to extract individual contributors from 'author' and
    'contributor' fields (if present) in order to create Agent and ContributionActivity records for
    them.
    '''
    if not dry_run:
        click.secho(
            'Attempting to migrate contributors. It is HIGHLY recommended that you back up your '
            'database before running this.',
            fg='red')
        click.confirm('Continue?', default=False, abort=True)
    converted_packages = [
        r.package_id for r in PackageContributionActivityQuery.all()
    ]
    unconverted_packages = PackageQuery.search(
        ~PackageQuery.m.id.in_(converted_packages))
    contribution_extras = {
        p.id: Session.query(PackageExtra).filter(
            PackageExtra.package_id == p.id,
            PackageExtra.key == 'contributors').first()
        for p in unconverted_packages
    }
    total = len(unconverted_packages)
    limit = int(limit or total)
    parser = migration.Parser()

    for i, pkg in enumerate(unconverted_packages[:limit]):
        click.echo('Processing package {0} of {1}.\n'.format(i + 1, total))
        parser.run(pkg.author, pkg.id, 'author')
        if contribution_extras.get(pkg.id) is not None:
            extras = contribution_extras.get(pkg.id).value
            if not isinstance(extras, str):
                parser.run(extras, pkg.id, 'contributor')

    combiner = migration.Combiner(parser)
    combined = combiner.run()
    if search_api:
        api_updater = migration.APISearch()
        for agnt in combined:
            api_updater.update(agnt)
    click.echo(f'\n\n{len(combined)} contributors found.')
    if dry_run:
        click.echo('Exiting before saving to the database.')
        return
    agent_lookup = {}
    agent_create = toolkit.get_action('agent_create')
    contribution_activity_create = toolkit.get_action(
        'contribution_activity_create')
    agent_affiliation_create = toolkit.get_action('agent_affiliation_create')
    remove_keys = ['packages', 'affiliations', 'key', 'all_names']
    for a in combined:
        try:
            # create the agent (check it doesn't exist first)
            agent_dict = {
                **{k: v
                   for k, v in a.items() if k not in remove_keys}
            }
            if a['agent_type'] == 'person':
                filters = [
                    and_(AgentQuery.m.family_name == a['family_name'],
                         AgentQuery.m.given_names == a['given_names'])
                ]
            else:
                filters = [AgentQuery.m.name == a['name']]
            if a.get('external_id'):
                filters.append(
                    AgentQuery.m.external_id == a.get('external_id'))
            matches = AgentQuery.search(or_(*filters))
            if len(matches) == 1:
                new_agent = matches[0].as_dict()
                click.echo(f'MATCHED "{a["key"]}"')
            elif len(matches) > 1:
                choice_ix = migration.multi_choice(
                    f'Does "{a["key"]}" match any of these existing agents?',
                    [m.display_name for m in matches] + ['None of these'])
                if choice_ix == len(matches):
                    del a['external_id']
                    del a['external_id_scheme']
                    new_agent = agent_create({'ignore_auth': True}, agent_dict)
                    click.echo(f'CREATED "{a["key"]}"')
                else:
                    new_agent = matches[choice_ix].as_dict()
                    click.echo(f'MATCHED "{a["key"]}"')
            else:
                new_agent = agent_create({'ignore_auth': True}, agent_dict)
                click.echo(f'CREATED "{a["key"]}"')
            agent_lookup[a['key']] = new_agent['id']
            # then activities
            for pkg, order in a['packages'].get('author', []):
                # create citation
                contribution_activity_create({'ignore_auth': True}, {
                    'activity': '[citation]',
                    'scheme': 'internal',
                    'order': order,
                    'package_id': pkg,
                    'agent_id': new_agent['id']
                })
                # then the actual activity
                contribution_activity_create({'ignore_auth': True}, {
                    'activity': 'Unspecified',
                    'scheme': 'internal',
                    'package_id': pkg,
                    'agent_id': new_agent['id']
                })
            for pkg, _ in a['packages'].get('contributor', []):
                # just the activity for this one
                contribution_activity_create({'ignore_auth': True}, {
                    'activity': 'Unspecified',
                    'scheme': 'internal',
                    'package_id': pkg,
                    'agent_id': new_agent['id']
                })
        except Exception as e:
            # very broad catch just so it doesn't ruin everything if one thing breaks
            click.echo(f'Skipping {a["key"]} due to error: {e}', err=True)
    # finally, the affiliations
    for pkg, pairs in combiner.affiliations.items():
        for agent_a, agent_b in pairs:
            try:
                agent_affiliation_create({'ignore_auth': True}, {
                    'agent_a_id': agent_lookup[agent_a],
                    'agent_b_id': agent_lookup[agent_b],
                    'package_id': pkg
                })
            except Exception as e:
                # very broad catch just so it doesn't ruin everything if one thing breaks
                click.echo(
                    f'Skipping {agent_a} + {agent_b} affiliation due to error: {e}',
                    err=True)

    # finally finally, update the package author strings
    for pkg in unconverted_packages[:limit]:
        try:
            authors = get_author_string(package_id=pkg.id)
            PackageQuery.update(pkg.id, author=authors)
        except Exception as e:
            # very broad catch just so it doesn't ruin everything if one thing breaks
            click.echo(f'Skipping {pkg.id} due to error: {e}', err=True)
def parse_contributors(context, data_dict):
    contributors = json.loads(data_dict.get('attribution', '{}'))

    try:
        pkg = PackageQuery.read(data_dict.get('id'))
    except:
        pkgs = PackageQuery.search(PackageQuery.m.name == data_dict.get('id'))
        pkg = pkgs[0] if len(pkgs) > 0 else None

    if pkg is None:
        raise toolkit.ObjectNotFound('This package does not exist.')
    pkg_id = pkg.id

    # agents
    agents = split_list_by_action(contributors.get('agents', []), AgentQuery)
    agent_cre = toolkit.get_action('agent_create')
    agent_upd = toolkit.get_action('agent_update')
    new_agents = {}
    for agent in agents['new']:
        gen_id = agent['id']
        del agent['id']
        new_id = agent_cre(context, agent)['id']
        new_agents[gen_id] = new_id
    for agent in agents['updated']:
        agent_upd(context, agent)
    # agents marked 'to_delete' almost certainly should not be deleted - only their activities
    # should be removed
    deleted_agents = [a['id'] for a in agents['deleted']]

    # activities
    activities = split_list_by_action(contributors.get('activities', []), ContributionActivityQuery)
    for agent in deleted_agents:
        activities['deleted'] += [r.as_dict() for r in
                                  AgentContributionActivityQuery.read_agent_package(agent, pkg_id)]
    activities['deleted'] = list(set([a['id'] for a in activities['deleted']]))
    activity_cre = toolkit.get_action('contribution_activity_create')
    activity_upd = toolkit.get_action('contribution_activity_update')
    activity_del = toolkit.get_action('contribution_activity_delete')
    for activity in activities['new']:
        if activity['agent_id'] in deleted_agents:
            continue
        del activity['id']
        new_agent_id = new_agents.get(activity['agent_id'])
        if new_agent_id:
            activity['agent_id'] = new_agent_id
        activity['package_id'] = activity.get('package_id', pkg_id)
        activity_cre(context, activity)
    for activity in activities['updated']:
        if activity['agent_id'] in deleted_agents:
            continue
        activity_upd(context, activity)
    for activity in activities['deleted']:
        activity_del(context, {'id': activity})

    # citations (specialised activities)
    citations = split_list_by_action(contributors.get('citations', []), ContributionActivityQuery)
    for citation in citations['new']:
        if citation['agent_id'] in deleted_agents:
            continue
        del citation['id']
        citation['activity'] = '[citation]'
        citation['scheme'] = 'internal'
        new_agent_id = new_agents.get(citation['agent_id'])
        if new_agent_id:
            citation['agent_id'] = new_agent_id
        citation['package_id'] = citation.get('package_id', pkg_id)
        new_citation = activity_cre(context, citation)
    for citation in citations['updated']:
        if citation['agent_id'] in deleted_agents:
            continue
        updated_citation = activity_upd(context, citation)
    for citation in citations['deleted']:
        activity_del(context, {'id': citation['id']})
    # make sure the order is right
    all_citations = sorted([c for c in PackageQuery.get_contributions(pkg_id) if
                            c.activity == '[citation]'], key=lambda x: x.order)
    for i, c in enumerate(all_citations):
        if c.order != i+1:
            activity_upd(context, {'id': c.id, 'order': i+1})

    # affiliations
    affiliations = split_list_by_action(contributors.get('affiliations', []), AgentAffiliationQuery,
                                        'db_id')

    def affiliation_key(x):
        return sorted((x['agent_id'], x['other_agent_id']))

    affiliations = {gk: [list(a)[0] for k, a in
                         itertools.groupby(sorted(gv, key=affiliation_key),
                                           key=affiliation_key)] for gk, gv in affiliations.items()}
    affiliation_cre = toolkit.get_action('agent_affiliation_create')
    affiliation_upd = toolkit.get_action('agent_affiliation_update')
    affiliation_del = toolkit.get_action('agent_affiliation_delete')

    for aff in affiliations['new']:
        del aff['id']
        new_agent_id = new_agents.get(aff['agent_id'])
        aff['agent_a_id'] = new_agent_id or aff['agent_id']
        new_other_agent_id = new_agents.get(aff['other_agent_id'])
        aff['agent_b_id'] = new_other_agent_id or aff['other_agent_id']
        aff['package_id'] = pkg_id
        if aff['agent_a_id'] in deleted_agents or aff['agent_b_id'] in deleted_agents:
            continue
        affiliation_cre(context, aff)

    for aff in affiliations['updated']:
        aff['id'] = aff['db_id']
        new_agent_id = new_agents.get(aff['agent_id'])
        aff['agent_a_id'] = new_agent_id or aff['agent_id']
        new_other_agent_id = new_agents.get(aff['other_agent_id'])
        aff['agent_b_id'] = new_other_agent_id or aff['other_agent_id']
        if aff['agent_a_id'] in deleted_agents or aff['agent_b_id'] in deleted_agents:
            continue
        affiliation_upd(context, aff)

    for aff in affiliations['deleted']:
        affiliation_del(context, {'id': aff['db_id']})

    for agent in deleted_agents:
        for aff in AgentAffiliationQuery.read_agent(agent, pkg_id):
            affiliation_del(context, {'id': aff.id})