예제 #1
0
    def serialize(self, pid, record, links_factory=None):
        search_by_institution = LiteratureSearch().query(
            'match', authors__affiliations__recid=get_id(record)
        ).params(
            _source=[
                'control_number',
            ],
        )

        literature_recids = [
            get_id(el.to_dict()) for el in search_by_institution.scan()]

        search_by_recids = LiteratureSearch().filter(
            'terms', control_number=literature_recids
        ).params(
            _source=[
                'authors.recid',
                'collaborations.value',
                'control_number',
                'earliest_date',
                'facet_inspire_doc_type',
                'inspire_categories',
                'titles.title',
            ],
        )

        return json.dumps(build_citesummary(search_by_recids))
def test_creating_deleted_record_and_undeleting_created_record_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        'deleted': True,
        '_collections': ['Literature']
    }

    # When a record is created in the DB with deleted flag True, it is not created in ES.

    record = InspireRecord.create(json)
    record.commit()
    db.session.commit()
    with pytest.raises(NotFoundError):
        search.get_source(record.id)

    # When a record is undeleted, it is created in ES.
    record['deleted'] = False
    record.commit()
    db.session.commit()
    search.get_source(record.id)
    record._delete(force=True)
def test_that_db_changes_are_mirrored_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
    }

    # When a record is created in the DB, it is also created in ES.

    record = InspireRecord.create(json)
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'foo'

    # When a record is updated in the DB, is is also updated in ES.

    record['titles'][0]['title'] = 'bar'
    record.commit()
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'bar'

    # When a record is deleted in the DB, it is also deleted in ES.

    record._delete(force=True)

    with pytest.raises(NotFoundError):
        es_record = search.get_source(record.id)
def test_deleting_record_triggers_delete_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
        '_collections': ['Literature']
    }

    # When a record is created in the DB, it is also created in ES.

    record = InspireRecord.create(json)
    record.commit()
    db.session.commit()
    search.get_source(record.id)

    # When a record is updated with deleted flag true, it is deleted in ES
    record['deleted'] = True
    record.commit()
    db.session.commit()
    with pytest.raises(NotFoundError):
        search.get_source(record.id)
예제 #5
0
def get_experiment_publications(experiment_name):
    """
    Get paper count for a given experiment.

    :param experiment_name: canonical name of the experiment.
    :type experiment_name: string
    """
    query = {
        "term": {"accelerator_experiments.experiment": experiment_name}
    }
    search = LiteratureSearch().query(query)
    search = search.params(search_type="count")
    return search.execute().hits.total
예제 #6
0
def get_experiment_publications(experiment_name):
    """
    Get paper count for a given experiment.

    :param experiment_name: canonical name of the experiment.
    :type experiment_name: string
    """
    query = {
        "term": {"accelerator_experiments.experiment": experiment_name}
    }
    search = LiteratureSearch().query(query)
    # FIXME: search_type=count is deprecated, but the whole function doesn't work anymore
    search = search.params(search_type="count")
    return search.execute().hits.total
예제 #7
0
    def serialize(self, pid, record, links_factory=None):
        """Return a list of co-authors for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value
        coauthors = {}

        search = LiteratureSearch().query({
            "match": {
                "authors.recid": author_pid
            }
        }).params(
            _source=[
                "authors.full_name",
                "authors.recid",
                "authors.record",
            ]
        )

        for result in search.scan():
            result_source = result.to_dict()['authors']

            for author in result_source:
                try:
                    # Don't add the reference author.
                    if author['recid'] != author_pid:
                        if author['recid'] in coauthors:
                            coauthors[author['recid']]['count'] += 1
                        else:
                            coauthors[author['recid']] = dict(
                                count=1,
                                full_name=author['full_name'],
                                id=author['recid'],
                                record=author['record'],
                            )
                except KeyError:
                    pass

        return json.dumps(coauthors.values())
예제 #8
0
def build_citesummary(search):
    citesummary = []

    for i, el in enumerate(search.scan()):
        result = el.to_dict()

        citesummary.append({
            'citations': [],
            'collaboration': is_collaboration(result),
            'core': is_core(result),
            'date': get_date(result),
            'document_type': get_document_type(result),
            'id': get_id(result),
            'subject': get_subject(result),
            'title': get_title(result),
        })

        search_by_literature = LiteratureSearch().query(
            'match', references__recid=get_id(result)
        ).params(
            _source=[
                'authors.recid',
                'collaborations.value',
                'control_number',
                'earliest_date',
                'facet_inspire_doc_type',
                'inspire_categories',
                'titles.title',
            ]
        )

        for el in search_by_literature.scan():
            literature_result = el.to_dict()

            citesummary[i]['citations'].append({
                'collaboration': is_collaboration(literature_result),
                'core': is_core(literature_result),
                'date': get_date(literature_result),
                'document_type': get_document_type(literature_result),
                'id': get_id(literature_result),
                'selfcite': is_selfcite(
                    result, literature_result),
                'subject': get_subject(literature_result),
                'title': get_title(literature_result),
            })

    return citesummary
예제 #9
0
def suggest():
    """Power typeahead.js search bar suggestions."""
    field = request.values.get('field')
    query = request.values.get('query')

    search = LiteratureSearch()
    search = search.suggest(
        'suggestions', query, completion={"field": field}
    )
    suggestions = search.execute_suggest()

    if field == "authors.name_suggest":
        bai_name_map = {}
        for suggestion in suggestions['suggestions'][0]['options']:
            bai = suggestion['payload']['bai']
            if bai in bai_name_map:
                bai_name_map[bai].append(
                    suggestion['text']
                )
            else:
                bai_name_map[bai] = [suggestion['text']]

        result = []
        for key, value in six.iteritems(bai_name_map):
            result.append(
                {
                    'name': max(value, key=len),
                    'value': key,
                    'template': 'author'
                }
            )

        return jsonify({
            'results': result
        })

    return jsonify({
        'results': [
            {'value': s['text']}
            for s in suggestions['suggestions'][0]['options']
        ]
    })
def test_find_author_or_author():
    query = IQ('find a gersdorff, g or a von gersdorff, g', LiteratureSearch())

    expected = {
        "bool": {
            "should": [{
                "bool": {
                    "must": [{
                        "bool": {
                            "should": [{
                                "match": {
                                    "authors.name_variations": "gersdorff, g"
                                }
                            }, {
                                "term": {
                                    "authors.ids.value": "gersdorff, g"
                                }
                            }]
                        }
                    }],
                    "should": [{
                        "match": {
                            "authors.full_name": "gersdorff, g"
                        }
                    }]
                }
            }, {
                "bool": {
                    "must": [{
                        "bool": {
                            "should": [{
                                "match": {
                                    "authors.name_variations":
                                    "von gersdorff, g"
                                }
                            }, {
                                "term": {
                                    "authors.ids.value": "von gersdorff, g"
                                }
                            }]
                        }
                    }],
                    "should": [{
                        "match": {
                            "authors.full_name": "von gersdorff, g"
                        }
                    }]
                }
            }]
        }
    }
    result = query.to_dict()

    assert expected == result
예제 #11
0
def ajax_references():
    """Handler for datatables references view"""
    recid = request.args.get('recid', '')
    endpoint = request.args.get('endpoint', '')

    pid_type = get_pid_type_from_endpoint(endpoint)
    pid = PersistentIdentifier.get(pid_type, recid)

    record = LiteratureSearch().get_source(pid.object_uuid)

    return jsonify({'data': get_and_format_references(record)})
예제 #12
0
파일: views.py 프로젝트: Kjili/inspire-next
def index():
    """View for literature collection landing page."""
    if current_app.config['INSPIRE_FULL_THEME']:
        number_of_records = LiteratureSearch().count()

        return render_template(
            'inspirehep_theme/search/collection_literature.html',
            collection='hep',
            number_of_records=number_of_records,
        )
    else:
        return render_template('inspirehep_theme/inspire_labs_cover.html')
def test_refersto_colon_recid_colon():
    query = IQ('refersto:recid:1286113', LiteratureSearch())

    expected = {
        'multi_match': {
            'query': '1286113',
            'fields': ['references.recid']
        }
    }
    result = query.to_dict()

    assert expected == result
예제 #14
0
def ajax_citations():
    """Handler for datatables citations view"""

    recid = request.args.get('recid', '')
    endpoint = request.args.get('endpoint', '')

    pid_type = get_pid_type_from_endpoint(endpoint)
    pid = PersistentIdentifier.get(pid_type, recid)

    record = LiteratureSearch().get_source(pid.object_uuid)

    return jsonify({"data": Citation(record).citations()})
def test_that_db_changes_are_mirrored_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'foo'
            },
        ],
        '_collections': ['Literature']
    }

    # When a record is created in the DB, it is also created in ES.

    record = InspireRecord.create(json)
    record.commit()
    db.session.commit()
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'foo'

    # When a record is updated in the DB, is is also updated in ES.

    record['titles'][0]['title'] = 'bar'
    record.commit()
    db.session.commit()
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'bar'

    # When a record is deleted in the DB, it is also deleted in ES.

    record._delete(force=True)
    db.session.commit()

    with pytest.raises(NotFoundError):
        es_record = search.get_source(record.id)
def test_deleting_record_triggers_delete_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'foo'
            },
        ],
        '_collections': ['Literature']
    }

    # When a record is created in the DB, it is also created in ES.

    record = InspireRecord.create(json)
    record.commit()
    db.session.commit()
    search.get_source(record.id)

    # When a record is updated with deleted flag true, it is deleted in ES
    record['deleted'] = True
    record.commit()
    db.session.commit()
    with pytest.raises(NotFoundError):
        search.get_source(record.id)
def test_creating_deleted_record_and_undeleting_created_record_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'foo'
            },
        ],
        'deleted': True,
        '_collections': ['Literature']
    }

    # When a record is created in the DB with deleted flag True, it is not created in ES.

    record = InspireRecord.create(json)
    record.commit()
    db.session.commit()
    with pytest.raises(NotFoundError):
        search.get_source(record.id)

    # When a record is undeleted, it is created in ES.
    record['deleted'] = False
    record.commit()
    db.session.commit()
    search.get_source(record.id)
    record._delete(force=True)
예제 #18
0
    def serialize(self, pid, record, links_factory=None):
        search_by_author = LiteratureSearch().query(
            'match', authors__recid=get_id(record)).params(_source=[
                'authors.recid',
                'collaborations.value',
                'control_number',
                'earliest_date',
                'facet_inspire_doc_type',
                'inspire_categories',
                'titles.title',
            ], )

        return json.dumps(build_citesummary(search_by_author))
def test_abstract_colon_with_star_wildcard():
    query = IQ('abstract: part*', LiteratureSearch())

    expected = {
        'query_string': {
            'query': 'part*',
            'default_field': 'abstracts.value',
            'analyze_wildcard': True
        }
    }
    result = query.to_dict()

    assert expected == result
def test_find_journal():
    query = IQ('find j "Phys.Rev.Lett.,105*"', LiteratureSearch())

    expected = {
        'query_string': {
            'query': '"Phys.Rev.Lett.,105*"',
            'default_field': 'publication_info.pubnote',
            'analyze_wildcard': True
        }
    }
    result = query.to_dict()

    assert expected == result
예제 #21
0
    def serialize(self, pid, record, links_factory=None):
        search_by_experiment = LiteratureSearch().query(
            'match',
            accelerator_experiments__recid=get_id(record)).params(_source=[
                'control_number',
            ], )

        literature_recids = [
            get_id(el.to_dict()) for el in search_by_experiment.scan()
        ]

        search_by_recids = LiteratureSearch().filter(
            'terms', control_number=literature_recids).params(_source=[
                'authors.recid',
                'collaborations.value',
                'control_number',
                'earliest_date',
                'facet_inspire_doc_type',
                'inspire_categories',
                'titles.title',
            ], )

        return json.dumps(build_citesummary(search_by_recids))
예제 #22
0
def ajax_citations():
    """Handler for datatables citations view

    .. deprecated:: 2018-08-23
    """
    recid = request.args.get('recid', '')
    endpoint = request.args.get('endpoint', '')

    pid_type = get_pid_type_from_endpoint(endpoint)
    pid = PersistentIdentifier.get(pid_type, recid)

    record = LiteratureSearch().get_source(pid.object_uuid)

    return jsonify({'data': get_and_format_citations(record)})
def test_exactauthor_colon_bai():
    query = IQ('exactauthor:J.Serra.3', LiteratureSearch())

    expected = {
        "multi_match": {
            "query":
            "J.Serra.3",
            "fields": [
                "exactauthor.raw", "authors.full_name",
                "authors.alternative_names", "authors.ids.value"
            ]
        }
    }
    result = query.to_dict()

    assert expected == result
def test_find_exactauthor():
    query = IQ('find ea witten, edward', LiteratureSearch())

    expected = {
        "multi_match": {
            "query":
            "witten, edward",
            "fields": [
                "exactauthor.raw", "authors.full_name",
                "authors.alternative_names", "authors.ids.value"
            ]
        }
    }
    result = query.to_dict()

    assert expected == result
예제 #25
0
def get_institution_papers_from_es(recid):
    """
    Get papers where some author is affiliated with institution.

    :param recid: id of the institution.
    :type recid: string
    """
    return LiteratureSearch().query_from_iq(
        'authors.affiliations.recid:{}'.format(recid)).sort(
            '-earliest_date').params(size=100,
                                     _source=[
                                         'control_number', 'earliest_date',
                                         'titles', 'authors',
                                         'publication_info', 'citation_count',
                                         'collaboration'
                                     ]).execute().hits
예제 #26
0
def get_citations_from_es(record, page=1, size=10):
    if 'control_number' not in record:
        return None

    return LiteratureSearch().query(
        'match', references__recid=record['control_number'],
    ).params(
        _source=[
            'authors',
            'control_number',
            'earliest_date',
            'titles',
            'publication_info'
        ],
        from_=(page - 1) * size,
        size=size,
    ).sort('-earliest_date').execute().hits
예제 #27
0
def test_or_of_exactauthor_colon_queries():
    query = IQ('exactauthor:X.Yin.1 or exactauthor:"Yin, Xi"',
               LiteratureSearch())

    expected = {
        "multi_match": {
            "query":
            "J.Serra.3",
            "fields": [
                "exactauthor.raw", "authors.full_name",
                "authors.alternative_names", "authors.inspire_bai"
            ]
        }
    }
    result = query.to_dict()

    assert expected == result
예제 #28
0
def conferences_contributions_from_es(cnum):
    """Query ES for conferences in the same series."""
    query = 'cnum:"{}"'.format(cnum)
    return LiteratureSearch().query_from_iq(
        query
    ).params(
        size=100,
        _source=[
            'control_number',
            'earliest_date',
            'titles',
            'authors',
            'publication_info',
            'citation_count',
            'collaboration'
        ]
    ).sort('-citation_count').execute().hits
def test_google_style():
    query = IQ('kudenko', LiteratureSearch())

    expected = {
        'multi_match': {
            'zero_terms_query':
            'all',
            'query':
            'kudenko',
            'fields': [
                'title^3', 'title.raw^10', 'abstract^2', 'abstract.raw^4',
                'author^10', 'author.raw^15', 'reportnumber^10', 'eprint^10',
                'doi^10'
            ]
        }
    }
    result = query.to_dict()

    assert expected == result
def test_author_colon_bai_with_double_quotes_and_collection_colon_and_cited_colon(
):
    query = IQ(
        'author:"E.Witten.1" AND collection:citeable AND cited:500->1000000',
        LiteratureSearch())

    expected = {
        'bool': {
            'must': [{
                'bool': {
                    'should': [{
                        'match': {
                            u'authors.name_variations': 'E.Witten.1'
                        }
                    }, {
                        'term': {
                            u'authors.ids.value': 'E.Witten.1'
                        }
                    }]
                }
            }, {
                'multi_match': {
                    'fields': ['collections.primary'],
                    'query': 'citeable'
                }
            }, {
                'range': {
                    'citation_count': {
                        'gte': '500',
                        'lte': '1000000'
                    }
                }
            }],
            'should': [{
                'match': {
                    u'authors.full_name': 'E.Witten.1'
                }
            }]
        }
    }
    result = query.to_dict()

    assert expected == result
예제 #31
0
def test_author_colon_bai_with_double_quotes_and_collection_colon_and_cited_colon(
):
    query = IQ(
        'author:"E.Witten.1" AND collection:citeable AND cited:500->1000000',
        LiteratureSearch())

    expected = {
        "bool": {
            "must": [{
                "bool": {
                    "should": [{
                        "match": {
                            "authors.name_variations": "E.Witten.1"
                        }
                    }, {
                        "term": {
                            "authors.inspire_bai": "E.Witten.1"
                        }
                    }]
                }
            }, {
                "multi_match": {
                    "query": "citeable",
                    "fields": ["collections.primary"]
                }
            }, {
                "range": {
                    "citation_count": {
                        "gte": "500",
                        "lte": "1000000"
                    }
                }
            }],
            "should": [{
                "match": {
                    "authors.full_name": "E.Witten.1"
                }
            }]
        }
    }
    result = query.to_dict()

    assert expected == result
예제 #32
0
    def get(self, pid, record):
        page = request.values.get('page', 1, type=int)
        size = request.values.get('size', 10, type=int)

        if page < 1 or size < 1:
            abort(400)

        citing_records_results = LiteratureSearch.citations(record, page, size)
        citing_records_count = citing_records_results.total
        citing_records = [
            citation.to_dict() for citation in citing_records_results
        ]

        data = {
            'citations': citing_records,
            'citation_count': citing_records_count
        }

        return self.make_response(pid, data)
예제 #33
0
def test_author_colon_bai():
    query = IQ('author:Y.Nomura.1', LiteratureSearch())

    expected = {
        "bool": {
            "should": [{
                "match": {
                    "authors.name_variations": "Y.Nomura.1"
                }
            }, {
                "match": {
                    "authors.full_name": "Y.Nomura.1"
                }
            }, {
                "match": {
                    "authors.inspire_bai": "Y.Nomura.1"
                }
            }]
        }
    }
    result = query.to_dict()

    assert expected == result
def test_find_author_with_hash_wildcard():
    query = IQ('find a chkv#', LiteratureSearch())

    expected = {
        'bool': {
            'should': [{
                'query_string': {
                    'analyze_wildcard': True,
                    'default_field': 'authors.full_name',
                    'query': 'chkv*'
                }
            }, {
                'query_string': {
                    'analyze_wildcard': True,
                    'default_field': 'authors.alternative_names',
                    'query': 'chkv*'
                }
            }]
        }
    }
    result = query.to_dict()

    assert expected == result
예제 #35
0
def generate_booktitle(record):
    booktitle = ''
    pubinfo = ''
    if 'publication_info' in record:
        pubinfo = record['publication_info']
        for field in pubinfo:
            if 'reportnumber' in field:
                rn = field['reportnumber']
                if rn:
                    acronym = field['acronym']
                    if acronym:
                        booktitle = "%s: %s" % (
                            rn,
                            acronym,
                        )
                    else:
                        records = LiteratureSearch().query_from_iq(
                            "reportnumber:%s" % (rn, )).execute()
                        if records:
                            rec = records.hits[0]
                            for title in rec['titles']:
                                booktitle = title.get('title', "")
                                if title.get('subtitle'):
                                    booktitle += ': ' + title.get('subtitle')
        if not booktitle:
            result = []
            for field in pubinfo:
                if 'pubinfo_freetext' in field:
                    result.append(field['pubinfo_freetext'])
            if result:
                if any(isinstance(i, list) for i in result):
                    nested_list = list(traverse(result))
                    booktitle = ', '.join(str(title) for title in nested_list)
                else:
                    booktitle = ', '.join(str(title) for title in result)
    return booktitle
예제 #36
0
def proceedings_link(record):
    cnum = record.get('cnum', '')
    out = ''
    if not cnum:
        return out

    records = LiteratureSearch().query_from_iq(
        'cnum:%s and 980__a:proceedings' % cnum).execute()

    if len(records):
        if len(records) > 1:
            proceedings = []

            for i, record in enumerate(records.hits, start=1):
                try:
                    dois = record['dois']
                    proceedings.append(
                        '<a href="/record/{recid}">#{i}</a> (DOI: <a '
                        'href="http://dx.doi.org/{doi}">{doi}</a>'.format(
                            recid=record['control_number'],
                            doi=dois[0]['value'],
                            i=i))
                except KeyError:
                    # Guards both against records not having a "dois" field
                    # and doi values not having a "value" field.
                    proceedings.append(
                        '<a href="/record/{recid}">#{i}</a>'.format(
                            recid=record['control_number'], i=i))

            out = 'Proceedings: '
            out += ', '.join(proceedings)
        else:
            out += '<a href="/record/{recid}">Proceedings</a>'.format(
                recid=records[0]['control_number'])

    return out
예제 #37
0
    def serialize(self, pid, record, links_factory=None):
        """Return a list of citations for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value
        citations = {}

        search = LiteratureSearch().query({
            "match": {
                "authors.recid": author_pid
            }
        }).params(
            _source=[
                "authors.recid",
                "control_number",
                "self",
            ]
        )

        # For each publication co-authored by a given author...
        for result in search.scan():
            result_source = result.to_dict()

            recid = result_source['control_number']
            authors = set([i['recid'] for i in result_source['authors']])
            citations[recid] = {}

            nested_search = LiteratureSearch().query({
                "match": {
                    "references.recid": recid
                }
            }).params(
                _source=[
                    "authors.recid",
                    "collections",
                    "control_number",
                    "earliest_date",
                    "self",
                ]
            )

            # The source record that is being cited.
            citations[recid]['citee'] = dict(
                id=recid,
                record=result_source['self'],
            )
            citations[recid]['citers'] = []

            # Check all publications, which cite the parent record.
            for nested_result in nested_search.scan():
                nested_result_source = nested_result.to_dict()

                # Not every signature has a recid (at least for demo records).
                try:
                    nested_authors = set(
                        [i['recid'] for i in nested_result_source['authors']]
                    )
                except KeyError:
                    nested_authors = set()

                citation = dict(
                    citer=dict(
                        id=int(nested_result_source['control_number']),
                        record=nested_result_source['self']
                    ),
                    # If at least one author is shared, it's a self-citation.
                    self_citation=len(authors & nested_authors) > 0,
                )

                # Get the earliest date of a citer.
                try:
                    citation['date'] = nested_result_source['earliest_date']
                except KeyError:
                    pass

                # Get status if a citer is published.
                # FIXME: As discussed with Sam, we should have a boolean flag
                #        for this type of information.
                try:
                    citation['published_paper'] = "Published" in [
                        i['primary'] for i in nested_result_source[
                            'collections']]
                except KeyError:
                    citation['published_paper'] = False

                citations[recid]['citers'].append(citation)

        return json.dumps(citations.values())
예제 #38
0
def get_publications():
    recid = request.values.get('recid', 0, type=int)

    publications = []
    collaborations = set()
    keywords = set()

    search = LiteratureSearch().query(
        {"match": {"authors.recid": recid}}
    ).params(
        _source=[
            'accelerator_experiments',
            'control_number',
            'earliest_date',
            'facet_inspire_doc_type',
            'publication_info',
            'titles',
            'keywords'
        ]
    )
    for result in search.scan():
        try:
            result_source = result.to_dict()
            publication = {}

            # Get publication title (required).
            publication['title'] = get_title(result_source)

            # Get publication recid (required).
            publication['recid'] = result_source['control_number']
        except (IndexError, KeyError):
            continue

        # Get publication type.
        try:
            publication['type'] = result_source.get(
                'facet_inspire_doc_type', [])[0]
        except IndexError:
            publication['type'] = "Not defined"

        # Get journal title.
        try:
            publication['journal_title'] = result_source.get(
                'publication_info', [])[0]['journal_title']

            # Get journal recid.
            try:
                publication['journal_recid'] = result_source.get(
                    'publication_info', [])[0]['journal_recid']
            except KeyError:
                pass
        except (IndexError, KeyError):
            pass

        # Get publication year.
        try:
            publication['year'] = result_source.get(
                'publication_info', [])[0]['year']
        except (IndexError, KeyError):
            pass

        # Get keywords.
        for keyword in result_source.get('keywords', []):
            if keyword.get('keyword') is not "* Automatic Keywords *" \
                    and keyword.get('keyword'):
                keywords.add(keyword.get('keyword'))

        # Get collaborations.
        for experiment in result_source.get(
                'accelerator_experiments', []):
            collaborations.add(experiment.get('experiment'))

        # Append to the list.
        publications.append(publication)

    response = {}
    response['publications'] = publications
    response['keywords'] = list(keywords)
    response['collaborations'] = list(collaborations)

    return jsonify(response)
예제 #39
0
    def serialize(self, pid, record, links_factory=None):
        """Return a list of publications for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value
        publications = []

        search = LiteratureSearch().query({
            "match": {
                "authors.recid": author_pid
            }
        }).params(
            _source=[
                "accelerator_experiments",
                "earliest_date",
                "citation_count",
                "control_number",
                "facet_inspire_doc_type",
                "publication_info",
                "self",
                "keywords",
                "titles",
            ]
        )

        for result in search.scan():
            result_source = result.to_dict()

            publication = {}
            publication['id'] = int(result_source['control_number'])
            publication['record'] = result_source['self']
            publication['title'] = get_title(result_source)

            # Get the earliest date.
            try:
                publication['date'] = result_source['earliest_date']
            except KeyError:
                pass

            # Get publication type.
            try:
                publication['type'] = result_source.get(
                    'facet_inspire_doc_type', [])[0]
            except IndexError:
                pass

            # Get citation count.
            try:
                publication['citations'] = result_source['citation_count']
            except KeyError:
                pass

            # Get journal.
            try:
                publication['journal'] = {}
                publication['journal']['title'] = result_source.get(
                    'publication_info', [])[0]['journal_title']

                # Get journal id and $self.
                try:
                    publication['journal']['id'] = result_source.get(
                        'publication_info', [])[0]['journal_recid']
                    publication['journal']['record'] = result_source.get(
                        'publication_info', [])[0]['journal_record']
                except KeyError:
                    pass
            except (IndexError, KeyError):
                del publication['journal']

            # Get collaborations.
            collaborations = set()

            for experiment in result_source.get('accelerator_experiments', []):
                collaborations.add(experiment.get('experiment'))

            if collaborations:
                publication['collaborations'] = list(collaborations)

            publications.append(publication)

        return json.dumps(publications)
예제 #40
0
def test_edit_article_workflow_deleting(workflow_app, mocked_external_services):
    app_client = workflow_app.test_client()
    user = User.query.filter_by(email='*****@*****.**').one()
    login_user_via_session(app_client, user=user)

    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'arxiv_eprints': [
            {
                'categories': [
                    'nucl-th'
                ],
                'value': '1802.03287'
            }
        ],
        'control_number': 123,
        'document_type': ['article'],
        'titles': [{'title': 'Resource Pooling in Large-Scale Content Delivery Systems'}],
        'self': {'$ref': 'http://localhost:5000/schemas/records/hep.json'},
        '_collections': ['Literature']
    }
    factory = TestRecordMetadata.create_from_kwargs(json=record)
    eng_uuid = start('edit_article', data=factory.record_metadata.json)
    obj = WorkflowEngine.from_uuid(eng_uuid).objects[0]

    obj.id_user = user.get_id()

    assert obj.status == ObjectStatus.WAITING
    assert obj.extra_data['callback_url']

    record = get_db_record('lit', 123)
    search = LiteratureSearch()
    search.get_source(record.id)

    # simulate changes in the editor and save
    obj.data['deleted'] = True

    payload = {
        'id': obj.id,
        'metadata': obj.data,
        '_extra_data': obj.extra_data
    }

    app_client.put(
        obj.extra_data['callback_url'],
        data=json.dumps(payload),
        content_type='application/json'
    )

    obj = WorkflowEngine.from_uuid(eng_uuid).objects[0]
    assert obj.status == ObjectStatus.WAITING  # waiting for robot_upload
    assert obj.data['deleted'] is True

    do_robotupload_callback(
        app=workflow_app,
        workflow_id=obj.id,
        recids=[obj.data['control_number']],
    )

    record = get_db_record('lit', 123)
    assert record['deleted'] is True

    with pytest.raises(NotFoundError):
        search.get_source(record.id)

    obj = WorkflowEngine.from_uuid(eng_uuid).objects[0]
    assert obj.status == ObjectStatus.COMPLETED
    pending_records = WorkflowsPendingRecord.query.filter_by(workflow_id=obj.id).all()
    assert not pending_records
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }
    cited = InspireRecord.create(data=json_data, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', cited['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {"reference": {'authors': [{'full_name': 'Smith, J.'}]}}
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    references = {
        'references': [
            {
                "curated_relation": False,
                "record": {
                    "$ref": "http://localhost:5000/api/literature/9999"
                },
                "reference": {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 2
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec).total == 1

    _delete_record('lit', 8888)
    _delete_record('lit', 9999)
예제 #42
0
파일: views.py 프로젝트: Kjili/inspire-next
def get_institution_people_datatables_rows(recid):
    """
    Datatable rows to render people working in an institution.

    :param recid: id of the institution.
    :type recid: string
    """
    query = LiteratureSearch().query(
        "term",
        authors__affiliations__recid=recid
    )
    query = query.params(search_type="count")

    query.aggs.bucket("authors", "nested", path="authors")\
        .bucket("affiliated", "filter", term={
            "authors.affiliations.recid": recid
        })\
        .bucket('byrecid', 'terms', field='authors.recid')

    records_from_es = query.execute().to_dict()

    # Extract all the record ids from the aggregation
    papers_per_author = records_from_es[
        'aggregations'
    ]['authors']['affiliated']['byrecid']['buckets']
    recids = [int(paper['key']) for paper in papers_per_author]

    # Generate query to retrieve records from author index
    query = ""
    for i, recid in enumerate(recids):
        query += "recid:{}".format(recid)
        if i != len(recids) - 1:
            query += " OR "

    results = AuthorsSearch().query_from_iq(
        query
    ).params(
        size=9999,
        _source=['control_number', 'name']
    ).execute()

    recid_map = dict(
        [(result.control_number, result.name) for result in results]
    )

    result = []
    author_html_link = u"<a href='/authors/{recid}'>{name}</a>"
    for author in papers_per_author:
        row = []
        try:
            row.append(
                author_html_link.format(
                    recid=author['key'],
                    name=recid_map[author['key']].preferred_name
                )
            )
        except:
            # No preferred name, use value
            row.append(
                author_html_link.format(
                    recid=author['key'],
                    name=recid_map[author['key']].value
                )
            )
        row.append(author['doc_count'])
        result.append(row)

    return result
예제 #43
0
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited = InspireRecord.create(data=json_data, skip_files=True)
    cited.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', 9999, 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number':
        8888,
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9999'
            },
            'reference': {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)

    # execute mocked task pretending record is not committed yet to DB
    _delete_record('lit', record['control_number'])
    with pytest.raises(RecordGetterError):
        # XXX: celery in eager mode does not retry, so it raises the first time
        index_modified_citations_from_record(*expected_args)

    _delete_record('lit', cited['control_number'])
예제 #44
0
def test_index_after_commit_indexes_also_cites_two_records(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited1 = InspireRecord.create(data=json1, skip_files=True)
    cited1.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', cited1['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    json2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This also is the record being cited'
        }],
        'control_number': 9998,
        '_collections': ['Literature']
    }

    cited2 = InspireRecord.create(data=json2, skip_files=True)
    cited2.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', cited2['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec1).total == 0
    assert LiteratureSearch.citations(es_rec2).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [{
            'reference': {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec1).total == 0
    assert LiteratureSearch.citations(es_rec2).total == 0

    references = {
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9998'
            },
        }, {
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9999'
            },
        }]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 1
    assert es_rec2['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec1).total == 1
    assert LiteratureSearch.citations(es_rec2).total == 1

    _delete_record('lit', record['control_number'])
    _delete_record('lit', cited1['control_number'])
    _delete_record('lit', cited2['control_number'])
def test_index_after_commit_indexes_also_cites_two_records(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited1 = InspireRecord.create(data=json1, skip_files=True)
    cited1.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited1['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    json2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This also is the record being cited'}],
        'control_number': 9998,
        '_collections': ['Literature']
    }

    cited2 = InspireRecord.create(data=json2, skip_files=True)
    cited2.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited2['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec1).total == 0
    assert LiteratureSearch.citations(es_rec2).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {
                'reference': {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec1).total == 0
    assert LiteratureSearch.citations(es_rec2).total == 0

    references = {
        'references': [
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9998'
                },
            },
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9999'
                },
            }
        ]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 1
    assert es_rec2['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec1).total == 1
    assert LiteratureSearch.citations(es_rec2).total == 1

    _delete_record('lit', record['control_number'])
    _delete_record('lit', cited1['control_number'])
    _delete_record('lit', cited2['control_number'])
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited = InspireRecord.create(data=json_data, skip_files=True)
    cited.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', 9999, 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9999'
                },
                'reference': {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)

    # execute mocked task pretending record is not committed yet to DB
    _delete_record('lit', record['control_number'])
    with pytest.raises(RecordGetterError):
        # XXX: celery in eager mode does not retry, so it raises the first time
        index_modified_citations_from_record(*expected_args)

    _delete_record('lit', cited['control_number'])
예제 #47
0
    def serialize(self, pid, record, links_factory=None):
        """Return a different metrics for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value

        fields = set()
        keywords = []

        statistics = {}
        statistics['citations'] = 0
        statistics['publications'] = 0
        statistics['types'] = {}

        statistics_citations = {}

        search = LiteratureSearch().query({
            "match": {
                "authors.recid": author_pid
            }
        }).params(
            _source=[
                "citation_count",
                "control_number",
                "facet_inspire_doc_type",
                "facet_inspire_subjects",
                "thesaurus_terms",
            ]
        )

        for result in search.scan():
            result_source = result.to_dict()

            # Increment the count of the total number of publications.
            statistics['publications'] += 1

            # Increment the count of citations.
            citation_count = result_source.get('citation_count', 0)

            statistics['citations'] += citation_count
            statistics_citations[int(result_source['control_number'])] = \
                citation_count

            # Count how many times certain type of publication was published.
            try:
                publication_type = result_source.get(
                    'facet_inspire_doc_type', [])[0]
            except IndexError:
                pass

            if publication_type:
                if publication_type in statistics['types']:
                    statistics['types'][publication_type] += 1
                else:
                    statistics['types'][publication_type] = 1

            # Get fields.
            for field in result_source.get('facet_inspire_subjects', []):
                fields.add(field)

            # Get keywords.
            keywords.extend([
                k for k in force_force_list(
                    get_value(result_source, 'thesaurus_terms.keyword'))
                if k != '* Automatic Keywords *'])

        # Calculate h-index together with i10-index.
        statistics['hindex'] = calculate_h_index(statistics_citations)
        statistics['i10index'] = calculate_i10_index(statistics_citations)

        if fields:
            statistics['fields'] = list(fields)

        # Return the top 25 keywords.
        if keywords:
            counter = Counter(keywords)
            statistics['keywords'] = [{
                'count': i[1],
                'keyword': i[0]
            } for i in counter.most_common(25)]

        return json.dumps(statistics)
예제 #48
0
def get_institution_people_datatables_rows(recid):
    """
    Datatable rows to render people working in an institution.

    :param recid: id of the institution.
    :type recid: string
    """
    query = LiteratureSearch().query(
        "term",
        authors__affiliations__recid=recid
    )
    # FIXME: search_type=count is deprecated, but the whole function doesn't work anymore
    query = query.params(search_type="count")

    query.aggs.bucket("authors", "nested", path="authors")\
        .bucket("affiliated", "filter", term={
            "authors.affiliations.recid": recid
        })\
        .bucket('byrecid', 'terms', field='authors.recid')

    records_from_es = query.execute().to_dict()

    # Extract all the record ids from the aggregation
    papers_per_author = records_from_es[
        'aggregations'
    ]['authors']['affiliated']['byrecid']['buckets']
    recids = [int(paper['key']) for paper in papers_per_author]

    # Generate query to retrieve records from author index
    query = ""
    for i, recid in enumerate(recids):
        query += "recid:{}".format(recid)
        if i != len(recids) - 1:
            query += " OR "

    results = AuthorsSearch().query_from_iq(
        query
    ).params(
        size=9999,
        _source=['control_number', 'name']
    ).execute()

    recid_map = dict(
        [(result.control_number, result.name) for result in results]
    )

    result = []
    author_html_link = u"<a href='/authors/{recid}'>{name}</a>"
    for author in papers_per_author:
        row = []
        try:
            row.append(
                author_html_link.format(
                    recid=author['key'],
                    name=recid_map[author['key']].preferred_name
                )
            )
        except Exception:
            # No preferred name, use value
            row.append(
                author_html_link.format(
                    recid=author['key'],
                    name=recid_map[author['key']].value
                )
            )
        row.append(author['doc_count'])
        result.append(row)

    return result