def test_that_db_changes_are_mirrored_in_es(app):
    search = LiteratureSearch()
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'foo'},
        ],
    }

    # When a record is created in the DB, it is also created in ES.

    record = InspireRecord.create(json)
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'foo'

    # When a record is updated in the DB, is is also updated in ES.

    record['titles'][0]['title'] = 'bar'
    record.commit()
    es_record = search.get_source(record.id)

    assert get_title(es_record) == 'bar'

    # When a record is deleted in the DB, it is also deleted in ES.

    record._delete(force=True)

    with pytest.raises(NotFoundError):
        es_record = search.get_source(record.id)
def test_get_title(double_title, single_title, empty_title):
    """Test get title utility."""
    assert get_title(double_title) == "Parton distributions with LHC data"
    assert get_title(single_title) == "The Large Hadron Collider"
    assert get_title(empty_title) == ""

    no_title_key = {
        "not_titles": []
    }
    assert get_title(no_title_key) == ""
def test_get_title_returns_empty_string_when_no_titles():
    no_titles = Record({})

    expected = ''
    result = get_title(no_titles)

    assert expected == result
def test_get_title_returns_empty_string_when_no_titles():
    record = {}

    expected = ''
    result = get_title(record)

    assert expected == result
def test_get_title_returns_empty_string_when_titles_is_empty():
    record = {'titles': []}

    expected = ''
    result = get_title(record)

    assert expected == result
Beispiel #6
0
def title_rule(self, key, value):
    title = get_title({"titles": value})
    if title == '':
        raise KeyError
    subtitle = get_subtitle({"titles": value})
    return {"title": title,
            "subtitle": subtitle}
def test_get_title_returns_empty_string_when_titles_is_empty():
    empty_titles = Record({'titles': []})

    expected = ''
    result = get_title(empty_titles)

    assert expected == result
Beispiel #8
0
def build_citesummary(search):
    citesummary = []

    for i, el in enumerate(search.scan()):
        result = el.to_dict()

        citesummary.append({
            'citations': [],
            'collaboration': is_collaboration(result),
            'core': is_core(result),
            'date': get_date(result),
            'document_type': get_document_type(result),
            'id': get_id(result),
            'subject': get_subject(result),
            'title': get_title(result),
        })

        search_by_literature = LiteratureSearch().query(
            'match', references__recid=get_id(result)
        ).params(
            _source=[
                'authors.recid',
                'collaborations.value',
                'control_number',
                'earliest_date',
                'facet_inspire_doc_type',
                'inspire_categories',
                'titles.title',
            ]
        )

        for el in search_by_literature.scan():
            literature_result = el.to_dict()

            citesummary[i]['citations'].append({
                'collaboration': is_collaboration(literature_result),
                'core': is_core(literature_result),
                'date': get_date(literature_result),
                'document_type': get_document_type(literature_result),
                'id': get_id(literature_result),
                'selfcite': is_selfcite(
                    result, literature_result),
                'subject': get_subject(literature_result),
                'title': get_title(literature_result),
            })

    return citesummary
Beispiel #9
0
def reply_ticket_context(user, obj):
    """Context for literature replies."""
    return dict(
        object=obj,
        user=user,
        title=get_title(obj.data),
        reason=obj.extra_data.get("reason", ""),
        record_url=obj.extra_data.get("url", ""),
    )
Beispiel #10
0
    def conference_information(self):
        """Conference information.

        Returns a list with information about conferences related to the
        record.
        """
        conf_info = []
        for pub_info in self['publication_info']:
            conference_recid = None
            parent_recid = None
            parent_rec = {}
            conference_rec = {}
            if 'conference_record' in pub_info:
                conference_rec = replace_refs(pub_info['conference_record'],
                                              'es')
                if conference_rec and conference_rec.get('control_number'):
                    conference_recid = conference_rec['control_number']
                else:
                    conference_rec = {}
            if 'parent_record' in pub_info:
                parent_rec = replace_refs(pub_info['parent_record'], 'es')
                if parent_rec and parent_rec.get('control_number'):
                    parent_recid = parent_rec['control_number']
                else:
                    parent_rec = {}
            conf_info.append(
                {
                    "conference_recid": conference_recid,
                    "conference_title": get_title(conference_rec),
                    "parent_recid": parent_recid,
                    "parent_title":
                        get_title(parent_rec).replace(
                            "Proceedings, ", "", 1
                    ),
                    "page_start": pub_info.get('page_start'),
                    "page_end": pub_info.get('page_end'),
                    "artid": pub_info.get('artid'),
                }
            )

        return conf_info
def test_get_title_returns_first_title():
    record = {
        'titles': [
            {'title': 'first title'},
            {'title': 'second title'},
        ],
    }

    expected = 'first title'
    result = get_title(record)

    assert expected == result
def test_get_title():
    """Test get title utility."""
    double_title = {
        "titles": [
            {
                "source": "arXiv",
                "title": "Parton distributions with LHC data"
            },
            {
                "title": "Parton distributions with LHC data"
            }
        ]
    }

    assert get_title(double_title) == "Parton distributions with LHC data"

    single_title = {
        "titles": [
            {
                "subtitle": "Harvest of Run 1",
                "title": "The Large Hadron Collider"
            }
        ]
    }

    assert get_title(single_title) == "The Large Hadron Collider"

    empty_title = {
        "titles": []
    }

    assert get_title(empty_title) == ""

    no_title_key = {
        "not_titles": []
    }

    assert get_title(no_title_key) == ""
def test_get_title_returns_the_only_title():
    single_title = Record({
        'titles': [
            {
                'source': "arXiv",
                'title': 'The Large Hadron Collider'
            }
        ]
    })

    expected = 'The Large Hadron Collider'
    result = get_title(single_title)

    assert expected == result
Beispiel #14
0
def new_ticket_context(user, obj):
    """Context for literature new tickets."""
    title = get_title(obj.data)
    subject = u"Your suggestion to INSPIRE: {0}".format(title)
    user_comment = obj.extra_data.get('formdata', {}).get('extra_comments', '')
    identifiers = get_value(obj.data, "external_system_numbers.value") or []
    return dict(
        email=user.email,
        title=title,
        identifier=identifiers or "",
        user_comment=user_comment,
        references=obj.extra_data.get('formdata', {}).get('references'),
        object=obj,
        subject=subject
    )
Beispiel #15
0
def test_get_title_returns_first_title():
    record = {
        'titles': [
            {
                'title': 'first title'
            },
            {
                'title': 'second title'
            },
        ],
    }

    expected = 'first title'
    result = get_title(record)

    assert expected == result
def test_get_title_returns_the_non_arxiv_title():
    double_title = Record({
        "titles": [
            {
                "title": "Importance of a consistent choice of alpha(s) in the matching of AlpGen and Pythia"
            },
            {
                "source": "arXiv",
                "title": "Monte Carlo tuning in the presence of Matching"
            }
        ],
    })

    expected = 'Importance of a consistent choice of alpha(s) in the matching of AlpGen and Pythia'
    result = get_title(double_title)

    assert expected == result
Beispiel #17
0
def test_get_title_returns_the_non_arxiv_title_with_source():
    double_title = InspireRecord({
        "titles": [{
            "source":
            "other",
            "title":
            "Importance of a consistent choice of alpha(s) in the matching of AlpGen and Pythia"
        }, {
            "source": "arXiv",
            "title": "Monte Carlo tuning in the presence of Matching"
        }],
    })

    expected = 'Importance of a consistent choice of alpha(s) in the matching of AlpGen and Pythia'
    result = get_title(double_title)

    assert expected == result
def test_get_title():
    schema = load_schema('hep')
    subschema = schema['properties']['titles']

    record = {
        'titles': [
            {
                'subtitle': 'A mathematical exposition',
                'title': 'The General Theory of Relativity',
            },
        ],
    }
    assert validate(record['titles'], subschema) is None

    expected = 'The General Theory of Relativity'
    result = get_title(record)

    assert expected == result
Beispiel #19
0
def get_institution_papers_datatables_rows(hits):
    """Row used by datatables to render institution papers."""
    result = []

    title_html = "<a href='/literature/{id}'>{name}</a>"

    for hit in hits:
        row = []
        row.append(
            title_html.format(
                id=hit.control_number,
                name=get_title(hit.to_dict()).encode('utf8')
            )
        )
        ctx = {
            'record': hit.to_dict(),
            'is_brief': 'true',
            'number_of_displayed_authors': 1,
            'show_affiliations': 'false',
            'collaboration_only': 'true'
        }
        row.append(render_macro_from_template(
            name="render_record_authors",
            template="inspirehep_theme/format/record/Inspire_Default_HTML_general_macros.tpl",
            ctx=ctx
        )
        )
        try:
            row.append(hit.publication_info[0].journal_title)
        except AttributeError:
            row.append('')

        try:
            row.append(hit.citation_count)
        except AttributeError:
            row.append(0)

        row.append(hit.earliest_date.split('-')[0])

        result.append(row)

    return result
Beispiel #20
0
def get_institution_papers_datatables_rows(hits):
    """Row used by datatables to render institution papers."""
    result = []

    title_html = "<a href='/literature/{id}'>{name}</a>"

    for hit in hits:
        row = []
        row.append(
            title_html.format(
                id=hit.control_number,
                name=get_title(hit.to_dict())
            )
        )
        ctx = {
            'record': hit.to_dict(),
            'is_brief': 'true',
            'number_of_displayed_authors': 1,
            'show_affiliations': 'false',
            'collaboration_only': 'true'
        }
        row.append(render_macro_from_template(
            name="render_record_authors",
            template="inspirehep_theme/format/record/Inspire_Default_HTML_general_macros.tpl",
            ctx=ctx
        )
        )
        try:
            row.append(hit.publication_info[0].journal_title)
        except AttributeError:
            row.append('')

        try:
            row.append(hit.citation_count)
        except AttributeError:
            row.append(0)

        row.append(hit.earliest_date.split('-')[0])

        result.append(row)

    return result
Beispiel #21
0
def render_contributions(hits):
    """Render a list of conferences to HTML."""

    result = []

    title_html = u"<a href='/literature/{id}'>{name}</a>"

    for hit in hits:
        row = []
        row.append(
            title_html.format(
                id=hit.control_number,
                name=get_title(hit.to_dict())
            )
        )
        ctx = {
            'record': hit.to_dict(),
            'is_brief': 'true',
            'number_of_displayed_authors': 1,
            'show_affiliations': 'false',
            'collaboration_only': 'true'
        }
        row.append(render_macro_from_template(
            name="render_record_authors",
            template="inspirehep_theme/format/record/Inspire_Default_HTML_general_macros.tpl",
            ctx=ctx
        )
        )
        try:
            row.append(hit.publication_info[0].journal_title)
        except AttributeError:
            row.append('')

        try:
            row.append(hit.citation_count)
        except AttributeError:
            row.append(0)

        result.append(row)

    return result, hits.total
Beispiel #22
0
def render_contributions(hits):
    """Render a list of conferences to HTML."""

    result = []

    title_html = "<a href='/literature/{id}'>{name}</a>"

    for hit in hits:
        row = []
        row.append(
            title_html.format(
                id=hit.control_number,
                name=get_title(hit.to_dict()).encode('utf8')
            )
        )
        ctx = {
            'record': hit.to_dict(),
            'is_brief': 'true',
            'number_of_displayed_authors': 1,
            'show_affiliations': 'false',
            'collaboration_only': 'true'
        }
        row.append(render_macro_from_template(
            name="render_record_authors",
            template="inspirehep_theme/format/record/Inspire_Default_HTML_general_macros.tpl",
            ctx=ctx
        )
        )
        try:
            row.append(hit.publication_info[0].journal_title)
        except AttributeError:
            row.append('')

        try:
            row.append(hit.citation_count)
        except AttributeError:
            row.append(0)

        result.append(row)

    return result, hits.total
Beispiel #23
0
def _get_preprint_context(record):
    abstract = get_abstract(record)
    try:
        abstract_language = detect(abstract)
    except LangDetectException:
        abstract_language = ''

    return {
        'abstract': abstract,
        'abstract_language': abstract_language,
        'arxiv_id': get_arxiv_id(record),
        'authors': get_authors(record),
        'collaborations': get_collaborations(record),
        'divulgation': get_divulgation(record),
        'domains': get_domains(record),
        'inspire_id': get_inspire_id(record),
        'keywords': get_keywords(record),
        'language': get_language(record),
        'subtitle': get_subtitle(record),
        'title': get_title(record),
    }
Beispiel #24
0
def _get_comm_context(record):
    abstract = get_abstract(record)
    try:
        abstract_language = detect(abstract)
    except LangDetectException:
        abstract_language = ''

    conference_record = get_conference_record(record)
    conference_city = get_conference_city(conference_record)
    conference_country = get_conference_country(conference_record)
    conference_end_date = get_conference_end_date(conference_record)
    conference_start_date = get_conference_start_date(conference_record)
    conference_title = get_conference_title(conference_record)

    return {
        'abstract': abstract,
        'abstract_language': abstract_language,
        'arxiv_id': get_arxiv_id(record),
        'authors': get_authors(record),
        'collaborations': get_collaborations(record),
        'conference_city': conference_city,
        'conference_country': conference_country,
        'conference_end_date': conference_end_date,
        'conference_start_date': conference_start_date,
        'conference_title': conference_title,
        'divulgation': get_divulgation(record),
        'doi': get_doi(record),
        'domains': get_domains(record),
        'inspire_id': get_inspire_id(record),
        'journal_issue': get_journal_issue(record),
        'journal_title': get_journal_title(record),
        'journal_volume': get_journal_volume(record),
        'keywords': get_keywords(record),
        'language': get_language(record),
        'page_artid': get_page_artid(record),
        'peer_reviewed': get_peer_reviewed(record),
        'publication_date': get_publication_date(record),
        'subtitle': get_subtitle(record),
        'title': get_title(record),
    }
Beispiel #25
0
def _get_art_context(record):
    abstract = get_abstract(record)
    abstract_language = langdetect.detect(abstract)

    return {
        'abstract': abstract,
        'abstract_language': abstract_language,
        'arxiv_id': get_arxiv_id(record),
        'authors': get_authors(record),
        'collaborations': get_collaborations(record),
        'divulgation': get_divulgation(record),
        'doi': get_doi(record),
        'domain': get_domain(record),
        'inspire_id': get_inspire_id(record),
        'journal_issue': get_journal_issue(record),
        'journal_title': get_journal_title(record),
        'journal_volume': get_journal_volume(record),
        'language': get_language(record),
        'page_artid': get_page_artid(record),
        'peer_reviewed': get_peer_reviewed(record),
        'publication_date': get_publication_date(record),
        'title': get_title(record),
    }
Beispiel #26
0
 def title(self):
     """Get preferred title."""
     return get_title(self)
Beispiel #27
0
 def title(self):
     """Get preferred title."""
     return get_title(self)
Beispiel #28
0
    def serialize(self, pid, record, links_factory=None):
        """
        Serialize a single impact graph from a record.

        :param pid: Persistent identifier instance.
        :param record: Record instance.
        :param links_factory: Factory function for the link generation,
                              which are added to the response.
        """
        out = {}

        # Add information about current record
        out['inspire_id'] = record['control_number']
        out['title'] = get_title(record)
        out['year'] = record['earliest_date'].split('-')[0]

        # Get citations
        citations = []

        record_citations = LiteratureSearch().query(
            'match', references__recid=record['control_number'],
        ).params(
            size=9999,
            _source=[
                'control_number',
                'citation_count',
                'titles',
                'earliest_date'
            ]
        ).execute().hits

        for citation in record_citations:
            try:
                citation_count = citation.citation_count
            except AttributeError:
                citation_count = 0
            citations.append({
                "inspire_id": citation['control_number'],
                "citation_count": citation_count,
                "title": get_title(citation.to_dict()),
                "year": citation['earliest_date'].split('-')[0]
            })

        out['citations'] = citations

        # Get references
        record_references = record.get('references', [])
        references = []

        reference_recids = [
            ref['recid'] for ref in record_references if ref.get('recid')
        ]

        if reference_recids:
            record_references = get_es_records(
                'lit',
                reference_recids,
                _source=[
                    'control_number',
                    'citation_count',
                    'titles',
                    'earliest_date'
                ]
            )

            for reference in record_references:
                try:
                    citation_count = reference.citation_count
                except AttributeError:
                    citation_count = 0
                references.append({
                    "inspire_id": reference['control_number'],
                    "citation_count": citation_count,
                    "title": get_title(reference),
                    "year": reference['earliest_date'].split('-')[0]
                })

        out['references'] = references

        return json.dumps(out)
Beispiel #29
0
def publication_info(record):
    """Displays inline publication and conference information"""
    result = {}
    out = []
    if 'publication_info' in record:
        journal_title, journal_volume, year, journal_issue, pages = \
            ('', '', '', '', '')
        for pub_info in record['publication_info']:
            if 'journal_title' in pub_info:
                journal_title = '<i>' + pub_info['journal_title'] + '</i>'
                if 'journal_volume' in pub_info:
                    journal_volume = ' ' + pub_info['journal_volume']
                if 'year' in pub_info:
                    year = ' (' + str(pub_info['year']) + ')'
                if 'journal_issue' in pub_info:
                    journal_issue = ' ' + pub_info['journal_issue'] + ', '
                if 'page_start' in pub_info and 'page_end' in pub_info:
                    pages = ' ' + '{page_start}-{page_end}'.format(**pub_info)
                elif 'page_start' in pub_info:
                    pages = ' ' + '{page_start}'.format(**pub_info)
                elif 'artid' in pub_info:
                    pages = ' ' + '{artid}'.format(**pub_info)
                out.append(journal_title + journal_volume +
                           year + journal_issue + pages)
        if out:
            result['pub_info'] = out
        if not result:
            for field in record['publication_info']:
                if 'pubinfo_freetext' in field:
                    out.append(field['pubinfo_freetext'])
                    result['pub_info'] = out
                    break
        # Conference info line
        for pub_info in record['publication_info']:
            conference_recid = None
            parent_recid = None
            if 'conference_record' in pub_info:
                conference_rec = replace_refs(pub_info['conference_record'],
                                              'es')
                if conference_rec and conference_rec.get('control_number'):
                    conference_recid = conference_rec['control_number']
            if 'parent_record' in pub_info:
                parent_rec = replace_refs(pub_info['parent_record'], 'es')
                if parent_rec and parent_rec.get('control_number'):
                    parent_recid = parent_rec['control_number']

            if conference_recid and parent_recid:
                try:
                    ctx = {
                        "parent_recid": parent_recid,
                        "conference_recid": conference_recid,
                        "conference_title": get_title(conference_rec)
                    }
                    if result:
                        result['conf_info'] = render_macro_from_template(
                            name="conf_with_pub_info",
                            template="inspirehep_theme/format/record/Conference_info_macros.tpl",
                            ctx=ctx)
                        break
                    else:
                        ctx.update(dict(
                            page_start=pub_info.get('page_start'),
                            page_end=pub_info.get('page_end'),
                            artid=pub_info.get('artid')
                        ))
                        result['conf_info'] = render_macro_from_template(
                            name="conf_without_pub_info",
                            template="inspirehep_theme/format/record/Conference_info_macros.tpl",
                            ctx=ctx)
                        break
                except TypeError:
                    pass
            elif conference_recid and not parent_recid:
                try:
                    ctx = {
                        "conference_recid": conference_recid,
                        "conference_title": get_title(conference_rec),
                        "pub_info": bool(result.get('pub_info', ''))
                    }
                    result['conf_info'] = render_macro_from_template(
                        name="conference_only",
                        template="inspirehep_theme/format/record/Conference_info_macros.tpl",
                        ctx=ctx)
                except TypeError:
                    pass
            elif parent_recid and not conference_recid:
                try:
                    ctx = {
                        "parent_recid": parent_recid,
                        "parent_title":
                            parent_rec['titles'][0]['title'].replace(
                                "Proceedings, ", "", 1),
                        "pub_info": bool(result.get('pub_info', ''))
                    }
                    result['conf_info'] = render_macro_from_template(
                        name="proceedings_only",
                        template="inspirehep_theme/format/record/Conference_info_macros.tpl",
                        ctx=ctx)
                except TypeError:
                    pass
    return result
Beispiel #30
0
def get_publications():
    recid = request.values.get('recid', 0, type=int)

    publications = []
    collaborations = set()
    keywords = set()

    search = LiteratureSearch().query({
        "match": {
            "authors.recid": recid
        }
    }).params(_source=[
        'accelerator_experiments', 'control_number', 'earliest_date',
        'facet_inspire_doc_type', 'publication_info', 'titles',
        'thesaurus_terms'
    ])
    for result in search.scan():
        try:
            result_source = result.to_dict()
            publication = {}

            # Get publication title (required).
            publication['title'] = get_title(result_source)

            # Get publication recid (required).
            publication['recid'] = result_source['control_number']
        except (IndexError, KeyError):
            continue

        # Get publication type.
        try:
            publication['type'] = result_source.get('facet_inspire_doc_type',
                                                    [])[0]
        except IndexError:
            publication['type'] = "Not defined"

        # Get journal title.
        try:
            publication['journal_title'] = result_source.get(
                'publication_info', [])[0]['journal_title']

            # Get journal recid.
            try:
                publication['journal_recid'] = result_source.get(
                    'publication_info', [])[0]['journal_recid']
            except KeyError:
                pass
        except (IndexError, KeyError):
            pass

        # Get publication year.
        try:
            publication['year'] = result_source.get('publication_info',
                                                    [])[0]['year']
        except (IndexError, KeyError):
            pass

        # Get keywords.
        for keyword in result_source.get('thesaurus_terms', []):
            if keyword.get('keyword') is not "* Automatic Keywords *" \
                    and keyword.get('keyword'):
                keywords.add(keyword.get('keyword'))

        # Get collaborations.
        for experiment in result_source.get('accelerator_experiments', []):
            collaborations.add(experiment.get('experiment'))

        # Append to the list.
        publications.append(publication)

    response = {}
    response['publications'] = publications
    response['keywords'] = list(keywords)
    response['collaborations'] = list(collaborations)

    return jsonify(response)
Beispiel #31
0
def get_publications():
    recid = request.values.get('recid', 0, type=int)

    publications = []
    collaborations = set()
    keywords = set()

    for result in scan(
            current_search_client,
            query={
                '_source': ['accelerator_experiments',
                            'control_number',
                            'earliest_date',
                            'facet_inspire_doc_type',
                            'publication_info',
                            'titles',
                            'thesaurus_terms'
                            ],
                'query': {"match": {"authors.recid": recid}}
            },
            index='records-hep',
            doc_type='hep'):

        try:
            result_source = result['_source']
            publication = {}

            # Get publication title (required).
            publication['title'] = get_title(result_source)

            # Get publication recid (required).
            publication['recid'] = result_source['control_number']
        except (IndexError, KeyError):
            continue

        # Get publication type.
        try:
            publication['type'] = result_source.get(
                'facet_inspire_doc_type', [])[0]
        except IndexError:
            publication['type'] = "Not defined"

        # Get journal title.
        try:
            publication['journal_title'] = result_source.get(
                'publication_info', [])[0]['journal_title']

            # Get journal recid.
            try:
                publication['journal_recid'] = result_source.get(
                    'publication_info', [])[0]['journal_recid']
            except KeyError:
                pass
        except (IndexError, KeyError):
            pass

        # Get publication year.
        try:
            publication['year'] = result_source.get(
                'publication_info', [])[0]['year']
        except (IndexError, KeyError):
            pass

        # Get keywords.
        for keyword in result_source.get('thesaurus_terms', []):
            if keyword.get('keyword') is not "* Automatic Keywords *" \
                    and keyword.get('keyword'):
                keywords.add(keyword.get('keyword'))

        # Get collaborations.
        for experiment in result_source.get(
                'accelerator_experiments', []):
            collaborations.add(experiment.get('experiment'))

        # Append to the list.
        publications.append(publication)

    response = {}
    response['publications'] = publications
    response['keywords'] = list(keywords)
    response['collaborations'] = list(collaborations)

    return jsonify(response)
Beispiel #32
0
    def serialize(self, pid, record, links_factory=None):
        """Return a list of publications for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value
        publications = []

        search = LiteratureSearch().query({
            "match": {
                "authors.recid": author_pid
            }
        }).params(
            _source=[
                "accelerator_experiments",
                "earliest_date",
                "citation_count",
                "control_number",
                "facet_inspire_doc_type",
                "publication_info",
                "self",
                "thesaurus_terms",
                "titles",
            ]
        )

        for result in search.scan():
            result_source = result.to_dict()

            publication = {}
            publication['id'] = int(result_source['control_number'])
            publication['record'] = result_source['self']
            publication['title'] = get_title(result_source)

            # Get the earliest date.
            try:
                publication['date'] = result_source['earliest_date']
            except KeyError:
                pass

            # Get publication type.
            try:
                publication['type'] = result_source.get(
                    'facet_inspire_doc_type', [])[0]
            except IndexError:
                pass

            # Get citation count.
            try:
                publication['citations'] = result_source['citation_count']
            except KeyError:
                pass

            # Get journal.
            try:
                publication['journal'] = {}
                publication['journal']['title'] = result_source.get(
                    'publication_info', [])[0]['journal_title']

                # Get journal id and $self.
                try:
                    publication['journal']['id'] = result_source.get(
                        'publication_info', [])[0]['journal_recid']
                    publication['journal']['record'] = result_source.get(
                        'publication_info', [])[0]['journal_record']
                except KeyError:
                    pass
            except (IndexError, KeyError):
                del publication['journal']

            # Get collaborations.
            collaborations = set()

            for experiment in result_source.get('accelerator_experiments', []):
                collaborations.add(experiment.get('experiment'))

            if collaborations:
                publication['collaborations'] = list(collaborations)

            publications.append(publication)

        return json.dumps(publications)
Beispiel #33
0
def title_rule(self, key, value):
    title = get_title({"titles": value})
    if title == '':
        raise KeyError
    subtitle = get_subtitle({"titles": value})
    return {"title": title, "subtitle": subtitle}
    def serialize(self, pid, record, links_factory=None):
        """Return a list of publications for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value
        publications = []

        search = LiteratureSearch().query({
            "match": {
                "authors.recid": author_pid
            }
        }).params(
            _source=[
                "accelerator_experiments",
                "earliest_date",
                "citation_count",
                "control_number",
                "facet_inspire_doc_type",
                "publication_info",
                "self",
                "keywords",
                "titles",
            ]
        )

        for result in search.scan():
            result_source = result.to_dict()

            publication = {}
            publication['id'] = int(result_source['control_number'])
            publication['record'] = result_source['self']
            publication['title'] = get_title(result_source)

            # Get the earliest date.
            try:
                publication['date'] = result_source['earliest_date']
            except KeyError:
                pass

            # Get publication type.
            try:
                publication['type'] = result_source.get(
                    'facet_inspire_doc_type', [])[0]
            except IndexError:
                pass

            # Get citation count.
            try:
                publication['citations'] = result_source['citation_count']
            except KeyError:
                pass

            # Get journal.
            try:
                publication['journal'] = {}
                publication['journal']['title'] = result_source.get(
                    'publication_info', [])[0]['journal_title']

                # Get journal id and $self.
                try:
                    publication['journal']['id'] = result_source.get(
                        'publication_info', [])[0]['journal_recid']
                    publication['journal']['record'] = result_source.get(
                        'publication_info', [])[0]['journal_record']
                except KeyError:
                    pass
            except (IndexError, KeyError):
                del publication['journal']

            # Get collaborations.
            collaborations = set()

            for experiment in result_source.get('accelerator_experiments', []):
                collaborations.add(experiment.get('experiment'))

            if collaborations:
                publication['collaborations'] = list(collaborations)

            publications.append(publication)

        return json.dumps(publications)
Beispiel #35
0
def get_publications():
    recid = request.values.get("recid", 0, type=int)

    publications = []
    collaborations = set()
    keywords = set()

    for result in scan(
        current_search_client,
        query={
            "_source": [
                "accelerator_experiments",
                "control_number",
                "earliest_date",
                "facet_inspire_doc_type",
                "publication_info",
                "titles",
                "thesaurus_terms",
            ],
            "query": {"match": {"authors.recid": recid}},
        },
        index="records-hep",
        doc_type="hep",
    ):

        try:
            result_source = result["_source"]
            publication = {}

            # Get publication title (required).
            publication["title"] = get_title(result_source)

            # Get publication recid (required).
            publication["recid"] = result_source["control_number"]
        except (IndexError, KeyError):
            continue

        # Get publication type.
        try:
            publication["type"] = result_source.get("facet_inspire_doc_type", [])[0]
        except IndexError:
            publication["type"] = "Not defined"

        # Get journal title.
        try:
            publication["journal_title"] = result_source.get("publication_info", [])[0]["journal_title"]

            # Get journal recid.
            try:
                publication["journal_recid"] = result_source.get("publication_info", [])[0]["journal_recid"]
            except KeyError:
                pass
        except (IndexError, KeyError):
            pass

        # Get publication year.
        try:
            publication["year"] = result_source.get("publication_info", [])[0]["year"]
        except (IndexError, KeyError):
            pass

        # Get keywords.
        for keyword in result_source.get("thesaurus_terms", []):
            if keyword.get("keyword") is not "* Automatic Keywords *" and keyword.get("keyword"):
                keywords.add(keyword.get("keyword"))

        # Get collaborations.
        for experiment in result_source.get("accelerator_experiments", []):
            collaborations.add(experiment.get("experiment"))

        # Append to the list.
        publications.append(publication)

    response = {}
    response["publications"] = publications
    response["keywords"] = list(keywords)
    response["collaborations"] = list(collaborations)

    return jsonify(response)
    def serialize(self, pid, record, links_factory=None):
        """
        Serialize a single impact graph from a record.

        :param pid: Persistent identifier instance.
        :param record: Record instance.
        :param links_factory: Factory function for the link generation,
                              which are added to the response.
        """
        out = {}

        # Add information about current record
        out['inspire_id'] = record['control_number']
        out['title'] = get_title(record)
        out['year'] = record['earliest_date'].split('-')[0]

        # Get citations
        citations = []

        record_citations = LiteratureSearch().query_from_iq(
            'refersto:' + str(record['control_number'])
        ).params(
            size=9999,
            _source=[
                'control_number',
                'citation_count',
                'titles',
                'earliest_date'
            ]
        ).execute().hits

        for citation in record_citations:
            try:
                citation_count = citation.citation_count
            except AttributeError:
                citation_count = 0
            citations.append({
                "inspire_id": citation['control_number'],
                "citation_count": citation_count,
                "title": get_title(citation.to_dict()),
                "year": citation['earliest_date'].split('-')[0]
            })

        out['citations'] = citations

        # Get references
        record_references = record.get('references', [])
        references = []

        reference_recids = [
            ref['recid'] for ref in record_references if ref.get('recid')
        ]

        if reference_recids:
            record_references = get_es_records(
                'lit',
                reference_recids,
                _source=[
                    'control_number',
                    'citation_count',
                    'titles',
                    'earliest_date'
                ]
            )

            for reference in record_references:
                try:
                    citation_count = reference.citation_count
                except AttributeError:
                    citation_count = 0
                references.append({
                    "inspire_id": reference['control_number'],
                    "citation_count": citation_count,
                    "title": get_title(reference),
                    "year": reference['earliest_date'].split('-')[0]
                })

        out['references'] = references

        return json.dumps(out)
    def serialize(self, pid, record, links_factory=None):
        """
        Serialize a single impact graph from a record.

        :param pid: Persistent identifier instance.
        :param record: Record instance.
        :param links_factory: Factory function for the link generation,
                              which are added to the response.
        """
        out = {}

        # Add information about current record
        out['inspire_id'] = record['control_number']
        out['title'] = get_title(record)
        out['year'] = record['earliest_date'].split('-')[0]

        # Get citations
        citations = []

        es_query = IQ('refersto:' + record['control_number'])
        record_citations = current_search_client.search(
            index='records-hep',
            doc_type='hep',
            body={"query": es_query.to_dict()},
            size=9999,
            _source=[
                'control_number',
                'citation_count',
                'titles',
                'earliest_date'
            ]
        )['hits']['hits']
        for citation in record_citations:
            citation = citation['_source']
            citations.append({
                "inspire_id": citation['control_number'],
                "citation_count": citation.get('citation_count', 0),
                "title": get_title(citation),
                "year": citation['earliest_date'].split('-')[0]
            })

        out['citations'] = citations

        # Get references
        record_references = record.get('references', [])
        references = []

        reference_recids = [
            ref['recid'] for ref in record_references if ref.get('recid')
        ]

        if reference_recids:
            query = IQ(' OR '.join('recid:' + str(ref)
                                   for ref in reference_recids))

            record_references = current_search_client.search(
                index='records-hep',
                doc_type='hep',
                body={"query": query.to_dict()},
                _source=[
                    'control_number',
                    'citation_count',
                    'titles',
                    'earliest_date'
                ]
            )

            for reference in record_references['hits']['hits']:
                ref_info = reference["_source"]

                references.append({
                    "inspire_id": ref_info['control_number'],
                    "citation_count": ref_info.get('citation_count', 0),
                    "title": get_title(ref_info),
                    "year": ref_info['earliest_date'].split('-')[0]
                })

        out['references'] = references

        return json.dumps(out)
    def serialize(self, pid, record, links_factory=None):
        """
        Serialize a single impact graph from a record.

        :param pid: Persistent identifier instance.
        :param record: Record instance.
        :param links_factory: Factory function for the link generation,
                              which are added to the response.
        """
        out = {}

        # Add information about current record
        out['inspire_id'] = record['control_number']
        out['title'] = get_title(record)
        out['year'] = record['earliest_date'].split('-')[0]

        # Get citations
        citations = []

        es_query = Query('refersto:' + record['control_number'])
        es_query.body.update({'size': 9999})
        record_citations = current_search_client.search(index='records-hep',
                                                        doc_type='hep',
                                                        body=es_query.body,
                                                        _source=[
                                                            'control_number',
                                                            'citation_count',
                                                            'titles',
                                                            'earliest_date'
                                                        ])['hits']['hits']
        for citation in record_citations:
            citation = citation['_source']
            citations.append({
                "inspire_id":
                citation['control_number'],
                "citation_count":
                citation.get('citation_count', 0),
                "title":
                get_title(citation),
                "year":
                citation['earliest_date'].split('-')[0]
            })

        out['citations'] = citations

        # Get references
        record_references = record.get('references', [])
        references = []

        reference_recids = [
            ref['recid'] for ref in record_references if ref.get('recid')
        ]

        if reference_recids:
            mget_body = {"ids": reference_recids}

            record_references = current_search_client.mget(
                index='records-hep',
                doc_type='hep',
                body=mget_body,
                _source=[
                    'control_number', 'citation_count', 'titles',
                    'earliest_date'
                ])

            for reference in record_references["docs"]:
                ref_info = reference["_source"]

                references.append({
                    "inspire_id":
                    ref_info['control_number'],
                    "citation_count":
                    ref_info.get('citation_count', 0),
                    "title":
                    get_title(ref_info),
                    "year":
                    ref_info['earliest_date'].split('-')[0]
                })

        out['references'] = references

        return json.dumps(out)