def test_that_db_changes_are_mirrored_in_es(app): search = LiteratureSearch() json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], } # When a record is created in the DB, it is also created in ES. record = InspireRecord.create(json) es_record = search.get_source(record.id) assert get_title(es_record) == 'foo' # When a record is updated in the DB, is is also updated in ES. record['titles'][0]['title'] = 'bar' record.commit() es_record = search.get_source(record.id) assert get_title(es_record) == 'bar' # When a record is deleted in the DB, it is also deleted in ES. record._delete(force=True) with pytest.raises(NotFoundError): es_record = search.get_source(record.id)
def test_get_title(double_title, single_title, empty_title): """Test get title utility.""" assert get_title(double_title) == "Parton distributions with LHC data" assert get_title(single_title) == "The Large Hadron Collider" assert get_title(empty_title) == "" no_title_key = { "not_titles": [] } assert get_title(no_title_key) == ""
def test_get_title_returns_empty_string_when_no_titles(): no_titles = Record({}) expected = '' result = get_title(no_titles) assert expected == result
def test_get_title_returns_empty_string_when_no_titles(): record = {} expected = '' result = get_title(record) assert expected == result
def test_get_title_returns_empty_string_when_titles_is_empty(): record = {'titles': []} expected = '' result = get_title(record) assert expected == result
def title_rule(self, key, value): title = get_title({"titles": value}) if title == '': raise KeyError subtitle = get_subtitle({"titles": value}) return {"title": title, "subtitle": subtitle}
def test_get_title_returns_empty_string_when_titles_is_empty(): empty_titles = Record({'titles': []}) expected = '' result = get_title(empty_titles) assert expected == result
def build_citesummary(search): citesummary = [] for i, el in enumerate(search.scan()): result = el.to_dict() citesummary.append({ 'citations': [], 'collaboration': is_collaboration(result), 'core': is_core(result), 'date': get_date(result), 'document_type': get_document_type(result), 'id': get_id(result), 'subject': get_subject(result), 'title': get_title(result), }) search_by_literature = LiteratureSearch().query( 'match', references__recid=get_id(result) ).params( _source=[ 'authors.recid', 'collaborations.value', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'inspire_categories', 'titles.title', ] ) for el in search_by_literature.scan(): literature_result = el.to_dict() citesummary[i]['citations'].append({ 'collaboration': is_collaboration(literature_result), 'core': is_core(literature_result), 'date': get_date(literature_result), 'document_type': get_document_type(literature_result), 'id': get_id(literature_result), 'selfcite': is_selfcite( result, literature_result), 'subject': get_subject(literature_result), 'title': get_title(literature_result), }) return citesummary
def reply_ticket_context(user, obj): """Context for literature replies.""" return dict( object=obj, user=user, title=get_title(obj.data), reason=obj.extra_data.get("reason", ""), record_url=obj.extra_data.get("url", ""), )
def conference_information(self): """Conference information. Returns a list with information about conferences related to the record. """ conf_info = [] for pub_info in self['publication_info']: conference_recid = None parent_recid = None parent_rec = {} conference_rec = {} if 'conference_record' in pub_info: conference_rec = replace_refs(pub_info['conference_record'], 'es') if conference_rec and conference_rec.get('control_number'): conference_recid = conference_rec['control_number'] else: conference_rec = {} if 'parent_record' in pub_info: parent_rec = replace_refs(pub_info['parent_record'], 'es') if parent_rec and parent_rec.get('control_number'): parent_recid = parent_rec['control_number'] else: parent_rec = {} conf_info.append( { "conference_recid": conference_recid, "conference_title": get_title(conference_rec), "parent_recid": parent_recid, "parent_title": get_title(parent_rec).replace( "Proceedings, ", "", 1 ), "page_start": pub_info.get('page_start'), "page_end": pub_info.get('page_end'), "artid": pub_info.get('artid'), } ) return conf_info
def test_get_title_returns_first_title(): record = { 'titles': [ {'title': 'first title'}, {'title': 'second title'}, ], } expected = 'first title' result = get_title(record) assert expected == result
def test_get_title(): """Test get title utility.""" double_title = { "titles": [ { "source": "arXiv", "title": "Parton distributions with LHC data" }, { "title": "Parton distributions with LHC data" } ] } assert get_title(double_title) == "Parton distributions with LHC data" single_title = { "titles": [ { "subtitle": "Harvest of Run 1", "title": "The Large Hadron Collider" } ] } assert get_title(single_title) == "The Large Hadron Collider" empty_title = { "titles": [] } assert get_title(empty_title) == "" no_title_key = { "not_titles": [] } assert get_title(no_title_key) == ""
def test_get_title_returns_the_only_title(): single_title = Record({ 'titles': [ { 'source': "arXiv", 'title': 'The Large Hadron Collider' } ] }) expected = 'The Large Hadron Collider' result = get_title(single_title) assert expected == result
def new_ticket_context(user, obj): """Context for literature new tickets.""" title = get_title(obj.data) subject = u"Your suggestion to INSPIRE: {0}".format(title) user_comment = obj.extra_data.get('formdata', {}).get('extra_comments', '') identifiers = get_value(obj.data, "external_system_numbers.value") or [] return dict( email=user.email, title=title, identifier=identifiers or "", user_comment=user_comment, references=obj.extra_data.get('formdata', {}).get('references'), object=obj, subject=subject )
def test_get_title_returns_first_title(): record = { 'titles': [ { 'title': 'first title' }, { 'title': 'second title' }, ], } expected = 'first title' result = get_title(record) assert expected == result
def test_get_title_returns_the_non_arxiv_title(): double_title = Record({ "titles": [ { "title": "Importance of a consistent choice of alpha(s) in the matching of AlpGen and Pythia" }, { "source": "arXiv", "title": "Monte Carlo tuning in the presence of Matching" } ], }) expected = 'Importance of a consistent choice of alpha(s) in the matching of AlpGen and Pythia' result = get_title(double_title) assert expected == result
def test_get_title_returns_the_non_arxiv_title_with_source(): double_title = InspireRecord({ "titles": [{ "source": "other", "title": "Importance of a consistent choice of alpha(s) in the matching of AlpGen and Pythia" }, { "source": "arXiv", "title": "Monte Carlo tuning in the presence of Matching" }], }) expected = 'Importance of a consistent choice of alpha(s) in the matching of AlpGen and Pythia' result = get_title(double_title) assert expected == result
def test_get_title(): schema = load_schema('hep') subschema = schema['properties']['titles'] record = { 'titles': [ { 'subtitle': 'A mathematical exposition', 'title': 'The General Theory of Relativity', }, ], } assert validate(record['titles'], subschema) is None expected = 'The General Theory of Relativity' result = get_title(record) assert expected == result
def get_institution_papers_datatables_rows(hits): """Row used by datatables to render institution papers.""" result = [] title_html = "<a href='/literature/{id}'>{name}</a>" for hit in hits: row = [] row.append( title_html.format( id=hit.control_number, name=get_title(hit.to_dict()).encode('utf8') ) ) ctx = { 'record': hit.to_dict(), 'is_brief': 'true', 'number_of_displayed_authors': 1, 'show_affiliations': 'false', 'collaboration_only': 'true' } row.append(render_macro_from_template( name="render_record_authors", template="inspirehep_theme/format/record/Inspire_Default_HTML_general_macros.tpl", ctx=ctx ) ) try: row.append(hit.publication_info[0].journal_title) except AttributeError: row.append('') try: row.append(hit.citation_count) except AttributeError: row.append(0) row.append(hit.earliest_date.split('-')[0]) result.append(row) return result
def get_institution_papers_datatables_rows(hits): """Row used by datatables to render institution papers.""" result = [] title_html = "<a href='/literature/{id}'>{name}</a>" for hit in hits: row = [] row.append( title_html.format( id=hit.control_number, name=get_title(hit.to_dict()) ) ) ctx = { 'record': hit.to_dict(), 'is_brief': 'true', 'number_of_displayed_authors': 1, 'show_affiliations': 'false', 'collaboration_only': 'true' } row.append(render_macro_from_template( name="render_record_authors", template="inspirehep_theme/format/record/Inspire_Default_HTML_general_macros.tpl", ctx=ctx ) ) try: row.append(hit.publication_info[0].journal_title) except AttributeError: row.append('') try: row.append(hit.citation_count) except AttributeError: row.append(0) row.append(hit.earliest_date.split('-')[0]) result.append(row) return result
def render_contributions(hits): """Render a list of conferences to HTML.""" result = [] title_html = u"<a href='/literature/{id}'>{name}</a>" for hit in hits: row = [] row.append( title_html.format( id=hit.control_number, name=get_title(hit.to_dict()) ) ) ctx = { 'record': hit.to_dict(), 'is_brief': 'true', 'number_of_displayed_authors': 1, 'show_affiliations': 'false', 'collaboration_only': 'true' } row.append(render_macro_from_template( name="render_record_authors", template="inspirehep_theme/format/record/Inspire_Default_HTML_general_macros.tpl", ctx=ctx ) ) try: row.append(hit.publication_info[0].journal_title) except AttributeError: row.append('') try: row.append(hit.citation_count) except AttributeError: row.append(0) result.append(row) return result, hits.total
def render_contributions(hits): """Render a list of conferences to HTML.""" result = [] title_html = "<a href='/literature/{id}'>{name}</a>" for hit in hits: row = [] row.append( title_html.format( id=hit.control_number, name=get_title(hit.to_dict()).encode('utf8') ) ) ctx = { 'record': hit.to_dict(), 'is_brief': 'true', 'number_of_displayed_authors': 1, 'show_affiliations': 'false', 'collaboration_only': 'true' } row.append(render_macro_from_template( name="render_record_authors", template="inspirehep_theme/format/record/Inspire_Default_HTML_general_macros.tpl", ctx=ctx ) ) try: row.append(hit.publication_info[0].journal_title) except AttributeError: row.append('') try: row.append(hit.citation_count) except AttributeError: row.append(0) result.append(row) return result, hits.total
def _get_preprint_context(record): abstract = get_abstract(record) try: abstract_language = detect(abstract) except LangDetectException: abstract_language = '' return { 'abstract': abstract, 'abstract_language': abstract_language, 'arxiv_id': get_arxiv_id(record), 'authors': get_authors(record), 'collaborations': get_collaborations(record), 'divulgation': get_divulgation(record), 'domains': get_domains(record), 'inspire_id': get_inspire_id(record), 'keywords': get_keywords(record), 'language': get_language(record), 'subtitle': get_subtitle(record), 'title': get_title(record), }
def _get_comm_context(record): abstract = get_abstract(record) try: abstract_language = detect(abstract) except LangDetectException: abstract_language = '' conference_record = get_conference_record(record) conference_city = get_conference_city(conference_record) conference_country = get_conference_country(conference_record) conference_end_date = get_conference_end_date(conference_record) conference_start_date = get_conference_start_date(conference_record) conference_title = get_conference_title(conference_record) return { 'abstract': abstract, 'abstract_language': abstract_language, 'arxiv_id': get_arxiv_id(record), 'authors': get_authors(record), 'collaborations': get_collaborations(record), 'conference_city': conference_city, 'conference_country': conference_country, 'conference_end_date': conference_end_date, 'conference_start_date': conference_start_date, 'conference_title': conference_title, 'divulgation': get_divulgation(record), 'doi': get_doi(record), 'domains': get_domains(record), 'inspire_id': get_inspire_id(record), 'journal_issue': get_journal_issue(record), 'journal_title': get_journal_title(record), 'journal_volume': get_journal_volume(record), 'keywords': get_keywords(record), 'language': get_language(record), 'page_artid': get_page_artid(record), 'peer_reviewed': get_peer_reviewed(record), 'publication_date': get_publication_date(record), 'subtitle': get_subtitle(record), 'title': get_title(record), }
def _get_art_context(record): abstract = get_abstract(record) abstract_language = langdetect.detect(abstract) return { 'abstract': abstract, 'abstract_language': abstract_language, 'arxiv_id': get_arxiv_id(record), 'authors': get_authors(record), 'collaborations': get_collaborations(record), 'divulgation': get_divulgation(record), 'doi': get_doi(record), 'domain': get_domain(record), 'inspire_id': get_inspire_id(record), 'journal_issue': get_journal_issue(record), 'journal_title': get_journal_title(record), 'journal_volume': get_journal_volume(record), 'language': get_language(record), 'page_artid': get_page_artid(record), 'peer_reviewed': get_peer_reviewed(record), 'publication_date': get_publication_date(record), 'title': get_title(record), }
def title(self): """Get preferred title.""" return get_title(self)
def serialize(self, pid, record, links_factory=None): """ Serialize a single impact graph from a record. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ out = {} # Add information about current record out['inspire_id'] = record['control_number'] out['title'] = get_title(record) out['year'] = record['earliest_date'].split('-')[0] # Get citations citations = [] record_citations = LiteratureSearch().query( 'match', references__recid=record['control_number'], ).params( size=9999, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ] ).execute().hits for citation in record_citations: try: citation_count = citation.citation_count except AttributeError: citation_count = 0 citations.append({ "inspire_id": citation['control_number'], "citation_count": citation_count, "title": get_title(citation.to_dict()), "year": citation['earliest_date'].split('-')[0] }) out['citations'] = citations # Get references record_references = record.get('references', []) references = [] reference_recids = [ ref['recid'] for ref in record_references if ref.get('recid') ] if reference_recids: record_references = get_es_records( 'lit', reference_recids, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ] ) for reference in record_references: try: citation_count = reference.citation_count except AttributeError: citation_count = 0 references.append({ "inspire_id": reference['control_number'], "citation_count": citation_count, "title": get_title(reference), "year": reference['earliest_date'].split('-')[0] }) out['references'] = references return json.dumps(out)
def publication_info(record): """Displays inline publication and conference information""" result = {} out = [] if 'publication_info' in record: journal_title, journal_volume, year, journal_issue, pages = \ ('', '', '', '', '') for pub_info in record['publication_info']: if 'journal_title' in pub_info: journal_title = '<i>' + pub_info['journal_title'] + '</i>' if 'journal_volume' in pub_info: journal_volume = ' ' + pub_info['journal_volume'] if 'year' in pub_info: year = ' (' + str(pub_info['year']) + ')' if 'journal_issue' in pub_info: journal_issue = ' ' + pub_info['journal_issue'] + ', ' if 'page_start' in pub_info and 'page_end' in pub_info: pages = ' ' + '{page_start}-{page_end}'.format(**pub_info) elif 'page_start' in pub_info: pages = ' ' + '{page_start}'.format(**pub_info) elif 'artid' in pub_info: pages = ' ' + '{artid}'.format(**pub_info) out.append(journal_title + journal_volume + year + journal_issue + pages) if out: result['pub_info'] = out if not result: for field in record['publication_info']: if 'pubinfo_freetext' in field: out.append(field['pubinfo_freetext']) result['pub_info'] = out break # Conference info line for pub_info in record['publication_info']: conference_recid = None parent_recid = None if 'conference_record' in pub_info: conference_rec = replace_refs(pub_info['conference_record'], 'es') if conference_rec and conference_rec.get('control_number'): conference_recid = conference_rec['control_number'] if 'parent_record' in pub_info: parent_rec = replace_refs(pub_info['parent_record'], 'es') if parent_rec and parent_rec.get('control_number'): parent_recid = parent_rec['control_number'] if conference_recid and parent_recid: try: ctx = { "parent_recid": parent_recid, "conference_recid": conference_recid, "conference_title": get_title(conference_rec) } if result: result['conf_info'] = render_macro_from_template( name="conf_with_pub_info", template="inspirehep_theme/format/record/Conference_info_macros.tpl", ctx=ctx) break else: ctx.update(dict( page_start=pub_info.get('page_start'), page_end=pub_info.get('page_end'), artid=pub_info.get('artid') )) result['conf_info'] = render_macro_from_template( name="conf_without_pub_info", template="inspirehep_theme/format/record/Conference_info_macros.tpl", ctx=ctx) break except TypeError: pass elif conference_recid and not parent_recid: try: ctx = { "conference_recid": conference_recid, "conference_title": get_title(conference_rec), "pub_info": bool(result.get('pub_info', '')) } result['conf_info'] = render_macro_from_template( name="conference_only", template="inspirehep_theme/format/record/Conference_info_macros.tpl", ctx=ctx) except TypeError: pass elif parent_recid and not conference_recid: try: ctx = { "parent_recid": parent_recid, "parent_title": parent_rec['titles'][0]['title'].replace( "Proceedings, ", "", 1), "pub_info": bool(result.get('pub_info', '')) } result['conf_info'] = render_macro_from_template( name="proceedings_only", template="inspirehep_theme/format/record/Conference_info_macros.tpl", ctx=ctx) except TypeError: pass return result
def get_publications(): recid = request.values.get('recid', 0, type=int) publications = [] collaborations = set() keywords = set() search = LiteratureSearch().query({ "match": { "authors.recid": recid } }).params(_source=[ 'accelerator_experiments', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'publication_info', 'titles', 'thesaurus_terms' ]) for result in search.scan(): try: result_source = result.to_dict() publication = {} # Get publication title (required). publication['title'] = get_title(result_source) # Get publication recid (required). publication['recid'] = result_source['control_number'] except (IndexError, KeyError): continue # Get publication type. try: publication['type'] = result_source.get('facet_inspire_doc_type', [])[0] except IndexError: publication['type'] = "Not defined" # Get journal title. try: publication['journal_title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal recid. try: publication['journal_recid'] = result_source.get( 'publication_info', [])[0]['journal_recid'] except KeyError: pass except (IndexError, KeyError): pass # Get publication year. try: publication['year'] = result_source.get('publication_info', [])[0]['year'] except (IndexError, KeyError): pass # Get keywords. for keyword in result_source.get('thesaurus_terms', []): if keyword.get('keyword') is not "* Automatic Keywords *" \ and keyword.get('keyword'): keywords.add(keyword.get('keyword')) # Get collaborations. for experiment in result_source.get('accelerator_experiments', []): collaborations.add(experiment.get('experiment')) # Append to the list. publications.append(publication) response = {} response['publications'] = publications response['keywords'] = list(keywords) response['collaborations'] = list(collaborations) return jsonify(response)
def get_publications(): recid = request.values.get('recid', 0, type=int) publications = [] collaborations = set() keywords = set() for result in scan( current_search_client, query={ '_source': ['accelerator_experiments', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'publication_info', 'titles', 'thesaurus_terms' ], 'query': {"match": {"authors.recid": recid}} }, index='records-hep', doc_type='hep'): try: result_source = result['_source'] publication = {} # Get publication title (required). publication['title'] = get_title(result_source) # Get publication recid (required). publication['recid'] = result_source['control_number'] except (IndexError, KeyError): continue # Get publication type. try: publication['type'] = result_source.get( 'facet_inspire_doc_type', [])[0] except IndexError: publication['type'] = "Not defined" # Get journal title. try: publication['journal_title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal recid. try: publication['journal_recid'] = result_source.get( 'publication_info', [])[0]['journal_recid'] except KeyError: pass except (IndexError, KeyError): pass # Get publication year. try: publication['year'] = result_source.get( 'publication_info', [])[0]['year'] except (IndexError, KeyError): pass # Get keywords. for keyword in result_source.get('thesaurus_terms', []): if keyword.get('keyword') is not "* Automatic Keywords *" \ and keyword.get('keyword'): keywords.add(keyword.get('keyword')) # Get collaborations. for experiment in result_source.get( 'accelerator_experiments', []): collaborations.add(experiment.get('experiment')) # Append to the list. publications.append(publication) response = {} response['publications'] = publications response['keywords'] = list(keywords) response['collaborations'] = list(collaborations) return jsonify(response)
def serialize(self, pid, record, links_factory=None): """Return a list of publications for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value publications = [] search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params( _source=[ "accelerator_experiments", "earliest_date", "citation_count", "control_number", "facet_inspire_doc_type", "publication_info", "self", "thesaurus_terms", "titles", ] ) for result in search.scan(): result_source = result.to_dict() publication = {} publication['id'] = int(result_source['control_number']) publication['record'] = result_source['self'] publication['title'] = get_title(result_source) # Get the earliest date. try: publication['date'] = result_source['earliest_date'] except KeyError: pass # Get publication type. try: publication['type'] = result_source.get( 'facet_inspire_doc_type', [])[0] except IndexError: pass # Get citation count. try: publication['citations'] = result_source['citation_count'] except KeyError: pass # Get journal. try: publication['journal'] = {} publication['journal']['title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal id and $self. try: publication['journal']['id'] = result_source.get( 'publication_info', [])[0]['journal_recid'] publication['journal']['record'] = result_source.get( 'publication_info', [])[0]['journal_record'] except KeyError: pass except (IndexError, KeyError): del publication['journal'] # Get collaborations. collaborations = set() for experiment in result_source.get('accelerator_experiments', []): collaborations.add(experiment.get('experiment')) if collaborations: publication['collaborations'] = list(collaborations) publications.append(publication) return json.dumps(publications)
def serialize(self, pid, record, links_factory=None): """Return a list of publications for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value publications = [] search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params( _source=[ "accelerator_experiments", "earliest_date", "citation_count", "control_number", "facet_inspire_doc_type", "publication_info", "self", "keywords", "titles", ] ) for result in search.scan(): result_source = result.to_dict() publication = {} publication['id'] = int(result_source['control_number']) publication['record'] = result_source['self'] publication['title'] = get_title(result_source) # Get the earliest date. try: publication['date'] = result_source['earliest_date'] except KeyError: pass # Get publication type. try: publication['type'] = result_source.get( 'facet_inspire_doc_type', [])[0] except IndexError: pass # Get citation count. try: publication['citations'] = result_source['citation_count'] except KeyError: pass # Get journal. try: publication['journal'] = {} publication['journal']['title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal id and $self. try: publication['journal']['id'] = result_source.get( 'publication_info', [])[0]['journal_recid'] publication['journal']['record'] = result_source.get( 'publication_info', [])[0]['journal_record'] except KeyError: pass except (IndexError, KeyError): del publication['journal'] # Get collaborations. collaborations = set() for experiment in result_source.get('accelerator_experiments', []): collaborations.add(experiment.get('experiment')) if collaborations: publication['collaborations'] = list(collaborations) publications.append(publication) return json.dumps(publications)
def get_publications(): recid = request.values.get("recid", 0, type=int) publications = [] collaborations = set() keywords = set() for result in scan( current_search_client, query={ "_source": [ "accelerator_experiments", "control_number", "earliest_date", "facet_inspire_doc_type", "publication_info", "titles", "thesaurus_terms", ], "query": {"match": {"authors.recid": recid}}, }, index="records-hep", doc_type="hep", ): try: result_source = result["_source"] publication = {} # Get publication title (required). publication["title"] = get_title(result_source) # Get publication recid (required). publication["recid"] = result_source["control_number"] except (IndexError, KeyError): continue # Get publication type. try: publication["type"] = result_source.get("facet_inspire_doc_type", [])[0] except IndexError: publication["type"] = "Not defined" # Get journal title. try: publication["journal_title"] = result_source.get("publication_info", [])[0]["journal_title"] # Get journal recid. try: publication["journal_recid"] = result_source.get("publication_info", [])[0]["journal_recid"] except KeyError: pass except (IndexError, KeyError): pass # Get publication year. try: publication["year"] = result_source.get("publication_info", [])[0]["year"] except (IndexError, KeyError): pass # Get keywords. for keyword in result_source.get("thesaurus_terms", []): if keyword.get("keyword") is not "* Automatic Keywords *" and keyword.get("keyword"): keywords.add(keyword.get("keyword")) # Get collaborations. for experiment in result_source.get("accelerator_experiments", []): collaborations.add(experiment.get("experiment")) # Append to the list. publications.append(publication) response = {} response["publications"] = publications response["keywords"] = list(keywords) response["collaborations"] = list(collaborations) return jsonify(response)
def serialize(self, pid, record, links_factory=None): """ Serialize a single impact graph from a record. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ out = {} # Add information about current record out['inspire_id'] = record['control_number'] out['title'] = get_title(record) out['year'] = record['earliest_date'].split('-')[0] # Get citations citations = [] record_citations = LiteratureSearch().query_from_iq( 'refersto:' + str(record['control_number']) ).params( size=9999, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ] ).execute().hits for citation in record_citations: try: citation_count = citation.citation_count except AttributeError: citation_count = 0 citations.append({ "inspire_id": citation['control_number'], "citation_count": citation_count, "title": get_title(citation.to_dict()), "year": citation['earliest_date'].split('-')[0] }) out['citations'] = citations # Get references record_references = record.get('references', []) references = [] reference_recids = [ ref['recid'] for ref in record_references if ref.get('recid') ] if reference_recids: record_references = get_es_records( 'lit', reference_recids, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ] ) for reference in record_references: try: citation_count = reference.citation_count except AttributeError: citation_count = 0 references.append({ "inspire_id": reference['control_number'], "citation_count": citation_count, "title": get_title(reference), "year": reference['earliest_date'].split('-')[0] }) out['references'] = references return json.dumps(out)
def serialize(self, pid, record, links_factory=None): """ Serialize a single impact graph from a record. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ out = {} # Add information about current record out['inspire_id'] = record['control_number'] out['title'] = get_title(record) out['year'] = record['earliest_date'].split('-')[0] # Get citations citations = [] es_query = IQ('refersto:' + record['control_number']) record_citations = current_search_client.search( index='records-hep', doc_type='hep', body={"query": es_query.to_dict()}, size=9999, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ] )['hits']['hits'] for citation in record_citations: citation = citation['_source'] citations.append({ "inspire_id": citation['control_number'], "citation_count": citation.get('citation_count', 0), "title": get_title(citation), "year": citation['earliest_date'].split('-')[0] }) out['citations'] = citations # Get references record_references = record.get('references', []) references = [] reference_recids = [ ref['recid'] for ref in record_references if ref.get('recid') ] if reference_recids: query = IQ(' OR '.join('recid:' + str(ref) for ref in reference_recids)) record_references = current_search_client.search( index='records-hep', doc_type='hep', body={"query": query.to_dict()}, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ] ) for reference in record_references['hits']['hits']: ref_info = reference["_source"] references.append({ "inspire_id": ref_info['control_number'], "citation_count": ref_info.get('citation_count', 0), "title": get_title(ref_info), "year": ref_info['earliest_date'].split('-')[0] }) out['references'] = references return json.dumps(out)
def serialize(self, pid, record, links_factory=None): """ Serialize a single impact graph from a record. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ out = {} # Add information about current record out['inspire_id'] = record['control_number'] out['title'] = get_title(record) out['year'] = record['earliest_date'].split('-')[0] # Get citations citations = [] es_query = Query('refersto:' + record['control_number']) es_query.body.update({'size': 9999}) record_citations = current_search_client.search(index='records-hep', doc_type='hep', body=es_query.body, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ])['hits']['hits'] for citation in record_citations: citation = citation['_source'] citations.append({ "inspire_id": citation['control_number'], "citation_count": citation.get('citation_count', 0), "title": get_title(citation), "year": citation['earliest_date'].split('-')[0] }) out['citations'] = citations # Get references record_references = record.get('references', []) references = [] reference_recids = [ ref['recid'] for ref in record_references if ref.get('recid') ] if reference_recids: mget_body = {"ids": reference_recids} record_references = current_search_client.mget( index='records-hep', doc_type='hep', body=mget_body, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ]) for reference in record_references["docs"]: ref_info = reference["_source"] references.append({ "inspire_id": ref_info['control_number'], "citation_count": ref_info.get('citation_count', 0), "title": get_title(ref_info), "year": ref_info['earliest_date'].split('-')[0] }) out['references'] = references return json.dumps(out)