def serialize(self, pid, record, links_factory=None): search_by_institution = LiteratureSearch().query( 'match', authors__affiliations__recid=get_id(record) ).params( _source=[ 'control_number', ], ) literature_recids = [ get_id(el.to_dict()) for el in search_by_institution.scan()] search_by_recids = LiteratureSearch().filter( 'terms', control_number=literature_recids ).params( _source=[ 'authors.recid', 'collaborations.value', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'inspire_categories', 'titles.title', ], ) return json.dumps(build_citesummary(search_by_recids))
def build_citesummary(search): citesummary = [] for i, el in enumerate(search.scan()): result = el.to_dict() citesummary.append({ 'citations': [], 'collaboration': is_collaboration(result), 'core': is_core(result), 'date': get_date(result), 'document_type': get_document_type(result), 'id': get_id(result), 'subject': get_subject(result), 'title': get_title(result), }) search_by_literature = LiteratureSearch().query( 'match', references__recid=get_id(result)).params(_source=[ 'authors.recid', 'collaboration.value', 'collections.primary', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'inspire_categories', 'titles.title', ]) for el in search_by_literature.scan(): literature_result = el.to_dict() citesummary[i]['citations'].append({ 'collaboration': is_collaboration(literature_result), 'core': is_core(literature_result), 'date': get_date(literature_result), 'document_type': get_document_type(literature_result), 'id': get_id(literature_result), 'selfcite': is_selfcite(result, literature_result), 'subject': get_subject(literature_result), 'title': get_title(literature_result), }) return citesummary
def serialize(self, pid, record, links_factory=None): citesummary = [ { 'citations': [], 'collaboration': is_collaboration(record), 'core': is_core(record), 'date': get_date(record), 'document_type': get_document_type(record), 'id': get_id(record), 'subject': get_subject(record), 'title': get_title(record), }, ] search = LiteratureSearch().query( 'match', references__recid=get_id(record)).params(_source=[ 'authors.recid', 'collaboration.value', 'collections.primary', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'inspire_categories', 'titles.title', ], ) for el in search.scan(): result = el.to_dict() citesummary[0]['citations'].append({ 'collaboration': is_collaboration(result), 'core': is_core(result), 'date': get_date(result), 'document_type': get_document_type(result), 'id': get_id(result), 'subject': get_subject(result), 'selfcite': is_selfcite(record, result), 'title': get_title(result), }) return json.dumps(citesummary)
def serialize(self, pid, record, links_factory=None): """Return a list of co-authors for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value coauthors = {} search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params( _source=[ "authors.full_name", "authors.recid", "authors.record", ] ) for result in search.scan(): result_source = result.to_dict()['authors'] for author in result_source: try: # Don't add the reference author. if author['recid'] != author_pid: if author['recid'] in coauthors: coauthors[author['recid']]['count'] += 1 else: coauthors[author['recid']] = dict( count=1, full_name=author['full_name'], id=author['recid'], record=author['record'], ) except KeyError: pass return json.dumps(coauthors.values())
def build_citesummary(search): citesummary = [] for i, el in enumerate(search.scan()): result = el.to_dict() citesummary.append({ 'citations': [], 'collaboration': is_collaboration(result), 'core': is_core(result), 'date': get_date(result), 'document_type': get_document_type(result), 'id': get_id(result), 'subject': get_subject(result), 'title': get_title(result), }) search_by_literature = LiteratureSearch().query( 'match', references__recid=get_id(result) ).params( _source=[ 'authors.recid', 'collaborations.value', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'inspire_categories', 'titles.title', ] ) for el in search_by_literature.scan(): literature_result = el.to_dict() citesummary[i]['citations'].append({ 'collaboration': is_collaboration(literature_result), 'core': is_core(literature_result), 'date': get_date(literature_result), 'document_type': get_document_type(literature_result), 'id': get_id(literature_result), 'selfcite': is_selfcite( result, literature_result), 'subject': get_subject(literature_result), 'title': get_title(literature_result), }) return citesummary
def serialize(self, pid, record, links_factory=None): """Return a list of publications for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value publications = [] search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params( _source=[ "accelerator_experiments", "earliest_date", "citation_count", "control_number", "facet_inspire_doc_type", "publication_info", "self", "thesaurus_terms", "titles", ] ) for result in search.scan(): result_source = result.to_dict() publication = {} publication['id'] = int(result_source['control_number']) publication['record'] = result_source['self'] publication['title'] = get_title(result_source) # Get the earliest date. try: publication['date'] = result_source['earliest_date'] except KeyError: pass # Get publication type. try: publication['type'] = result_source.get( 'facet_inspire_doc_type', [])[0] except IndexError: pass # Get citation count. try: publication['citations'] = result_source['citation_count'] except KeyError: pass # Get journal. try: publication['journal'] = {} publication['journal']['title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal id and $self. try: publication['journal']['id'] = result_source.get( 'publication_info', [])[0]['journal_recid'] publication['journal']['record'] = result_source.get( 'publication_info', [])[0]['journal_record'] except KeyError: pass except (IndexError, KeyError): del publication['journal'] # Get collaborations. collaborations = set() for experiment in result_source.get('accelerator_experiments', []): collaborations.add(experiment.get('experiment')) if collaborations: publication['collaborations'] = list(collaborations) publications.append(publication) return json.dumps(publications)
def serialize(self, pid, record, links_factory=None): """Return a list of citations for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value citations = {} search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params(_source=[ "authors.recid", "control_number", "self", ]) # For each publication co-authored by a given author... for result in search.scan(): result_source = result.to_dict() recid = result_source['control_number'] authors = set([i['recid'] for i in result_source['authors']]) citations[recid] = {} nested_search = LiteratureSearch().query({ "match": { "references.recid": recid } }).params(_source=[ "authors.recid", "collections", "control_number", "earliest_date", "self", ]) # The source record that is being cited. citations[recid]['citee'] = dict( id=recid, record=result_source['self'], ) citations[recid]['citers'] = [] # Check all publications, which cite the parent record. for nested_result in nested_search.scan(): nested_result_source = nested_result.to_dict() # Not every signature has a recid (at least for demo records). try: nested_authors = set( [i['recid'] for i in nested_result_source['authors']]) except KeyError: nested_authors = set() citation = dict( citer=dict(id=int(nested_result_source['control_number']), record=nested_result_source['self']), # If at least one author is shared, it's a self-citation. self_citation=len(authors & nested_authors) > 0, ) # Get the earliest date of a citer. try: citation['date'] = nested_result_source['earliest_date'] except KeyError: pass # Get status if a citer is published. # FIXME: As discussed with Sam, we should have a boolean flag # for this type of information. try: citation['published_paper'] = "Published" in [ i['primary'] for i in nested_result_source['collections'] ] except KeyError: citation['published_paper'] = False citations[recid]['citers'].append(citation) return json.dumps(citations.values())
def get_publications(): recid = request.values.get('recid', 0, type=int) publications = [] collaborations = set() keywords = set() search = LiteratureSearch().query({ "match": { "authors.recid": recid } }).params(_source=[ 'accelerator_experiments', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'publication_info', 'titles', 'thesaurus_terms' ]) for result in search.scan(): try: result_source = result.to_dict() publication = {} # Get publication title (required). publication['title'] = get_title(result_source) # Get publication recid (required). publication['recid'] = result_source['control_number'] except (IndexError, KeyError): continue # Get publication type. try: publication['type'] = result_source.get('facet_inspire_doc_type', [])[0] except IndexError: publication['type'] = "Not defined" # Get journal title. try: publication['journal_title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal recid. try: publication['journal_recid'] = result_source.get( 'publication_info', [])[0]['journal_recid'] except KeyError: pass except (IndexError, KeyError): pass # Get publication year. try: publication['year'] = result_source.get('publication_info', [])[0]['year'] except (IndexError, KeyError): pass # Get keywords. for keyword in result_source.get('thesaurus_terms', []): if keyword.get('keyword') is not "* Automatic Keywords *" \ and keyword.get('keyword'): keywords.add(keyword.get('keyword')) # Get collaborations. for experiment in result_source.get('accelerator_experiments', []): collaborations.add(experiment.get('experiment')) # Append to the list. publications.append(publication) response = {} response['publications'] = publications response['keywords'] = list(keywords) response['collaborations'] = list(collaborations) return jsonify(response)
def get_publications(): recid = request.values.get('recid', 0, type=int) publications = [] collaborations = set() keywords = set() search = LiteratureSearch().query( {"match": {"authors.recid": recid}} ).params( _source=[ 'accelerator_experiments', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'publication_info', 'titles', 'keywords' ] ) for result in search.scan(): try: result_source = result.to_dict() publication = {} # Get publication title (required). publication['title'] = get_title(result_source) # Get publication recid (required). publication['recid'] = result_source['control_number'] except (IndexError, KeyError): continue # Get publication type. try: publication['type'] = result_source.get( 'facet_inspire_doc_type', [])[0] except IndexError: publication['type'] = "Not defined" # Get journal title. try: publication['journal_title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal recid. try: publication['journal_recid'] = result_source.get( 'publication_info', [])[0]['journal_recid'] except KeyError: pass except (IndexError, KeyError): pass # Get publication year. try: publication['year'] = result_source.get( 'publication_info', [])[0]['year'] except (IndexError, KeyError): pass # Get keywords. for keyword in result_source.get('keywords', []): if keyword.get('keyword') is not "* Automatic Keywords *" \ and keyword.get('keyword'): keywords.add(keyword.get('keyword')) # Get collaborations. for experiment in result_source.get( 'accelerator_experiments', []): collaborations.add(experiment.get('experiment')) # Append to the list. publications.append(publication) response = {} response['publications'] = publications response['keywords'] = list(keywords) response['collaborations'] = list(collaborations) return jsonify(response)
def serialize(self, pid, record, links_factory=None): """Return a different metrics for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value fields = set() keywords = [] statistics = {} statistics['citations'] = 0 statistics['publications'] = 0 statistics['types'] = {} statistics_citations = {} search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params( _source=[ "citation_count", "control_number", "facet_inspire_doc_type", "facet_inspire_subjects", "thesaurus_terms", ] ) for result in search.scan(): result_source = result.to_dict() # Increment the count of the total number of publications. statistics['publications'] += 1 # Increment the count of citations. citation_count = result_source.get('citation_count', 0) statistics['citations'] += citation_count statistics_citations[int(result_source['control_number'])] = \ citation_count # Count how many times certain type of publication was published. try: publication_type = result_source.get( 'facet_inspire_doc_type', [])[0] except IndexError: pass if publication_type: if publication_type in statistics['types']: statistics['types'][publication_type] += 1 else: statistics['types'][publication_type] = 1 # Get fields. for field in result_source.get('facet_inspire_subjects', []): fields.add(field) # Get keywords. keywords.extend([ k for k in force_force_list( get_value(result_source, 'thesaurus_terms.keyword')) if k != '* Automatic Keywords *']) # Calculate h-index together with i10-index. statistics['hindex'] = calculate_h_index(statistics_citations) statistics['i10index'] = calculate_i10_index(statistics_citations) if fields: statistics['fields'] = list(fields) # Return the top 25 keywords. if keywords: counter = Counter(keywords) statistics['keywords'] = [{ 'count': i[1], 'keyword': i[0] } for i in counter.most_common(25)] return json.dumps(statistics)
def serialize(self, pid, record, links_factory=None): """Return a different metrics for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value fields = set() keywords = [] statistics = {} statistics['citations'] = 0 statistics['publications'] = 0 statistics['types'] = {} statistics_citations = {} search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params(_source=[ "citation_count", "control_number", "facet_inspire_doc_type", "facet_inspire_subjects", "keywords", ]) for result in search.scan(): result_source = result.to_dict() # Increment the count of the total number of publications. statistics['publications'] += 1 # Increment the count of citations. citation_count = result_source.get('citation_count', 0) statistics['citations'] += citation_count statistics_citations[result_source['control_number']] = \ citation_count # Count how many times certain type of publication was published. try: publication_type = result_source.get('facet_inspire_doc_type', [])[0] except IndexError: pass if publication_type: if publication_type in statistics['types']: statistics['types'][publication_type] += 1 else: statistics['types'][publication_type] = 1 # Get fields. for field in result_source.get('facet_inspire_subjects', []): fields.add(field) # Get keywords. keywords.extend([ k for k in force_list(get_value(result_source, 'keywords.value')) if k != '* Automatic Keywords *' ]) # Calculate h-index together with i10-index. statistics['hindex'] = calculate_h_index(statistics_citations) statistics['i10index'] = calculate_i10_index(statistics_citations) if fields: statistics['fields'] = list(fields) # Return the top 25 keywords. if keywords: counter = Counter(keywords) statistics['keywords'] = [{ 'count': i[1], 'keyword': i[0] } for i in counter.most_common(25)] return json.dumps(statistics)
def serialize(self, pid, record, links_factory=None): """Return a list of citations for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value citations = {} search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params( _source=[ "authors.recid", "control_number", "self", ] ) # For each publication co-authored by a given author... for result in search.scan(): result_source = result.to_dict() recid = result_source['control_number'] authors = set([i['recid'] for i in result_source['authors']]) citations[recid] = {} nested_search = LiteratureSearch().query({ "match": { "references.recid": recid } }).params( _source=[ "authors.recid", "collections", "control_number", "earliest_date", "self", ] ) # The source record that is being cited. citations[recid]['citee'] = dict( id=recid, record=result_source['self'], ) citations[recid]['citers'] = [] # Check all publications, which cite the parent record. for nested_result in nested_search.scan(): nested_result_source = nested_result.to_dict() # Not every signature has a recid (at least for demo records). try: nested_authors = set( [i['recid'] for i in nested_result_source['authors']] ) except KeyError: nested_authors = set() citation = dict( citer=dict( id=int(nested_result_source['control_number']), record=nested_result_source['self'] ), # If at least one author is shared, it's a self-citation. self_citation=len(authors & nested_authors) > 0, ) # Get the earliest date of a citer. try: citation['date'] = nested_result_source['earliest_date'] except KeyError: pass # Get status if a citer is published. # FIXME: As discussed with Sam, we should have a boolean flag # for this type of information. try: citation['published_paper'] = "Published" in [ i['primary'] for i in nested_result_source[ 'collections']] except KeyError: citation['published_paper'] = False citations[recid]['citers'].append(citation) return json.dumps(citations.values())
def serialize(self, pid, record, links_factory=None): """Return a list of publications for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value publications = [] search = LiteratureSearch().query({ "match": { "authors.recid": author_pid } }).params( _source=[ "accelerator_experiments", "earliest_date", "citation_count", "control_number", "facet_inspire_doc_type", "publication_info", "self", "keywords", "titles", ] ) for result in search.scan(): result_source = result.to_dict() publication = {} publication['id'] = int(result_source['control_number']) publication['record'] = result_source['self'] publication['title'] = get_title(result_source) # Get the earliest date. try: publication['date'] = result_source['earliest_date'] except KeyError: pass # Get publication type. try: publication['type'] = result_source.get( 'facet_inspire_doc_type', [])[0] except IndexError: pass # Get citation count. try: publication['citations'] = result_source['citation_count'] except KeyError: pass # Get journal. try: publication['journal'] = {} publication['journal']['title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal id and $self. try: publication['journal']['id'] = result_source.get( 'publication_info', [])[0]['journal_recid'] publication['journal']['record'] = result_source.get( 'publication_info', [])[0]['journal_record'] except KeyError: pass except (IndexError, KeyError): del publication['journal'] # Get collaborations. collaborations = set() for experiment in result_source.get('accelerator_experiments', []): collaborations.add(experiment.get('experiment')) if collaborations: publication['collaborations'] = list(collaborations) publications.append(publication) return json.dumps(publications)
def serialize(self, pid, record, links_factory=None): """Return a list of publications for a given author recid. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ author_pid = pid.pid_value publications = [] query = Q('match', authors__recid=author_pid) search = LiteratureSearch().query('nested', path='authors', query=query)\ .params(_source=[ 'accelerator_experiments', 'citation_count', 'control_number', 'earliest_date', 'facet_inspire_doc_type', 'keywords', 'publication_info', 'self', 'titles', ]) for result in search.scan(): result_source = result.to_dict() publication = {} publication['id'] = int(result_source['control_number']) publication['record'] = result_source['self'] publication['title'] = LiteratureReader(result_source).title # Get the earliest date. try: publication['date'] = result_source['earliest_date'] except KeyError: pass # Get publication type. try: publication['type'] = result_source.get( 'facet_inspire_doc_type', [])[0] except IndexError: pass # Get citation count. try: publication['citations'] = result_source['citation_count'] except KeyError: pass # Get journal. try: publication['journal'] = {} publication['journal']['title'] = result_source.get( 'publication_info', [])[0]['journal_title'] # Get journal id and $self. try: publication['journal']['id'] = result_source.get( 'publication_info', [])[0]['journal_recid'] publication['journal']['record'] = result_source.get( 'publication_info', [])[0]['journal_record'] except KeyError: pass except (IndexError, KeyError): del publication['journal'] # Get collaborations. collaborations = set() for experiment in result_source.get('accelerator_experiments', []): collaborations.add(experiment.get('experiment')) if collaborations: publication['collaborations'] = list(collaborations) publications.append(publication) return json.dumps(publications)