def test_to_paper(self, container_title, title, citeproc): p = self.test_class.to_paper(citeproc) # Ensure that paper is in database (i.e. created) assert p.pk >= 1 # Check paper fields for author_p, author_c in zip(p.authors_list, citeproc['author']): assert author_p['name']['first'] == author_c['given'] assert author_p['name']['last'] == author_c['family'] assert author_p['affiliation'] == author_c['affiliation'][0][ 'name'] assert author_p['orcid'] == author_c['ORCID'] assert p.pubdate == date(*citeproc['issued']['date-parts'][0]) assert p.title == title # Ensure that oairecord is in database (i.e. created) r = OaiRecord.objects.get(about=p) # Check oairecord fields assert r.doi == citeproc['DOI'] assert r.identifier == doi_to_crossref_identifier(citeproc['DOI']) assert r.issue == citeproc['issue'] assert r.journal_title == container_title assert r.pages == citeproc['page'] assert r.pubdate == date(*citeproc['issued']['date-parts'][0]) assert r.publisher_name == citeproc['publisher'] assert r.source == OaiSource.objects.get(identifier='crossref') assert r.splash_url == doi_to_url(citeproc['DOI']) assert r.volume == citeproc['volume']
def create_oairecord(self, record): """ Given one line of the dump (represented as a dict), add it to the corresponding paper (if it exists) """ doi = to_doi(record['doi']) if not doi: return prefix = doi.split('/')[0] if prefix in free_doi_prefixes: return paper = Paper.get_by_doi(doi) if not paper: try: paper = Paper.create_by_doi(doi) except (MetadataSourceException, ValueError): return if not paper: print('no such paper for doi {doi}'.format(doi=doi)) return url = record['url'] # just to speed things up a bit... if paper.pdf_url == url: return identifier = 'oadoi:' + url source = self.oadoi_source if record['host_type'] == 'publisher': url = doi_to_url(doi) identifier = doi_to_crossref_identifier(doi) source = self.crossref_source record = BareOaiRecord(paper=paper, doi=doi, pubtype=paper.doctype, source=source, identifier=identifier, splash_url=url, pdf_url=record['url']) try: paper.add_oairecord(record) paper.update_availability() # TODO re-enable this #paper.update_index() except (DataError, ValueError): print('Record does not fit in the DB')
def test_get_oairecord_data(self, db, monkeypatch, container_title, issn, citeproc, journal): """ We do some assertions on the results, but relatively lax, as we test the called functions, too """ monkeypatch.setattr(Journal, 'find', lambda issn, title: journal) r = self.test_class._get_oairecord_data(citeproc) assert r['doi'] == citeproc['DOI'] assert r['description'] == citeproc['abstract'] assert r['identifier'] == doi_to_crossref_identifier(citeproc['DOI']) assert r['issn'] == issn assert r['issue'] == citeproc['issue'] assert r['journal'] == journal assert r['journal_title'] == container_title assert r['pages'] == citeproc['page'] assert r['pdf_url'] == '' # Is not OA assert r['pubdate'] == date(*citeproc['issued']['date-parts'][0]) assert r['publisher_name'] == citeproc['publisher'] assert r['pubtype'] == citeproc['type'] assert r['source'] == OaiSource.objects.get(identifier='crossref') assert r['splash_url'] == doi_to_url(citeproc['DOI']) assert r['volume'] == citeproc['volume']
def _get_oairecord_data(cls, data): """ :param data: citeproc metadata :returns: Returns a dict, ready to passed to a BarePaper instance :raises: CiteprocError """ doi = cls._get_doi(data) splash_url = doi_to_url(doi) licenses = data.get('licenses', []) pdf_url = cls._get_pdf_url(doi, licenses, splash_url) journal_title = cls._get_container(data) issn = cls._get_issn(data) journal = Journal.find(issn=issn, title=journal_title) publisher_name = data.get('publisher', '')[:512] publisher = cls._get_publisher(publisher_name, journal) bare_oairecord_data = { 'doi': doi, 'description': cls._get_abstract(data), 'identifier': doi_to_crossref_identifier(doi), 'issn': issn, 'issue': data.get('issue', ''), 'journal': journal, 'journal_title': journal_title, 'pages': data.get('page', ''), 'pdf_url': pdf_url, 'pubdate': cls._get_pubdate(data), 'publisher': publisher, 'publisher_name': publisher_name, 'pubtype': cls._get_pubtype(data), 'source': OaiSource.objects.get(identifier='crossref'), 'splash_url': splash_url, 'volume': data.get('volume', ''), } return bare_oairecord_data
def _create_publication(paper, metadata): if not metadata: return if not metadata.get('container-title'): return doi = to_doi(metadata.get('DOI', None)) title = metadata['container-title'] if isinstance(title, list): title = title[0] title = title[:512] issn = metadata.get('ISSN', None) if issn and isinstance(issn, list): issn = issn[0] # TODO pass all the ISSN to the RoMEO interface volume = metadata.get('volume', None) pages = metadata.get('page', None) issue = metadata.get('issue', None) date_dict = metadata.get('issued', dict()) pubdate = None if 'date-parts' in date_dict: dateparts = date_dict.get('date-parts')[0] pubdate = date_from_dateparts(dateparts) # for instance it outputs dates like 2014-2-3 publisher_name = metadata.get('publisher', None) if publisher_name: publisher_name = publisher_name[:512] pubtype = metadata.get('type', 'unknown') pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype) splash_url = doi_to_url(doi) # PDF availability pdf_url = None licenses = set([(license or {}).get('URL') for license in metadata.get('license', [])]) doi_prefix = doi.split('/')[0] if doi_prefix in free_doi_prefixes or any(map(is_oa_license, licenses)): pdf_url = splash_url # Lookup journal search_terms = {'jtitle': title} if issn: search_terms['issn'] = issn journal = fetch_journal(search_terms) publisher = None if journal: publisher = journal.publisher AliasPublisher.increment(publisher_name, journal.publisher) else: publisher = fetch_publisher(publisher_name) barepub = BareOaiRecord( paper=paper, journal_title=title, issue=issue, volume=volume, pubdate=pubdate, pages=pages, doi=doi, pubtype=pubtype, publisher_name=publisher_name, journal=journal, publisher=publisher, pdf_url=pdf_url, splash_url=splash_url, source=OaiSource.objects.get(identifier='crossref'), identifier=doi_to_crossref_identifier(doi)) rec = paper.add_oairecord(barepub) paper.update_availability() return paper, rec
def _create_publication(paper, metadata): if not metadata: return if not metadata.get('container-title'): return doi = to_doi(metadata.get('DOI', None)) title = metadata['container-title'] if isinstance(title, list): title = title[0] title = title[:512] issn = metadata.get('ISSN', None) if issn and isinstance(issn, list): issn = issn[0] # TODO pass all the ISSN to the RoMEO interface volume = metadata.get('volume', None) pages = metadata.get('page', None) issue = metadata.get('issue', None) date_dict = metadata.get('issued', dict()) pubdate = None if 'date-parts' in date_dict: dateparts = date_dict.get('date-parts')[0] pubdate = date_from_dateparts(dateparts) # for instance it outputs dates like 2014-2-3 publisher_name = metadata.get('publisher', None) if publisher_name: publisher_name = publisher_name[:512] pubtype = metadata.get('type', 'unknown') pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype) splash_url = doi_to_url(doi) # PDF availability pdf_url = None licenses = set([(license or {}).get('URL') for license in metadata.get('license', [])]) doi_prefix = doi.split('/')[0] if doi_prefix in free_doi_prefixes or any(map(is_oa_license, licenses)): pdf_url = splash_url # Lookup journal journal = Journal.find(issn=issn, title=title) publisher = None if journal: publisher = journal.publisher AliasPublisher.increment(publisher_name, journal.publisher) else: publisher = Publisher.find(publisher_name) barepub = BareOaiRecord( paper=paper, journal_title=title, issue=issue, volume=volume, pubdate=pubdate, pages=pages, doi=doi, pubtype=pubtype, publisher_name=publisher_name, journal=journal, publisher=publisher, pdf_url=pdf_url, splash_url=splash_url, source=OaiSource.objects.get(identifier='crossref'), identifier=doi_to_crossref_identifier(doi)) rec = paper.add_oairecord(barepub) paper.update_availability() return paper, rec
def _create_publication(paper, metadata): if not metadata: return if not 'container-title' in metadata or not metadata['container-title']: return doi = to_doi(metadata.get('DOI',None)) title = metadata['container-title'] if type(title) == type([]): title = title[0] title = title[:512] issn = metadata.get('ISSN',None) if issn and type(issn) == type([]): issn = issn[0] # TODO pass all the ISSN to the RoMEO interface volume = metadata.get('volume',None) pages = metadata.get('page',None) issue = metadata.get('issue',None) date_dict = metadata.get('issued',dict()) pubdate = None if 'date-parts' in date_dict: dateparts = date_dict.get('date-parts')[0] pubdate = date_from_dateparts(dateparts) # for instance it outputs dates like 2014-2-3 publisher_name = metadata.get('publisher', None) if publisher_name: publisher_name = publisher_name[:512] pubtype = metadata.get('type','unknown') pubtype = CROSSREF_PUBTYPE_ALIASES.get(pubtype, pubtype) # PDF availability pdf_url = None licenses = set([(license or {}).get('URL') for license in metadata.get('license', [])]) if any(map(is_oa_license, licenses)): pdf_url = doi_to_url(doi) splash_url = doi_to_url(doi) # Lookup journal search_terms = {'jtitle':title} if issn: search_terms['issn'] = issn journal = fetch_journal(search_terms) publisher = None if journal: publisher = journal.publisher AliasPublisher.increment(publisher_name, journal.publisher) else: publisher = fetch_publisher(publisher_name) barepub = BareOaiRecord( paper=paper, journal_title=title, issue=issue, volume=volume, pubdate=pubdate, pages=pages, doi=doi, pubtype=pubtype, publisher_name=publisher_name, journal=journal, publisher=publisher, pdf_url=pdf_url, splash_url=splash_url, source=crossref_oai_source, identifier=doi_to_crossref_identifier(doi)) rec = paper.add_oairecord(barepub) paper.update_availability() return paper, rec
def create_oairecord(self, record, update_index=True, create_missing_dois=True): """ Given one line of the dump (represented as a dict), add it to the corresponding paper (if it exists) """ doi = to_doi(record['doi']) if not doi: return prefix = doi.split('/')[0] if prefix in free_doi_prefixes: return if not record.get('oa_locations'): return paper = Paper.get_by_doi(doi) if not paper: if not create_missing_dois: return try: paper = Paper.create_by_doi(doi) except (MetadataSourceException, ValueError): return if not paper: logger.info('no such paper for doi {doi}'.format(doi=doi)) return logger.info(doi) paper.cache_oairecords() for oa_location in record.get('oa_locations') or []: url = oa_location['url'] # just to speed things up a bit... if paper.pdf_url == url: return identifier='oadoi:'+url source = self.oadoi_source if oa_location['host_type'] == 'publisher': url = doi_to_url(doi) identifier = doi_to_crossref_identifier(doi) source = self.crossref_source record = BareOaiRecord( paper=paper, doi=doi, pubtype=paper.doctype, source=source, identifier=identifier, splash_url=url, pdf_url=oa_location['url']) try: # We disable checks by DOI since we know the paper has been looked up by DOI already. old_pdf_url = paper.pdf_url paper.add_oairecord(record, check_by_doi=False) super(Paper, paper).update_availability() if old_pdf_url != paper.pdf_url: paper.save() if update_index: paper.update_index() except (DataError, ValueError): logger.warning('Record does not fit in the DB')