def create_paper(self, work): assert (not work.skipped) # Create paper authors, orcids = work.authors_and_orcids paper = BarePaper.create( work.title, authors, work.pubdate, visible=True, affiliations=None, orcids=orcids, ) record = BareOaiRecord(source=self.oai_source, identifier=work.api_uri, splash_url=work.splash_url, pubtype=work.pubtype) paper.add_oairecord(record) try: p = Paper.from_bare(paper) p = self.associate_researchers(p) p.save() p.update_index() except ValueError: p = None return p
def translate(self, header, metadata): """ Creates a BarePaper """ # We need three things to create a paper: # - publication date pubdate = self.find_earliest_oai_date(metadata) # - authors authors = self.get_oai_authors(metadata) # - title if not metadata.get('title') or not authors or not pubdate: #print "no title, authors, or pubdate" return # Find the OAI source source = self.get_source(header, metadata) if not source: print "Invalid source from the proxy, skipping" return # Create paper and record try: paper = BarePaper.create(metadata['title'][0], authors, pubdate) self.add_oai_record(header, metadata, source, paper) return paper except ValueError as e: print "Warning, OAI record "+header.identifier()+" skipped:\n"+unicode(e) paper.update_availability()
def translate(self, header, metadata): """ Creates a BarePaper """ # We need three things to create a paper: # - publication date pubdate = self.find_earliest_oai_date(metadata) # - authors authors = self.get_oai_authors(metadata) # - title if not metadata.get('title') or not authors or not pubdate: logger.debug("No title, authors or pubdate") return # Create paper and record try: paper = BarePaper.create(metadata['title'][0], authors, pubdate) self.add_oai_record(header, metadata, paper) return paper except ValueError as e: logger.warning("OAI record " + header.identifier() + " skipped:\n", e, exc_info=True) paper.update_availability()
def create_paper(self, data_paper): assert (not data_paper.skipped) # Create paper paper = BarePaper.create( data_paper.title, data_paper.authors, data_paper.pubdate, visible=True, affiliations=None, orcids=data_paper.orcids, ) record = BareOaiRecord(source=orcid_oai_source(), identifier=data_paper.identifier, splash_url=data_paper.splash_url, pubtype=data_paper.doctype) paper.add_oairecord(record) return paper
def create_paper(self, data_paper): assert (not data_paper.skipped) # Create paper paper = BarePaper.create( data_paper.title, data_paper.authors, data_paper.pubdate, 'VISIBLE', data_paper.affiliations ) record = BareOaiRecord( source=orcid_oai_source, identifier=data_paper.identifier, splash_url=data_paper.splash_url, pubtype=data_paper.doctype ) paper.add_oairecord(record) return paper
def test_add_author(self): """ p.add_author adds the author at the right place """ names = [BareName.create('Peter', 'Johnstone'), BareName.create('Xing', 'Li'), BareName.create('John', 'Dubuc')] p = BarePaper.create('The title', [names[0]], datetime.date(year=2012, month=1, day=9)) p.add_author(BareAuthor(name=names[2])) self.assertEqual(len(p.authors), 2) p.add_author(BareAuthor(name=names[1]), position=1) self.assertListEqual(p.author_names(), names) self.assertRaises(ValueError, p.add_author, BareAuthor(name=BareName.create( 'Cantor', 'Bernstein')), position=8)
def create_paper(self, work): assert (not work.skipped) # Create paper authors, orcids = work.authors_and_orcids paper = BarePaper.create( work.title, authors, work.pubdate, visible=True, affiliations=None, orcids=orcids, ) record = BareOaiRecord(source=orcid_oai_source(), identifier=work.api_uri, splash_url=work.splash_url, pubtype=work.pubtype) paper.add_oairecord(record) return paper
def create_paper(self, work): assert (not work.skipped) # Create paper authors, orcids = work.authors_and_orcids paper = BarePaper.create( work.title, authors, work.pubdate, visible=True, affiliations=None, orcids=orcids, ) record = BareOaiRecord( source=self.oai_source, identifier=work.api_uri, splash_url=work.splash_url, pubtype=work.pubtype ) paper.add_oairecord(record) return paper
def translate(self, header, metadata): """ Creates a BarePaper """ # We need three things to create a paper: # - publication date pubdate = self.find_earliest_oai_date(metadata) # - authors authors = self.get_oai_authors(metadata) # - title if not metadata.get('title') or not authors or not pubdate: logger.debug("No title, authors or pubdate") return # Create paper and record try: paper = BarePaper.create(metadata['title'][0], authors, pubdate) self.add_oai_record(header, metadata, paper) return paper except ValueError as e: logger.warning("OAI record "+header.identifier()+" skipped:\n", e, exc_info=True) paper.update_availability()
def to_paper(cls, data): """ Call this function to convert citeproc metadata into a paper object Our strategy is as follows: We collect first all data necessary, if me miss something, then we raise CiteprocError. If we have collected everything, we pass that to the corresponding baremodels. :param data: citeproc metadata. Note that CrossRef does put its citeproc into a message block :returns: Paper object :raises: CiteprocError """ if not isinstance(data, dict): raise CiteprocError('Invalid metadaformat, expecting dict') bare_paper_data = cls._get_paper_data(data) bare_oairecord_data = cls._get_oairecord_data(data) bare_paper = BarePaper.create(**bare_paper_data) bare_oairecord = BareOaiRecord(paper=bare_paper, **bare_oairecord_data) bare_paper.add_oairecord(bare_oairecord) bare_paper.update_availability() paper = Paper.from_bare(bare_paper) paper.update_index() return paper
def test_add_author(self): """ p.add_author adds the author at the right place """ names = [ BareName.create('Peter', 'Johnstone'), BareName.create('Xing', 'Li'), BareName.create('John', 'Dubuc') ] p = BarePaper.create('The title', [names[0]], datetime.date(year=2012, month=1, day=9)) p.add_author(BareAuthor(name=names[2])) self.assertEqual(len(p.authors), 2) p.add_author(BareAuthor(name=names[1]), position=1) self.assertListEqual(p.author_names(), names) self.assertRaises( ValueError, p.add_author, BareAuthor(name=BareName.create('Cantor', 'Bernstein')), position=8)
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.create_by_doi(doi, bare=True) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, unicode) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters' ) date = fields.get('date') if not isinstance(date, unicode): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(unicode(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], unicode) or not isinstance( a['last'], unicode) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], unicode) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: p = BarePaper.create(title, parsed_authors, date) except ValueError: raise BadRequest('Invalid paper') return {'status': 'ok', 'paper': p.json()}
def save_doi_metadata(self, metadata, extra_orcids=None): """ Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication :param extra_orcids: an optional orcids list, which will be unified with the orcids extracted from the metadata. This is useful for the ORCID interface. :returns: the paper, created if needed """ # Normalize metadata if metadata is None or not isinstance(metadata, dict): raise ValueError('Invalid metadata format, expecting a dict') if not metadata.get('author'): raise ValueError('No author provided') if not metadata.get('title'): raise ValueError('No title') # the upstream function ensures that there is a non-empty title if not to_doi(metadata.get('DOI')): raise ValueError("No DOI, skipping") pubdate = get_publication_date(metadata) if pubdate is None: raise ValueError('No pubdate') title = metadata['title'] # CrossRef metadata stores titles in lists if isinstance(title, list): title = title[0] subtitle = metadata.get('subtitle') if subtitle: if isinstance(subtitle, list): subtitle = subtitle[0] title += ': '+subtitle name_pairs = map(convert_to_name_pair, metadata['author']) if None in name_pairs: raise ValueError('Invalid author') authors = [BareName.create_bare(first, last) for first, last in name_pairs] def get_affiliation(author_elem): for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name'] def get_orcid(author_elem): orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid new_orcids = map(get_orcid, metadata['author']) if extra_orcids: orcids = [new or old for (old, new) in zip( extra_orcids, new_orcids)] else: orcids = new_orcids affiliations = map(get_affiliation, metadata['author']) paper = BarePaper.create(title, authors, pubdate, visible=True, affiliations=affiliations, orcids=orcids) result = create_publication(paper, metadata) if result is None: # Creating the publication failed! # Make sure the paper only appears if it is still associated # with another source. paper.update_visible() else: paper = result[0] return paper
def save_doi_metadata(self, metadata, extra_orcids=None): """ Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication :param extra_orcids: an optional orcids list, which will be unified with the orcids extracted from the metadata. This is useful for the ORCID interface. :returns: the paper, created if needed """ # Normalize metadata if metadata is None or not isinstance(metadata, dict): raise ValueError('Invalid metadata format, expecting a dict') if not metadata.get('author'): raise ValueError('No author provided') if not metadata.get('title'): raise ValueError('No title') # the upstream function ensures that there is a non-empty title if not to_doi(metadata.get('DOI')): raise ValueError("No DOI, skipping") pubdate = get_publication_date(metadata) if pubdate is None: raise ValueError('No pubdate') title = metadata['title'] # CrossRef metadata stores titles in lists if isinstance(title, list): title = title[0] subtitle = metadata.get('subtitle') if subtitle: if isinstance(subtitle, list): subtitle = subtitle[0] title += ': '+subtitle name_pairs = list(map(convert_to_name_pair, metadata['author'])) if None in name_pairs: raise ValueError('Invalid author') authors = [BareName.create_bare(first, last) for first, last in name_pairs] def get_affiliation(author_elem): for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name'] def get_orcid(author_elem): orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid new_orcids = list(map(get_orcid, metadata['author'])) if extra_orcids: # remove the extra_orcids if they already exist on different authors set_of_extra_orcids = set(x for x in extra_orcids if x != None) new_orcids = [(x if x not in set_of_extra_orcids else None) for x in new_orcids] # now do the union orcids = [new or old for (old, new) in zip( extra_orcids, new_orcids)] else: orcids = new_orcids affiliations = list(map(get_affiliation, metadata['author'])) paper = BarePaper.create(title, authors, pubdate, visible=True, affiliations=affiliations, orcids=orcids) result = create_publication(paper, metadata) if result is None: # Creating the publication failed! # Make sure the paper only appears if it is still associated # with another source. paper.update_visible() else: paper = result[0] return paper
def fetch_orcid_records(self, id, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: id = validate_orcid(id) if id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch papers = [] # list of papers created records_found = 0 # how many records did we successfully import from the profile? # Fetch publications pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: def j(path, default=None): return jpath(path, pub, default) # DOI doi = None for extid in j( 'work-external-identifiers/work-external-identifier', []): if extid.get('work-external-identifier-type') == 'DOI': doi = to_doi( jpath('work-external-identifier-id/value', extid)) if doi: # If a DOI is available, create the paper using metadata from CrossRef. # We don't do it yet, we only store the DOI, so that we can fetch them # by batch later. dois.append(doi) if doi and use_doi: continue # Extract information from ORCiD # Title title = j('work-title/title/value') if title is None: print "Warning: Skipping ORCID publication: no title" # Type doctype = orcid_to_doctype(j('work-type', 'other')) # Contributors (ignored for now as they are very often not present) def get_contrib(js): return { 'orcid': jpath('contributor-orcid', js), 'name': jpath('credit-name/value', js), } contributors = map(get_contrib, j('work-contributors/contributor', [])) author_names = filter(lambda x: x is not None, map(lambda x: x['name'], contributors)) authors = map(parse_comma_name, author_names) pubdate = None # ORCiD internal id identifier = j('put-code') affiliations = map(lambda x: x['orcid'], contributors) # Pubdate year = parse_int(j('publication-date/year/value'), 1970) month = parse_int(j('publication-date/month/value'), 01) day = parse_int(j('publication-date/day/value'), 01) pubdate = None try: pubdate = date(year=year, month=01, day=01) pubdate = date(year=year, month=month, day=01) pubdate = date(year=year, month=month, day=day) except ValueError: if pubdate is None: print "Invalid publication date in ORCID publication, skipping" continue # Citation type: metadata format citation_format = j('work-citation/work-citation-type') print citation_format bibtex = j('work-citation/citation') if bibtex is not None: try: entry = parse_bibtex(bibtex) if entry.get('author', []) == []: print "Warning: Skipping ORCID publication: no authors." print j('work-citation/citation') if not authors: authors = entry['author'] except ValueError: pass affiliations = affiliate_author_with_orcid( ref_name, id, authors, initial_affiliations=affiliations) authors = map(name_lookup_cache.lookup, authors) if not authors: print "No authors found, skipping" continue # Create paper: paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) record = BareOaiRecord(source=orcid_oai_source, identifier=identifier, splash_url='http://orcid.org/' + id, pubtype=doctype) paper.add_oairecord(record) yield paper if use_doi: for metadata in crps.search_for_dois_incrementally( '', {'orcid': id}): try: paper = crps.save_doi_metadata(metadata) if paper: yield paper except ValueError as e: print "Saving CrossRef record from ORCID failed: %s" % unicode( e) # Now we add the DOIs found in the ORCID profile. doi_metadata = fetch_dois(dois) for metadata in doi_metadata: try: authors = map(convert_to_name_pair, metadata['author']) affiliations = affiliate_author_with_orcid( ref_name, id, authors) paper = crps.save_doi_metadata(metadata, affiliations) if not paper: continue record = BareOaiRecord(source=orcid_oai_source, identifier='orcid:' + id + ':' + metadata['DOI'], splash_url='http://orcid.org/' + id, pubtype=paper.doctype) paper.add_oairecord(record) yield paper except (KeyError, ValueError, TypeError): pass
def setUp(self): self.ist = BarePaper.create('Groundbreaking Results', [BareName.create('Alfred', 'Kastler'), BareName.create('John', 'Dubuc')], datetime.date(year=2015, month=3, day=2))
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters') date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance(a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: p = BarePaper.create(title, parsed_authors, date) except ValueError: raise BadRequest('Invalid paper') return {'status': 'ok', 'paper': p.json()}
def fetch_orcid_records(self, id, profile=None, use_doi=True): """ Queries ORCiD to retrieve the publications associated with a given ORCiD. It also fetches such papers from the CrossRef search interface. :param profile: The ORCID profile if it has already been fetched before (format: parsed JSON). :param use_doi: Fetch the publications by DOI when we find one (recommended, but slow) :returns: a generator, where all the papers found are yielded. (some of them could be in free form, hence not imported) """ crps = CrossRefPaperSource(self.ccf) # Cleanup iD: id = validate_orcid(id) if id is None: raise MetadataSourceException('Invalid ORCiD identifier') # Get ORCiD profile try: if profile is None: profile = OrcidProfile(id=id) else: profile = OrcidProfile(json=profile) except MetadataSourceException as e: print e return # Reference name ref_name = profile.name # curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0002-8612-8827/orcid-works' -L -i dois = [] # list of DOIs to fetch papers = [] # list of papers created records_found = 0 # how many records did we successfully import from the profile? # Fetch publications pubs = jpath('orcid-profile/orcid-activities/orcid-works/orcid-work', profile, []) for pub in pubs: def j(path, default=None): return jpath(path, pub, default) # DOI doi = None for extid in j('work-external-identifiers/work-external-identifier', []): if extid.get('work-external-identifier-type') == 'DOI': doi = to_doi(jpath('work-external-identifier-id/value', extid)) if doi: # If a DOI is available, create the paper using metadata from CrossRef. # We don't do it yet, we only store the DOI, so that we can fetch them # by batch later. dois.append(doi) if doi and use_doi: continue # Extract information from ORCiD # Title title = j('work-title/title/value') if title is None: print "Warning: Skipping ORCID publication: no title" # Type doctype = orcid_to_doctype(j('work-type', 'other')) # Contributors (ignored for now as they are very often not present) def get_contrib(js): return { 'orcid':jpath('contributor-orcid', js), 'name': jpath('credit-name/value', js), } contributors = map(get_contrib, j('work-contributors/contributor',[])) author_names = filter(lambda x: x is not None, map( lambda x: x['name'], contributors)) authors = map(parse_comma_name, author_names) pubdate = None # ORCiD internal id identifier = j('put-code') affiliations = map(lambda x: x['orcid'], contributors) # Pubdate year = parse_int(j('publication-date/year/value'), 1970) month = parse_int(j('publication-date/month/value'), 01) day = parse_int(j('publication-date/day/value'), 01) pubdate = None try: pubdate = date(year=year, month=01, day=01) pubdate = date(year=year, month=month, day=01) pubdate = date(year=year, month=month, day=day) except ValueError: if pubdate is None: print "Invalid publication date in ORCID publication, skipping" continue # Citation type: metadata format citation_format = j('work-citation/work-citation-type') print citation_format bibtex = j('work-citation/citation') if bibtex is not None: try: entry = parse_bibtex(bibtex) if entry.get('author', []) == []: print "Warning: Skipping ORCID publication: no authors." print j('work-citation/citation') if not authors: authors = entry['author'] except ValueError: pass affiliations = affiliate_author_with_orcid(ref_name, id, authors, initial_affiliations=affiliations) authors = map(name_lookup_cache.lookup, authors) if not authors: print "No authors found, skipping" continue # Create paper: paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) record = BareOaiRecord( source=orcid_oai_source, identifier=identifier, splash_url='http://orcid.org/'+id, pubtype=doctype) paper.add_oairecord(record) yield paper if use_doi: for metadata in crps.search_for_dois_incrementally('', {'orcid':id}): try: paper = crps.save_doi_metadata(metadata) if paper: yield paper except ValueError as e: print "Saving CrossRef record from ORCID failed: %s" % unicode(e) # Now we add the DOIs found in the ORCID profile. doi_metadata = fetch_dois(dois) for metadata in doi_metadata: try: authors = map(convert_to_name_pair, metadata['author']) affiliations = affiliate_author_with_orcid(ref_name, id, authors) paper = crps.save_doi_metadata(metadata, affiliations) if not paper: continue record = BareOaiRecord( source=orcid_oai_source, identifier='orcid:'+id+':'+metadata['DOI'], splash_url='http://orcid.org/'+id, pubtype=paper.doctype) paper.add_oairecord(record) yield paper except (KeyError, ValueError, TypeError): pass
def process_records(self, listRecords): for record in listRecords: metadata = record[1]._map authors = get_oai_authors(metadata) # Filter the record if all(not elem.is_known for elem in authors): print "No relevant author, continue" continue if not 'title' in metadata or metadata['title'] == []: continue # Find the source sets = record[0].setSpec() source_identifier = None for s in sets: if s.startswith(PROXY_SOURCE_PREFIX): source_identifier = s[len(PROXY_SOURCE_PREFIX):] break source = None if source_identifier: try: source = OaiSource.objects.get( identifier=source_identifier) except OaiSource.DoesNotExist: pass if not source: print "Invalid source '" + str( source_identifier) + "' from the proxy, skipping" continue # Find the DOI, if any doi = None for identifier in metadata['identifier'] + metadata['relation']: if not doi: doi = to_doi(identifier) # A publication date is necessary pubdate = find_earliest_oai_date(record) if not pubdate: print "No publication date, skipping" continue print 'Saving record %s' % record[0].identifier() paper = BarePaper.create(metadata['title'][0], authors, pubdate) if doi: try: metadata = crossref.fetch_metadata_by_DOI(doi) crossref.create_publication(paper, metadata) except MetadataSourceException as e: print( "Warning, metadata source exception while fetching DOI " + doi + ":\n" + unicode(e)) pass if paper is None: print "Paper creation failed, skipping" continue # Save the record # TODO: we should check record validity *BEFORE* creating the paper try: add_oai_record(record, source, paper) yield paper except ValueError as e: print "Warning, OAI record " + record[0].identifier( ) + " skipped:\n" + unicode(e) paper.update_availability()
def save_doi_metadata(self, metadata, extra_affiliations=None, allow_unknown_authors=False): """ Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication :param extra_affiliations: an optional affiliations list, which will be unified with the affiliations extracted from the metadata. This is useful for the ORCID interface. :param allow_unknown_authors: create the paper even if no author matches our researchers :returns: the paper, created if needed """ # Normalize metadata if metadata is None or type(metadata) != dict: if metadata is not None: print "WARNING: Invalid metadata: type is "+str(type(metadata)) print "The doi proxy is doing something nasty!" raise ValueError('Invalid metadata format, expecting a dict') if not 'author' in metadata: raise ValueError('No author provided') if not 'title' in metadata or not metadata['title']: raise ValueError('No title') # the upstream function ensures that there is a non-empty title if not 'DOI' in metadata or not metadata['DOI']: raise ValueError("No DOI, skipping") doi = to_doi(metadata['DOI']) pubdate = get_publication_date(metadata) if pubdate is None: raise ValueError('No pubdate') title = metadata['title'] # CrossRef metadata stores titles in lists if type(title) == list: title = title[0] subtitle = metadata.get('subtitle') if subtitle: if type(subtitle) == list: subtitle = subtitle[0] title += ': '+subtitle authors = map(name_lookup_cache.lookup, map(convert_to_name_pair, metadata['author'])) authors = filter(lambda x: x != None, authors) if (not allow_unknown_authors and all(not elem.is_known for elem in authors)) or authors == []: raise ValueError('No known author') def get_affiliation(author_elem): # First, look for an ORCID id orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid # Otherwise return the plain affiliation, if any for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name'] affiliations = map(get_affiliation, metadata['author']) if extra_affiliations and len(affiliations) == len(extra_affiliations): for i in range(len(affiliations)): if affiliation_is_greater(extra_affiliations[i],affiliations[i]): affiliations[i] = extra_affiliations[i] paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) result = create_publication(paper, metadata) if result is None: # Creating the publication failed! paper.update_visibility() # Make sure the paper only appears if it is still associated # with another source. # TODO add unit test for this else: paper = result[0] return paper
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters' ) date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance( a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: # Validate the metadata against our data model, # and compute the fingerprint to look up the paper in the DB. # This does NOT create a paper in the database - we do not want # to create papers for every search query we get! p = BarePaper.create(title, parsed_authors, date) except ValueError as e: raise BadRequest('Invalid paper: {}'.format(e)) try: model_paper = Paper.objects.get(fingerprint=p.fingerprint) return {'status': 'ok', 'paper': model_paper.json()} except Paper.DoesNotExist: return {'status': 'not found'}, 404
def save_doi_metadata(self, metadata, extra_affiliations=None, allow_unknown_authors=False): """ Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication :param extra_affiliations: an optional affiliations list, which will be unified with the affiliations extracted from the metadata. This is useful for the ORCID interface. :param allow_unknown_authors: create the paper even if no author matches our researchers :returns: the paper, created if needed """ # Normalize metadata if metadata is None or type(metadata) != dict: if metadata is not None: print "WARNING: Invalid metadata: type is " + str( type(metadata)) print "The doi proxy is doing something nasty!" raise ValueError('Invalid metadata format, expecting a dict') if not 'author' in metadata: raise ValueError('No author provided') if not 'title' in metadata or not metadata['title']: raise ValueError('No title') # the upstream function ensures that there is a non-empty title if not 'DOI' in metadata or not metadata['DOI']: raise ValueError("No DOI, skipping") doi = to_doi(metadata['DOI']) pubdate = get_publication_date(metadata) if pubdate is None: raise ValueError('No pubdate') title = metadata['title'] # CrossRef metadata stores titles in lists if type(title) == list: title = title[0] subtitle = metadata.get('subtitle') if subtitle: if type(subtitle) == list: subtitle = subtitle[0] title += ': ' + subtitle authors = map(name_lookup_cache.lookup, map(convert_to_name_pair, metadata['author'])) authors = filter(lambda x: x != None, authors) if (not allow_unknown_authors and all(not elem.is_known for elem in authors)) or authors == []: raise ValueError('No known author') def get_affiliation(author_elem): # First, look for an ORCID id orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid # Otherwise return the plain affiliation, if any for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name'] affiliations = map(get_affiliation, metadata['author']) if extra_affiliations and len(affiliations) == len(extra_affiliations): for i in range(len(affiliations)): if affiliation_is_greater(extra_affiliations[i], affiliations[i]): affiliations[i] = extra_affiliations[i] paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) result = create_publication(paper, metadata) if result is None: # Creating the publication failed! paper.update_visibility() # Make sure the paper only appears if it is still associated # with another source. # TODO add unit test for this else: paper = result[0] return paper
def setUp(self): self.ist = BarePaper.create('Groundbreaking Results', [ BareName.create('Alfred', 'Kastler'), BareName.create('John', 'Dubuc') ], datetime.date(year=2015, month=3, day=2))
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters') date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance(a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: # Validate the metadata against our data model, # and compute the fingerprint to look up the paper in the DB. # This does NOT create a paper in the database - we do not want # to create papers for every search query we get! p = BarePaper.create(title, parsed_authors, date) except ValueError as e: raise BadRequest('Invalid paper: {}'.format(e)) try: model_paper = Paper.objects.get(fingerprint=p.fingerprint) return {'status': 'ok', 'paper': model_paper.json()} except Paper.DoesNotExist: return {'status': 'not found'}, 404
def process_records(self, listRecords): for record in listRecords: metadata = record[1]._map authors = get_oai_authors(metadata) # Filter the record if all(not elem.is_known for elem in authors): print "No relevant author, continue" continue if not 'title' in metadata or metadata['title'] == []: continue # Find the source sets = record[0].setSpec() source_identifier = None for s in sets: if s.startswith(PROXY_SOURCE_PREFIX): source_identifier = s[len(PROXY_SOURCE_PREFIX):] break source = None if source_identifier: try: source = OaiSource.objects.get(identifier=source_identifier) except OaiSource.DoesNotExist: pass if not source: print "Invalid source '"+str(source_identifier)+"' from the proxy, skipping" continue # Find the DOI, if any doi = None for identifier in metadata['identifier']+metadata['relation']: if not doi: doi = to_doi(identifier) # A publication date is necessary pubdate = find_earliest_oai_date(record) if not pubdate: print "No publication date, skipping" continue print 'Saving record %s' % record[0].identifier() paper = BarePaper.create(metadata['title'][0], authors, pubdate) if doi: try: metadata = crossref.fetch_metadata_by_DOI(doi) crossref.create_publication(paper, metadata) except MetadataSourceException as e: print("Warning, metadata source exception while fetching DOI "+doi+":\n"+unicode(e)) pass if paper is None: print "Paper creation failed, skipping" continue # Save the record # TODO: we should check record validity *BEFORE* creating the paper try: add_oai_record(record, source, paper) yield paper except ValueError as e: print "Warning, OAI record "+record[0].identifier()+" skipped:\n"+unicode(e) paper.update_availability()