def test_create(self): """ BarePaper.create checks its arguments are non-empty """ names = [ BareName.create('Peter', 'Johnstone'), BareName.create('Xing', 'Li') ] pubdate = datetime.date(year=2014, month=9, day=4) # No title self.assertRaises(ValueError, BarePaper.create, '', names, pubdate) # No authors self.assertRaises(ValueError, BarePaper.create, 'Excellent title', [], pubdate) # No publication date self.assertRaises(ValueError, BarePaper.create, 'Excellent title', names, None) # Invalid visibility self.assertRaises(ValueError, BarePaper.create, 'Excellent title', names, pubdate, visible="something") # Not enough affiliations self.assertRaises(ValueError, BarePaper.create, 'Excellent title', names, pubdate, affiliations=['ENS'])
def convert_authors(self, authors, orcids): names = [BareName.create_bare(first, last) for first, last in authors] names_and_orcids = zip(names, orcids) filtered = [(n, o) for n, o in names_and_orcids if n is not None] final_names = [n for n, o in filtered] final_orcids = [o for n, o in filtered] return final_names, final_orcids
def get_oai_authors(self, metadata): """ Get the authors names out of a metadata record """ parsed = list(map(parse_comma_name, metadata['creator'])) names = [BareName.create_bare(fst, lst) for fst, lst in parsed] return names
def test_merge(self): # Get a paper with CrossRef metadata p = Paper.create_by_doi('10.1111/j.1744-6570.1953.tb01038.x') p = Paper.from_bare(p) # Create a copy with slight variations names = [BareName.create_bare(f, l) for (f, l) in [('M. H.', 'Jones'), ('R. H.', 'Haase'), ('S. F.', 'Hulbert')]] p2 = Paper.get_or_create( 'A Survey of the Literature on Technical Positions', names, date(year=2011, month=0o1, day=0o1)) # The two are not merged because of the difference in the title self.assertNotEqual(p, p2) # Fix the title of the second one p2.title = 'A Survey of the Literature on Job Analysis of Technical Positions' p2.save() # Check that the new fingerprint is equal to that of the first paper self.assertEqual(p2.new_fingerprint(), p.fingerprint) # and that the new fingerprint and the current differ self.assertNotEqual(p2.new_fingerprint(), p2.fingerprint) # and that the first paper matches its own shit self.assertEqual(Paper.objects.filter( fingerprint=p.fingerprint).first(), p) # The two papers should hence be merged together new_paper = p2.recompute_fingerprint_and_merge_if_needed() self.assertEqual(new_paper.pk, p.pk)
def test_update_authors(self): for old_author_names, new_author_names, final in [ ([('G.', 'Bodenhausen')], [('Geoffrey', 'Bodenhausen')], [('Geoffrey', 'Bodenhausen')]), ([('L. F.', 'Jullien'), ('A.', 'Amarilli')], [('Ludovic', 'Jullien'), ('R.', 'Pérand'), ('Antoine', 'Amarilli')], [('Ludovic F.', 'Jullien'), ('R.', 'Pérand'), ('Antoine', 'Amarilli')]), ]: paper = Paper.get_or_create('This is a test paper', [BareName.create_bare(f, l) for ( f, l) in old_author_names], datetime.date(year=2015, month=0o4, day=0o5)) new_authors = [BareAuthor(name=BareName.create_bare(f, l)) for (f, l) in new_author_names] paper.update_authors(new_authors) self.assertEqual(paper.bare_author_names(), final)
def test_paper_with_empty_slug(self): """ Papers may have titles with characters that are all ignored by slugify. """ p = Paper.get_or_create('!@#$%^*()', [BareName.create('Jean', 'Saisrien')], datetime.date(2016, 7, 2)) self.assertEqual(p.slug, '') self.checkPage('paper', args=[p.pk, p.slug])
def test_add_author(self): """ p.add_author adds the author at the right place """ names = [BareName.create('Peter', 'Johnstone'), BareName.create('Xing', 'Li'), BareName.create('John', 'Dubuc')] p = BarePaper.create('The title', [names[0]], datetime.date(year=2012, month=1, day=9)) p.add_author(BareAuthor(name=names[2])) self.assertEqual(len(p.authors), 2) p.add_author(BareAuthor(name=names[1]), position=1) self.assertListEqual(p.author_names(), names) self.assertRaises(ValueError, p.add_author, BareAuthor(name=BareName.create( 'Cantor', 'Bernstein')), position=8)
def test_update_authors(self): for old_author_names, new_author_names, final in [ ([('G.', 'Bodenhausen')], [('Geoffrey', 'Bodenhausen')], [('Geoffrey', 'Bodenhausen')]), ([('L. F.', 'Jullien'), ('A.', 'Amarilli') ], [('Ludovic', 'Jullien'), ('R.', 'Pérand'), ('Antoine', 'Amarilli')], [('Ludovic F.', 'Jullien'), ('R.', 'Pérand'), ('Antoine', 'Amarilli')]), ]: paper = Paper.get_or_create( 'This is a test paper', [BareName.create_bare(f, l) for (f, l) in old_author_names], datetime.date(year=2015, month=0o4, day=0o5)) new_authors = [ BareAuthor(name=BareName.create_bare(f, l)) for (f, l) in new_author_names ] paper.update_authors(new_authors) self.assertEqual(paper.bare_author_names(), final)
def test_paper_with_empty_slug(self, db, check_page): """ Papers may have titles with characters that are all ignored by slugify. """ p = Paper.get_or_create('!@#$%^*()', [BareName.create('Jean', 'Saisrien')], datetime.date(2016, 7, 2)) p.visible = True # Force paper to be visible even if it an orphan p.save() assert p.slug == '' check_page(200, 'paper', args=[p.pk, p.slug])
def test_create(self): """ BarePaper.create checks its arguments are non-empty """ names = [BareName.create('Peter', 'Johnstone'), BareName.create('Xing', 'Li')] pubdate = datetime.date(year=2014, month=9, day=4) # No title self.assertRaises(ValueError, BarePaper.create, '', names, pubdate) # No authors self.assertRaises(ValueError, BarePaper.create, 'Excellent title', [], pubdate) # No publication date self.assertRaises(ValueError, BarePaper.create, 'Excellent title', names, None) # Invalid visibility self.assertRaises(ValueError, BarePaper.create, 'Excellent title', names, pubdate, visible="something") # Not enough affiliations self.assertRaises(ValueError, BarePaper.create, 'Excellent title', names, pubdate, affiliations=['ENS'])
def test_paper_with_empty_slug(self): """ Papers may have titles with characters that are all ignored by slugify. """ p = Paper.get_or_create( '!@#$%^*()', [BareName.create('Jean', 'Saisrien')], datetime.date(2016, 7, 2)) p.visible = True # Force paper to be visible even if it an orphan p.save() self.assertEqual(p.slug, '') self.checkPage('paper', args=[p.pk, p.slug])
def test_add_author(self): """ p.add_author adds the author at the right place """ names = [ BareName.create('Peter', 'Johnstone'), BareName.create('Xing', 'Li'), BareName.create('John', 'Dubuc') ] p = BarePaper.create('The title', [names[0]], datetime.date(year=2012, month=1, day=9)) p.add_author(BareAuthor(name=names[2])) self.assertEqual(len(p.authors), 2) p.add_author(BareAuthor(name=names[1]), position=1) self.assertListEqual(p.author_names(), names) self.assertRaises( ValueError, p.add_author, BareAuthor(name=BareName.create('Cantor', 'Bernstein')), position=8)
def test_deposit_on_behalf_of(self): paper = Paper.create_by_doi('10.1007/978-3-662-47666-6_5') prefs = self.proto.get_preferences(self.user) prefs.on_behalf_of = 'mweg3' # sample user id on the sandbox # with name "Jean Saisrien" paper.add_author(BareAuthor(name=BareName.create_bare('Jean', 'Saisrien'))) paper.save() request = self.dry_deposit( paper, license='58fd62fcda3e2400012ca5d3', abstract='Salagadoola menchicka boola bibbidi-bobbidi-boo.', subjects=['59552884da3e240081ba32de'], tags='Pumpkin, Mouse, Godmother') self.assertEqualOrLog(request.status, 'published')
def test_deposit_on_behalf_of(self): paper = Paper.create_by_doi('10.1007/978-3-662-47666-6_5') prefs = self.proto.get_preferences(self.user) prefs.on_behalf_of = 'mweg3' # sample user id on the sandbox # with name "Jean Saisrien" paper.add_author( BareAuthor(name=BareName.create_bare('Jean', 'Saisrien'))) paper.save() request = self.dry_deposit( paper, license='58fd62fcda3e2400012ca5d3', abstract='Salagadoola menchicka boola bibbidi-bobbidi-boo.', subjects=['59552884da3e240081ba32de'], tags='Pumpkin, Mouse, Godmother') self.assertEqualOrLog(request.status, 'published')
def authors_and_orcids(self): """ :returns: two lists of equal length, the first with BareName objects representing authors, the second with ORCID ids (or None) for each of these authors """ authors = self.authors orcids = affiliate_author_with_orcid(self.profile.name, self.id, authors) names = [ BareName.create_bare(first, last) for first, last in self.authors ] names_and_orcids = zip(names, orcids) filtered = [(n, o) for n, o in names_and_orcids if n is not None] final_names = [n for n, o in filtered] final_orcids = [o for n, o in filtered] return final_names, final_orcids
def _get_authors(cls, data): """ :param data: citeproc metadata :returns: List of barenames :raises: CiteprocAuthorError """ authors = data.get('author') if not isinstance(authors, list): raise CiteprocAuthorError('No list of authors in metadata') name_pairs = list(map(cls._convert_to_name_pair, authors)) if None in name_pairs: raise CiteprocAuthorError('Author list compromised') authors = [ BareName.create_bare(first, last) for first, last in name_pairs ] if not authors: raise CiteprocAuthorError('No list of authors in metadata') return authors
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters') date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance(a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: # Validate the metadata against our data model, # and compute the fingerprint to look up the paper in the DB. # This does NOT create a paper in the database - we do not want # to create papers for every search query we get! p = BarePaper.create(title, parsed_authors, date) except ValueError as e: raise BadRequest('Invalid paper: {}'.format(e)) try: model_paper = Paper.objects.get(fingerprint=p.fingerprint) return {'status': 'ok', 'paper': model_paper.json()} except Paper.DoesNotExist: return {'status': 'not found'}, 404
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters' ) date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance( a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: # Validate the metadata against our data model, # and compute the fingerprint to look up the paper in the DB. # This does NOT create a paper in the database - we do not want # to create papers for every search query we get! p = BarePaper.create(title, parsed_authors, date) except ValueError as e: raise BadRequest('Invalid paper: {}'.format(e)) try: model_paper = Paper.objects.get(fingerprint=p.fingerprint) return {'status': 'ok', 'paper': model_paper.json()} except Paper.DoesNotExist: return {'status': 'not found'}, 404
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.create_by_doi(doi, bare=True) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, unicode) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters' ) date = fields.get('date') if not isinstance(date, unicode): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(unicode(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], unicode) or not isinstance( a['last'], unicode) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], unicode) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: p = BarePaper.create(title, parsed_authors, date) except ValueError: raise BadRequest('Invalid paper') return {'status': 'ok', 'paper': p.json()}
def save_doi_metadata(self, metadata, extra_orcids=None): """ Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication :param extra_orcids: an optional orcids list, which will be unified with the orcids extracted from the metadata. This is useful for the ORCID interface. :returns: the paper, created if needed """ # Normalize metadata if metadata is None or not isinstance(metadata, dict): raise ValueError('Invalid metadata format, expecting a dict') if not metadata.get('author'): raise ValueError('No author provided') if not metadata.get('title'): raise ValueError('No title') # the upstream function ensures that there is a non-empty title if not to_doi(metadata.get('DOI')): raise ValueError("No DOI, skipping") pubdate = get_publication_date(metadata) if pubdate is None: raise ValueError('No pubdate') title = metadata['title'] # CrossRef metadata stores titles in lists if isinstance(title, list): title = title[0] subtitle = metadata.get('subtitle') if subtitle: if isinstance(subtitle, list): subtitle = subtitle[0] title += ': '+subtitle name_pairs = map(convert_to_name_pair, metadata['author']) if None in name_pairs: raise ValueError('Invalid author') authors = [BareName.create_bare(first, last) for first, last in name_pairs] def get_affiliation(author_elem): for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name'] def get_orcid(author_elem): orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid new_orcids = map(get_orcid, metadata['author']) if extra_orcids: orcids = [new or old for (old, new) in zip( extra_orcids, new_orcids)] else: orcids = new_orcids affiliations = map(get_affiliation, metadata['author']) paper = BarePaper.create(title, authors, pubdate, visible=True, affiliations=affiliations, orcids=orcids) result = create_publication(paper, metadata) if result is None: # Creating the publication failed! # Make sure the paper only appears if it is still associated # with another source. paper.update_visible() else: paper = result[0] return paper
def save_doi_metadata(self, metadata, extra_orcids=None): """ Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication :param extra_orcids: an optional orcids list, which will be unified with the orcids extracted from the metadata. This is useful for the ORCID interface. :returns: the paper, created if needed """ # Normalize metadata if metadata is None or not isinstance(metadata, dict): raise ValueError('Invalid metadata format, expecting a dict') if not metadata.get('author'): raise ValueError('No author provided') if not metadata.get('title'): raise ValueError('No title') # the upstream function ensures that there is a non-empty title if not to_doi(metadata.get('DOI')): raise ValueError("No DOI, skipping") pubdate = get_publication_date(metadata) if pubdate is None: raise ValueError('No pubdate') title = metadata['title'] # CrossRef metadata stores titles in lists if isinstance(title, list): title = title[0] subtitle = metadata.get('subtitle') if subtitle: if isinstance(subtitle, list): subtitle = subtitle[0] title += ': '+subtitle name_pairs = list(map(convert_to_name_pair, metadata['author'])) if None in name_pairs: raise ValueError('Invalid author') authors = [BareName.create_bare(first, last) for first, last in name_pairs] def get_affiliation(author_elem): for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name'] def get_orcid(author_elem): orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid new_orcids = list(map(get_orcid, metadata['author'])) if extra_orcids: # remove the extra_orcids if they already exist on different authors set_of_extra_orcids = set(x for x in extra_orcids if x != None) new_orcids = [(x if x not in set_of_extra_orcids else None) for x in new_orcids] # now do the union orcids = [new or old for (old, new) in zip( extra_orcids, new_orcids)] else: orcids = new_orcids affiliations = list(map(get_affiliation, metadata['author'])) paper = BarePaper.create(title, authors, pubdate, visible=True, affiliations=affiliations, orcids=orcids) result = create_publication(paper, metadata) if result is None: # Creating the publication failed! # Make sure the paper only appears if it is still associated # with another source. paper.update_visible() else: paper = result[0] return paper
def setUp(self): self.ist = BarePaper.create('Groundbreaking Results', [ BareName.create('Alfred', 'Kastler'), BareName.create('John', 'Dubuc') ], datetime.date(year=2015, month=3, day=2))
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters') date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance(a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: p = BarePaper.create(title, parsed_authors, date) except ValueError: raise BadRequest('Invalid paper') return {'status': 'ok', 'paper': p.json()}
def setUp(self): self.ist = BarePaper.create('Groundbreaking Results', [BareName.create('Alfred', 'Kastler'), BareName.create('John', 'Dubuc')], datetime.date(year=2015, month=3, day=2))