def test_simple(self): self.assertEqual(parse_comma_name('Claire Mathieu'), ('Claire', 'Mathieu')) self.assertEqual(parse_comma_name('Mathieu, Claire'), ('Claire', 'Mathieu')) self.assertEqual(parse_comma_name('Kenyon-Mathieu, Claire'), ('Claire', 'Kenyon-Mathieu')) self.assertEqual(parse_comma_name('Arvind'), ('', 'Arvind'))
def test_simple(self): self.assertEqual(parse_comma_name( 'Claire Mathieu'), ('Claire', 'Mathieu')) self.assertEqual(parse_comma_name( 'Mathieu, Claire'), ('Claire', 'Mathieu')) self.assertEqual(parse_comma_name( 'Kenyon-Mathieu, Claire'), ('Claire', 'Kenyon-Mathieu')) self.assertEqual(parse_comma_name('Arvind'), ('', 'Arvind'))
def test_hard_cases(self): # TODO ? self.assertEqual(parse_comma_name('W. Timothy Gowers'), ('W. Timothy', 'Gowers')) self.assertEqual(parse_comma_name('Guido van Rossum'), ('Guido', 'van Rossum')) self.assertEqual(parse_comma_name('Éric Colin de Verdière'), ('Éric', 'Colin de Verdière'))
def test_hard_cases(self): # TODO ? self.assertEqual(parse_comma_name('W. Timothy Gowers'), ('W. Timothy', 'Gowers')) self.assertEqual(parse_comma_name('Guido van Rossum'), ('Guido', 'van Rossum')) self.assertEqual(parse_comma_name( 'Éric Colin de Verdière'), ('Éric', 'Colin de Verdière'))
def convert_to_name_pair(dct): """ Converts a dictionary {'family':'Last','given':'First'} to ('First','Last') """ result = None if 'family' in dct and 'given' in dct: result = (dct['given'],dct['family']) elif 'family' in dct: # The 'Arvind' case result = ('',dct['family']) elif 'literal' in dct: result = parse_comma_name(dct['literal']) if result: result = (normalize_name_words(result[0]), normalize_name_words(result[1])) return result
def name(self): """ Returns a parsed version of the "credit name" in the ORCID profile. If there is no such name, returns the given and family names on the profile (they should exist) """ name_item = jpath('orcid-profile/orcid-bio/personal-details', self.json) name = jpath('credit-name/value', name_item) if name is not None: return parse_comma_name(name) return (normalize_name_words(jpath('given-names/value', name_item, '')), normalize_name_words(jpath('family-name/value', name_item, '')))
def name(self): """ Returns a parsed version of the "credit name" in the ORCID profile. If there is no such name, returns the given and family names on the profile (they should exist) """ name_item = jpath('person/name', self.json) name = jpath('credit-name/value', name_item) if name: return parse_comma_name(name) return (normalize_name_words(jpath('given-names/value', name_item, '')), normalize_name_words(jpath('family-name/value', name_item, '')))
def other_names(self): """ Returns the list of other names listed on the ORCiD profile. This includes the (given,family) name if a credit name was defined. """ name_item = jpath('orcid-profile/orcid-bio/personal-details', self.json) names = [] credit_name = jpath('credit-name/value', name_item) if credit_name is not None: names.append((normalize_name_words(jpath('given-names/value', name_item)), normalize_name_words(jpath('family-name/value', name_item)))) other_names = jpath('other-names/other-name', name_item, default=[]) for name in other_names: val = name.get('value') if val is not None: names.append(parse_comma_name(val)) return names
def other_names(self): """ Returns the list of other names listed on the ORCiD profile. This includes the (given,family) name if a credit name was defined. """ person = jpath('person', self.json) names = [] credit_name = jpath('name/credit-name/value', person) if credit_name is not None: names.append((normalize_name_words( jpath('name/given-names/value', person, '')), normalize_name_words( jpath('name/family-name/value', person, '')))) other_names = jpath('other-names/other-name', person, default=[]) for name in other_names: val = name.get('content') if val is not None: names.append(parse_comma_name(val)) return names
def parse_authors_list(record): """ Split author field into a list of (First name, Last name) """ if 'author' in record: if record['author']: # Handle "et al" record['author'] = ET_AL_RE.sub('', record['author']) # Handle "others" record['author'] = OTHERS_RE.sub('', record['author']) # Normalizations record['author'] = record['author'].replace('\n', ' ') # Split author field into list of first and last names record['author'] = [ parse_comma_name(author.strip()) for author in record['author'].split(' and ') ] else: del record['author'] return record
def other_names(self): """ Returns the list of other names listed on the ORCiD profile. This includes the (given,family) name if a credit name was defined. """ name_item = jpath('orcid-profile/orcid-bio/personal-details', self.json) names = [] credit_name = jpath('credit-name/value', name_item) if credit_name is not None: names.append( (normalize_name_words(jpath('given-names/value', name_item, '')), normalize_name_words(jpath('family-name/value', name_item, '')))) other_names = jpath('other-names/other-name', name_item, default=[]) for name in other_names: val = name.get('value') if val is not None: names.append(parse_comma_name(val)) return names
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.create_by_doi(doi, bare=True) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, unicode) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters' ) date = fields.get('date') if not isinstance(date, unicode): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(unicode(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], unicode) or not isinstance( a['last'], unicode) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], unicode) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: p = BarePaper.create(title, parsed_authors, date) except ValueError: raise BadRequest('Invalid paper') return {'status': 'ok', 'paper': p.json()}
def test_initial_capitalized(self): self.assertEqual(parse_comma_name( 'MATHIEU Claire'), ('Claire', 'Mathieu')) self.assertEqual(parse_comma_name('MATHIEU C.'), ('C.', 'Mathieu'))
def test_collapsed_initials(self): self.assertEqual(parse_comma_name('Badiou CS'), ('C. S.', 'Badiou')) self.assertEqual(parse_comma_name('Tony LI'), ('Tony', 'Li'))
def test_middle_initials(self): self.assertEqual(parse_comma_name('Neal E. Young'), ('Neal E.', 'Young'))
def test_final_initials(self): self.assertEqual(parse_comma_name('Mathieu C.'), ('C.', 'Mathieu')) self.assertEqual(parse_comma_name('Gowers W. T..'), ('W. T.', 'Gowers'))
def test_initial_initials(self): self.assertEqual(parse_comma_name('C. Mathieu'), ('C.', 'Mathieu')) self.assertEqual(parse_comma_name('N. E. Young'), ('N. E.', 'Young'))
def test_middle_initials(self): self.assertEqual(parse_comma_name( 'Neal E. Young'), ('Neal E.', 'Young'))
def test_initial_capitalized(self): self.assertEqual(parse_comma_name('MATHIEU Claire'), ('Claire', 'Mathieu')) self.assertEqual(parse_comma_name('MATHIEU C.'), ('C.', 'Mathieu'))
def test_final_capitalized(self): self.assertEqual(parse_comma_name( 'Claire MATHIEU'), ('Claire', 'Mathieu')) self.assertEqual(parse_comma_name('C. MATHIEU'), ('C.', 'Mathieu'))
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.create_by_doi(doi, bare=True) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status':'ok','paper':p.json()} title = fields.get('title') if type(title) != unicode or not title or len(title) > 512: raise BadRequest('Invalid title, has to be a non-empty string shorter than 512 characters') pubdate = fields.get('date') date = fields.get('date') if type(date) != unicode: raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(unicode(e)) authors = fields.get('authors') if type(authors) != list: raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if type(a) != dict: raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if type(a['first']) != unicode or type(a['last']) != unicode or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'],a['last']) elif 'plain' in a: if type(a['plain']) != unicode or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0],author[1])) if not authors: raise BadRequest('No authors provided') try: p = BarePaper.create(title, parsed_authors, date) except ValueError: raise BadRequest('Invalid paper') import backend.oai as oai oaisource = oai.OaiPaperSource(max_results=10) p = oaisource.fetch_accessibility(p) return {'status':'ok','paper':p.json()}
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters' ) date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance( a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: # Validate the metadata against our data model, # and compute the fingerprint to look up the paper in the DB. # This does NOT create a paper in the database - we do not want # to create papers for every search query we get! p = BarePaper.create(title, parsed_authors, date) except ValueError as e: raise BadRequest('Invalid paper: {}'.format(e)) try: model_paper = Paper.objects.get(fingerprint=p.fingerprint) return {'status': 'ok', 'paper': model_paper.json()} except Paper.DoesNotExist: return {'status': 'not found'}, 404
def test_final_capitalized(self): self.assertEqual(parse_comma_name('Claire MATHIEU'), ('Claire', 'Mathieu')) self.assertEqual(parse_comma_name('C. MATHIEU'), ('C.', 'Mathieu'))
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters') date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance(a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: # Validate the metadata against our data model, # and compute the fingerprint to look up the paper in the DB. # This does NOT create a paper in the database - we do not want # to create papers for every search query we get! p = BarePaper.create(title, parsed_authors, date) except ValueError as e: raise BadRequest('Invalid paper: {}'.format(e)) try: model_paper = Paper.objects.get(fingerprint=p.fingerprint) return {'status': 'ok', 'paper': model_paper.json()} except Paper.DoesNotExist: return {'status': 'not found'}, 404