def test_affiliation_is_greater_partial_order(self): for a, b in [(None, None), (None, 'Cambridge'), ('0000-0002-8612-8827', 'ENS'), ('University of Oxford, Oxfordshire', '0000-0001-5892-7431')]: self.assertFalse(affiliation_is_greater(a, b) and affiliation_is_greater(b, a)) self.assertFalse(affiliation_is_greater(a, a)) self.assertFalse(affiliation_is_greater(b, b))
def update_author_names(self, new_author_names, new_affiliations=None): """ Improves the current list of authors by considering a new list of author names. Missing authors are added, and names are unified. If affiliations are provided, they will replace the old ones if they are more informative. :param new_author_names: list of Name instances (the order matters) :param new_affiliations: (optional) list of affiliation strings for the new author names. """ if new_affiliations is None: new_affiliations = [None] * len(new_author_names) assert len(new_author_names) == len(new_affiliations) if hasattr(self, 'authors'): del self.authors old_authors = list(self.authors) # Invalidate cached properties if hasattr(self, 'interesting_authors'): del self.interesting_authors old_names = map(lambda a: (a.name.first, a.name.last), old_authors) unified_names = unify_name_lists(old_names, new_author_names) seen_old_names = set() for i, (new_name, (idx, new_idx)) in enumerate(unified_names): if idx is not None: # Updating the name of an existing author seen_old_names.add(idx) author = old_authors[idx] if new_name is None: # Delete that author, it was pruned because it already # appears elsewhere if author.id is not None: author.delete() continue fields = [] if idx != i: author.position = i fields.append('position') if new_name != (author.name.first, author.name.last): name = Name.lookup_name(new_name) name.save() author.name = name fields.append('name') if new_idx is not None and affiliation_is_greater( new_affiliations[new_idx], author.affiliation): author.affiliation = new_affiliations[new_idx] fields.append('affiliation') author.update_name_variants_if_needed() if fields: author.name.save_if_not_saved() author.save() elif new_name is not None: # Creating a new author name = Name.lookup_name(new_name) name.save() author = Author(paper=self, name=name, position=i, affiliation=new_affiliations[new_idx]) author.save() # Just in case unify_name_lists pruned authors without telling us… for idx, author in enumerate(old_authors): if idx not in seen_old_names: print("** Deleting author %d **" % author.pk) author.delete() # Invalidate our local cache if hasattr(self, 'authors'): del self.authors
def test_affiliation_is_greater(self): self.assertEqual(affiliation_is_greater(None, None), False) self.assertEqual(affiliation_is_greater(None, 'UPenn'), False) self.assertEqual(affiliation_is_greater('UPenn', None), True) self.assertEqual(affiliation_is_greater('0000-0001-8633-6098', 'Ecole normale superieure, Paris'), True) self.assertEqual(affiliation_is_greater('Ecole normale superieure', 'Upenn'), True)
def save_doi_metadata(self, metadata, extra_affiliations=None, allow_unknown_authors=False): """ Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication :param extra_affiliations: an optional affiliations list, which will be unified with the affiliations extracted from the metadata. This is useful for the ORCID interface. :param allow_unknown_authors: create the paper even if no author matches our researchers :returns: the paper, created if needed """ # Normalize metadata if metadata is None or type(metadata) != dict: if metadata is not None: print "WARNING: Invalid metadata: type is " + str( type(metadata)) print "The doi proxy is doing something nasty!" raise ValueError('Invalid metadata format, expecting a dict') if not 'author' in metadata: raise ValueError('No author provided') if not 'title' in metadata or not metadata['title']: raise ValueError('No title') # the upstream function ensures that there is a non-empty title if not 'DOI' in metadata or not metadata['DOI']: raise ValueError("No DOI, skipping") doi = to_doi(metadata['DOI']) pubdate = get_publication_date(metadata) if pubdate is None: raise ValueError('No pubdate') title = metadata['title'] # CrossRef metadata stores titles in lists if type(title) == list: title = title[0] subtitle = metadata.get('subtitle') if subtitle: if type(subtitle) == list: subtitle = subtitle[0] title += ': ' + subtitle authors = map(name_lookup_cache.lookup, map(convert_to_name_pair, metadata['author'])) authors = filter(lambda x: x != None, authors) if (not allow_unknown_authors and all(not elem.is_known for elem in authors)) or authors == []: raise ValueError('No known author') def get_affiliation(author_elem): # First, look for an ORCID id orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid # Otherwise return the plain affiliation, if any for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name'] affiliations = map(get_affiliation, metadata['author']) if extra_affiliations and len(affiliations) == len(extra_affiliations): for i in range(len(affiliations)): if affiliation_is_greater(extra_affiliations[i], affiliations[i]): affiliations[i] = extra_affiliations[i] paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) result = create_publication(paper, metadata) if result is None: # Creating the publication failed! paper.update_visibility() # Make sure the paper only appears if it is still associated # with another source. # TODO add unit test for this else: paper = result[0] return paper
def save_doi_metadata(self, metadata, extra_affiliations=None, allow_unknown_authors=False): """ Given the metadata as Citeproc+JSON or from CrossRef, create the associated paper and publication :param extra_affiliations: an optional affiliations list, which will be unified with the affiliations extracted from the metadata. This is useful for the ORCID interface. :param allow_unknown_authors: create the paper even if no author matches our researchers :returns: the paper, created if needed """ # Normalize metadata if metadata is None or type(metadata) != dict: if metadata is not None: print "WARNING: Invalid metadata: type is "+str(type(metadata)) print "The doi proxy is doing something nasty!" raise ValueError('Invalid metadata format, expecting a dict') if not 'author' in metadata: raise ValueError('No author provided') if not 'title' in metadata or not metadata['title']: raise ValueError('No title') # the upstream function ensures that there is a non-empty title if not 'DOI' in metadata or not metadata['DOI']: raise ValueError("No DOI, skipping") doi = to_doi(metadata['DOI']) pubdate = get_publication_date(metadata) if pubdate is None: raise ValueError('No pubdate') title = metadata['title'] # CrossRef metadata stores titles in lists if type(title) == list: title = title[0] subtitle = metadata.get('subtitle') if subtitle: if type(subtitle) == list: subtitle = subtitle[0] title += ': '+subtitle authors = map(name_lookup_cache.lookup, map(convert_to_name_pair, metadata['author'])) authors = filter(lambda x: x != None, authors) if (not allow_unknown_authors and all(not elem.is_known for elem in authors)) or authors == []: raise ValueError('No known author') def get_affiliation(author_elem): # First, look for an ORCID id orcid = validate_orcid(author_elem.get('ORCID')) if orcid: return orcid # Otherwise return the plain affiliation, if any for dct in author_elem.get('affiliation', []): if 'name' in dct: return dct['name'] affiliations = map(get_affiliation, metadata['author']) if extra_affiliations and len(affiliations) == len(extra_affiliations): for i in range(len(affiliations)): if affiliation_is_greater(extra_affiliations[i],affiliations[i]): affiliations[i] = extra_affiliations[i] paper = BarePaper.create(title, authors, pubdate, 'VISIBLE', affiliations) result = create_publication(paper, metadata) if result is None: # Creating the publication failed! paper.update_visibility() # Make sure the paper only appears if it is still associated # with another source. # TODO add unit test for this else: paper = result[0] return paper
def update_author_names(self, new_author_names, new_affiliations=None): """ Improves the current list of authors by considering a new list of author names. Missing authors are added, and names are unified. If affiliations are provided, they will replace the old ones if they are more informative. :param new_author_names: list of Name instances (the order matters) :param new_affiliations: (optional) list of affiliation strings for the new author names. """ if new_affiliations is None: new_affiliations = [None]*len(new_author_names) assert len(new_author_names) == len(new_affiliations) if hasattr(self, 'authors'): del self.authors old_authors = list(self.authors) # Invalidate cached properties if hasattr(self, 'interesting_authors'): del self.interesting_authors old_names = map(lambda a: (a.name.first,a.name.last), old_authors) unified_names = unify_name_lists(old_names, new_author_names) seen_old_names = set() for i, (new_name, (idx,new_idx)) in enumerate(unified_names): if idx is not None: # Updating the name of an existing author seen_old_names.add(idx) author = old_authors[idx] if new_name is None: # Delete that author, it was pruned because it already # appears elsewhere if author.id is not None: author.delete() continue fields = [] if idx != i: author.position = i fields.append('position') if new_name != (author.name.first,author.name.last): name = Name.lookup_name(new_name) name.save() author.name = name fields.append('name') if new_idx is not None and affiliation_is_greater(new_affiliations[new_idx], author.affiliation): author.affiliation = new_affiliations[new_idx] fields.append('affiliation') author.update_name_variants_if_needed() if fields: author.name.save_if_not_saved() author.save() elif new_name is not None: # Creating a new author name = Name.lookup_name(new_name) name.save() author = Author(paper=self,name=name,position=i,affiliation=new_affiliations[new_idx]) author.save() # Just in case unify_name_lists pruned authors without telling us… for idx, author in enumerate(old_authors): if idx not in seen_old_names: print("** Deleting author %d **" % author.pk) author.delete() # Invalidate our local cache if hasattr(self, 'authors'): del self.authors