def test_insertion(self): self.assertEqual(unify_name_lists( [('Jean', 'Dupont'), ('Marie', 'Dupré'), ('Alphonse', 'de Lamartine')], [('J.', 'Dupont'), ('M.', 'Dupré'), ('A.', 'de Lamartine'), ('R.', 'Badinter')]), [(('Jean', 'Dupont'), (0, 0)), (('Marie', 'Dupré'), (1, 1)), (('Alphonse', 'de Lamartine'), (2, 2)), (('R.', 'Badinter'), (None, 3))]) self.assertEqual(unify_name_lists( [('Élise', 'Chaumont'), ('Jean', 'Dupont'), ('Marie', 'Dupré'), ('Alphonse', 'de Lamartine')], [('J.', 'Dupont'), ('M.', 'Dupré'), ('A.', 'de Lamartine')]), [(('Élise', 'Chaumont'), (0, None)), (('Jean', 'Dupont'), (1, 0)), (('Marie', 'Dupré'), (2, 1)), (('Alphonse', 'de Lamartine'), (3, 2))])
def test_same_last_name(self): self.assertTrue(unify_name_lists( [('Jean', 'Dupont'), ('Marie', 'Dupont')], [('M.', 'Dupont'), ('J. P.', 'Dupont')]) in [ [(('Jean P.', 'Dupont'), (0, 1)), (('Marie', 'Dupont'), (1, 0))], [(('Marie', 'Dupont'), (1, 0)), (('Jean P.', 'Dupont'), (0, 1))] ])
def test_insertion(self): self.assertEqual( unify_name_lists([('Jean', 'Dupont'), ('Marie', 'Dupré'), ('Alphonse', 'de Lamartine')], [('J.', 'Dupont'), ('M.', 'Dupré'), ('A.', 'de Lamartine'), ('R.', 'Badinter')]), [(('Jean', 'Dupont'), (0, 0)), (('Marie', 'Dupré'), (1, 1)), (('Alphonse', 'de Lamartine'), (2, 2)), (('R.', 'Badinter'), (None, 3))]) self.assertEqual( unify_name_lists([('Élise', 'Chaumont'), ('Jean', 'Dupont'), ('Marie', 'Dupré'), ('Alphonse', 'de Lamartine')], [('J.', 'Dupont'), ('M.', 'Dupré'), ('A.', 'de Lamartine')]), [(('Élise', 'Chaumont'), (0, None)), (('Jean', 'Dupont'), (1, 0)), (('Marie', 'Dupré'), (2, 1)), (('Alphonse', 'de Lamartine'), (3, 2))])
def test_same_last_name(self): self.assertTrue( unify_name_lists([('Jean', 'Dupont'), ('Marie', 'Dupont')], [( 'M.', 'Dupont'), ( 'J. P.', 'Dupont')]) in [[(('Jean P.', 'Dupont'), (0, 1)), (('Marie', 'Dupont'), (1, 0))], [(('Marie', 'Dupont'), (1, 0)), (('Jean P.', 'Dupont'), (0, 1))]])
def test_inverted(self): # in the wild: # https://doi.org/10.1371/journal.pone.0156198 # http://hdl.handle.net/11573/870611 self.assertEqual( len([ x for x in unify_name_lists([ ('Sarnelli', 'Giovanni'), ("d'Alessandro", 'Alessandra'), ], [ ('Giovanni', 'Sarnelli'), ('Giovanni', 'Domenico de Palma'), ('Alessandra', "D'Alessandro"), ]) if x[0] != None ]), 3)
def test_inverted(self): # in the wild: # https://doi.org/10.1371/journal.pone.0156198 # http://hdl.handle.net/11573/870611 self.assertEqual(len([x for x in unify_name_lists( [ ('Sarnelli', 'Giovanni'), ("d'Alessandro", 'Alessandra'), ], [ ('Giovanni', 'Sarnelli'), ('Giovanni', 'Domenico de Palma'), ('Alessandra', "D'Alessandro"), ]) if x[0] != None]), 3)
def test_simple(self): self.assertEqual(unify_name_lists([], []), []) self.assertEqual( unify_name_lists([('Jean', 'Dupont')], [('Jean', 'Dupont')]), [(('Jean', 'Dupont'), (0, 0))]) self.assertEqual( unify_name_lists([('Jean', 'Dupont')], [('J.', 'Dupont')]), [(('Jean', 'Dupont'), (0, 0))]) self.assertEqual( unify_name_lists([('Jean', 'Dupont')], [('J. F.', 'Dupont')]), [(('Jean F.', 'Dupont'), (0, 0))]) self.assertEqual( unify_name_lists([('Jean', 'Dupont'), ('Marie', 'Dupré'), ('Alphonse', 'de Lamartine')], [('J.', 'Dupont'), ('M.', 'Dupré'), ('A.', 'de Lamartine')]), [(('Jean', 'Dupont'), (0, 0)), (('Marie', 'Dupré'), (1, 1)), (('Alphonse', 'de Lamartine'), (2, 2))]) self.assertEqual( unify_name_lists([('Antonin', 'Delpeuch'), ('Anne', 'Preller')], [('Antonin', 'Delpeuch'), ('Anne', 'Preller')]), [(('Antonin', 'Delpeuch'), (0, 0)), (('Anne', 'Preller'), (1, 1))])
def test_simple(self): self.assertEqual(unify_name_lists([], []), []) self.assertEqual(unify_name_lists( [('Jean', 'Dupont')], [('Jean', 'Dupont')]), [(('Jean', 'Dupont'), (0, 0))]) self.assertEqual(unify_name_lists( [('Jean', 'Dupont')], [('J.', 'Dupont')]), [(('Jean', 'Dupont'), (0, 0))]) self.assertEqual(unify_name_lists( [('Jean', 'Dupont')], [('J. F.', 'Dupont')]), [(('Jean F.', 'Dupont'), (0, 0))]) self.assertEqual(unify_name_lists( [('Jean', 'Dupont'), ('Marie', 'Dupré'), ('Alphonse', 'de Lamartine')], [('J.', 'Dupont'), ('M.', 'Dupré'), ('A.', 'de Lamartine')]), [(('Jean', 'Dupont'), (0, 0)), (('Marie', 'Dupré'), (1, 1)), (('Alphonse', 'de Lamartine'), (2, 2))]) self.assertEqual(unify_name_lists( [('Antonin', 'Delpeuch'), ('Anne', 'Preller')], [('Antonin', 'Delpeuch'), ('Anne', 'Preller')]), [(('Antonin', 'Delpeuch'), (0, 0)), (('Anne', 'Preller'), (1, 1))])
def update_author_names(self, new_author_names, new_affiliations=None): """ Improves the current list of authors by considering a new list of author names. Missing authors are added, and names are unified. If affiliations are provided, they will replace the old ones if they are more informative. :param new_author_names: list of Name instances (the order matters) :param new_affiliations: (optional) list of affiliation strings for the new author names. """ if new_affiliations is None: new_affiliations = [None] * len(new_author_names) assert len(new_author_names) == len(new_affiliations) if hasattr(self, 'authors'): del self.authors old_authors = list(self.authors) # Invalidate cached properties if hasattr(self, 'interesting_authors'): del self.interesting_authors old_names = map(lambda a: (a.name.first, a.name.last), old_authors) unified_names = unify_name_lists(old_names, new_author_names) seen_old_names = set() for i, (new_name, (idx, new_idx)) in enumerate(unified_names): if idx is not None: # Updating the name of an existing author seen_old_names.add(idx) author = old_authors[idx] if new_name is None: # Delete that author, it was pruned because it already # appears elsewhere if author.id is not None: author.delete() continue fields = [] if idx != i: author.position = i fields.append('position') if new_name != (author.name.first, author.name.last): name = Name.lookup_name(new_name) name.save() author.name = name fields.append('name') if new_idx is not None and affiliation_is_greater( new_affiliations[new_idx], author.affiliation): author.affiliation = new_affiliations[new_idx] fields.append('affiliation') author.update_name_variants_if_needed() if fields: author.name.save_if_not_saved() author.save() elif new_name is not None: # Creating a new author name = Name.lookup_name(new_name) name.save() author = Author(paper=self, name=name, position=i, affiliation=new_affiliations[new_idx]) author.save() # Just in case unify_name_lists pruned authors without telling us… for idx, author in enumerate(old_authors): if idx not in seen_old_names: print("** Deleting author %d **" % author.pk) author.delete() # Invalidate our local cache if hasattr(self, 'authors'): del self.authors
def test_shallower_similarity(self): self.assertEqual( unify_name_lists([('Clement F.', 'Pit Claudel')], [('Clément', 'Pit-Claudel')])[0][1], (0, 0))
def test_duplicates(self): self.assertEqual( unify_name_lists([('Jérémie', 'Boutier'), ('Jérémie', 'Boutier')], [('J.', 'Boutier')]), [(('Jérémie', 'Boutier'), (0, 0)), (None, (1, None))])
def test_dirty_input(self): self.assertEqual( unify_name_lists([('Jérémie', 'Boutier'), ('Alphonse', 'Viger')], [('J{é}r{é}mie', 'Boutier'), ('A.', 'Viger')]), [(('Jérémie', 'Boutier'), (0, 0)), (('Alphonse', 'Viger'), (1, 1))])
def test_shallower_similarity(self): self.assertEqual(unify_name_lists( [('Clement F.', 'Pit Claudel')], [('Clément', 'Pit-Claudel')])[0][1], (0,0))
def test_duplicates(self): self.assertEqual(unify_name_lists( [('Jérémie', 'Boutier'), ('Jérémie', 'Boutier')], [('J.', 'Boutier')]), [(('Jérémie', 'Boutier'), (0, 0)), (None, (1, None))])
def test_dirty_input(self): self.assertEqual(unify_name_lists( [('Jérémie', 'Boutier'), ('Alphonse', 'Viger')], [('J{é}r{é}mie', 'Boutier'), ('A.', 'Viger')]), [(('Jérémie', 'Boutier'), (0, 0)), (('Alphonse', 'Viger'), (1, 1))])
def update_author_names(self, new_author_names, new_affiliations=None): """ Improves the current list of authors by considering a new list of author names. Missing authors are added, and names are unified. If affiliations are provided, they will replace the old ones if they are more informative. :param new_author_names: list of Name instances (the order matters) :param new_affiliations: (optional) list of affiliation strings for the new author names. """ if new_affiliations is None: new_affiliations = [None]*len(new_author_names) assert len(new_author_names) == len(new_affiliations) if hasattr(self, 'authors'): del self.authors old_authors = list(self.authors) # Invalidate cached properties if hasattr(self, 'interesting_authors'): del self.interesting_authors old_names = map(lambda a: (a.name.first,a.name.last), old_authors) unified_names = unify_name_lists(old_names, new_author_names) seen_old_names = set() for i, (new_name, (idx,new_idx)) in enumerate(unified_names): if idx is not None: # Updating the name of an existing author seen_old_names.add(idx) author = old_authors[idx] if new_name is None: # Delete that author, it was pruned because it already # appears elsewhere if author.id is not None: author.delete() continue fields = [] if idx != i: author.position = i fields.append('position') if new_name != (author.name.first,author.name.last): name = Name.lookup_name(new_name) name.save() author.name = name fields.append('name') if new_idx is not None and affiliation_is_greater(new_affiliations[new_idx], author.affiliation): author.affiliation = new_affiliations[new_idx] fields.append('affiliation') author.update_name_variants_if_needed() if fields: author.name.save_if_not_saved() author.save() elif new_name is not None: # Creating a new author name = Name.lookup_name(new_name) name.save() author = Author(paper=self,name=name,position=i,affiliation=new_affiliations[new_idx]) author.save() # Just in case unify_name_lists pruned authors without telling us… for idx, author in enumerate(old_authors): if idx not in seen_old_names: print("** Deleting author %d **" % author.pk) author.delete() # Invalidate our local cache if hasattr(self, 'authors'): del self.authors