def test_flattened_initials(self): self.assertEqual( name_unification(('J. P.', 'Gendre'), ('Jp.', 'Gendre')), ('J.-P.', 'Gendre')) self.assertEqual( name_unification(('J. Pierre', 'Gendre'), ('Jp.', 'Gendre')), ('J.-Pierre', 'Gendre'))
def test_simple(self): self.assertEqual(name_unification(('Jean', 'Dupont'), ('Jean', 'Dupont')), ('Jean', 'Dupont')) self.assertEqual(name_unification(('J.', 'Dupont'), ('Jean', 'Dupont')), ('Jean', 'Dupont')) self.assertEqual(name_unification(('Anna', 'Erscher'), ('A. G.', 'Erscher')), ('Anna G.', 'Erscher'))
def test_fix_capitalization(self): self.assertEqual( name_unification(('Marie-france', 'Sagot'), ('Marie-France', 'Sagot')), ('Marie-France', 'Sagot')) self.assertEqual( name_unification(('Marie-France', 'Sagot'), ('Marie-france', 'Sagot')), ('Marie-France', 'Sagot'))
def test_name_splitting_error(self): # TODO Not sure we can get that right with a reasonable rule self.assertEqual( name_unification(('Johannes G. de', 'Vries'), ('Johannes G.', 'de Vries')), ('Johannes G.', 'de Vries')) self.assertEqual( name_unification(('Éric Colin', 'de Verdière'), ('E.', 'Colin de Verdière')), ('Éric', 'Colin de Verdière'))
def test_simple(self): self.assertEqual( name_unification(('Jean', 'Dupont'), ('Jean', 'Dupont')), ('Jean', 'Dupont')) self.assertEqual( name_unification(('J.', 'Dupont'), ('Jean', 'Dupont')), ('Jean', 'Dupont')) self.assertEqual( name_unification(('Anna', 'Erscher'), ('A. G.', 'Erscher')), ('Anna G.', 'Erscher'))
def test_duplicated(self): # http://api.openaire.eu/oai_pmh?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:dnet:od_______567::b9714edf9d96bbe640f7a8d753be81b7 self.assertEqual(name_unification(('A Adi', 'Shamir'), ('Adi', 'Shamir')), ('Adi', 'Shamir')) self.assertEqual(name_unification(('A A', 'Amarilli'), ('Antoine', 'Amarilli')), ('Antoine A.', 'Amarilli')) self.assertEqual(name_unification(('A A', 'Amarilli'), ('A', 'Amarilli')), ('A. A.', 'Amarilli')) self.assertEqual(name_unification( ('H-C Hsieh-Chung', 'Chen'), ('Hsieh-Chung', 'Chen')), ('Hsieh-Chung', 'Chen'))
def test_hyphens(self): self.assertEqual(name_unification(('J.-P.', 'Dupont'), ('Jean', 'Dupont')), ('Jean-P.', 'Dupont')) self.assertEqual(name_unification(('Jean Pierre', 'Dupont'), ('Jean', 'Dupont')), ('Jean Pierre', 'Dupont')) self.assertEqual(name_unification(('Jean-Pierre', 'Dupont'), ('Jean', 'Dupont')), ('Jean-Pierre', 'Dupont')) # For this one we don't check the output because ideally it # should be ('Clément F.', 'Pit-Claudel') but it is currently # ('Clement F.', 'Pit Claudel') which is still fine. self.assertTrue(name_unification(('Clement F.', 'Pit Claudel'), ('Clément', 'Pit-Claudel')))
def test_duplicated(self): # http://api.openaire.eu/oai_pmh?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:dnet:od_______567::b9714edf9d96bbe640f7a8d753be81b7 self.assertEqual( name_unification(('A Adi', 'Shamir'), ('Adi', 'Shamir')), ('Adi', 'Shamir')) self.assertEqual( name_unification(('A A', 'Amarilli'), ('Antoine', 'Amarilli')), ('Antoine A.', 'Amarilli')) self.assertEqual( name_unification(('A A', 'Amarilli'), ('A', 'Amarilli')), ('A. A.', 'Amarilli')) self.assertEqual( name_unification(('H-C Hsieh-Chung', 'Chen'), ('Hsieh-Chung', 'Chen')), ('Hsieh-Chung', 'Chen'))
def test_hyphens(self): self.assertEqual( name_unification(('J.-P.', 'Dupont'), ('Jean', 'Dupont')), ('Jean-P.', 'Dupont')) self.assertEqual( name_unification(('Jean Pierre', 'Dupont'), ('Jean', 'Dupont')), ('Jean Pierre', 'Dupont')) self.assertEqual( name_unification(('Jean-Pierre', 'Dupont'), ('Jean', 'Dupont')), ('Jean-Pierre', 'Dupont')) # For this one we don't check the output because ideally it # should be ('Clément F.', 'Pit-Claudel') but it is currently # ('Clement F.', 'Pit Claudel') which is still fine. self.assertTrue( name_unification(('Clement F.', 'Pit Claudel'), ('Clément', 'Pit-Claudel')))
def test_composite_last_name(self): # TODO this should be reasonably easy to get right self.assertEqual( name_unification(('F.', 'Zappa Nardelli'), ('Francesco', 'Nardelli')), ('Francesco', 'Zappa Nardelli'))
def test_quotes(self): self.assertNotEqual( name_unification(('Alessandra', "D’Alessandro"), ('A.', "d'Alessandro")), None)
def test_empty_first_name(self): self.assertEqual( name_unification(('', 'Placet'), ('Vincent', 'Placet')), ('Vincent', 'Placet'))
def test_uncommon_order(self): self.assertEqual(name_unification(('W. T.', 'Gowers'), ('Timothy', 'Gowers')), ('W. Timothy', 'Gowers'))
def test_fix_capitalization(self): self.assertEqual(name_unification(('Marie-france', 'Sagot'), ('Marie-France', 'Sagot')), ('Marie-France', 'Sagot')) self.assertEqual(name_unification(('Marie-France', 'Sagot'), ('Marie-france', 'Sagot')), ('Marie-France', 'Sagot'))
def test_uncommon_order(self): self.assertEqual( name_unification(('W. T.', 'Gowers'), ('Timothy', 'Gowers')), ('W. Timothy', 'Gowers'))
def test_name_splitting_error(self): # TODO Not sure we can get that right with a reasonable rule self.assertEqual(name_unification(('Johannes G. de', 'Vries'), ('Johannes G.', 'de Vries')), ('Johannes G.', 'de Vries')) self.assertEqual(name_unification(('Éric Colin', 'de Verdière'), ('E.', 'Colin de Verdière')), ('Éric', 'Colin de Verdière'))
def test_composite_last_name(self): # TODO this should be reasonably easy to get right self.assertEqual(name_unification(('F.', 'Zappa Nardelli'), ('Francesco', 'Nardelli')), ('Francesco', 'Zappa Nardelli'))
def test_quotes(self): self.assertNotEqual(name_unification( ('Alessandra', "D’Alessandro"), ('A.', "d'Alessandro")), None)
def test_empty_first_name(self): self.assertEqual(name_unification( ('', 'Placet'), ('Vincent', 'Placet')), ('Vincent', 'Placet'))
def test_flattened_initials(self): self.assertEqual(name_unification(('J. P.', 'Gendre'), ('Jp.', 'Gendre')), ('J.-P.', 'Gendre')) self.assertEqual(name_unification(('J. Pierre', 'Gendre'), ('Jp.', 'Gendre')), ('J.-Pierre', 'Gendre'))