Beispiel #1
0
 def test_flattened_initials(self):
     self.assertEqual(
         name_unification(('J. P.', 'Gendre'), ('Jp.', 'Gendre')),
         ('J.-P.', 'Gendre'))
     self.assertEqual(
         name_unification(('J. Pierre', 'Gendre'), ('Jp.', 'Gendre')),
         ('J.-Pierre', 'Gendre'))
Beispiel #2
0
 def test_simple(self):
     self.assertEqual(name_unification(('Jean', 'Dupont'),
                                       ('Jean', 'Dupont')), ('Jean', 'Dupont'))
     self.assertEqual(name_unification(('J.', 'Dupont'),
                                       ('Jean', 'Dupont')), ('Jean', 'Dupont'))
     self.assertEqual(name_unification(('Anna', 'Erscher'),
                                       ('A. G.', 'Erscher')), ('Anna G.', 'Erscher'))
Beispiel #3
0
 def test_fix_capitalization(self):
     self.assertEqual(
         name_unification(('Marie-france', 'Sagot'),
                          ('Marie-France', 'Sagot')),
         ('Marie-France', 'Sagot'))
     self.assertEqual(
         name_unification(('Marie-France', 'Sagot'),
                          ('Marie-france', 'Sagot')),
         ('Marie-France', 'Sagot'))
Beispiel #4
0
 def test_name_splitting_error(self):
     # TODO Not sure we can get that right with a reasonable rule
     self.assertEqual(
         name_unification(('Johannes G. de', 'Vries'),
                          ('Johannes G.', 'de Vries')),
         ('Johannes G.', 'de Vries'))
     self.assertEqual(
         name_unification(('Éric Colin', 'de Verdière'),
                          ('E.', 'Colin de Verdière')),
         ('Éric', 'Colin de Verdière'))
Beispiel #5
0
 def test_simple(self):
     self.assertEqual(
         name_unification(('Jean', 'Dupont'), ('Jean', 'Dupont')),
         ('Jean', 'Dupont'))
     self.assertEqual(
         name_unification(('J.', 'Dupont'), ('Jean', 'Dupont')),
         ('Jean', 'Dupont'))
     self.assertEqual(
         name_unification(('Anna', 'Erscher'), ('A. G.', 'Erscher')),
         ('Anna G.', 'Erscher'))
Beispiel #6
0
 def test_duplicated(self):
     # http://api.openaire.eu/oai_pmh?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:dnet:od_______567::b9714edf9d96bbe640f7a8d753be81b7
     self.assertEqual(name_unification(('A Adi', 'Shamir'),
                                       ('Adi', 'Shamir')), ('Adi', 'Shamir'))
     self.assertEqual(name_unification(('A A', 'Amarilli'),
                                       ('Antoine', 'Amarilli')), ('Antoine A.', 'Amarilli'))
     self.assertEqual(name_unification(('A A', 'Amarilli'),
                                       ('A', 'Amarilli')), ('A. A.', 'Amarilli'))
     self.assertEqual(name_unification(
         ('H-C Hsieh-Chung', 'Chen'), ('Hsieh-Chung', 'Chen')), ('Hsieh-Chung', 'Chen'))
Beispiel #7
0
    def test_hyphens(self):
        self.assertEqual(name_unification(('J.-P.', 'Dupont'),
                                          ('Jean', 'Dupont')), ('Jean-P.', 'Dupont'))
        self.assertEqual(name_unification(('Jean Pierre', 'Dupont'), ('Jean', 'Dupont')),
                         ('Jean Pierre', 'Dupont'))
        self.assertEqual(name_unification(('Jean-Pierre', 'Dupont'), ('Jean', 'Dupont')),
                         ('Jean-Pierre', 'Dupont'))

        # For this one we don't check the output because ideally it
        # should be ('Clément F.', 'Pit-Claudel') but it is currently
        # ('Clement F.', 'Pit Claudel') which is still fine.
        self.assertTrue(name_unification(('Clement F.', 'Pit Claudel'),
                                            ('Clément', 'Pit-Claudel')))
Beispiel #8
0
 def test_duplicated(self):
     # http://api.openaire.eu/oai_pmh?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:dnet:od_______567::b9714edf9d96bbe640f7a8d753be81b7
     self.assertEqual(
         name_unification(('A Adi', 'Shamir'), ('Adi', 'Shamir')),
         ('Adi', 'Shamir'))
     self.assertEqual(
         name_unification(('A A', 'Amarilli'), ('Antoine', 'Amarilli')),
         ('Antoine A.', 'Amarilli'))
     self.assertEqual(
         name_unification(('A A', 'Amarilli'), ('A', 'Amarilli')),
         ('A. A.', 'Amarilli'))
     self.assertEqual(
         name_unification(('H-C Hsieh-Chung', 'Chen'),
                          ('Hsieh-Chung', 'Chen')), ('Hsieh-Chung', 'Chen'))
Beispiel #9
0
    def test_hyphens(self):
        self.assertEqual(
            name_unification(('J.-P.', 'Dupont'), ('Jean', 'Dupont')),
            ('Jean-P.', 'Dupont'))
        self.assertEqual(
            name_unification(('Jean Pierre', 'Dupont'), ('Jean', 'Dupont')),
            ('Jean Pierre', 'Dupont'))
        self.assertEqual(
            name_unification(('Jean-Pierre', 'Dupont'), ('Jean', 'Dupont')),
            ('Jean-Pierre', 'Dupont'))

        # For this one we don't check the output because ideally it
        # should be ('Clément F.', 'Pit-Claudel') but it is currently
        # ('Clement F.', 'Pit Claudel') which is still fine.
        self.assertTrue(
            name_unification(('Clement F.', 'Pit Claudel'),
                             ('Clément', 'Pit-Claudel')))
Beispiel #10
0
 def test_composite_last_name(self):
     # TODO this should be reasonably easy to get right
     self.assertEqual(
         name_unification(('F.', 'Zappa Nardelli'),
                          ('Francesco', 'Nardelli')),
         ('Francesco', 'Zappa Nardelli'))
Beispiel #11
0
 def test_quotes(self):
     self.assertNotEqual(
         name_unification(('Alessandra', "D’Alessandro"),
                          ('A.', "d'Alessandro")), None)
Beispiel #12
0
 def test_empty_first_name(self):
     self.assertEqual(
         name_unification(('', 'Placet'), ('Vincent', 'Placet')),
         ('Vincent', 'Placet'))
Beispiel #13
0
 def test_uncommon_order(self):
     self.assertEqual(name_unification(('W. T.', 'Gowers'),
                                       ('Timothy', 'Gowers')), ('W. Timothy', 'Gowers'))
Beispiel #14
0
 def test_fix_capitalization(self):
     self.assertEqual(name_unification(('Marie-france', 'Sagot'),
                                       ('Marie-France', 'Sagot')), ('Marie-France', 'Sagot'))
     self.assertEqual(name_unification(('Marie-France', 'Sagot'),
                                       ('Marie-france', 'Sagot')), ('Marie-France', 'Sagot'))
Beispiel #15
0
 def test_uncommon_order(self):
     self.assertEqual(
         name_unification(('W. T.', 'Gowers'), ('Timothy', 'Gowers')),
         ('W. Timothy', 'Gowers'))
Beispiel #16
0
 def test_name_splitting_error(self):
     # TODO Not sure we can get that right with a reasonable rule
     self.assertEqual(name_unification(('Johannes G. de', 'Vries'), ('Johannes G.', 'de Vries')),
                      ('Johannes G.', 'de Vries'))
     self.assertEqual(name_unification(('Éric Colin', 'de Verdière'), ('E.', 'Colin de Verdière')),
                      ('Éric', 'Colin de Verdière'))
Beispiel #17
0
 def test_composite_last_name(self):
     # TODO this should be reasonably easy to get right
     self.assertEqual(name_unification(('F.', 'Zappa Nardelli'),
                                       ('Francesco', 'Nardelli')), ('Francesco', 'Zappa Nardelli'))
Beispiel #18
0
 def test_quotes(self):
     self.assertNotEqual(name_unification(
         ('Alessandra', "D’Alessandro"),
         ('A.', "d'Alessandro")),
         None)
Beispiel #19
0
 def test_empty_first_name(self):
     self.assertEqual(name_unification(
         ('', 'Placet'), ('Vincent', 'Placet')), ('Vincent', 'Placet'))
Beispiel #20
0
 def test_flattened_initials(self):
     self.assertEqual(name_unification(('J. P.', 'Gendre'),
                                       ('Jp.', 'Gendre')), ('J.-P.', 'Gendre'))
     self.assertEqual(name_unification(('J. Pierre', 'Gendre'),
                                       ('Jp.', 'Gendre')), ('J.-Pierre', 'Gendre'))