def test_similarity_with_dates(self): p1 = self._add_person('Lucky', geboortedatum='1000', sterfdatum='2000') p2 = self._add_person('Lucky', geboortedatum='', sterfdatum='2000') p3 = self._add_person('Lucky', geboortedatum='1000', sterfdatum='') p4 = self._add_person('Lucky', geboortedatum='', sterfdatum='') self.assert_similarity_order([ (p1, p1), (p1, p2), (p1, p3), (p1, p4), ]) p2 = self._add_person('Lucky', geboortedatum='1001', sterfdatum='2000') p3 = self._add_person('Lucky', geboortedatum='1900', sterfdatum='2000') self.assert_similarity_order([ (p1, p1), (p1, p2), (p1, p3), ]) p1 = self._add_person('Lucky, Pozzo Vladimir Estragon', geboortedatum='1000', sterfdatum='2000') p2 = self._add_person('Luckie, Pozzo Vladimir Estragon', geboortedatum='1000', sterfdatum='2000') p3 = self._add_person('Lucky, Pozzo Vladimir Estragon', geboortedatum='', sterfdatum='') p4 = self._add_person('Luckie, Pozzo Vladimir Estragon', geboortedatum='', sterfdatum='') score1 = Similarity.similarity_score(p1, p2) #@UndefinedVariable score2 = Similarity.ratio(p1.get_names()[0], p2.get_names()[0]) #given the fact that they have the same birth and death dates, the scores of p1 and p2 shoudl imporve wrt the "bare" names self.assertTrue(score1 > score2) self.assert_similarity_order([ (p1, p1), (p1, p2), (p1, p3), (p1, p4), ])
def test_with_biodes_files(self): s1 = """<biodes version="1.0.1"> <fileDesc> <title/> <ref target="http://www.rkd.nl/rkddb/dispatcher.aspx?action=search&database=ChoiceArtists&search=priref=19815"/> <publisher> <name>Rijksbureau voor Kunsthistorische Documentatie</name> <ref target="http://www.rkd.nl/"/> </publisher> </fileDesc> <person> <persName>Dam, Max van</persName> <event type="birth" when="1910-03-19"> <place>Winterswijk</place> </event> <event type="death" when="1943-09-20"> <place>Sóbibor (Polen)</place> </event> <state type="occupation">schilder</state> <state type="occupation">tekenaar</state> <idno type="id">19815</idno> </person> <biography> <text>Schilder, tekenaar. Geboren: 19 maart 1910, Winterswijk. Gestorven: 20 september 1943, Sóbibor (Polen). </text> </biography> </biodes>""" s2 = """ <biodes version="1.0.1"> <fileDesc> <title/> <publisher/> </fileDesc> <person> <idno type="id">50019330</idno> <persName>Max van Dam</persName><event type="birth" when="1910-03-19"><place>Winterswijk</place></event><event type="death" when="1943-09-20"><place>Sobibor, Polen</place></event><event type="funeral"><place/></event><event type="baptism"><place/></event><sex value="1"/><state type="category" idno="8">Maatschappelijke bewegingen</state><state type="floruit" from="" to=""><place/></state></person> <biography><snippet source_id="jews/109.xml"/></biography> </biodes>""" p1 = self._add_person(xml_source=s1) p2 = self._add_person(xml_source=s2) self.assertTrue(Similarity.are_surely_equal(p1, p2)) #@UndefinedVariable
def test_sanity(self): persons = self.repo.get_persons() sim = Similarity(persons[1], persons) sim.compute() sim.sort()