def test_biography(self): repo = self.repo # get an existing biography bio = list(repo.get_biographies())[5] # get the information of this biography # this biography has no identifier yet assert bio.get_bioport_id() # but it has lost of other properties (inherited from BioDesDoc?) # create a new biography e source = Source(id='bioport_test') repo.save_source(source) # make a new biography bio = Biography(id='bioport_test/test_bio', source_id=source.id) self.assertEqual(bio.id, 'bioport_test/test_bio') bio.from_args( url_biografie='http://ladida/didum', naam_publisher='nogeensiets', url_publisher='http://pbulihser_url', naam='Tiedel Doodle Dum', ) # save it self._save_biography(bio) # the new biography now also has a bioport_id assert bio.get_bioport_id() self.assertEqual(bio.title(), 'Tiedel Doodle Dum') p = bio.get_person() self.assertEqual(bio.get_bioport_id(), p.get_bioport_id()) bio.set_value('geboortedatum', '2009-01-01') bio.set_value('geboortedatum', '2009-01-02') self.assertEqual(bio.get_value('geboortedatum'), '2009-01-02') bio.set_value('geboortedatum', '2009-01-02') bio.set_value('geboorteplaats', 'nog een test') self.assertEqual(bio.get_value('geboortedatum'), '2009-01-02') bio.set_value('sterfdatum', '2010-01-02') bio.set_value('sterfplaats', 'nog een test') self.assertEqual(bio.get_value('sterfdatum'), '2010-01-02') bio.set_value('sterfdatum', u'') self.assertEqual(bio.get_value('sterfdatum'), None)
def get_bio(self, bdate=None, ddate=None, bplace=None, dplace=None): self.x += 1 bio = Biography(id=str(self.x), source_id=u"knaw", repository=self.repo) bio.from_args(url_biografie='http://google.it', naam_publisher='jelle', url_publisher='http://gerbrandy.com', naam="gino") if bdate is not None: bio.set_value('birth_date', bdate) if ddate is not None: bio.set_value('death_date', ddate) if bplace is not None: bio.set_value('birth_place', bplace) if dplace is not None: bio.set_value('death_place', dplace) self._save_biography(bio) return bio
def test_snippet(self): s = """<?xml version="1.0" encoding="UTF-8"?> <!--2011-05-18 11:26:12--> <biodes version="1.0"> <fileDesc> <author>Nationaal Archief</author> <ref target="http://proxy.handle.net/10648/cd48fc47-2b91-42f6-bb63-c2de770135b1"/> <date when="1920"/> <publisher> <name>Nationaal Archief</name> <ref target="http://www.gahetna.nl/collectie/archief"/> </publisher> </fileDesc> <person> <persName>Jan Daniël Cornelis Carel Willem de Constant Rebecque</persName> </person> <biography> <text> <title>Inventaris van het archief van De Constant Rebecque</title> <span></span> </text> </biography> </biodes> """ bio = Biography().from_string(s) self.assertEqual(bio.snippet(1000), 'Inventaris van het archief van De Constant Rebecque') s = """<biodes version="1.0"> <fileDesc> <author>Nationaal Archief</author> <ref target="http://proxy.handle.net/10648/cd48fc47-2b91-42f6-bb63-c2de770135b1"/> <date when="1920"/> <publisher> <name>Nationaal Archief</name> <ref target="http://www.gahetna.nl/collectie/archief"/> </publisher> </fileDesc> <person> <persName>Jan Daniël Cornelis Carel Willem de Constant Rebecque</persName> </person> <biography> </biography> </biodes> """ bio = Biography().from_string(s) self.assertEqual(bio.snippet(), '') source = Source(id='bioport_test') self.repo.save_source(source) # make a new biography bio = Biography(id='bioport_test/test_bio', source_id=source.id) self.assertEqual(bio.id, 'bioport_test/test_bio') # XXX Do we need this? bio.from_args( url_biografie='http://ladida/didum', naam_publisher='nogeensiets', url_publisher='http://pbulihser_url', naam='Tiedel Doodle Dum', tekst="""Lemuel is in charge, he raises his hatchet on which the blood will never dry, but not to hit anyone, he will not hit anyone, he will not hit anyone any more, he will not touch anyone any more, either with it or with it or with it or with or or with it or with his hammer or with his stick or with his fist or in thought in dream I mean never he will never or with his pencil or with his stick or or light light I mean never there he will never never anything there any more""", ) self.assertEqual(bio.snippet(size=20), 'Lemuel is in...') bio.set_value('text', 'abc') self.assertEqual(bio.snippet(), 'abc') bio.set_value('text', 'ca. 1800-1900') self.assertEqual(bio.snippet(), 'ca. 1800-1900')
def _merge_biographies(bio1, bio2): """try to merge bio1 and bio2 - if we cannot (because they are not consistent), return None""" # single values that must be equal in both biographies ls = ['name_publisher', 'url_publisher', 'url_biography', 'sex', 'title_biography'] dct = {} for k in ls: v1 = bio1.get_value(k) v2 = bio2.get_value(k) if v1 and v2 and v1 != v2: raise Exception('Cannot merge biographies because values for %s are different (%s and %s)' % (k, v1, v2)) return else: dct[k] = v1 or v2 names = bio1.get_names() for n2 in bio2.get_names(): if n2 not in names: names.append(n2) dct['names'] = names merged_bio = Biography(source_id=bio1.source_id, biodes_document=bio1.to_string()) merged_bio.from_args(**dct) # non-unique states states1 = bio1.get_states() states2 = bio2.get_states() for state in states2: if etree.tostring(state).strip() not in [etree.tostring(s).strip() for s in states1]: # @UndefinedVariable # copy the state (instead of moving it, which will change bio2 as well) state = copy.deepcopy(state) merged_bio._add_state_element(state) # unique events unique_events = ['birth', 'death'] # non-unieuqe events events1 = merged_bio.get_events() # these are all events from bio1 events2 = bio2.get_events() events = events1 for bio2_event in events2: if etree.tostring(bio2_event).strip() not in [etree.tostring(e).strip() for e in events]: # @UndefinedVariable if bio2_event.get('type') in unique_events: # if this event can occur only once, we check for consistency with an eventual existing event # and if they are consistent, update accordingly bio1_event = merged_bio.get_event(type=bio2_event.get('type')) if bio1_event is not None: when1 = bio1_event.get('when', '') when2 = bio2_event.get('when', '') if when1 and when2 and not (when1 in when2 or when2 in when1): # these are incompatible return elif when1 in when2: bio1_event.set('when', when2) else: # no event of this type exists yet in bio1 merged_bio._add_event_element(copy.deepcopy(bio2_event)) else: merged_bio._add_event_element(copy.deepcopy(bio2_event)) return merged_bio