Example #1
0
    def write(self, names):
        names.sort()
        self.total = len(names)
        for index, name in enumerate(names):
            index += 1
            self.print_progress(index, name)

            # URL
            base_dev = "http://dev.inghist.nl/retrotest2010/oldenbarnevelt/"
            base_production = "http://www.inghist.nl/retroboeken/oldenbarnevelt/"
            anchor = "#accessor=toc&accessor_href=toc%3FSearchSource%3D"
            encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
            url = base_production + \
                  anchor + \
                  encoded_name + \
                  "%26correspondent%3D%26day1%3D%26month1%3D%26year1%3D%26day2%3D%26month2%3D%26year2%3D"
            bdes = BioDesDoc()
            args = dict(
                naam=name,
                naam_publisher="XXX",
                url_publisher="http://XXX.nl",
                url_biografie=url,
            )
            bdes.from_args(**args)
            self.write_file(bdes, index)
Example #2
0
    def test_add_note(self):
        doc = BioDesDoc().from_xml(self.create_element())
        doc.add_note('text of the note', type='sometype' )

        self.assertEqual(len(doc.get_notes()), 1)
        self.assertEqual(len(doc.get_notes(type='sometype')), 1)
        self.assertEqual(doc.get_notes(type='sometype')[0].text, 'text of the note')
        doc.add_or_update_note('note2', type='sometype')
        self.assertEqual(doc.get_notes(type='sometype')[0].text, 'note2')
Example #3
0
    def test_from_args(self):
        kw = {
            'url_biografie':'http://www.gerbrandy.com/bio?a&b',
            'url_publisher':'http://www.gerbrandy.com',
            'naam_publisher':'Website van Jelle',
            'titel_biografie':'Bio van Jelle',
            'naam':'Jelle Gerbrandy',
            'local_id': '123',
        }

        doc = BioDesDoc()
        doc.from_args(**kw)
        self.assertEqual(doc.get_idno(), '123')
 def to_xml(self):
     doc = BioDesDoc()
     bioport_id = self.get_biographies()[0].get_bioport_id()
     # add the basic onfirmation
     doc.from_args(
         naam_publisher='Het Biografisch Portaal',
         url_biografie='http://www.biografischportaal.nl/persoon/%s' % bioport_id,
         url_publisher='http://www.biografischportaal.nl',
         namen=self.get_names(),
         bioport_id=bioport_id,
         sex=self.get_value('sex'),
     )
     # add the events
     for event_type in ['birth', 'death', 'funeral', 'baptism', 'floruit']:
         event = self.get_event(event_type)
         if event is not None:
             doc._add_event_element(event)
     # add illustrations
     for ill in self.get_illustrations():
         doc._add_figure(url=ill.source_url, head=ill.caption)
     # add links to all sources
     for bio in self.get_biographies():
         if bio.get_source().id != 'bioport':
             # construct a bibl element
             bibl = SubElement(doc.get_element_biography(), 'bibl')
             publisher = SubElement(bibl, 'publisher')
             publisher.text = bio.get_value('name_publisher')
             ref = SubElement(bibl, 'ref')
             ref.attrib['target'] = bio.get_value('url_biography')
             author = bio.get_value('author')
             if author:
                 for s in author:
                     el_author = SubElement(bibl, 'author')
                     el_author.text = s
     return doc
Example #5
0
 def test_round_trip(k, o, **dict):
     """test a 'round trip': create a biodes doc with 'from_args', and then parse the file using 'to_dict'
     
     k = the key
     o = the expected oubput
     dict : the data used to create the biodes document
     """
     if not dict:
         dict = {k:o}
     el = self.create_element(**dict)
     doc = BioDesDoc().from_element(el)
     dct = doc.to_dict()
     assert dct.has_key(k), doc.to_string()
     assert dct[k] == o, '%s shoudl be "%s", not "%s"\n%s'  % (k, o,  dct[k], doc.to_string())
    def get_illustrations(self, default=[]):

        figures = BioDesDoc.get_illustrations(self)
        images_cache_local = ''
        images_cache_url = ''
        prefix = self.get_source().id
        if self.repository:
            images_cache_local = self.repository.images_cache_local
            images_cache_url = self.repository.images_cache_url
        result = []
        for figure in figures:
            url, caption = figure
            if not caption:
                caption = 'illustratie uit %s' % self.get_source().description
            if (not url.startswith('http://')) and (not url.startswith('file://')):
                # this is a relative url
                url = '/'.join((os.path.dirname(self.source_url), url))
                if not url.startswith('file://'):
                    url = 'file://' + url
            result.append(Illustration(
                 url=url,
                 images_cache_local=images_cache_local,
                 images_cache_url=images_cache_url,
                 prefix=prefix,
                 caption=caption,
                 link_url=self.get_value('url_biografie'),
                 ))
        return result
Example #7
0
 def write(self, people):
     self.total = len(people)
     for index, name in enumerate(people):
         if self.name_already_processed(name):
             self.skipped += 1
             continue
         url = people[name]['url']
         bdes = BioDesDoc()
         args = dict(
             naam=name,
             naam_publisher="XXX",
             url_publisher="http://XXX.nl",
             url_biografie=url,
         )
         bdes.from_args(**args)
         self.write_file(bdes, index)
Example #8
0
 def test_replace_name(self):
     doc = BioDesDoc().from_xml(self.create_element())
     naam = Name('Pietje Een')
     doc._add_a_name(naam)
     naam = Name('Pietje Twee')
     doc._add_a_name(naam)
     self.assertEqual(len(doc.get_names()), 3)
     
     new_naam = Name('Newt Newman')
     self.assertEqual(new_naam.to_string(),
         u'<persName>Newt Newman</persName>') 
     doc._replace_name(new_naam, 1)
     self.assertEqual(doc.get_names()[1].to_string(), new_naam.to_string())
Example #9
0
 def test_read_write_events(self):
     doc = BioDesDoc().from_xml(self.create_element())
     doc.add_or_update_event(type='a', when="2009", notBefore="2010", notAfter="2011", date_text="2012", place="asd", place_id="-12345")
     events = doc.get_events(type='a')
     self.assertEqual(len(events), 1)
     e = events[0]
     self.assertEqual(e.get('when'), '2009')
     self.assertEqual(e.get('notBefore'), '2010')
     self.assertEqual(e.get('notAfter'), '2011')
     self.assertEqual(e.find('date').text, '2012')
     self.assertEqual(e.find('place').text, 'asd')
     self.assertEqual(e.find('place').get('key'), '-12345')
     doc.add_or_update_event(type='a', date_text='')
     self.assertEqual(e.find('date'), None)
     doc.add_or_update_event(type='a', place='')
     self.assertEqual(e.find('place').text, '')
     doc.add_or_update_event(type='a', place_id='', place='')
     self.assertEqual(e.find('place').get('key'), '')
Example #10
0
 def process(self):
     names = []
     for file in os.listdir('in'):
         tree = etree.parse("in/" + file)
         entries = tree.xpath("//item")
         for index, person in enumerate(entries, 1):
             self.total += 2
             try:
                 name1 = person.xpath('title/from')[0].text
             except IndexError:
                 name1 = None
             try:
                 name2 = person.xpath('title/to')[0].text
             except IndexError:
                 name2 = None
                 
             
             for name in (name1, name2):
                 if name == "...." or not name:
                     self.skip("null name")
                     continue
                 if name.replace('.', '').strip() == "":
                     self.skip("null name")
                     continue
                 if name in names:
                     self.skip("dupe name")
                     continue
                 names.append(name)
                
     for index, name in enumerate(names, 1):
         base_production = "http://www.inghist.nl/retroboeken/archives/"
         anchor = "#accessor=toc&accessor_href=toc%3Fcorrespondent%253Austring%253Autf-8%3D"
         encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
         url = base_production + \
               anchor + \
               encoded_name 
         bdes = BioDesDoc()
         args = dict(naam = name,
                     naam_publisher = "Instituut voor Nederlandse Geschiedenis",
                     url_publisher = "http://www.inghist.nl/",
                     url_biografie = url,
                    )               
         bdes.from_args(**args)
         self.write_file(bdes, index)
Example #11
0
 def test_to_dict(self):
     doc = BioDesDoc()
     doc.from_url(os.path.join(this_dir, 'bio.xml'))
     d = doc.to_dict()
     assert 'geboortedatum' in d, d
     assert doc.get_value('geboortedatum')
     self.assertEqual(type(doc.get_value('geboortedatum')), type(u''))
Example #12
0
    def write(self, names):
        names.sort()
        self.total = len(names)
        for index, name in enumerate(names):
            index += 1
           
            name = sanitize_name(name)
            if self.name_already_processed(name):
                self.skip("dupe name")
                continue
            # URL
            encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
            url = "http://www.inghist.nl/retroboeken/gachard/#accessor=toc&accessor_href=toc%3FSearchSource%253Austring%253Autf-8%3D%26van_aan%3D%26correspondent%253Austring%253Autf-8%3D" + encoded_name

            bdes = BioDesDoc()
            args = dict(naam = name,
                        naam_publisher = "XXX",
                        url_publisher = "http://XXX.nl",
                        url_biografie = url,
                       )               
            bdes.from_args(**args)
            self.write_file(bdes, index)
Example #13
0
 def process(self, people_dict):
     people = people_dict.keys()
     people.sort()
     self.total = len(people)
     x = 0
     for name in people:
         x += 1
         info = people_dict[name]
         print "processing: %s/%s - %s" %(x, len(people), name)
         name = sanitize_name(name)
         if self.name_already_processed(name):
             self.skipped += 1
             continue           
         
         # URL
         base_dev = "http://dev.inghist.nl/retrotest2010/groen/"
         base_production = "http://www.inghist.nl/retroboeken/groen/"                          
         encoded_name = urllib.quote(urllib.quote(name.encode('utf8')))
         url = base_production + \
               "#accessor=accessor_index&accessor_href=accessor_index%3FSearchSource%253Autf-8%253Austring%3D" + \
               encoded_name
         
         bdes = BioDesDoc()
         args = dict(naam = name,
                     naam_publisher = "XXX",
                     url_publisher = "http://XXX.nl",
                     url_biografie = url,
                )
         """
         args = dict(naam = name,
                     figures =[(people_dict[id]['img_url'], 
                                people_dict[id]['caption'],
                               )],
                     naam_publisher = "Het Geheugen van Nederland",
                     url_biografie = people_dict[id]['bio_url'],
                     url_publisher = "http://geheugenvannederland.nl",
                     tekst = people_dict[id]['tekst']
                     )
         """
         birth_date = info['born']
         death_date = sterfdatum = info['dead']
         if bdes.is_date(birth_date):
             args['geboortedatum'] = birth_date
         if bdes.is_date(death_date):
             args['sterfdatum'] = death_date
             
         bdes.from_args(**args)
         self.write_file(bdes, x)
Example #14
0
 def test_add_delete_update_figure(self):
     
     doc = BioDesDoc().from_xml(self.create_element())
     self.assertEqual(len(doc.get_figures()), 0)
     _ref1 = doc.add_figure(uri='http://someref', text='some text')
     self.assertEqual(len(doc.get_figures()), 1)
     _ref2 = doc.add_figure(uri='http://someref2', text='some text2')
     self.assertEqual(len(doc.get_figures()), 2)
     index1 = doc.get_figures()[0][0]
     index2 = doc.get_figures()[1][0]
     doc.remove_figure(index2)
     self.assertEqual(len(doc.get_figures()), 1)
     _ref1 = doc.update_figure(index=index1, uri='http://somerefx', text='some textx')
     self.assertEqual(len(doc.get_figures()), 1)
     index, ill = doc.get_figures()[0]
     self.assertEqual(index, 0)
     self.assertEqual(ill.find('graphic').get('url'), 'http://somerefx')
     self.assertEqual(ill.find('head').text, 'some textx')
Example #15
0
 def test_add_delete_update_extrafield(self):
     
     doc = BioDesDoc().from_xml(self.create_element())
     self.assertEqual(len(doc.get_extrafields()), 0)
     doc.add_extrafield(key='sleutel', value='some value')
     self.assertEqual(len(doc.get_extrafields()), 1)
     doc.add_extrafield(key='sleutel2', value='some value2')
     self.assertEqual(len(doc.get_extrafields()), 2)
     index1 = 0
     index2 = 1 
     doc.remove_extrafield(index2)
     self.assertEqual(len(doc.get_extrafields()), 1)
     ref1 = doc.update_extrafield(index=index1, key='sleuteldifferent', value='different value')
     self.assertEqual(len(doc.get_extrafields()), 1)
     self.assertEqual(ref1.get('target'), 'sleuteldifferent')
     self.assertEqual(ref1.text, 'different value')
     
     #this is what happens when saveing fom the UI
     doc._replace_extrafields([])
     self.assertEqual(len(doc.get_extrafields()), 0)
     doc.add_extrafield(key='key0', value='some value')
     doc.add_extrafield(key='key1', value='some value2')
     self.assertEqual(doc.get_extrafields()[0].get('target'), 'key0')
     self.assertEqual(doc.get_extrafields()[1].get('target'), 'key1')
     doc._replace_extrafields([('key0', 'some value'), ('key1', 'some value2')])
     self.assertEqual(doc.get_extrafields()[0].get('target'), 'key0')
     self.assertEqual(doc.get_extrafields()[1].get('target'), 'key1')
Example #16
0
 def test_add_delete_update_reference(self):
     
     doc = BioDesDoc().from_xml(self.create_element())
     self.assertEqual(len(doc.get_references()), 0)
     _ref1 = doc.add_reference(uri='http://someref', text='some text')
     self.assertEqual(len(doc.get_references()), 1)
     _ref2 = doc.add_reference(uri='http://someref2', text='some text2')
     self.assertEqual(len(doc.get_references()), 2)
     index1 = doc.get_references()[0][0]
     index2 = doc.get_references()[1][0]
     doc.remove_reference(index2)
     self.assertEqual(len(doc.get_references()), 1)
     ref1 = doc.update_reference(index=index1, uri='http://somerefx', text='some textx')
     self.assertEqual(len(doc.get_references()), 1)
     index, _ref = doc.get_references()[0]
     self.assertEqual(index, 0)
     self.assertEqual(ref1.get('target'), 'http://somerefx')
     self.assertEqual(ref1.text, 'some textx')
Example #17
0
    def test_relations(self):
        doc = BioDesDoc().from_xml(self.create_element())
        doc.add_relation(person="Kwik", relation="partner")
        doc.add_relation(person="Kwek", relation="child")
        doc.add_relation(person="Kwak", relation="father")
        doc.add_relation(person="Donald", relation="mother")
        doc.add_relation(person="Dagobert", relation="parent")
        
        self.assertEqual(doc.get_relation('partner'), ['Kwik'])
        self.assertEqual(doc.get_relation('child'), ['Kwek'])
        self.assertEqual(doc.get_relation('father'), ['Kwak'])
        self.assertEqual(doc.get_relation('mother'), ['Donald'])
        self.assertEqual(doc.get_relation('parent'), ['Dagobert'])

        #make sure we are not reading the other names
        self.assertEqual(len(doc.get_names()), 1)
        
        self.assertEqual(len(doc.get_relations()), 5)
        ls =  [(el_relation.get('name'), el_person[0].text) for (el_relation, el_person) in doc.get_relations()]
        
        self.assertTrue(('child', 'Kwek') in ls, ls)
        
        el_relation, el_person = doc.get_relations()[1]
        type = el_relation.get('name')
        name = el_person[0].text
        index = el_relation.getparent().index(el_relation)
        #see if deleting and re-adding is sane
        doc.remove_relation(index)
        self.assertEqual(len(doc.get_relations()), 4)
        doc.add_relation(person=name, relation=type)
        self.assertEqual(len(doc.get_relations()), 5)
Example #18
0
 def test_read_write_states(self):    
     doc = BioDesDoc().from_xml(self.create_element())
     doc.add_or_update_state(type='floruit', frm="1900", to="1910", place='Zohar', place_id='1')
     state = doc.get_state(type='floruit')
     self.assertEqual(state.get('from'), '1900')
     self.assertEqual(state.get('to'), '1910')
     self.assertEqual(state.get('type'), 'floruit')
     self.assertEqual(state.find('place').text, 'Zohar')
     self.assertEqual(state.find('place').get('key'), '1')
     doc.add_or_update_state(type='floruit', place_id='', place='')
     self.assertEqual(state.find('place').get('key'), '')       
     states = doc.get_states(type='floruit')
     self.assertEqual(states, [state])
     
     doc.add_state(type='occupation', idno="1")
     self.assertEqual(len(states), 1)
     doc.add_state(type='occupation', idno="2")
     doc.add_state(type='occupation', idno="3")
     states = doc.get_states(type='occupation')
     self.assertEqual(len(states), 3)
     doc.remove_state(type='occupation', idx=1)
     states = doc.get_states(type='occupation')
     self.assertEqual(len(states), 2)
     self.assertEqual([s.get('idno') for s in states], ['1', '3'])
     
     #remove states by index number
     states = doc.get_states()
     some_state = states[1]
     some_index = some_state.getparent().index(some_state)
     doc.remove_state(idx= some_index)
     self.assertEqual(len(states)-1, len(doc.get_states()))
Example #19
0
 def test_remove_name(self):
     doc = BioDesDoc().from_xml(self.create_element())
     naam = Name('Pietje Een')
     doc._add_a_name(naam)
     naam = Name('Pietje Twee')
     doc._add_a_name(naam)
     self.assertEqual(doc.get_names()[2].volledige_naam(), 'Pietje Twee', doc.get_names() )
     self.assertEqual(len(doc.get_names()), 3)
 
     doc.remove_name(1)
     self.assertEqual(len(doc.get_names()), 2)
     self.assertEqual(doc.get_names()[1].volledige_naam(), 'Pietje Twee', doc.get_names() )
Example #20
0
    def test_create_some_samples(self, **args):
        #create a very simple file
        kw = {
            'url_biografie':'http://www.gerbrandy.com/bio',
            'url_publisher':'http://www.gerbrandy.com',
            'naam_publisher':'Website van Jelle',
            'titel_biografie':'Bio van Jelle',
            'naam':'Jelle Gerbrandy',
        }

        doc = BioDesDoc()
        doc.from_args(**kw)
        doc.to_file('biodes10_minimal.xml')

        #the most complex case includes everyting
        kw = {
            'bioport_id':'biodesid',   
            'url_biografie':'http://url_van_de_biografie',
            'url_publisher':'http://url_van_de_publisher',
            'titel_biografie':'titel van de biografie',
            'naam_publisher':'naam van depublisher',
#            'naam':'naam',
            'auteur':'auteur',
    #        'beroep':'beroep',
            'prepositie':'prepositie',
            'voornaam':'voornaam',
            'intrapositie':'intrapositie', 
            'geslachtsnaam': 'geslachtsnaam',
            'postpositie':'postpositie',
            'laatst_veranderd':'2009-11-11',   
            'publicatiedatum':'2009-11-11',
            'geboortedatum':'2009-11-11',
            'geboortedatum_tekst':'2009-11-11 in tekst',
            'geboorteplaats':'geboorteplaats',
            'sterfdatum':'2011-11-11',
            'sterfdatum_tekst':'sterfdatum_tekst',
            'sterfplaats':'sterfplaats',
            'geslacht':'1',
            'illustraties':['http://illustratie1.jpg', 'http://illustratie2.jpg'],
            'namen':['Naam1', ('mr.', 'Jan', 'van', 'Voorbeeld', 'Esq.')], 
            'namen_en':['John'],
            'tekst':'tekst van de biografie kan <em>Markup</em> <p>bevatten</p>', 
        }
        doc.from_args(**kw)
        doc._add_event(
            type='marriage',
            when='1901-12-12',
            text='getrouwd met marietje',
        )
        doc.add_state(
            type='occupation',
            frm='1940', 
            to='1960',
            text='schilder',
            )
        doc.add_state(
            type='residence',
            frm='1940', 
            to='1960',
            text='Amsterdam',
            )

        doc.add_state(
            type='claim_to_fame',
            text='Superbekende persoon!',
            )

        doc.add_state(
            type='occupation',
            frm='1940', 
            to='1960',
            text='schilder',
            place="Amsterdam",
            )
    
        doc.to_file('biodes10_maximal.xml')
Example #21
0
 def test_from_dict(self):
     d = self.kw 
     doc = BioDesDoc()
     doc.from_dict(d)
 def from_url(self, url):
     self.source_url = url
     BioDesDoc.from_url(self, url)
     self.create_id()
     return self
Example #23
0
 def test_get_names(self):
     url = os.path.join(this_dir, 'bio.xml')
     doc = BioDesDoc().from_url(url)
     self.assertEqual(len(doc.get_names()), 1)
     n = doc.get_names()[0]
     self.assertEqual(u'C. van Heynsbergen', n.volledige_naam())
Example #24
0
 def test_get_value(self):
     doc = BioDesDoc()
     doc.from_url(os.path.join(this_dir, 'bio.xml'))
     value = [n.volledige_naam() for n in doc.get_value('namen')]
     should_be = ['C. van Heynsbergen']
     self.failUnlessEqual(value, should_be)
Example #25
0
 def get_biodes_documents(self):
     for url in self.root.xpath('//a/@href'):
         yield BioDesDoc().from_url(url)