コード例 #1
    def createMetsFile(self):
        Given a toc object consisting of articles with dbc ids
        use the DBC service to 	generate data for each article.
        When all data is created, append this to the exising
        meta.xml data
        # legr: Parse the META.XML and put it into a dictionary tree
        dt, _ = dict_tools.parseXmlToDict(self.meta_file)

        # legr: Dont do anything if there already are FILE_nnnn's and PHYS_nnnn's in the META.XML
        # legr: todo : what if a step is pushed back because of missing images. When they are added, this doesn't get
        # legr: todo : updated? I think we need a possibilty to clean the mets-file.
        if not mets_tools.containsImages(dt):
            # legr: we are here because META.XML was empty, so FILE_nnnn and PHYS_nnnn references to actual files
            # legr: in /master_orig/
            dt = mets_tools.addImages(dt, self.img_src)
            # legr: now update the META.XML with the references from above
            # legr: it seems the overall META.XML file structure also is somewhat reformatted - METS standard?
            xml_tools.writeDictTreeToFile(dt, self.meta_file)
 def getVariables(self):
     This method pulls in all the variables
     from the command line and the config file 
     that are necessary for its running.
     We need a path to our toc file, our meta.xml
     and a link to our DBC data service (eXist API).
     Errors in variables will lead to an 
     Exception being thrown.
     self.page_offset = None
     self.issnSet = False
     process_path = self.command_line.process_path
     toc_dir = os.path.join(
         self.getConfigItem('metadata_toc_path', section='process_folder_structure')
     toc_name = tools.getFirstFileWithExtension(toc_dir, '.toc')
     self.toc_file_path = os.path.join(toc_dir, toc_name)
     self.service_url = self.getConfigItem('dbc_service', section='dbc')
     self.meta_file = os.path.join(
         self.getConfigItem('metadata_goobi_file', section='process_files')
     # Parse initial Goobi METS file to a dictionary tree for processing
     self.meta_data,_ = dict_tools.parseXmlToDict(self.meta_file)
     # For pdf info
     pdf_input = self.getConfigItem('doc_limbpdf_path',
                                    section= 'process_folder_structure')
     self.pdf_input_dir = os.path.join(process_path, pdf_input)
     # parse for overlapping articles
     self.overlapping_articles = self.getSetting('overlapping_articles',
     # parse boolean from command line - for overlapping articles
     self.default_language = self.getSetting('default_language',
コード例 #3
    data = dict()
    for elem in metadata:
        name = elem.getAttribute('name')
        if name in required_fields:
            data[name] = elem.firstChild.nodeValue
    for item in required_fields:
        if item not in data: 
            raise DataError("{0} missing value {1}".format(anchor_file, item))
    return data

if __name__ == '__main__':
    image_src = '/opt/digiverso/goobi/metadata/201/images/master_orig'
    src = '/opt/digiverso/goobi/metadata/201/meta.xml'
    #src = '/opt/digiverso/goobi/metadata/194/meta - complete data.xml'
    dest = os.path.join(os.path.dirname(src),'meta_new.xml')
    dict_tree,ns  = dict_tools.parseXmlToDict(src)
    # Add images - i.e. create phys struct map, file sec and set pathimages
    if not mets_tools.containsImages(dict_tree):
        dict_tree = mets_tools.addImages(dict_tree,image_src) 
    content = [{'name': 'Abstract', 'data' : 'From the Roman Empire...' }, 
               {'name' : 'TitleDocMain', 'data' : 'Return of the oppressed'},
               {'name' : 'Author', 'type' : 'person', 'fields' :
                    [{'tag' : 'displayName', 'data' : 'Turchin, Peter'},
                     {'tag' : 'firstName', 'data' : 'Peter'},
                     {'tag' : 'lastName', 'data' : 'Turchin'}]
    new_doc_struct = {'content':content,