def docTypeExists(dict_tree,doc_type): log_struct_map = log_struct_map_tools.getLogicalStructMap(dict_tree) ns = '{http://www.loc.gov/METS/}' elem_name = 'div' elem_attrib_key = 'TYPE' elem_attrib_val = doc_type dmd_sec = xml_tools.getAllSubTrees(log_struct_map, ns, elem_name, elem_attrib_key, elem_attrib_val) #pprint.pprint(dmd_sec) return len(dmd_sec) > 0
def getDmdsWithContent(dict_tree, doc_type, metadata_key, content): ''' Get a list of logical IDs (LOG_NNNN) for dmd_secs with TYPE = doc_type and metadata_key = content. E.g. a list of all articles where 'TitleDocMain' = content :param dict_tree: :param doc_type: type of dmd_sec to look in, e.g. 'Article' :param metadata_key: metadata key to look in, e.g. 'TitleDocMain' :param content: metadata content to check for, e.g. the name of the article ''' ns = '{http://www.loc.gov/METS/}' mets = dict_tree[ns+'mets'] if ns+'mets' in dict_tree else {} elem_name = 'div' elem_attrib_key = 'TYPE' elem_attrib_val = doc_type log_struct_maps = xml_tools.getAllSubTrees(mets, ns, elem_name, elem_attrib_key, elem_attrib_val) dmd_sec_ids = dict([(x['@DMDID'],x['@ID']) for x in log_struct_maps if '@DMDID' in x]) goobi_ns = '{http://meta.goobi.org/v1.5.1/}' # Take list of dmd_secs dmd_secs = mets[ns+'dmdSec'] if ns+'dmdSec' in mets else [] named_dmd_secs = [] for dmd_sec in dmd_secs: if '@ID' in dmd_sec and dmd_sec['@ID'] in dmd_sec_ids: # This is an article dmd_sec_metadata = xml_tools.getAllSubTrees(dmd_sec,goobi_ns, 'dmdSec','name', metadata_key) if [a for a in dmd_sec_metadata if '#text' in a and a['#text'] == content]: # This dmd_sec is a article and has the name "metadata_key" # Get LOG-id for dmd, as this is used outsude named_dmd_secs.append(dmd_sec_ids[dmd_sec['@ID']]) #return True # The article has the title 'article_title' return named_dmd_secs
def getMetadata(dmd_secs): ''' Returns all the metadata from dmd_secs from a list of dmd_secs. If a metadata field exist more than once, only one will be outputted :param dmd_secs: a list of dmd_secs ''' metadata = [] goobi_ns = '{http://meta.goobi.org/v1.5.1/}' metadata_key = 'metadata' for dmd_sec in dmd_secs: temp_metadata = xml_tools.getAllSubTrees(dmd_sec, goobi_ns, metadata_key) if temp_metadata and isinstance(temp_metadata[0],list): if len(temp_metadata) > 1: raise ValueError('To many metadata lists') metadata.extend(temp_metadata[0]) else: metadata.extend(temp_metadata) return metadata