def addImages(dict_tree,image_src): ''' Returns a dict_tree where all the images from image_src has been inserted into the physical struct map and file section :param dict_tree: dict tree to insert images :param image_src: folder in which the images exist ''' dict_tree = dmd_sec_tools.fixPathimagefiles(dict_tree, image_src) images = fs_tools.getImages(image_src) ## Handle Physical Stuct Map # Get physical struct map physical_struct_map = phys_struct_map_tools.get(dict_tree) # Check the struct map exists if physical_struct_map is None: raise ValueError('physical_struct_map is missing from dict_tree') if (phys_struct_map_tools.isEmpty(physical_struct_map) or not phys_struct_map_tools.isValid(physical_struct_map, image_src)): # Clear existing physical struct map physical_struct_map = phys_struct_map_tools.clear(physical_struct_map) # Add pages to empty physical struct map physical_struct_map = phys_struct_map_tools.create(physical_struct_map, len(images)) file_section = file_sec_tools.get(dict_tree) file_section = file_sec_tools.create(file_section,images) dict_tree = file_sec_tools.insert(dict_tree, file_section) return dict_tree
def addStructLinks(dict_tree,link_from,doc_struct_info): ''' Adds a struct link to a physical struct map and returns an updated version of dict_tree with links inserted. :param dict_tree: a dictionary tree with complete goobi metadata :param link_from: the logical section to link to, e.g. LOG_0001 :param doc_struct_info: a dictonary with at least start_page and end_page ''' # Set variables struct_link = get(dict_tree) start_page_key = 'start_page' end_page_key = 'end_page' # Check doc_struct_info assert start_page_key in doc_struct_info, \ 'doc_struct_info must contain "{0}"'.format(start_page_key) assert end_page_key in doc_struct_info, \ 'doc_struct_info must contain "{0}"'.format(end_page_key) start_page = doc_struct_info[start_page_key] end_page = doc_struct_info[end_page_key] physical_struct_map = phys_struct_map_tools.get(dict_tree) pages = phys_struct_map_tools.createPages(physical_struct_map,start_page,end_page) # Insert links struct_link = addLinksToStructLink(struct_link, link_from, pages) # Update tree dict_tree = insert(struct_link,dict_tree) # Return updated tree return dict_tree
def addOffsetToPhysicalStructMap(dict_tree, page_offset): ''' Adds page_offset to all orderlabels. page_offset is a positive integer. ''' if page_offset is not None: physical_struct_map = phys_struct_map_tools.get(dict_tree) # Check if offset is already set for book and skip if it is if phys_struct_map_tools.offsetExists(physical_struct_map): return phys_struct_map_tools.addOffset(physical_struct_map, page_offset)
def containsImages(dict_tree): # legr: returns True if the meta.xml's file section is empty # legr: (file section maps actual file path to ID="FILE_nnnn") empty_file_sec = file_sec_tools.isEmpty(file_sec_tools.get(dict_tree)) # legr: returns True if the meta.xml's physical struct map section is empty # legr: (structMap TYPE="PHYSICAL" maps the above FILE_nnnn to PHYS_nnnn and adds ORDER and ORDERLABEL empty_phys_struct_map = phys_struct_map_tools.isEmpty(phys_struct_map_tools.get(dict_tree)) # Both file_sec_tools and physical struct map must be non empty # legr: returns False if one or both are empty return not empty_file_sec and not empty_phys_struct_map
def addAllPagesToRoot(logical_struct_map,dict_tree): if isinstance(logical_struct_map, dict): root_id = logical_struct_map['@ID'] physical_struct_map = phys_struct_map_tools.get(dict_tree) all_pages = phys_struct_map_tools.getPages(physical_struct_map,details=True) # Add all pages (phys_list) to root dict_tree = struct_link_tools.addLinks(dict_tree, root_id, all_pages) #pprint.pprint(struct_link) else: err = 'Multiple root detected - not allowed' raise ValueError(err) return dict_tree
def articleExists(dict_tree, title, start_page, end_page): ''' Returns true if an article exists in dict_tree. The test is done by checking title, start page and end page :param dict_tree: :param title: :param start_page: :param end_page: ''' def getPages(ps,i): r = [] t = sorted([int(p[0]) for p in pages if p[0].isdigit()]) if t: r.append(t[i]) t = sorted([int(p[1]) for p in pages if p[1].isdigit()]) if t: r.append(t[i]) return r named_articles = dmd_sec_tools.getDmdsWithContent(dict_tree, 'Article', 'TitleDocMain', title) for a in named_articles: #======================================================================= # If there are article in dict_tree with same name as the one being # added, compare start and end pages. #======================================================================= phys_pages = struct_link_tools.getPhysListByLogIds(dict_tree, [a]) physical_struct_map = phys_struct_map_tools.get(dict_tree) pages = phys_struct_map_tools.getSelectedPages(physical_struct_map, phys_pages) # pages: a list of (str,str) tuples, where first elem is ORDER and # second elem is ORDERLBEL # 1: Find first page in pages (both ORDER and ORDERLABEL) and see if # it is equal to start_page. # We need to check both because possible offset difference start_pages = getPages(pages,0) same_start_page = (start_page in start_pages) # 2: Same with end_page end_pages = getPages(pages,-1) same_end_page = (end_page in end_pages) if (same_start_page and same_end_page): return True return False
def containsImages(dict_tree): empty_file_sec = file_sec_tools.isEmpty(file_sec_tools.get(dict_tree)) empty_phys_struct_map = phys_struct_map_tools.isEmpty(phys_struct_map_tools.get(dict_tree)) # Both file_sec_tools and physical struct map must be non empty return (not empty_file_sec and not empty_phys_struct_map)