def articleExists(dict_tree, title, start_page, end_page): ''' Returns true if an article exists in dict_tree. The test is done by checking title, start page and end page :param dict_tree: :param title: :param start_page: :param end_page: ''' def getPages(ps,i): r = [] t = sorted([int(p[0]) for p in pages if p[0].isdigit()]) if t: r.append(t[i]) t = sorted([int(p[1]) for p in pages if p[1].isdigit()]) if t: r.append(t[i]) return r named_articles = dmd_sec_tools.getDmdsWithContent(dict_tree, 'Article', 'TitleDocMain', title) for a in named_articles: #======================================================================= # If there are article in dict_tree with same name as the one being # added, compare start and end pages. #======================================================================= phys_pages = struct_link_tools.getPhysListByLogIds(dict_tree, [a]) physical_struct_map = phys_struct_map_tools.get(dict_tree) pages = phys_struct_map_tools.getSelectedPages(physical_struct_map, phys_pages) # pages: a list of (str,str) tuples, where first elem is ORDER and # second elem is ORDERLBEL # 1: Find first page in pages (both ORDER and ORDERLABEL) and see if # it is equal to start_page. # We need to check both because possible offset difference start_pages = getPages(pages,0) same_start_page = (start_page in start_pages) # 2: Same with end_page end_pages = getPages(pages,-1) same_end_page = (end_page in end_pages) if (same_start_page and same_end_page): return True return False
def addPagesOnLevel(logical_struct_map,dict_tree): div_key = '{http://www.loc.gov/METS/}div' if isinstance(logical_struct_map, dict): # Only one root parent_id = logical_struct_map['@ID'] if struct_link_tools.exists(dict_tree, parent_id): # Current element already has pages return children_ids = log_struct_map_tools.getChildrenLogIds(logical_struct_map) if not children_ids: # No children - dammit return if div_key in logical_struct_map: # Recursively handle subtree before adding pages to this element addPagesOnLevel(logical_struct_map[div_key],dict_tree) phys_list = struct_link_tools.getPhysListByLogIds(dict_tree, children_ids) # Add children pages (phys_list) to parent (parent_id) dict_tree = struct_link_tools.addLinks(dict_tree, parent_id, phys_list) #pprint.pprint(struct_link) elif isinstance(logical_struct_map, list): # Multiple branches for branch in logical_struct_map: addPagesOnLevel(branch,dict_tree) return dict_tree