Example #1
0
def addImages(dict_tree,image_src):
    '''
    Returns a dict_tree where all the images from image_src has been inserted
    into the physical struct map and file section
    
    :param dict_tree: dict tree to insert images
    :param image_src: folder in which the images exist
    '''
    dict_tree = dmd_sec_tools.fixPathimagefiles(dict_tree, image_src)
    images = fs_tools.getImages(image_src)
    ## Handle Physical Stuct Map
    # Get physical struct map
    physical_struct_map = phys_struct_map_tools.get(dict_tree)
    # Check the struct map exists
    if physical_struct_map is None:
        raise ValueError('physical_struct_map is missing from dict_tree')
    if (phys_struct_map_tools.isEmpty(physical_struct_map) or
        not phys_struct_map_tools.isValid(physical_struct_map, image_src)):
        # Clear existing physical struct map
        physical_struct_map = phys_struct_map_tools.clear(physical_struct_map)
        # Add pages to empty physical struct map
        physical_struct_map = phys_struct_map_tools.create(physical_struct_map,
                                                     len(images))
    file_section = file_sec_tools.get(dict_tree)
    file_section = file_sec_tools.create(file_section,images)
    dict_tree = file_sec_tools.insert(dict_tree, file_section)
    return dict_tree
def addStructLinks(dict_tree,link_from,doc_struct_info):
    '''
    Adds a struct link to a physical struct map and returns an updated version 
    of dict_tree with links inserted.
    
    :param dict_tree: a dictionary tree with complete goobi metadata
    :param link_from: the logical section to link to, e.g. LOG_0001
    :param doc_struct_info: a dictonary with at least start_page and end_page

    '''
    # Set variables
    struct_link = get(dict_tree)

    start_page_key = 'start_page'
    end_page_key = 'end_page'
    # Check doc_struct_info
    assert start_page_key in doc_struct_info, \
           'doc_struct_info must contain "{0}"'.format(start_page_key)
    assert end_page_key in doc_struct_info, \
           'doc_struct_info must contain "{0}"'.format(end_page_key)
    
    start_page = doc_struct_info[start_page_key]
    end_page = doc_struct_info[end_page_key]
    physical_struct_map = phys_struct_map_tools.get(dict_tree)
    pages = phys_struct_map_tools.createPages(physical_struct_map,start_page,end_page)
    # Insert links
    struct_link = addLinksToStructLink(struct_link, link_from, pages)
    # Update tree
    dict_tree = insert(struct_link,dict_tree)
    # Return updated tree
    return dict_tree
Example #3
0
def addOffsetToPhysicalStructMap(dict_tree, page_offset):
    '''
    Adds page_offset to all orderlabels. page_offset is a positive integer.
    '''
    if page_offset is not None:
        physical_struct_map = phys_struct_map_tools.get(dict_tree)
        # Check if offset is already set for book and skip if it is
        if phys_struct_map_tools.offsetExists(physical_struct_map): return
        phys_struct_map_tools.addOffset(physical_struct_map, page_offset)
Example #4
0
def containsImages(dict_tree):

    # legr: returns True if the meta.xml's file section is empty
    # legr: (file section maps actual file path to ID="FILE_nnnn")
    empty_file_sec = file_sec_tools.isEmpty(file_sec_tools.get(dict_tree))
    # legr: returns True if the meta.xml's physical struct map section is empty
    # legr: (structMap TYPE="PHYSICAL" maps the above FILE_nnnn to PHYS_nnnn and adds ORDER and ORDERLABEL
    empty_phys_struct_map = phys_struct_map_tools.isEmpty(phys_struct_map_tools.get(dict_tree))
    # Both file_sec_tools and physical struct map must be non empty
    # legr: returns False if one or both are empty
    return not empty_file_sec and not empty_phys_struct_map
Example #5
0
def addAllPagesToRoot(logical_struct_map,dict_tree):
    if isinstance(logical_struct_map, dict):
        root_id = logical_struct_map['@ID']
        physical_struct_map = phys_struct_map_tools.get(dict_tree)
        all_pages = phys_struct_map_tools.getPages(physical_struct_map,details=True)
        # Add all pages (phys_list) to root
        dict_tree = struct_link_tools.addLinks(dict_tree, root_id, all_pages)
        #pprint.pprint(struct_link)
    else:
        err = 'Multiple root detected - not allowed'
        raise ValueError(err)
    return dict_tree
Example #6
0
def articleExists(dict_tree, title, start_page, end_page):
    '''
    Returns true if an article exists in dict_tree.
    
    The test is done by checking title, start page and end page
    :param dict_tree:
    :param title:
    :param start_page:
    :param end_page:
    '''
    def getPages(ps,i):
        r = []
        t = sorted([int(p[0]) for p in pages if p[0].isdigit()])
        if t: r.append(t[i])
        t = sorted([int(p[1]) for p in pages if p[1].isdigit()])
        if t: r.append(t[i])
        return r
    
    named_articles = dmd_sec_tools.getDmdsWithContent(dict_tree,
                                                      'Article', 
                                                      'TitleDocMain',
                                                      title)
    for a in named_articles:
        #=======================================================================
        # If there are article in dict_tree with same name as the one being 
        # added, compare start and end pages.
        #=======================================================================
        phys_pages = struct_link_tools.getPhysListByLogIds(dict_tree, [a])
        physical_struct_map = phys_struct_map_tools.get(dict_tree)
        pages = phys_struct_map_tools.getSelectedPages(physical_struct_map, phys_pages)
        # pages: a list of (str,str) tuples, where first elem is ORDER and 
        # second elem is ORDERLBEL
        # 1: Find first page in pages (both ORDER and ORDERLABEL) and see if
        # it is equal to start_page.
        # We need to check both because possible offset difference
        start_pages = getPages(pages,0)
        same_start_page = (start_page in start_pages) 
        # 2: Same with end_page
        end_pages = getPages(pages,-1)
        same_end_page = (end_page in end_pages)
        if (same_start_page and same_end_page):
            return True
    return False
def containsImages(dict_tree):

    empty_file_sec = file_sec_tools.isEmpty(file_sec_tools.get(dict_tree))
    empty_phys_struct_map = phys_struct_map_tools.isEmpty(phys_struct_map_tools.get(dict_tree))
    # Both file_sec_tools and physical struct map must be non empty
    return (not empty_file_sec and not empty_phys_struct_map)