예제 #1
0
파일: split.py 프로젝트: 089git/calibre
def split(container, name, loc_or_xpath, before=True):
    ''' Split the file specified by name at the position specified by loc_or_xpath. '''

    root = container.parsed(name)
    if isinstance(loc_or_xpath, type('')):
        split_point = root.xpath(loc_or_xpath)[0]
    else:
        split_point = node_from_loc(root, loc_or_xpath)
    if in_table(split_point):
        raise AbortError('Cannot split inside tables')
    if split_point.tag.endswith('}body'):
        raise AbortError('Cannot split on the <body> tag')
    tree1, tree2 = do_split(split_point, container.log, before=before)
    root1, root2 = tree1.getroot(), tree2.getroot()
    anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
    anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name'))
    base, ext = name.rpartition('.')[0::2]
    base = re.sub(r'_split\d+$', '', base)
    nname, s = None, 0
    while not nname or container.exists(nname):
        s += 1
        nname = '%s_split%d.%s' % (base, s, ext)
    manifest_item = container.generate_item(nname, media_type=container.mime_map[name])
    bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name)

    # Fix links in the split trees
    for r, rname, anchors in [(root1, bottom_name, anchors_in_bottom), (root2, name, anchors_in_top)]:
        for a in r.xpath('//*[@href]'):
            url = a.get('href')
            if url.startswith('#'):
                fname = name
            else:
                fname = container.href_to_name(url, name)
            if fname == name:
                purl = urlparse(url)
                if purl.fragment in anchors:
                    a.set('href', '%s#%s' % (container.name_to_href(rname, name), purl.fragment))

    # Fix all links in the container that point to anchors in the bottom tree
    for fname, media_type in container.mime_map.iteritems():
        if fname not in {name, bottom_name}:
            repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container)
            container.replace_links(fname, repl)

    container.replace(name, root1)
    container.replace(bottom_name, root2)

    spine = container.opf_xpath('//opf:spine')[0]
    for spine_item, spine_name, linear in container.spine_iter:
        if spine_name == name:
            break
    index = spine.index(spine_item) + 1

    si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id'))
    if not linear:
        si.set('linear', 'no')
    container.insert_into_xml(spine, si, index=index)
    container.dirty(container.opf_name)
    return bottom_name
예제 #2
0
파일: split.py 프로젝트: zwlistu/calibre
def split(container, name, loc_or_xpath, before=True, totals=None):
    '''
    Split the file specified by name at the position specified by loc_or_xpath.
    Splitting automatically migrates all links and references to the affected
    files.

    :param loc_or_xpath: Should be an XPath expression such as
        //h:div[@id="split_here"]. Can also be a *loc* which is used internally to
        implement splitting in the preview panel.
    :param before: If True the split occurs before the identified element otherwise after it.
    :param totals: Used internally
    '''

    root = container.parsed(name)
    if isinstance(loc_or_xpath, type('')):
        split_point = root.xpath(loc_or_xpath)[0]
    else:
        try:
            split_point = node_from_loc(root, loc_or_xpath, totals=totals)
        except MalformedMarkup:
            # The webkit HTML parser and the container parser have yielded
            # different node counts, this can happen if the file is valid XML
            # but contains constructs like nested <p> tags. So force parse it
            # with the HTML 5 parser and try again.
            raw = container.raw_data(name)
            root = container.parse_xhtml(raw,
                                         fname=name,
                                         force_html5_parse=True)
            try:
                split_point = node_from_loc(root, loc_or_xpath, totals=totals)
            except MalformedMarkup:
                raise MalformedMarkup(
                    _('The file %s has malformed markup. Try running the Fix HTML tool'
                      ' before splitting') % name)
            container.replace(name, root)
    if in_table(split_point):
        raise AbortError('Cannot split inside tables')
    if split_point.tag.endswith('}body'):
        raise AbortError('Cannot split on the <body> tag')
    tree1, tree2 = do_split(split_point, container.log, before=before)
    root1, root2 = tree1.getroot(), tree2.getroot()
    anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(
        root1.xpath('//*/@name')) | {''}
    anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(
        root2.xpath('//*/@name'))
    base, ext = name.rpartition('.')[0::2]
    base = re.sub(r'_split\d+$', '', base)
    nname, s = None, 0
    while not nname or container.exists(nname):
        s += 1
        nname = '%s_split%d.%s' % (base, s, ext)
    manifest_item = container.generate_item(
        nname, media_type=container.mime_map[name])
    bottom_name = container.href_to_name(manifest_item.get('href'),
                                         container.opf_name)

    # Fix links in the split trees
    for r in (root1, root2):
        for a in r.xpath('//*[@href]'):
            url = a.get('href')
            if url.startswith('#'):
                fname = name
            else:
                fname = container.href_to_name(url, name)
            if fname == name:
                purl = urlparse(url)
                if purl.fragment in anchors_in_top:
                    if r is root2:
                        a.set(
                            'href', '%s#%s' % (container.name_to_href(
                                name, bottom_name), purl.fragment))
                    else:
                        a.set('href', '#' + purl.fragment)
                elif purl.fragment in anchors_in_bottom:
                    if r is root1:
                        a.set(
                            'href', '%s#%s' % (container.name_to_href(
                                bottom_name, name), purl.fragment))
                    else:
                        a.set('href', '#' + purl.fragment)

    # Fix all links in the container that point to anchors in the bottom tree
    for fname, media_type in iteritems(container.mime_map):
        if fname not in {name, bottom_name}:
            repl = SplitLinkReplacer(fname, anchors_in_bottom, name,
                                     bottom_name, container)
            container.replace_links(fname, repl)

    container.replace(name, root1)
    container.replace(bottom_name, root2)

    spine = container.opf_xpath('//opf:spine')[0]
    for spine_item, spine_name, linear in container.spine_iter:
        if spine_name == name:
            break
    index = spine.index(spine_item) + 1

    si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id'))
    if not linear:
        si.set('linear', 'no')
    container.insert_into_xml(spine, si, index=index)
    container.dirty(container.opf_name)
    return bottom_name
예제 #3
0
파일: split.py 프로젝트: j-howell/calibre
def split(container, name, loc_or_xpath, before=True, totals=None):
    '''
    Split the file specified by name at the position specified by loc_or_xpath.
    Splitting automatically migrates all links and references to the affected
    files.

    :param loc_or_xpath: Should be an XPath expression such as
        //h:div[@id="split_here"]. Can also be a *loc* which is used internally to
        implement splitting in the preview panel.
    :param before: If True the split occurs before the identified element otherwise after it.
    :param totals: Used internally
    '''

    root = container.parsed(name)
    if isinstance(loc_or_xpath, type('')):
        split_point = root.xpath(loc_or_xpath)[0]
    else:
        try:
            split_point = node_from_loc(root, loc_or_xpath, totals=totals)
        except MalformedMarkup:
            # The webkit HTML parser and the container parser have yielded
            # different node counts, this can happen if the file is valid XML
            # but contains constructs like nested <p> tags. So force parse it
            # with the HTML 5 parser and try again.
            raw = container.raw_data(name)
            root = container.parse_xhtml(raw, fname=name, force_html5_parse=True)
            try:
                split_point = node_from_loc(root, loc_or_xpath, totals=totals)
            except MalformedMarkup:
                raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool'
                                        ' before splitting') % name)
            container.replace(name, root)
    if in_table(split_point):
        raise AbortError('Cannot split inside tables')
    if split_point.tag.endswith('}body'):
        raise AbortError('Cannot split on the <body> tag')
    tree1, tree2 = do_split(split_point, container.log, before=before)
    root1, root2 = tree1.getroot(), tree2.getroot()
    anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
    anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name'))
    base, ext = name.rpartition('.')[0::2]
    base = re.sub(r'_split\d+$', '', base)
    nname, s = None, 0
    while not nname or container.exists(nname):
        s += 1
        nname = '%s_split%d.%s' % (base, s, ext)
    manifest_item = container.generate_item(nname, media_type=container.mime_map[name])
    bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name)

    # Fix links in the split trees
    for r in (root1, root2):
        for a in r.xpath('//*[@href]'):
            url = a.get('href')
            if url.startswith('#'):
                fname = name
            else:
                fname = container.href_to_name(url, name)
            if fname == name:
                purl = urlparse(url)
                if purl.fragment in anchors_in_top:
                    if r is root2:
                        a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment))
                    else:
                        a.set('href', '#' + purl.fragment)
                elif purl.fragment in anchors_in_bottom:
                    if r is root1:
                        a.set('href', '%s#%s' % (container.name_to_href(bottom_name, name), purl.fragment))
                    else:
                        a.set('href', '#' + purl.fragment)

    # Fix all links in the container that point to anchors in the bottom tree
    for fname, media_type in iteritems(container.mime_map):
        if fname not in {name, bottom_name}:
            repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container)
            container.replace_links(fname, repl)

    container.replace(name, root1)
    container.replace(bottom_name, root2)

    spine = container.opf_xpath('//opf:spine')[0]
    for spine_item, spine_name, linear in container.spine_iter:
        if spine_name == name:
            break
    index = spine.index(spine_item) + 1

    si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id'))
    if not linear:
        si.set('linear', 'no')
    container.insert_into_xml(spine, si, index=index)
    container.dirty(container.opf_name)
    return bottom_name
예제 #4
0
파일: split.py 프로젝트: kmshi/calibre
def split(container, name, loc_or_xpath, before=True):
    ''' Split the file specified by name at the position specified by loc_or_xpath. '''

    root = container.parsed(name)
    if isinstance(loc_or_xpath, type('')):
        split_point = root.xpath(loc_or_xpath)[0]
    else:
        split_point = node_from_loc(root, loc_or_xpath)
    if in_table(split_point):
        raise AbortError('Cannot split inside tables')
    if split_point.tag.endswith('}body'):
        raise AbortError('Cannot split on the <body> tag')
    tree1, tree2 = do_split(split_point, container.log, before=before)
    root1, root2 = tree1.getroot(), tree2.getroot()
    anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(
        root1.xpath('//*/@name')) | {''}
    anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(
        root2.xpath('//*/@name'))
    manifest_item = container.generate_item(
        name, media_type=container.mime_map[name])
    bottom_name = container.href_to_name(manifest_item.get('href'),
                                         container.opf_name)

    # Fix links in the split trees
    for r, rname, anchors in [(root1, bottom_name, anchors_in_bottom),
                              (root2, name, anchors_in_top)]:
        for a in r.xpath('//*[@href]'):
            url = a.get('href')
            if url.startswith('#'):
                fname = name
            else:
                fname = container.href_to_name(url, name)
            if fname == name:
                purl = urlparse(url)
                if purl.fragment in anchors:
                    a.set(
                        'href', '%s#%s' %
                        (container.name_to_href(rname, name), purl.fragment))

    # Fix all links in the container that point to anchors in the bottom tree
    for fname, media_type in container.mime_map.iteritems():
        if fname not in {name, bottom_name}:
            repl = SplitLinkReplacer(fname, anchors_in_bottom, name,
                                     bottom_name, container)
            container.replace_links(fname, repl)

    container.replace(name, root1)
    container.replace(bottom_name, root2)

    spine = container.opf_xpath('//opf:spine')[0]
    for spine_item, spine_name, linear in container.spine_iter:
        if spine_name == name:
            break
    index = spine.index(spine_item) + 1

    si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id'))
    if not linear:
        si.set('linear', 'no')
    container.insert_into_xml(spine, si, index=index)
    container.dirty(container.opf_name)
    return bottom_name