def split(container, name, loc_or_xpath, before=True): ''' Split the file specified by name at the position specified by loc_or_xpath. ''' root = container.parsed(name) if isinstance(loc_or_xpath, type('')): split_point = root.xpath(loc_or_xpath)[0] else: split_point = node_from_loc(root, loc_or_xpath) if in_table(split_point): raise AbortError('Cannot split inside tables') if split_point.tag.endswith('}body'): raise AbortError('Cannot split on the <body> tag') tree1, tree2 = do_split(split_point, container.log, before=before) root1, root2 = tree1.getroot(), tree2.getroot() anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''} anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name')) base, ext = name.rpartition('.')[0::2] base = re.sub(r'_split\d+$', '', base) nname, s = None, 0 while not nname or container.exists(nname): s += 1 nname = '%s_split%d.%s' % (base, s, ext) manifest_item = container.generate_item(nname, media_type=container.mime_map[name]) bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name) # Fix links in the split trees for r, rname, anchors in [(root1, bottom_name, anchors_in_bottom), (root2, name, anchors_in_top)]: for a in r.xpath('//*[@href]'): url = a.get('href') if url.startswith('#'): fname = name else: fname = container.href_to_name(url, name) if fname == name: purl = urlparse(url) if purl.fragment in anchors: a.set('href', '%s#%s' % (container.name_to_href(rname, name), purl.fragment)) # Fix all links in the container that point to anchors in the bottom tree for fname, media_type in container.mime_map.iteritems(): if fname not in {name, bottom_name}: repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container) container.replace_links(fname, repl) container.replace(name, root1) container.replace(bottom_name, root2) spine = container.opf_xpath('//opf:spine')[0] for spine_item, spine_name, linear in container.spine_iter: if spine_name == name: break index = spine.index(spine_item) + 1 si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id')) if not linear: si.set('linear', 'no') container.insert_into_xml(spine, si, index=index) container.dirty(container.opf_name) return bottom_name
def split(container, name, loc_or_xpath, before=True, totals=None): ''' Split the file specified by name at the position specified by loc_or_xpath. Splitting automatically migrates all links and references to the affected files. :param loc_or_xpath: Should be an XPath expression such as //h:div[@id="split_here"]. Can also be a *loc* which is used internally to implement splitting in the preview panel. :param before: If True the split occurs before the identified element otherwise after it. :param totals: Used internally ''' root = container.parsed(name) if isinstance(loc_or_xpath, type('')): split_point = root.xpath(loc_or_xpath)[0] else: try: split_point = node_from_loc(root, loc_or_xpath, totals=totals) except MalformedMarkup: # The webkit HTML parser and the container parser have yielded # different node counts, this can happen if the file is valid XML # but contains constructs like nested <p> tags. So force parse it # with the HTML 5 parser and try again. raw = container.raw_data(name) root = container.parse_xhtml(raw, fname=name, force_html5_parse=True) try: split_point = node_from_loc(root, loc_or_xpath, totals=totals) except MalformedMarkup: raise MalformedMarkup( _('The file %s has malformed markup. Try running the Fix HTML tool' ' before splitting') % name) container.replace(name, root) if in_table(split_point): raise AbortError('Cannot split inside tables') if split_point.tag.endswith('}body'): raise AbortError('Cannot split on the <body> tag') tree1, tree2 = do_split(split_point, container.log, before=before) root1, root2 = tree1.getroot(), tree2.getroot() anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset( root1.xpath('//*/@name')) | {''} anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset( root2.xpath('//*/@name')) base, ext = name.rpartition('.')[0::2] base = re.sub(r'_split\d+$', '', base) nname, s = None, 0 while not nname or container.exists(nname): s += 1 nname = '%s_split%d.%s' % (base, s, ext) manifest_item = container.generate_item( nname, media_type=container.mime_map[name]) bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name) # Fix links in the split trees for r in (root1, root2): for a in r.xpath('//*[@href]'): url = a.get('href') if url.startswith('#'): fname = name else: fname = container.href_to_name(url, name) if fname == name: purl = urlparse(url) if purl.fragment in anchors_in_top: if r is root2: a.set( 'href', '%s#%s' % (container.name_to_href( name, bottom_name), purl.fragment)) else: a.set('href', '#' + purl.fragment) elif purl.fragment in anchors_in_bottom: if r is root1: a.set( 'href', '%s#%s' % (container.name_to_href( bottom_name, name), purl.fragment)) else: a.set('href', '#' + purl.fragment) # Fix all links in the container that point to anchors in the bottom tree for fname, media_type in iteritems(container.mime_map): if fname not in {name, bottom_name}: repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container) container.replace_links(fname, repl) container.replace(name, root1) container.replace(bottom_name, root2) spine = container.opf_xpath('//opf:spine')[0] for spine_item, spine_name, linear in container.spine_iter: if spine_name == name: break index = spine.index(spine_item) + 1 si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id')) if not linear: si.set('linear', 'no') container.insert_into_xml(spine, si, index=index) container.dirty(container.opf_name) return bottom_name
def split(container, name, loc_or_xpath, before=True, totals=None): ''' Split the file specified by name at the position specified by loc_or_xpath. Splitting automatically migrates all links and references to the affected files. :param loc_or_xpath: Should be an XPath expression such as //h:div[@id="split_here"]. Can also be a *loc* which is used internally to implement splitting in the preview panel. :param before: If True the split occurs before the identified element otherwise after it. :param totals: Used internally ''' root = container.parsed(name) if isinstance(loc_or_xpath, type('')): split_point = root.xpath(loc_or_xpath)[0] else: try: split_point = node_from_loc(root, loc_or_xpath, totals=totals) except MalformedMarkup: # The webkit HTML parser and the container parser have yielded # different node counts, this can happen if the file is valid XML # but contains constructs like nested <p> tags. So force parse it # with the HTML 5 parser and try again. raw = container.raw_data(name) root = container.parse_xhtml(raw, fname=name, force_html5_parse=True) try: split_point = node_from_loc(root, loc_or_xpath, totals=totals) except MalformedMarkup: raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool' ' before splitting') % name) container.replace(name, root) if in_table(split_point): raise AbortError('Cannot split inside tables') if split_point.tag.endswith('}body'): raise AbortError('Cannot split on the <body> tag') tree1, tree2 = do_split(split_point, container.log, before=before) root1, root2 = tree1.getroot(), tree2.getroot() anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''} anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name')) base, ext = name.rpartition('.')[0::2] base = re.sub(r'_split\d+$', '', base) nname, s = None, 0 while not nname or container.exists(nname): s += 1 nname = '%s_split%d.%s' % (base, s, ext) manifest_item = container.generate_item(nname, media_type=container.mime_map[name]) bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name) # Fix links in the split trees for r in (root1, root2): for a in r.xpath('//*[@href]'): url = a.get('href') if url.startswith('#'): fname = name else: fname = container.href_to_name(url, name) if fname == name: purl = urlparse(url) if purl.fragment in anchors_in_top: if r is root2: a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment)) else: a.set('href', '#' + purl.fragment) elif purl.fragment in anchors_in_bottom: if r is root1: a.set('href', '%s#%s' % (container.name_to_href(bottom_name, name), purl.fragment)) else: a.set('href', '#' + purl.fragment) # Fix all links in the container that point to anchors in the bottom tree for fname, media_type in iteritems(container.mime_map): if fname not in {name, bottom_name}: repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container) container.replace_links(fname, repl) container.replace(name, root1) container.replace(bottom_name, root2) spine = container.opf_xpath('//opf:spine')[0] for spine_item, spine_name, linear in container.spine_iter: if spine_name == name: break index = spine.index(spine_item) + 1 si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id')) if not linear: si.set('linear', 'no') container.insert_into_xml(spine, si, index=index) container.dirty(container.opf_name) return bottom_name
def split(container, name, loc_or_xpath, before=True): ''' Split the file specified by name at the position specified by loc_or_xpath. ''' root = container.parsed(name) if isinstance(loc_or_xpath, type('')): split_point = root.xpath(loc_or_xpath)[0] else: split_point = node_from_loc(root, loc_or_xpath) if in_table(split_point): raise AbortError('Cannot split inside tables') if split_point.tag.endswith('}body'): raise AbortError('Cannot split on the <body> tag') tree1, tree2 = do_split(split_point, container.log, before=before) root1, root2 = tree1.getroot(), tree2.getroot() anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset( root1.xpath('//*/@name')) | {''} anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset( root2.xpath('//*/@name')) manifest_item = container.generate_item( name, media_type=container.mime_map[name]) bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name) # Fix links in the split trees for r, rname, anchors in [(root1, bottom_name, anchors_in_bottom), (root2, name, anchors_in_top)]: for a in r.xpath('//*[@href]'): url = a.get('href') if url.startswith('#'): fname = name else: fname = container.href_to_name(url, name) if fname == name: purl = urlparse(url) if purl.fragment in anchors: a.set( 'href', '%s#%s' % (container.name_to_href(rname, name), purl.fragment)) # Fix all links in the container that point to anchors in the bottom tree for fname, media_type in container.mime_map.iteritems(): if fname not in {name, bottom_name}: repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container) container.replace_links(fname, repl) container.replace(name, root1) container.replace(bottom_name, root2) spine = container.opf_xpath('//opf:spine')[0] for spine_item, spine_name, linear in container.spine_iter: if spine_name == name: break index = spine.index(spine_item) + 1 si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id')) if not linear: si.set('linear', 'no') container.insert_into_xml(spine, si, index=index) container.dirty(container.opf_name) return bottom_name