def node_from_loc(root, locs, totals=None): node = root.xpath('//*[local-name()="body"]')[0] for i, loc in enumerate(locs): children = tuple(node.iterchildren(etree.Element)) if totals is not None and totals[i] != len(children): raise MalformedMarkup() node = children[loc] return node
def add_id(container, name, loc, totals=None): root = container.parsed(name) try: node = node_from_loc(root, loc, totals=totals) except MalformedMarkup: # The webkit HTML parser and the container parser have yielded # different node counts, this can happen if the file is valid XML # but contains constructs like nested <p> tags. So force parse it # with the HTML 5 parser and try again. raw = container.raw_data(name) root = container.parse_xhtml(raw, fname=name, force_html5_parse=True) try: node = node_from_loc(root, loc, totals=totals) except MalformedMarkup: raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool' ' before editing.') % name) container.replace(name, root) node.set('id', node.get('id', uuid_id())) container.commit_item(name, keep_parsed=True) return node.get('id')
def split(container, name, loc_or_xpath, before=True, totals=None): ''' Split the file specified by name at the position specified by loc_or_xpath. Splitting automatically migrates all links and references to the affected files. :param loc_or_xpath: Should be an XPath expression such as //h:div[@id="split_here"]. Can also be a *loc* which is used internally to implement splitting in the preview panel. :param before: If True the split occurs before the identified element otherwise after it. :param totals: Used internally ''' root = container.parsed(name) if isinstance(loc_or_xpath, type('')): split_point = root.xpath(loc_or_xpath)[0] else: try: split_point = node_from_loc(root, loc_or_xpath, totals=totals) except MalformedMarkup: # The webkit HTML parser and the container parser have yielded # different node counts, this can happen if the file is valid XML # but contains constructs like nested <p> tags. So force parse it # with the HTML 5 parser and try again. raw = container.raw_data(name) root = container.parse_xhtml(raw, fname=name, force_html5_parse=True) try: split_point = node_from_loc(root, loc_or_xpath, totals=totals) except MalformedMarkup: raise MalformedMarkup( _('The file %s has malformed markup. Try running the Fix HTML tool' ' before splitting') % name) container.replace(name, root) if in_table(split_point): raise AbortError('Cannot split inside tables') if split_point.tag.endswith('}body'): raise AbortError('Cannot split on the <body> tag') tree1, tree2 = do_split(split_point, container.log, before=before) root1, root2 = tree1.getroot(), tree2.getroot() anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset( root1.xpath('//*/@name')) | {''} anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset( root2.xpath('//*/@name')) base, ext = name.rpartition('.')[0::2] base = re.sub(r'_split\d+$', '', base) nname, s = None, 0 while not nname or container.exists(nname): s += 1 nname = '%s_split%d.%s' % (base, s, ext) manifest_item = container.generate_item( nname, media_type=container.mime_map[name]) bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name) # Fix links in the split trees for r in (root1, root2): for a in r.xpath('//*[@href]'): url = a.get('href') if url.startswith('#'): fname = name else: fname = container.href_to_name(url, name) if fname == name: purl = urlparse(url) if purl.fragment in anchors_in_top: if r is root2: a.set( 'href', '%s#%s' % (container.name_to_href( name, bottom_name), purl.fragment)) else: a.set('href', '#' + purl.fragment) elif purl.fragment in anchors_in_bottom: if r is root1: a.set( 'href', '%s#%s' % (container.name_to_href( bottom_name, name), purl.fragment)) else: a.set('href', '#' + purl.fragment) # Fix all links in the container that point to anchors in the bottom tree for fname, media_type in iteritems(container.mime_map): if fname not in {name, bottom_name}: repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container) container.replace_links(fname, repl) container.replace(name, root1) container.replace(bottom_name, root2) spine = container.opf_xpath('//opf:spine')[0] for spine_item, spine_name, linear in container.spine_iter: if spine_name == name: break index = spine.index(spine_item) + 1 si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id')) if not linear: si.set('linear', 'no') container.insert_into_xml(spine, si, index=index) container.dirty(container.opf_name) return bottom_name