def parse_outline(raw, output_dir): from lxml import etree from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER raw = clean_xml_chars( xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]) outline = etree.fromstring(raw, parser=RECOVER_PARSER).xpath('(//outline)[1]') if outline: from calibre.ebooks.oeb.polish.toc import TOC, create_ncx outline = outline[0] toc = TOC() count = [0] def process_node(node, toc): for child in node.iterchildren('*'): if child.tag == 'outline': parent = toc.children[-1] if toc.children else toc process_node(child, parent) else: if child.text: page = child.get('page', '1') toc.add(child.text, 'index.html', 'p' + page) count[0] += 1 process_node(outline, toc) if count[0] > 2: root = create_ncx(toc, (lambda x: x), 'pdftohtml', 'en', 'pdftohtml') with open(os.path.join(output_dir, 'toc.ncx'), 'wb') as f: f.write( etree.tostring(root, pretty_print=True, with_tail=False, encoding='utf-8', xml_declaration=True))
def parse_outline(raw, output_dir): from lxml import etree from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER raw = clean_xml_chars(xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]) outline = etree.fromstring(raw, parser=RECOVER_PARSER).xpath('(//outline)[1]') if outline: from calibre.ebooks.oeb.polish.toc import TOC, create_ncx outline = outline[0] toc = TOC() count = [0] def process_node(node, toc): for child in node.iterdescendants('*'): if child.tag == 'outline': parent = toc.children[-1] if toc.children else toc process_node(child, parent) else: page = child.get('page', '1') toc.add(child.text, 'index.html', page) count[0] += 1 process_node(outline, toc) if count[0] > 2: root = create_ncx(toc, (lambda x:x), 'pdftohtml', 'en', 'pdftohtml') with open(os.path.join(output_dir, 'toc.ncx'), 'wb') as f: f.write(etree.tostring(root, pretty_print=True, with_tail=False, encoding='utf-8', xml_declaration=True))
def create_toc(mi, opf, html_name, lang): uuid = '' for u in opf.xpath('//*[@id="uuid_id"]'): uuid = u.text toc = TOC() toc.add(_('Start'), html_name) return create_ncx(toc, lambda x: x, mi.title, lang, uuid)
def create_toc(mi, opf, html_name, lang): uuid = '' for u in opf.xpath('//*[@id="uuid_id"]'): uuid = u.text toc = TOC() toc.add(_('Start'), html_name) return create_ncx(toc, lambda x:x, mi.title, lang, uuid)