def create_inline_toc(container, title=None): ''' Create an inline (HTML) Table of Contents from an existing NCX table of contents. :param title: The title for this table of contents. ''' lang = get_book_language(container) default_title = 'Table of Contents' if lang: lang = lang_as_iso639_1(lang) or lang default_title = translate(lang, default_title) title = title or default_title toc = get_toc(container) if len(toc) == 0: return None toc_name = find_inline_toc(container) name = toc_name html = toc_to_html(toc, container, name, title, lang) raw = serialize(html, 'text/html') if name is None: name, c = 'toc.xhtml', 0 while container.has_name(name): c += 1 name = 'toc%d.xhtml' % c container.add_file(name, raw, spine_index=0) else: with container.open(name, 'wb') as f: f.write(raw) set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc') return name
def create_inline_toc(container, title=None): title = title or _('Table of Contents') toc = get_toc(container) if len(toc) == 0: return None toc_name = find_inline_toc(container) def process_node(html_parent, toc, level=1, indent=' '): li = html_parent.makeelement(XHTML('li')) li.tail = '\n' + (indent * level) html_parent.append(li) name, frag = toc.dest, toc.frag href = '#' if name: href = container.name_to_href(name, toc_name) if frag: href += '#' + frag a = li.makeelement(XHTML('a'), href=href) a.text = toc.title li.append(a) if len(toc) > 0: parent = li.makeelement(XHTML('ul')) li.append(parent) a.tail = '\n\n' + (indent * (level + 2)) parent.text = '\n' + (indent * (level + 3)) parent.tail = '\n\n' + (indent * (level + 1)) for child in toc: process_node(parent, child, level + 3) parent[-1].tail = '\n' + (indent * (level + 2)) E = ElementMaker(namespace=XHTML_NS, nsmap={None: XHTML_NS}) html = E.html( E.head( E.title(title), E.style(''' li { list-style-type: none; padding-left: 2em; margin-left: 0} a { text-decoration: none } a:hover { color: red }''', type='text/css'), ), E.body( E.h2(title), E.ul(), id="calibre_generated_inline_toc", )) name = toc_name for child in toc: process_node(html[1][1], child) pretty_html_tree(container, html) raw = serialize(html, 'text/html') if name is None: name, c = 'toc.xhtml', 0 while container.has_name(name): c += 1 name = 'toc%d.xhtml' % c container.add_file(name, raw, spine_index=0) else: with container.open(name, 'wb') as f: f.write(raw) return name
def pretty_xml(container, name, raw): ' Pretty print the XML represented as a string in raw. If ``name`` is the name of the OPF, extra OPF-specific prettying is performed. ' root = container.parse_xml(raw) if name == container.opf_name: pretty_opf(root) pretty_xml_tree(root) return serialize(root, 'text/xml')
def beautify_text(raw, syntax): from lxml import etree from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree from calibre.ebooks.chardet import strip_encoding_declarations if syntax == 'xml': root = etree.fromstring(strip_encoding_declarations(raw)) pretty_xml_tree(root) elif syntax == 'css': import logging from calibre.ebooks.oeb.base import serialize, _css_logger from calibre.ebooks.oeb.polish.utils import setup_css_parser_serialization from css_parser import CSSParser, log setup_css_parser_serialization(tprefs['editor_tab_stop_width']) log.setLevel(logging.WARN) log.raiseExceptions = False parser = CSSParser(loglevel=logging.WARNING, # We dont care about @import rules fetcher=lambda x: (None, None), log=_css_logger) data = parser.parseString(raw, href='<string>', validate=False) return serialize(data, 'text/css') else: root = parse(raw, line_numbers=False) pretty_html_tree(None, root) return etree.tostring(root, encoding=unicode)
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True) ).replace( '_TITLE_', prepare_string_for_xml(mi.title) ).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors)) ).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def beautify_text(raw, syntax): from lxml import etree from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree from calibre.ebooks.chardet import strip_encoding_declarations if syntax == 'xml': root = etree.fromstring(strip_encoding_declarations(raw)) pretty_xml_tree(root) elif syntax == 'css': import logging from calibre.ebooks.oeb.base import serialize, _css_logger from calibre.ebooks.oeb.polish.utils import setup_cssutils_serialization from cssutils import CSSParser, log setup_cssutils_serialization(tprefs['editor_tab_stop_width']) log.setLevel(logging.WARN) log.raiseExceptions = False parser = CSSParser(loglevel=logging.WARNING, # We dont care about @import rules fetcher=lambda x: (None, None), log=_css_logger) data = parser.parseString(raw, href='<string>', validate=False) return serialize(data, 'text/css') else: root = parse(raw, line_numbers=False) pretty_html_tree(None, root) return etree.tostring(root, encoding=unicode)
def parse_html(raw): root = parse(raw, decoder=lambda x: x.decode('utf-8'), line_numbers=True, linenumber_attribute='data-lnum') ans = serialize(root, 'text/html') if not isinstance(ans, bytes): ans = ans.encode('utf-8') return ans
def serialize_item(self, name): data = self.parsed(name) if name == self.opf_name: self.format_opf() data = serialize(data, self.mime_map[name], pretty_print=name in self.pretty_print) if name == self.opf_name: # Needed as I can't get lxml to output opf:role and # not output <opf:metadata> as well data = re.sub(br"(<[/]{0,1})opf:", r"\1", data) return data
def serialize_item(self, name): data = self.parsed(name) if name == self.opf_name: self.format_opf() data = serialize(data, self.mime_map[name], pretty_print=name in self.pretty_print) if name == self.opf_name: # Needed as I can't get lxml to output opf:role and # not output <opf:metadata> as well data = re.sub(br'(<[/]{0,1})opf:', r'\1', data) return data
def parse_html(raw): root = parse(raw, decoder=lambda x:x.decode('utf-8'), line_numbers=True, linenumber_attribute='data-lnum') ans = serialize(root, 'text/html') if not isinstance(ans, bytes): ans = ans.encode('utf-8') return ans
def convert_epub3_nav(self, nav_path, opf, log, opts): from lxml import etree from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize from calibre.ebooks.oeb.polish.toc import first_child from tempfile import NamedTemporaryFile with lopen(nav_path, 'rb') as f: raw = f.read() raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0] root = parse(raw, log=log) ncx = etree.fromstring( '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>' ) navmap = ncx[0] et = '{%s}type' % EPUB_NS bn = os.path.basename(nav_path) def add_from_li(li, parent): href = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): text = etree.tostring( x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join( x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: if href.startswith('#'): href = bn + href break np = parent.makeelement(NCX('navPoint')) parent.append(np) np.append(np.makeelement(NCX('navLabel'))) np[0].append(np.makeelement(NCX('text'))) np[0][0].text = text if href: np.append(np.makeelement(NCX('content'), attrib={'src': href})) return np def process_nav_node(node, toc_parent): for li in node.iterchildren(XHTML('li')): child = add_from_li(li, toc_parent) ol = first_child(li, XHTML('ol')) if child is not None and ol is not None: process_nav_node(ol, child) for nav in root.iterdescendants(XHTML('nav')): if nav.get(et) == 'toc': ol = first_child(nav, XHTML('ol')) if ol is not None: process_nav_node(ol, navmap) break else: return with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f: f.write(etree.tostring(ncx, encoding='utf-8')) ncx_href = os.path.relpath(f.name, os.getcwdu()).replace(os.sep, '/') ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id') for spine in opf.root.xpath('//*[local-name()="spine"]'): spine.set('toc', ncx_id) opts.epub3_nav_href = urlnormalize( os.path.relpath(nav_path).replace(os.sep, '/')) opts.epub3_nav_parsed = root if getattr(self, 'removed_cover', None): changed = False base_path = os.path.dirname(nav_path) for elem in root.xpath('//*[@href]'): href, frag = elem.get('href').partition('#')[::2] link_path = os.path.relpath( os.path.join(base_path, urlunquote(href)), base_path) abs_href = urlnormalize(link_path) if abs_href == self.removed_cover: changed = True elem.set('data-calibre-removed-titlepage', '1') if changed: with open(nav_path, 'wb') as f: f.write(serialize(root, 'application/xhtml+xml'))
def create_inline_toc(container, title=None): lang = get_book_language(container) default_title = 'Table of Contents' if lang: lang = lang_as_iso639_1(lang) or lang default_title = translate(lang, default_title) title = title or default_title toc = get_toc(container) if len(toc) == 0: return None toc_name = find_inline_toc(container) def process_node(html_parent, toc, level=1, indent=' ', style_level=2): li = html_parent.makeelement(XHTML('li')) li.tail = '\n'+ (indent*level) html_parent.append(li) name, frag = toc.dest, toc.frag href = '#' if name: href = container.name_to_href(name, toc_name) if frag: href += '#' + frag a = li.makeelement(XHTML('a'), href=href) a.text = toc.title li.append(a) if len(toc) > 0: parent = li.makeelement(XHTML('ul')) parent.set('class', 'level%d' % (style_level)) li.append(parent) a.tail = '\n\n' + (indent*(level+2)) parent.text = '\n'+(indent*(level+3)) parent.tail = '\n\n' + (indent*(level+1)) for child in toc: process_node(parent, child, level+3, style_level=style_level + 1) parent[-1].tail = '\n' + (indent*(level+2)) E = ElementMaker(namespace=XHTML_NS, nsmap={None:XHTML_NS}) html = E.html( E.head( E.title(title), E.style(''' li { list-style-type: none; padding-left: 2em; margin-left: 0} a { text-decoration: none } a:hover { color: red }''', type='text/css'), ), E.body( E.h2(title), E.ul(), id="calibre_generated_inline_toc", ) ) name = toc_name ul = html[1][1] ul.set('class', 'level1') for child in toc: process_node(ul, child) if lang: html.set('lang', lang) pretty_html_tree(container, html) raw = serialize(html, 'text/html') if name is None: name, c = 'toc.xhtml', 0 while container.has_name(name): c += 1 name = 'toc%d.xhtml' % c container.add_file(name, raw, spine_index=0) else: with container.open(name, 'wb') as f: f.write(raw) set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc') return name
def pretty_css(container, name, raw): ' Pretty print the CSS represented as a string in raw ' sheet = container.parse_css(raw) return serialize(sheet, 'text/css')
def pretty_html(container, name, raw): root = container.parse_xhtml(raw) pretty_html_tree(container, root) return serialize(root, 'text/html')
def pretty_xml(container, name, raw): root = container.parse_xml(raw) if name == container.opf_name: pretty_opf(root) pretty_xml_tree(root) return serialize(root, 'text/xml')
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(as_bytes(mi.title)) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace( '_TITLE_', prepare_string_for_xml(mi.title)).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string( mi.authors))).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0o755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def fix_html(container, raw): ' Fix any parsing errors in the HTML represented as a string in raw. Fixing is done using the HTML5 parsing algorithm. ' root = container.parse_xhtml(raw) return serialize(root, 'text/html')
def pretty_css(container, name, raw): sheet = container.parse_css(raw) return serialize(sheet, 'text/css')
def fix_html(container, raw): root = container.parse_xhtml(raw) return serialize(root, 'text/html')
def create_inline_toc(container, title=None): ''' Create an inline (HTML) Table of Contents from an existing NCX table of contents. :param title: The title for this table of contents. ''' lang = get_book_language(container) default_title = 'Table of Contents' if lang: lang = lang_as_iso639_1(lang) or lang default_title = translate(lang, default_title) title = title or default_title toc = get_toc(container) if len(toc) == 0: return None toc_name = find_inline_toc(container) def process_node(html_parent, toc, level=1, indent=' ', style_level=2): li = html_parent.makeelement(XHTML('li')) li.tail = '\n' + (indent * level) html_parent.append(li) name, frag = toc.dest, toc.frag href = '#' if name: href = container.name_to_href(name, toc_name) if frag: href += '#' + frag a = li.makeelement(XHTML('a'), href=href) a.text = toc.title li.append(a) if len(toc) > 0: parent = li.makeelement(XHTML('ul')) parent.set('class', 'level%d' % (style_level)) li.append(parent) a.tail = '\n\n' + (indent * (level + 2)) parent.text = '\n' + (indent * (level + 3)) parent.tail = '\n\n' + (indent * (level + 1)) for child in toc: process_node(parent, child, level + 3, style_level=style_level + 1) parent[-1].tail = '\n' + (indent * (level + 2)) E = ElementMaker(namespace=XHTML_NS, nsmap={None: XHTML_NS}) html = E.html( E.head( E.title(title), E.style(''' li { list-style-type: none; padding-left: 2em; margin-left: 0} a { text-decoration: none } a:hover { color: red }''', type='text/css'), ), E.body( E.h2(title), E.ul(), id="calibre_generated_inline_toc", )) name = toc_name ul = html[1][1] ul.set('class', 'level1') for child in toc: process_node(ul, child) if lang: html.set('lang', lang) pretty_html_tree(container, html) raw = serialize(html, 'text/html') if name is None: name, c = 'toc.xhtml', 0 while container.has_name(name): c += 1 name = 'toc%d.xhtml' % c container.add_file(name, raw, spine_index=0) else: with container.open(name, 'wb') as f: f.write(raw) set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc') return name
def parse_html(raw): root = parse(raw, decoder=lambda x: x.decode("utf-8"), line_numbers=True, linenumber_attribute="data-lnum") return serialize(root, "text/html").encode("utf-8")
def pretty_html(container, name, raw): ' Pretty print the HTML represented as a string in raw ' root = container.parse_xhtml(raw) pretty_html_tree(container, root) return serialize(root, 'text/html')
def convert_epub3_nav(self, nav_path, opf, log, opts): from lxml import etree from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize from calibre.ebooks.oeb.polish.toc import first_child from tempfile import NamedTemporaryFile with lopen(nav_path, 'rb') as f: raw = f.read() raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0] root = parse(raw, log=log) ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>') navmap = ncx[0] et = '{%s}type' % EPUB_NS bn = os.path.basename(nav_path) def add_from_li(li, parent): href = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: if href.startswith('#'): href = bn + href break np = parent.makeelement(NCX('navPoint')) parent.append(np) np.append(np.makeelement(NCX('navLabel'))) np[0].append(np.makeelement(NCX('text'))) np[0][0].text = text if href: np.append(np.makeelement(NCX('content'), attrib={'src':href})) return np def process_nav_node(node, toc_parent): for li in node.iterchildren(XHTML('li')): child = add_from_li(li, toc_parent) ol = first_child(li, XHTML('ol')) if child is not None and ol is not None: process_nav_node(ol, child) for nav in root.iterdescendants(XHTML('nav')): if nav.get(et) == 'toc': ol = first_child(nav, XHTML('ol')) if ol is not None: process_nav_node(ol, navmap) break else: return with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f: f.write(etree.tostring(ncx, encoding='utf-8')) ncx_href = os.path.relpath(f.name, os.getcwdu()).replace(os.sep, '/') ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id') for spine in opf.root.xpath('//*[local-name()="spine"]'): spine.set('toc', ncx_id) opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/')) opts.epub3_nav_parsed = root if getattr(self, 'removed_cover', None): changed = False base_path = os.path.dirname(nav_path) for elem in root.xpath('//*[@href]'): href, frag = elem.get('href').partition('#')[::2] link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path) abs_href = urlnormalize(link_path) if abs_href == self.removed_cover: changed = True elem.set('data-calibre-removed-titlepage', '1') if changed: with open(nav_path, 'wb') as f: f.write(serialize(root, 'application/xhtml+xml'))
def fix_html(raw): root = parse(raw) return serialize(root, 'text/html').decode('utf-8')
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(mi.title) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True) ).replace( '_TITLE_', prepare_string_for_xml(mi.title) ).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors)) ).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def parse_html(raw): root = parse(raw, decoder=lambda x: x.decode('utf-8'), line_numbers=True, linenumber_attribute='data-lnum') return serialize(root, 'text/html').encode('utf-8')
def parse_html(raw): root = parse(raw, decoder=lambda x:x.decode('utf-8'), line_numbers=True, linenumber_attribute='data-lnum') return serialize(root, 'text/html').encode('utf-8')