def concat_books(): for book in TEST_FILES: print book e = _load_epub(book, verbose=True) e.parse_meta() e.parse_opf() e.parse_ncx() lang = e.find_language() or 'UND' doc = epub.new_doc(lang=lang) for ID in e.spine: fn, mimetype = e.manifest[ID] print fn if mimetype.startswith('image'): tree = epub.new_doc(guts='<img src="%s" alt="" />' % fn) else: tree = e.gettree(fn, parse=_html_parse) add_marker(doc, 'espri-new-page-%s' % ID, fn) add_guts(tree, doc) f = open('tests/xhtml/' + os.path.basename(book) + '.html', 'w') print >> f, lxml.etree.tostring(doc, encoding='utf-8', method='html').replace(' ', '')#.encode('utf-8') f.close()
def concat_chapters(fn): e = epub.Epub() e.load(open(fn).read()) e.parse_meta() e.parse_opf() e.parse_ncx() lang = e.find_language() or 'UND' chapter_depth, toc_points = e.find_probable_chapters() doc = epub.new_doc(lang=lang) for ID in e.order: fn, mimetype = e.manifest[ID] print fn if mimetype.startswith('image'): tree = epub.new_doc(guts='<img src="%s" alt="" />' % fn) else: tree = e.gettree(fn, parse=_html_parse) add_marker(doc, 'espri-new-page-%s' % ID, fn) add_guts(tree, doc) return doc
def test_new_doc(): #XXX not very comprehensive. for guts in ('', "hello", "<h1>HELLO!</h1>"): doc = epub.new_doc(guts=guts) try: body = doc.iter(epub.XHTMLNS + 'body').next() print "got %s" % body except StopIteration: body = doc.iter('body').next() guts2 = body.text or '' for x in body: guts2 += lxml.etree.tostring(x) if body.tail is not None: guts2 += body.tail assert guts == guts2