def transform(wldoc, flags=None, **options): """ Transforms input_file in XML to output_file in TXT. possible flags: raw-text, """ # Parse XSLT style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt') style = etree.parse(style_filename) document = copy.deepcopy(wldoc) del wldoc document.swap_endlines() if flags: for flag in flags: document.edoc.getroot().set(flag, 'yes') result = document.transform(style, **options) if not flags or 'raw-text' not in flags: if document.book_info: parsed_dc = document.book_info description = parsed_dc.description url = document.book_info.url license_description = parsed_dc.license_description license = parsed_dc.license if license: license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % (license_description, license) else: license_description = u"Ten utwór nie jest chroniony prawem autorskim i znajduje się w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to te dodatkowe materiały udostępnione są na licencji Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL (http://creativecommons.org/licenses/by-sa/3.0/)" source = parsed_dc.source_name if source: source = "\n\nTekst opracowany na podstawie: " + source else: source = '' contributors = ', '.join(person.readable() for person in sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p))) if contributors: contributors = "\n\nOpracowanie redakcyjne i przypisy: %s" % contributors else: description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).' url = '*' * 10 license = "" license_description = "" source = "" contributors = "" return OutputFile.from_string((TEMPLATE % { 'description': description, 'url': url, 'license_description': license_description, 'text': unicode(result), 'source': source, 'contributors': contributors, }).encode('utf-8')) else: return OutputFile.from_string(unicode(result).encode('utf-8'))
def transform(wldoc, verbose=False, cover=None, flags=None): """ produces a FB2 file cover: a cover.Cover object or True for default flags: less-advertising, working-copy """ document = deepcopy(wldoc) del wldoc if flags: for flag in flags: document.edoc.getroot().set(flag, 'yes') document.clean_ed_note() document.clean_ed_note('abstrakt') style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt') style = etree.parse(style_filename) replace_by_verse(document.edoc) sectionify(document.edoc) result = document.transform(style) return OutputFile.from_bytes(six.text_type(result).encode('utf-8'))
def transform(wldoc, verbose=False, cover=None, flags=None): """ produces a FB2 file cover: a cover.Cover object or True for default flags: less-advertising, working-copy """ document = deepcopy(wldoc) del wldoc if flags: for flag in flags: document.edoc.getroot().set(flag, 'yes') document.clean_ed_note() document.clean_ed_note('abstrakt') style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt') style = etree.parse(style_filename) replace_by_verse(document.edoc) sectionify(document.edoc) result = document.transform(style) return OutputFile.from_string(unicode(result).encode('utf-8'))
def transform(wldoc, verbose=False, sample=None, cover=None, use_kindlegen=False, flags=None, hyphenate=True, ilustr_path='', converter_path=None): """ produces a MOBI file wldoc: a WLDocument sample=n: generate sample e-book (with at least n paragraphs) cover: a cover.Cover factory overriding default flags: less-advertising, converter_path: override path to MOBI converter, either ebook-convert or kindlegen """ document = deepcopy(wldoc) del wldoc epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True, cover=cover or True, flags=flags, hyphenate=hyphenate, ilustr_path=ilustr_path, output_type='mobi') if verbose: kwargs = {} else: devnull = open("/dev/null", 'w') kwargs = {"stdout": devnull, "stderr": devnull} output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False) output_file.close() if use_kindlegen: output_file_basename = os.path.basename(output_file.name) subprocess.check_call([ converter_path or 'kindlegen', '-c2', epub.get_filename(), '-o', output_file_basename ], **kwargs) else: subprocess.check_call([ converter_path or 'ebook-convert', epub.get_filename(), output_file.name, '--no-inline-toc', '--mobi-file-type=both', '--mobi-ignore-margins' ], **kwargs) return OutputFile.from_filename(output_file.name)
def transform(wldoc, stylesheet='legacy', options=None, flags=None): """Transforms the WL document to XHTML. If output_filename is None, returns an XML, otherwise returns True if file has been written,False if it hasn't. File won't be written if it has no content. """ # Parse XSLT try: style_filename = get_stylesheet(stylesheet) style = etree.parse(style_filename) document = copy.deepcopy(wldoc) del wldoc document.swap_endlines() if flags: for flag in flags: document.edoc.getroot().set(flag, 'yes') document.clean_ed_note() document.clean_ed_note('abstrakt') if not options: options = {} options.setdefault('gallery', "''") result = document.transform(style, **options) del document # no longer needed large object :) if html_has_content(result): add_anchors(result.getroot()) add_table_of_themes(result.getroot()) add_table_of_contents(result.getroot()) return OutputFile.from_bytes( etree.tostring(result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8')) else: return None except KeyError: raise ValueError("'%s' is not a valid stylesheet.") except (XMLSyntaxError, XSLTApplyError) as e: raise ParseError(e)
def transform(wldoc, verbose=False, sample=None, cover=None, flags=None): """ produces a MOBI file wldoc: a WLDocument sample=n: generate sample e-book (with at least n paragraphs) cover: a cover.Cover factory overriding default flags: less-advertising, """ document = deepcopy(wldoc) del wldoc book_info = document.book_info # provide a cover by default if not cover: cover = DefaultEbookCover cover_file = NamedTemporaryFile(suffix='.png', delete=False) bound_cover = cover(book_info) bound_cover.save(cover_file) if bound_cover.uses_dc_cover: if document.book_info.cover_by: document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) if document.book_info.cover_source: document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) if not flags: flags = [] flags = list(flags) + ['with-full-fonts'] epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True, flags=flags, style=get_resource('epub/style.css')) if verbose: kwargs = {} else: devnull = open("/dev/null", 'w') kwargs = {"stdout": devnull, "stderr": devnull} output_file = NamedTemporaryFile(prefix='librarian', suffix='.mobi', delete=False) output_file.close() subprocess.check_call(['ebook-convert', epub.get_filename(), output_file.name, '--no-inline-toc', '--mobi-file-type=both', '--subset-embedded-fonts', '--mobi-ignore-margins', '--cover=%s' % cover_file.name], **kwargs) os.unlink(cover_file.name) return OutputFile.from_filename(output_file.name)
def transform(wldoc, stylesheet='legacy', options=None, flags=None): """Transforms the WL document to XHTML. If output_filename is None, returns an XML, otherwise returns True if file has been written,False if it hasn't. File won't be written if it has no content. """ # Parse XSLT try: style_filename = get_stylesheet(stylesheet) style = etree.parse(style_filename) document = copy.deepcopy(wldoc) del wldoc document.swap_endlines() if flags: for flag in flags: document.edoc.getroot().set(flag, 'yes') document.clean_ed_note() document.clean_ed_note('abstrakt') if not options: options = {} options.setdefault('gallery', "''") result = document.transform(style, **options) del document # no longer needed large object :) if html_has_content(result): add_anchors(result.getroot()) add_table_of_themes(result.getroot()) add_table_of_contents(result.getroot()) return OutputFile.from_string(etree.tostring( result, method='html', xml_declaration=False, pretty_print=True, encoding='utf-8')) else: return None except KeyError: raise ValueError("'%s' is not a valid stylesheet.") except (XMLSyntaxError, XSLTApplyError), e: raise ParseError(e)
def transform(wldoc, verbose=False, cover=None, flags=None): """ produces a FB2 file cover: a cover.Cover object or True for default flags: less-advertising, working-copy """ document = deepcopy(wldoc) del wldoc if flags: for flag in flags: document.edoc.getroot().set(flag, "yes") style_filename = os.path.join(os.path.dirname(__file__), "fb2/fb2.xslt") style = etree.parse(style_filename) result = document.transform(style) return OutputFile.from_string(unicode(result).encode("utf-8"))
def transform(wldoc, verbose=False, sample=None, cover=None, use_kindlegen=False, flags=None): """ produces a MOBI file wldoc: a WLDocument sample=n: generate sample e-book (with at least n paragraphs) cover: a cover.Cover factory overriding default flags: less-advertising, """ document = deepcopy(wldoc) del wldoc epub = document.as_epub(verbose=verbose, sample=sample, html_toc=True, cover=cover or True, flags=flags) if verbose: kwargs = {} else: devnull = open("/dev/null", "w") kwargs = {"stdout": devnull, "stderr": devnull} output_file = NamedTemporaryFile(prefix="librarian", suffix=".mobi", delete=False) output_file.close() if use_kindlegen: output_file_basename = os.path.basename(output_file.name) subprocess.check_call(["kindlegen", "-c2", epub.get_filename(), "-o", output_file_basename], **kwargs) else: subprocess.check_call( [ "ebook-convert", epub.get_filename(), output_file.name, "--no-inline-toc", "--mobi-file-type=both", "--mobi-ignore-margins", ], **kwargs ) return OutputFile.from_filename(output_file.name)
def transform(wldoc, verbose=False, style=None, html_toc=False, sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'): """ produces a EPUB file sample=n: generate sample e-book (with at least n paragraphs) cover: a cover.Cover factory or True for default flags: less-advertising, without-fonts, working-copy """ def transform_file(wldoc, chunk_counter=1, first=True, sample=None): """ processes one input file and proceeds to its children """ replace_characters(wldoc.edoc.getroot()) hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator) # every input file will have a TOC entry, # pointing to starting chunk toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter) chars = set() if first: # write book title page html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type) chars = used_chars(html_tree.getroot()) zip.writestr( 'OPS/title.html', etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' + ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' ) ) # add a title page TOC entry toc.add(u"Strona tytułowa", "title.html") elif wldoc.book_info.parts: # write title page for every parent if sample is not None and sample <= 0: chars = set() html_string = open(get_resource('epub/emptyChunk.html')).read() else: html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl')) chars = used_chars(html_tree.getroot()) html_string = etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' + ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' ) zip.writestr('OPS/part%d.html' % chunk_counter, html_string) add_to_manifest(manifest, chunk_counter) add_to_spine(spine, chunk_counter) chunk_counter += 1 if len(wldoc.edoc.getroot()) > 1: # rdf before style master main_text = wldoc.edoc.getroot()[1] else: # rdf in style master main_text = wldoc.edoc.getroot()[0] if main_text.tag == RDFNS('RDF'): main_text = None if main_text is not None: for chunk_xml in chop(main_text): empty = False if sample is not None: if sample <= 0: empty = True else: sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog')) chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty) toc.extend(chunk_toc) chars = chars.union(chunk_chars) zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html) add_to_manifest(manifest, chunk_counter) add_to_spine(spine, chunk_counter) chunk_counter += 1 for child in wldoc.parts(): child_toc, chunk_counter, chunk_chars, sample = transform_file( child, chunk_counter, first=False, sample=sample) toc.append(child_toc) chars = chars.union(chunk_chars) return toc, chunk_counter, chars, sample document = deepcopy(wldoc) del wldoc if flags: for flag in flags: document.edoc.getroot().set(flag, 'yes') document.clean_ed_note() document.clean_ed_note('abstrakt') # add editors info editors = document.editors() if editors: document.edoc.getroot().set('editors', u', '.join(sorted( editor.readable() for editor in editors))) if document.book_info.funders: document.edoc.getroot().set('funders', u', '.join( document.book_info.funders)) if document.book_info.thanks: document.edoc.getroot().set('thanks', document.book_info.thanks) opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) guide = opf.find('.//' + OPFNS('guide')) spine = opf.find('.//' + OPFNS('spine')) output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False) zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) functions.reg_mathml_epub(zip) if os.path.isdir(ilustr_path): for i, filename in enumerate(os.listdir(ilustr_path)): file_path = os.path.join(ilustr_path, filename) zip.write(file_path, os.path.join('OPS', filename)) image_id = 'image%s' % i manifest.append(etree.fromstring( '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0]))) # write static elements mime = zipfile.ZipInfo() mime.filename = 'mimetype' mime.compress_type = zipfile.ZIP_STORED mime.extra = '' zip.writestr(mime, 'application/epub+zip') zip.writestr( 'META-INF/container.xml', '<?xml version="1.0" ?>' '<container version="1.0" ' 'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' '<rootfiles><rootfile full-path="OPS/content.opf" ' 'media-type="application/oebps-package+xml" />' '</rootfiles></container>' ) zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png')) zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png')) if not style: style = get_resource('epub/style.css') zip.write(style, os.path.join('OPS', 'style.css')) if cover: if cover is True: cover = DefaultEbookCover cover_file = StringIO() bound_cover = cover(document.book_info) bound_cover.save(cover_file) cover_name = 'cover.%s' % bound_cover.ext() zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue()) del cover_file cover_tree = etree.parse(get_resource('epub/cover.html')) cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name) zip.writestr('OPS/cover.html', etree.tostring( cover_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' + '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' )) if bound_cover.uses_dc_cover: if document.book_info.cover_by: document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) if document.book_info.cover_source: document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) manifest.append(etree.fromstring( '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />')) manifest.append(etree.fromstring( '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type()))) spine.insert(0, etree.fromstring('<itemref idref="cover"/>')) opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>')) guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>')) annotations = etree.Element('annotations') toc_file = etree.fromstring( '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' '"-//NISO//DTD ncx 2005-1//EN" ' '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' 'version="2005-1"><head></head><docTitle></docTitle><navMap>' '</navMap></ncx>' ) nav_map = toc_file[-1] if html_toc: manifest.append(etree.fromstring( '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />')) spine.append(etree.fromstring( '<itemref idref="html_toc" />')) guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>')) toc, chunk_counter, chars, sample = transform_file(document, sample=sample) if len(toc.children) < 2: toc.add(u"Początek utworu", "part1.html") # Last modifications in container files and EPUB creation if len(annotations) > 0: toc.add("Przypisy", "annotations.html") manifest.append(etree.fromstring( '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />')) spine.append(etree.fromstring( '<itemref idref="annotations" />')) replace_by_verse(annotations) html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl')) chars = chars.union(used_chars(html_tree.getroot())) zip.writestr('OPS/annotations.html', etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' + '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' )) toc.add("Wesprzyj Wolne Lektury", "support.html") manifest.append(etree.fromstring( '<item id="support" href="support.html" media-type="application/xhtml+xml" />')) spine.append(etree.fromstring( '<itemref idref="support" />')) html_string = open(get_resource('epub/support.html')).read() chars.update(used_chars(etree.fromstring(html_string))) zip.writestr('OPS/support.html', html_string) toc.add("Strona redakcyjna", "last.html") manifest.append(etree.fromstring( '<item id="last" href="last.html" media-type="application/xhtml+xml" />')) spine.append(etree.fromstring( '<itemref idref="last" />')) html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type) chars.update(used_chars(html_tree.getroot())) zip.writestr('OPS/last.html', etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' + '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' )) if not flags or 'without-fonts' not in flags: # strip fonts tmpdir = mkdtemp('-librarian-epub') try: cwd = os.getcwd() except OSError: cwd = None os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), get_resource('fonts/' + fname), os.path.join(tmpdir, fname)] if verbose: print "Running font-optimizer" subprocess.check_call(optimizer_call) else: dev_null = open(os.devnull, 'w') subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null) zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) manifest.append(etree.fromstring( '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname))) rmtree(tmpdir) if cwd is not None: os.chdir(cwd) zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True, xml_declaration=True, encoding="utf-8")) title = document.book_info.title attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber" for st in attributes: meta = toc_file.makeelement(NCXNS('meta')) meta.set('name', st) meta.set('content', '0') toc_file[0].append(meta) toc_file[0][0].set('content', str(document.book_info.url)) toc_file[0][1].set('content', str(toc.depth())) set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>'))) # write TOC if html_toc: toc.add(u"Spis treści", "toc.html", index=1) zip.writestr('OPS/toc.html', toc.html().encode('utf-8')) toc.write_to_xml(nav_map) zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True, xml_declaration=True, encoding="utf-8")) zip.close() return OutputFile.from_filename(output_file.name)
def transform(wldoc, verbose=False, save_tex=None, morefloats=None, cover=None, flags=None, customizations=None): """ produces a PDF file with XeLaTeX wldoc: a WLDocument verbose: prints all output from LaTeX save_tex: path to save the intermediary LaTeX file to morefloats (old/new/none): force specific morefloats cover: a cover.Cover factory or True for default flags: less-advertising, customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class) """ # Parse XSLT try: book_info = wldoc.book_info document = load_including_children(wldoc) root = document.edoc.getroot() if cover: if cover is True: cover = DefaultEbookCover bound_cover = cover(book_info, width=1200) root.set('data-cover-width', str(bound_cover.width)) root.set('data-cover-height', str(bound_cover.height)) if bound_cover.uses_dc_cover: if book_info.cover_by: root.set('data-cover-by', book_info.cover_by) if book_info.cover_source: root.set('data-cover-source', book_info.cover_source) if flags: for flag in flags: root.set('flag-' + flag, 'yes') # check for LaTeX packages if morefloats: root.set('morefloats', morefloats.lower()) elif package_available('morefloats', 'maxfloats=19'): root.set('morefloats', 'new') # add customizations if customizations is not None: root.set('customizations', u','.join(customizations)) # add editors info editors = document.editors() if editors: root.set('editors', u', '.join(sorted( editor.readable() for editor in editors))) if document.book_info.funders: root.set('funders', u', '.join(document.book_info.funders)) if document.book_info.thanks: root.set('thanks', document.book_info.thanks) # hack the tree move_motifs_inside(document.edoc) hack_motifs(document.edoc) parse_creator(document.edoc) substitute_hyphens(document.edoc) fix_hanging(document.edoc) fix_tables(document.edoc) # wl -> TeXML style_filename = get_stylesheet("wl2tex") style = etree.parse(style_filename) functions.reg_mathml_latex() # TeXML -> LaTeX temp = mkdtemp('-wl2pdf') for sponsor in book_info.sponsors: ins = etree.Element("data-sponsor", name=sponsor) logo = sponsor_logo(sponsor) if logo: fname = 'sponsor-%s' % os.path.basename(logo) shutil.copy(logo, os.path.join(temp, fname)) ins.set('src', fname) root.insert(0, ins) if book_info.sponsor_note: root.set("sponsor-note", book_info.sponsor_note) texml = document.transform(style) if cover: with open(os.path.join(temp, 'cover.png'), 'w') as f: bound_cover.save(f, quality=80) del document # no longer needed large object :) tex_path = os.path.join(temp, 'doc.tex') fout = open(tex_path, 'w') process(StringIO(texml), fout, 'utf-8') fout.close() del texml if save_tex: shutil.copy(tex_path, save_tex) # LaTeX -> PDF shutil.copy(get_resource('pdf/wl.cls'), temp) shutil.copy(get_resource('res/wl-logo.png'), temp) try: cwd = os.getcwd() except OSError: cwd = None os.chdir(temp) if verbose: p = call(['xelatex', tex_path]) else: p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE) if p: raise ParseError("Error parsing .tex file") if cwd is not None: os.chdir(cwd) output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False) pdf_path = os.path.join(temp, 'doc.pdf') shutil.move(pdf_path, output_file.name) shutil.rmtree(temp) return OutputFile.from_filename(output_file.name) except (XMLSyntaxError, XSLTApplyError), e: raise ParseError(e)
def output_file(self, *args, **kwargs): imgstr = BytesIO() self.save(imgstr, *args, **kwargs) return OutputFile.from_bytes(imgstr.getvalue())
def output_file(self, *args, **kwargs): imgstr = StringIO() self.save(imgstr, *args, **kwargs) return OutputFile.from_string(imgstr.getvalue())
def transform(wldoc, flags=None, **options): """ Transforms input_file in XML to output_file in TXT. possible flags: raw-text, """ # Parse XSLT style_filename = os.path.join(os.path.dirname(__file__), 'xslt/book2txt.xslt') style = etree.parse(style_filename) document = copy.deepcopy(wldoc) del wldoc document.swap_endlines() if flags: for flag in flags: document.edoc.getroot().set(flag, 'yes') if 'wrapping' in options: options['wrapping'] = str(options['wrapping']) result = document.transform(style, **options) if not flags or 'raw-text' not in flags: if document.book_info: parsed_dc = document.book_info description = parsed_dc.description url = document.book_info.url license_description = parsed_dc.license_description license = parsed_dc.license if license: license_description = u"Ten utwór jest udostepniony na licencji %s: \n%s" % ( license_description, license) else: license_description = u"Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się " \ u"w domenie publicznej, co oznacza że możesz go swobodnie wykorzystywać, " \ u"publikować i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi " \ u"materiałami (przypisy, motywy literackie etc.), które podlegają prawu " \ u"autorskiemu, to te dodatkowe materiały udostępnione są na licencji " \ u"Creative Commons Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL " \ u"(http://creativecommons.org/licenses/by-sa/3.0/)" source = parsed_dc.source_name if source: source = "\n\nTekst opracowany na podstawie: " + source else: source = '' contributors = ', '.join(person.readable() for person in sorted(set(p for p in (parsed_dc.technical_editors + parsed_dc.editors) if p))) if contributors: contributors = "\n\nOpracowanie redakcyjne i przypisy: %s." % contributors funders = ', '.join(parsed_dc.funders) if funders: funders = u"\n\nPublikację wsparli i wsparły: %s." % funders publisher = '\n\nWydawca: ' + ', '.join(parsed_dc.publisher) isbn = getattr(parsed_dc, 'isbn_txt', None) if isbn: isbn = '\n\n' + isbn else: isbn = '' else: description = 'Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl).' url = '*' * 10 license_description = "" source = "" contributors = "" funders = "" publisher = "" isbn = "" result = (TEMPLATE % { 'description': description, 'url': url, 'license_description': license_description, 'text': unicode(result), 'source': source, 'contributors': contributors, 'funders': funders, 'publisher': publisher, 'isbn': isbn, }).encode('utf-8') else: result = unicode(result).encode('utf-8') return OutputFile.from_string("\r\n".join(result.splitlines()) + "\r\n")
def transform(wldoc, verbose=False, style=None, html_toc=False, sample=None, cover=None, flags=None): """ produces a EPUB file sample=n: generate sample e-book (with at least n paragraphs) cover: a cover.Cover factory or True for default flags: less-advertising, without-fonts, working-copy """ def transform_file(wldoc, chunk_counter=1, first=True, sample=None): """ processes one input file and proceeds to its children """ replace_characters(wldoc.edoc.getroot()) hyphenator = set_hyph_language(wldoc.edoc.getroot()) hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator) # every input file will have a TOC entry, # pointing to starting chunk toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter) chars = set() if first: # write book title page html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl')) chars = used_chars(html_tree.getroot()) zip.writestr( 'OPS/title.html', etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' + ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' ) ) # add a title page TOC entry toc.add(u"Strona tytułowa", "title.html") elif wldoc.book_info.parts: # write title page for every parent if sample is not None and sample <= 0: chars = set() html_string = open(get_resource('epub/emptyChunk.html')).read() else: html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl')) chars = used_chars(html_tree.getroot()) html_string = etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' + ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' ) zip.writestr('OPS/part%d.html' % chunk_counter, html_string) add_to_manifest(manifest, chunk_counter) add_to_spine(spine, chunk_counter) chunk_counter += 1 if len(wldoc.edoc.getroot()) > 1: # rdf before style master main_text = wldoc.edoc.getroot()[1] else: # rdf in style master main_text = wldoc.edoc.getroot()[0] if main_text.tag == RDFNS('RDF'): main_text = None if main_text is not None: for chunk_xml in chop(main_text): empty = False if sample is not None: if sample <= 0: empty = True else: sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog')) chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty) toc.extend(chunk_toc) chars = chars.union(chunk_chars) zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html) add_to_manifest(manifest, chunk_counter) add_to_spine(spine, chunk_counter) chunk_counter += 1 for child in wldoc.parts(): child_toc, chunk_counter, chunk_chars, sample = transform_file( child, chunk_counter, first=False, sample=sample) toc.append(child_toc) chars = chars.union(chunk_chars) return toc, chunk_counter, chars, sample document = deepcopy(wldoc) del wldoc if flags: for flag in flags: document.edoc.getroot().set(flag, 'yes') # add editors info editors = document.editors() if editors: document.edoc.getroot().set('editors', u', '.join(sorted( editor.readable() for editor in editors))) if document.book_info.funders: document.edoc.getroot().set('funders', u', '.join( document.book_info.funders)) if document.book_info.thanks: document.edoc.getroot().set('thanks', document.book_info.thanks) opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) guide = opf.find('.//' + OPFNS('guide')) spine = opf.find('.//' + OPFNS('spine')) output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False) zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) # write static elements mime = zipfile.ZipInfo() mime.filename = 'mimetype' mime.compress_type = zipfile.ZIP_STORED mime.extra = '' zip.writestr(mime, 'application/epub+zip') zip.writestr( 'META-INF/container.xml', '<?xml version="1.0" ?>' '<container version="1.0" ' 'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' '<rootfiles><rootfile full-path="OPS/content.opf" ' 'media-type="application/oebps-package+xml" />' '</rootfiles></container>' ) zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png')) zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png')) if not style: style = get_resource('epub/style.css') zip.write(style, os.path.join('OPS', 'style.css')) if cover: if cover is True: cover = DefaultEbookCover cover_file = StringIO() bound_cover = cover(document.book_info) bound_cover.save(cover_file) cover_name = 'cover.%s' % bound_cover.ext() zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue()) del cover_file cover_tree = etree.parse(get_resource('epub/cover.html')) cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name) zip.writestr('OPS/cover.html', etree.tostring( cover_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' + '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' )) if bound_cover.uses_dc_cover: if document.book_info.cover_by: document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) if document.book_info.cover_source: document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) manifest.append(etree.fromstring( '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />')) manifest.append(etree.fromstring( '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type()))) spine.insert(0, etree.fromstring('<itemref idref="cover"/>')) opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>')) guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>')) annotations = etree.Element('annotations') toc_file = etree.fromstring( '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' '"-//NISO//DTD ncx 2005-1//EN" ' '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' 'version="2005-1"><head></head><docTitle></docTitle><navMap>' '</navMap></ncx>' ) nav_map = toc_file[-1] if html_toc: manifest.append(etree.fromstring( '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />')) spine.append(etree.fromstring( '<itemref idref="html_toc" />')) guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>')) toc, chunk_counter, chars, sample = transform_file(document, sample=sample) if len(toc.children) < 2: toc.add(u"Początek utworu", "part1.html") # Last modifications in container files and EPUB creation if len(annotations) > 0: toc.add("Przypisy", "annotations.html") manifest.append(etree.fromstring( '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />')) spine.append(etree.fromstring( '<itemref idref="annotations" />')) replace_by_verse(annotations) html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl')) chars = chars.union(used_chars(html_tree.getroot())) zip.writestr('OPS/annotations.html', etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' + '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' )) toc.add("Wesprzyj Wolne Lektury", "support.html") manifest.append(etree.fromstring( '<item id="support" href="support.html" media-type="application/xhtml+xml" />')) spine.append(etree.fromstring( '<itemref idref="support" />')) html_string = open(get_resource('epub/support.html')).read() chars.update(used_chars(etree.fromstring(html_string))) zip.writestr('OPS/support.html', html_string) toc.add("Strona redakcyjna", "last.html") manifest.append(etree.fromstring( '<item id="last" href="last.html" media-type="application/xhtml+xml" />')) spine.append(etree.fromstring( '<itemref idref="last" />')) html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl')) chars.update(used_chars(html_tree.getroot())) zip.writestr('OPS/last.html', etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' + '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">' )) if not flags or not 'without-fonts' in flags: # strip fonts tmpdir = mkdtemp('-librarian-epub') try: cwd = os.getcwd() except OSError: cwd = None os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), get_resource('fonts/' + fname), os.path.join(tmpdir, fname)] if verbose: print "Running font-optimizer" subprocess.check_call(optimizer_call) else: subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) manifest.append(etree.fromstring( '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname))) rmtree(tmpdir) if cwd is not None: os.chdir(cwd) zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True, xml_declaration=True, encoding="utf-8")) title = document.book_info.title attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber" for st in attributes: meta = toc_file.makeelement(NCXNS('meta')) meta.set('name', st) meta.set('content', '0') toc_file[0].append(meta) toc_file[0][0].set('content', str(document.book_info.url)) toc_file[0][1].set('content', str(toc.depth())) set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>'))) # write TOC if html_toc: toc.add(u"Spis treści", "toc.html", index=1) zip.writestr('OPS/toc.html', toc.html().encode('utf-8')) toc.write_to_xml(nav_map) zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True, xml_declaration=True, encoding="utf-8")) zip.close() return OutputFile.from_filename(output_file.name)
def transform(wldoc, verbose=False, save_tex=None, morefloats=None, cover=None, flags=None, customizations=None, ilustr_path='', latex_dir=False): """ produces a PDF file with XeLaTeX wldoc: a WLDocument verbose: prints all output from LaTeX save_tex: path to save the intermediary LaTeX file to morefloats (old/new/none): force specific morefloats cover: a cover.Cover factory or True for default flags: less-advertising, customizations: user requested customizations regarding various formatting parameters (passed to wl LaTeX class) """ # Parse XSLT try: book_info = wldoc.book_info document = load_including_children(wldoc) root = document.edoc.getroot() if cover: if cover is True: cover = make_cover bound_cover = cover(book_info, width=1200) root.set('data-cover-width', str(bound_cover.width)) root.set('data-cover-height', str(bound_cover.height)) if bound_cover.uses_dc_cover: if book_info.cover_by: root.set('data-cover-by', book_info.cover_by) if book_info.cover_source: root.set('data-cover-source', book_info.cover_source) if flags: for flag in flags: root.set('flag-' + flag, 'yes') # check for LaTeX packages if morefloats: root.set('morefloats', morefloats.lower()) elif package_available('morefloats', 'maxfloats=19'): root.set('morefloats', 'new') # add customizations if customizations is not None: root.set('customizations', u','.join(customizations)) # add editors info editors = document.editors() if editors: root.set( 'editors', u', '.join(sorted(editor.readable() for editor in editors))) if document.book_info.funders: root.set('funders', u', '.join(document.book_info.funders)) if document.book_info.thanks: root.set('thanks', document.book_info.thanks) # hack the tree move_motifs_inside(document.edoc) hack_motifs(document.edoc) parse_creator(document.edoc) substitute_hyphens(document.edoc) fix_hanging(document.edoc) fix_tables(document.edoc) mark_subauthors(document.edoc) # wl -> TeXML style_filename = get_stylesheet("wl2tex") style = etree.parse(style_filename) functions.reg_mathml_latex() # TeXML -> LaTeX temp = mkdtemp('-wl2pdf') for ilustr in document.edoc.findall("//ilustr"): shutil.copy(os.path.join(ilustr_path, ilustr.get("src")), temp) for sponsor in book_info.sponsors: ins = etree.Element("data-sponsor", name=sponsor) logo = sponsor_logo(sponsor) if logo: fname = 'sponsor-%s' % os.path.basename(logo) shutil.copy(logo, os.path.join(temp, fname)) ins.set('src', fname) root.insert(0, ins) if book_info.sponsor_note: root.set("sponsor-note", book_info.sponsor_note) texml = document.transform(style) if cover: with open(os.path.join(temp, 'cover.png'), 'w') as f: bound_cover.save(f, quality=80) del document # no longer needed large object :) tex_path = os.path.join(temp, 'doc.tex') fout = open(tex_path, 'wb') process(six.BytesIO(texml), fout, 'utf-8') fout.close() del texml if save_tex: shutil.copy(tex_path, save_tex) # LaTeX -> PDF shutil.copy(get_resource('pdf/wl.cls'), temp) shutil.copy(get_resource('res/wl-logo.png'), temp) if latex_dir: return temp try: cwd = os.getcwd() except OSError: cwd = None os.chdir(temp) # some things work better when compiled twice # (table of contents, [line numbers - disabled]) for run in range(2): if verbose: p = call(['xelatex', tex_path]) else: p = call(['xelatex', '-interaction=batchmode', tex_path], stdout=PIPE, stderr=PIPE) if p: raise ParseError("Error parsing .tex file") if cwd is not None: os.chdir(cwd) output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False) pdf_path = os.path.join(temp, 'doc.pdf') shutil.move(pdf_path, output_file.name) shutil.rmtree(temp) return OutputFile.from_filename(output_file.name) except (XMLSyntaxError, XSLTApplyError) as e: raise ParseError(e)