def convert(self, output_filename=None, **calibre_options): # check for commandlineoptions.txt file cmdopts = '' cmdopts_filename = os.path.join(os.path.dirname(self.filename), 'commandlineoptions.txt') if os.path.exists(cmdopts_filename): cmdopts = file(cmdopts_filename).read() cmdopts = cmdopts % dict(WORKDIR=os.path.dirname(self.filename)) tidy_filename = tidyhtml(self.filename, self.encoding) result = html2calibre(tidy_filename, output_filename, cmdopts=cmdopts, **calibre_options) os.unlink(tidy_filename) return result
def convert(self, output_filename=None, **options): tidy_filename = tidyhtml(self.filename, self.encoding) result = html2pdf(tidy_filename, output_filename, **options) os.unlink(tidy_filename) return result
def convert(self, filename, encoding='utf-8', tidy=True, output_filename=None, **kw): """ Convert a HTML file stored as 'filename' to FO using CSS2XSLFO. """ if tidy: filename = tidyhtml(filename, encoding, strip_base=kw.get('strip_base', False)) if output_filename: fo_filename = output_filename else: fo_filename = newTempfile(suffix='.fo') csstoxslfo = os.path.abspath(os.path.join(dirname, 'lib', 'csstoxslfo', 'css2xslfo.jar')) if not os.path.exists(csstoxslfo): raise IOError('%s does not exist' % csstoxslfo) cmd = '"%s"' % java + \ ' -Duser.language=en -Xms256m -Xmx256m -jar "%(csstoxslfo)s" "%(filename)s" -fo "%(fo_filename)s"' % vars() for k in kw: cmd += ' %s="%s"' % (k, kw[k]) status, output = runcmd(cmd) if status != 0: raise ConversionError('Error executing: %s' % cmd, output) # remove tidy-ed file if tidy: os.unlink(filename) # remove some stuff from the generated FO file causing # some conversion trouble either with XINC or XFC E = parse(fo_filename) ids_seen = list() for node in E.getiterator(): get = node.attrib.get # ensure that ID attributes are unique node_id = get('id') if node_id is not None: if node_id in ids_seen: del node.attrib['id'] ids_seen.append(node_id) for k, v in (('footnote', 'reset'), ('unicode-bidi', 'embed'), ('writing-mode', 'lr-tb'), ('font-selection-strategy', 'character-by-character'), ('line-height-shift-adjustment', 'disregard-shifts'), ('page-break-after', 'avoid'), ('page-break-before', 'avoid'), ('page-break-inside', 'avoid')): value = get(k) if value == v: del node.attrib[k] for attr in ('margin-left', 'margin-right', 'margin-top', 'margin-bottom', 'padding-left', 'padding-right', 'padding-top', 'padding-bottom'): value = get(attr) if value == '0': node.attrib[attr] = '0em' if get('page-break-after') == 'always': del node.attrib['page-break-after'] node.attrib['break-after'] = 'page' if get('text-transform'): del node.attrib['text-transform'] value = get('white-space') if value == 'pre': del node.attrib['white-space'] node.text = '\n' + node.text.lstrip() for k,v in {'white-space-treatment' : 'preserve', 'white-space-collapse' : 'false', 'wrap-option' : 'no-wrap', 'linefeed-treatment' : 'preserve' }.items(): node.attrib[k] = v fo_text = tostring(E.getroot()) fo_text = fo_text.replace('<ns0:block ' , '<ns0:block margin-top="0" margin-bottom="0" ') # avoid a linebreak through <li><p> (XFC) # fo_text = fo_text.replace('<ns0:block/>', '') # causes a crash with XINC fo_text = fo_text.replace('<ns0:block margin-top="0" margin-bottom="0" />', '') file(fo_filename, 'wb').write(fo_text) return fo_filename
def convert(self, filename, encoding="utf-8", tidy=True, output_filename=None, **kw): """ Convert a HTML file stored as 'filename' to FO using CSS2XSLFO. """ if tidy: filename = tidyhtml(filename, encoding, strip_base=kw.get("strip_base", False)) if output_filename: fo_filename = output_filename else: fo_filename = newTempfile(suffix=".fo") csstoxslfo = os.path.abspath(os.path.join(dirname, "lib", "csstoxslfo", "css2xslfo.jar")) if not os.path.exists(csstoxslfo): raise IOError("%s does not exist" % csstoxslfo) cmd = ( '"%s"' % java + ' -Duser.language=en -Xms256m -Xmx256m -jar "%(csstoxslfo)s" "%(filename)s" -fo "%(fo_filename)s"' % vars() ) for k in kw: cmd += ' %s="%s"' % (k, kw[k]) status, output = runcmd(cmd) if status != 0: raise ConversionError("Error executing: %s" % cmd, output) # remove tidy-ed file if tidy: os.unlink(filename) # remove some stuff from the generated FO file causing # some conversion trouble either with XINC or XFC E = parse(fo_filename) ids_seen = list() for node in E.getiterator(): get = node.attrib.get # ensure that ID attributes are unique node_id = get("id") if node_id is not None: if node_id in ids_seen: del node.attrib["id"] ids_seen.append(node_id) for k, v in ( ("footnote", "reset"), ("unicode-bidi", "embed"), ("writing-mode", "lr-tb"), ("font-selection-strategy", "character-by-character"), ("line-height-shift-adjustment", "disregard-shifts"), ("page-break-after", "avoid"), ("page-break-before", "avoid"), ("page-break-inside", "avoid"), ): value = get(k) if value == v: del node.attrib[k] for attr in ( "margin-left", "margin-right", "margin-top", "margin-bottom", "padding-left", "padding-right", "padding-top", "padding-bottom", ): value = get(attr) if value == "0": node.attrib[attr] = "0em" if get("page-break-after") == "always": del node.attrib["page-break-after"] node.attrib["break-after"] = "page" if get("text-transform"): del node.attrib["text-transform"] value = get("white-space") if value == "pre": del node.attrib["white-space"] node.text = "\n" + node.text.lstrip() for k, v in { "white-space-treatment": "preserve", "white-space-collapse": "false", "wrap-option": "no-wrap", "linefeed-treatment": "preserve", }.items(): node.attrib[k] = v fo_text = tostring(E.getroot()) fo_text = fo_text.replace( "<ns0:block ", '<ns0:block margin-top="0" margin-bottom="0" ' ) # avoid a linebreak through <li><p> (XFC) # fo_text = fo_text.replace('<ns0:block/>', '') # causes a crash with XINC fo_text = fo_text.replace('<ns0:block margin-top="0" margin-bottom="0" />', "") file(fo_filename, "wb").write(fo_text) return fo_filename
def convert(self, filename, encoding='utf-8', tidy=True, output_filename=None, **kw): """ Convert a HTML file stored as 'filename' to FO using CSS2XSLFO. """ if tidy: filename = tidyhtml(filename, encoding, strip_base=kw.get('strip_base', False)) if output_filename: fo_filename = output_filename else: fo_filename = newTempfile(suffix='.fo') csstoxslfo = os.path.abspath( os.path.join(dirname, 'lib', 'csstoxslfo', 'css2xslfo.jar')) if not os.path.exists(csstoxslfo): raise IOError('%s does not exist' % csstoxslfo) cmd = '"%s"' % java + \ ' -Duser.language=en -Xms256m -Xmx256m -jar "%(csstoxslfo)s" "%(filename)s" -fo "%(fo_filename)s"' % vars() for k in kw: cmd += ' %s="%s"' % (k, kw[k]) status, output = runcmd(cmd) if status != 0: raise ConversionError('Error executing: %s' % cmd, output) # remove tidy-ed file if tidy: os.unlink(filename) # remove some stuff from the generated FO file causing # some conversion trouble either with XINC or XFC E = parse(fo_filename) ids_seen = list() for node in E.getiterator(): get = node.attrib.get # ensure that ID attributes are unique node_id = get('id') if node_id is not None: if node_id in ids_seen: del node.attrib['id'] ids_seen.append(node_id) for k, v in (('footnote', 'reset'), ('unicode-bidi', 'embed'), ('writing-mode', 'lr-tb'), ('font-selection-strategy', 'character-by-character'), ('line-height-shift-adjustment', 'disregard-shifts'), ('page-break-after', 'avoid'), ('page-break-before', 'avoid'), ('page-break-inside', 'avoid')): value = get(k) if value == v: del node.attrib[k] for attr in ('margin-left', 'margin-right', 'margin-top', 'margin-bottom', 'padding-left', 'padding-right', 'padding-top', 'padding-bottom'): value = get(attr) if value == '0': node.attrib[attr] = '0em' if get('page-break-after') == 'always': del node.attrib['page-break-after'] node.attrib['break-after'] = 'page' if get('text-transform'): del node.attrib['text-transform'] value = get('white-space') if value == 'pre': del node.attrib['white-space'] node.text = '\n' + node.text.lstrip() for k, v in { 'white-space-treatment': 'preserve', 'white-space-collapse': 'false', 'wrap-option': 'no-wrap', 'linefeed-treatment': 'preserve' }.items(): node.attrib[k] = v fo_text = tostring(E.getroot()) fo_text = fo_text.replace( '<ns0:block ', '<ns0:block margin-top="0" margin-bottom="0" ' ) # avoid a linebreak through <li><p> (XFC) # fo_text = fo_text.replace('<ns0:block/>', '') # causes a crash with XINC fo_text = fo_text.replace( '<ns0:block margin-top="0" margin-bottom="0" />', '') file(fo_filename, 'wb').write(fo_text) return fo_filename