def add_markup_text(self, mimetype, mimedata): # workaround for some pandoc weirdness: # pandoc interprets html with indention as code and formats it with pre # So remove all linefeeds/whitespace... if mimetype == "text/html": res = [] for line in mimedata.split("\n"): res.append(line.strip()) mimedata = "".join(res) # pandas adds multiple spaces if one element in a column is long, but the rest is # short. Remove these spaces, as pandoc doesn't like them... mimedata = re.sub(' +', ' ', mimedata) to_format = "markdown" # try to convert to the current format so that it can be included "asis" if not MARKUP_FORMAT_CONVERTER[mimetype] in [ to_format, self.export_config.pandoc_export_format ]: if "<table" in mimedata: # There is a bug in pandoc <=1.13.2, where th in normal tr is triggers "only # text" conversion. msg = "Trying to fix tables for conversion with pandoc (bug in pandoc <=1.13.2)." self.log.debug(msg) mimedata = self._fix_html_tables_old_pandoc(mimedata) try: self.log.debug( "Converting markup of type '%s' to '%s' via pandoc...", mimetype, to_format) mimedata = pandoc(mimedata, to=to_format, format=MARKUP_FORMAT_CONVERTER[mimetype]) except RuntimeError as e: # these are pypandoc errors msg = "Could not convert mime data of type '%s' to output format '%s'." self.log.debug(msg, mimetype, to_format) raise KnitpyOutputException(str(e)) except Exception as e: msg = "Could not convert mime data of type '%s' to output format '%s'." self.log.exception(msg, mimetype, to_format) raise KnitpyOutputException(str(e)) self.add_asis("\n") self.add_asis(mimedata) self.add_asis("\n")
def add_markup_text(self, mimetype, mimedata): # workaround for some pandoc weirdness: # pandoc interprets html with indention as code and formats it with pre # So remove all linefeeds/whitespace... if mimetype == "text/html": res= [] for line in mimedata.split("\n"): res.append(line.strip()) mimedata = "".join(res) # pandas adds multiple spaces if one element in a column is long, but the rest is # short. Remove these spaces, as pandoc doesn't like them... mimedata = re.sub(' +',' ', mimedata) to_format = "markdown" # try to convert to the current format so that it can be included "asis" if not MARKUP_FORMAT_CONVERTER[mimetype] in [to_format, self.export_config.pandoc_export_format]: if "<table" in mimedata: # There is a bug in pandoc <=1.13.2, where th in normal tr is triggers "only # text" conversion. msg = "Trying to fix tables for conversion with pandoc (bug in pandoc <=1.13.2)." self.log.debug(msg) mimedata = self._fix_html_tables_old_pandoc(mimedata) try: self.log.debug("Converting markup of type '%s' to '%s' via pandoc...", mimetype, to_format) mimedata = pandoc(mimedata, to=to_format, format=MARKUP_FORMAT_CONVERTER[mimetype]) except RuntimeError as e: # these are pypandoc errors msg = "Could not convert mime data of type '%s' to output format '%s'." self.log.debug(msg, mimetype, to_format) raise KnitpyOutputException(str(e)) except Exception as e: msg = "Could not convert mime data of type '%s' to output format '%s'." self.log.exception(msg, mimetype, to_format) raise KnitpyOutputException(str(e)) self.add_asis("\n") self.add_asis(mimedata) self.add_asis("\n")
def render(self, filename, output=None): """ Convert the filename to the given output format(s) """ # Export each documents conversion_success = 0 converted_docs = [] # save here to change back after the conversation. orig_cwd = getcwd() needs_chdir = False # save original filename as pass in so we can include it in output input_filename = filename # expand $HOME and so on... filename = expand_path(filename) filename = os.path.abspath(filename) self.log.info("Converting %s..." % filename) basedir = os.path.dirname(filename) basename = os.path.splitext(os.path.basename(filename))[0] # no reason to continue past this point if we dont have # a valid file path, so check it here and provide a clear # failure message since this a common mistake if not os.path.exists(filename): raise IOError('File not found: %s\nExpanded path: %s' % (input_filename, filename)) # It's easier if we just change wd to the dir of the file if unicode_type(basedir) != getcwd(): os.chdir(basedir) needs_chdir = True self.log.info("Changing to working dir: %s" % basedir) filename = os.path.basename(filename) outputdir_name = os.path.splitext(basename)[0] + "_files" # parse the input document parsed, metadata = self.parse_document(filename) # get the output formats # order: kwarg overwrites default overwrites document output_formats = [self._outputs[self.default_export_format]] if output is None: self.log.debug("Converting to default output format [%s]!" % self.default_export_format) elif output == "all": outputs = metadata.get("output", None) # if nothing is specified, we keep the default if outputs is None: self.log.debug("Did not find any specified output formats: using only default!") else: output_formats = [] for fmt_name, config in iteritems(outputs): fod = self.get_output_format(fmt_name, config) output_formats.append(fod) self.log.debug("Converting to all specified output formats: %s" % [fmt.name for fmt in output_formats]) else: self._ensure_valid_output(output) output_formats = [self._outputs[output]] for final_format in output_formats: self.log.info("Converting document %s to %s", filename, final_format.name) # TODO: build a proper way to specify final output... md_temp = TemporaryOutputDocument(fileoutputs=outputdir_name, export_config=final_format, log=self.log, parent=self) # get the temporary md file self.convert(parsed, md_temp) if final_format.keep_md or self.keep_md: mdfilename = basename+"."+final_format.name+".md" self.log.info("Saving the temporary markdown as '%s'." % mdfilename) # TODO: remove the first yaml metadata block and # put "#<title>\n<author>\n<date>" before the rest with codecs.open(mdfilename, 'w+b','UTF-8') as f: f.write(md_temp.content) # convert the md file to the final filetype input_format = "markdown" \ "+autolink_bare_uris" \ "+ascii_identifiers" \ "+tex_math_single_backslash-implicit_figures" \ "+fenced_code_attributes" extra = ["--smart", # typographically correct output (curly quotes, etc) "--email-obfuscation", "none", #do not obfuscation email names with javascript "--self-contained", # include img/scripts as data urls "--standalone", # html with header + footer "--section-divs", ] outfilename = basename+"." +final_format.file_extension # exported is irrelevant, as we pass in a filename exported = pandoc(source=md_temp.content, to=final_format.pandoc_export_format, format=input_format, extra_args=extra, outputfile=outfilename) self.log.info("Written final output: %s" % outfilename) converted_docs.append(os.path.join(basedir, outfilename)) if needs_chdir: os.chdir(orig_cwd) return converted_docs
def render(self, filename, output=None): """ Convert the filename to the given output format(s) """ # Export each documents conversion_success = 0 converted_docs = [] # save here to change back after the conversation. orig_cwd = os.getcwd() needs_chdir = False # expand $HOME and so on... filename = expand_path(filename) filename = os.path.abspath(filename) self.log.info("Converting %s..." % filename) basedir = os.path.dirname(filename) basename = os.path.splitext(os.path.basename(filename))[0] # It's easier if we just change wd to the dir of the file if unicode_type(basedir) != py3compat.getcwd(): os.chdir(basedir) needs_chdir = True self.log.info("Changing to working dir: %s" % basedir) filename = os.path.basename(filename) outputdir_name = os.path.splitext(basename)[0] + "_files" # parse the input document parsed, metadata = self.parse_document(filename) # get the output formats # order: kwarg overwrites default overwrites document output_formats = [self._outputs[self.default_export_format]] if output is None: self.log.debug("Converting to default output format [%s]!" % self.default_export_format) elif output == "all": outputs = metadata.get("output", None) # if nothing is specified, we keep the default if outputs is None: self.log.debug("Did not find any specified output formats: using only default!") else: output_formats = [] for fmt_name, config in iteritems(outputs): fod = self.get_output_format(fmt_name, config) output_formats.append(fod) self.log.debug("Converting to all specified output formats: %s" % [fmt.name for fmt in output_formats]) else: self._ensure_valid_output(output) output_formats = [self._outputs[output]] for final_format in output_formats: self.log.info("Converting document %s to %s", filename, final_format.name) # TODO: build a proper way to specify final output... md_temp = TemporaryOutputDocument(fileoutputs=outputdir_name, export_config=final_format, log=self.log, parent=self) # get the temporary md file self.convert(parsed, md_temp) if final_format.keep_md or self.keep_md: mdfilename = basename+"."+final_format.name+".md" self.log.info("Saving the temporary markdown as '%s'." % mdfilename) # TODO: remove the first yaml metadata block and # put "#<title>\n<author>\n<date>" before the rest with codecs.open(mdfilename, 'w+b','UTF-8') as f: f.write(md_temp.content) # convert the md file to the final filetype input_format = "markdown" \ "+autolink_bare_uris" \ "+ascii_identifiers" \ "+tex_math_single_backslash-implicit_figures" \ "+fenced_code_attributes" extra = ["--smart", # typographically correct output (curly quotes, etc) "--email-obfuscation", "none", #do not obfuscation email names with javascript "--self-contained", # include img/scripts as data urls "--standalone", # html with header + footer "--section-divs", ] outfilename = basename+"." +final_format.file_extension # exported is irrelevant, as we pass in a filename exported = pandoc(source=md_temp.content, to=final_format.pandoc_export_format, format=input_format, extra_args=extra, outputfile=outfilename) self.log.info("Written final output: %s" % outfilename) converted_docs.append(os.path.join(basedir, outfilename)) if needs_chdir: os.chdir(orig_cwd) return converted_docs