Example #1
0
    def add_markup_text(self, mimetype, mimedata):
        # workaround for some pandoc weirdness:
        # pandoc interprets html with indention as code and formats it with pre
        # So remove all linefeeds/whitespace...
        if mimetype == "text/html":
            res = []
            for line in mimedata.split("\n"):
                res.append(line.strip())
            mimedata = "".join(res)
            # pandas adds multiple spaces if one element in a column is long, but the rest is
            # short. Remove these spaces, as pandoc doesn't like them...
            mimedata = re.sub(' +', ' ', mimedata)

        to_format = "markdown"
        # try to convert to the current format so that it can be included "asis"
        if not MARKUP_FORMAT_CONVERTER[mimetype] in [
                to_format, self.export_config.pandoc_export_format
        ]:
            if "<table" in mimedata:
                # There is a bug in pandoc <=1.13.2, where th in normal tr is triggers "only
                # text" conversion.
                msg = "Trying to fix tables for conversion with pandoc (bug in pandoc <=1.13.2)."
                self.log.debug(msg)
                mimedata = self._fix_html_tables_old_pandoc(mimedata)

            try:
                self.log.debug(
                    "Converting markup of type '%s' to '%s' via pandoc...",
                    mimetype, to_format)
                mimedata = pandoc(mimedata,
                                  to=to_format,
                                  format=MARKUP_FORMAT_CONVERTER[mimetype])
            except RuntimeError as e:
                # these are pypandoc errors
                msg = "Could not convert mime data of type '%s' to output format '%s'."
                self.log.debug(msg, mimetype, to_format)
                raise KnitpyOutputException(str(e))
            except Exception as e:
                msg = "Could not convert mime data of type '%s' to output format '%s'."
                self.log.exception(msg, mimetype, to_format)
                raise KnitpyOutputException(str(e))

        self.add_asis("\n")
        self.add_asis(mimedata)
        self.add_asis("\n")
Example #2
0
    def add_markup_text(self, mimetype, mimedata):
        # workaround for some pandoc weirdness:
        # pandoc interprets html with indention as code and formats it with pre
        # So remove all linefeeds/whitespace...
        if mimetype == "text/html":
            res= []
            for line in mimedata.split("\n"):
                res.append(line.strip())
            mimedata = "".join(res)
            # pandas adds multiple spaces if one element in a column is long, but the rest is
            # short. Remove these spaces, as pandoc doesn't like them...
            mimedata = re.sub(' +',' ', mimedata)

        to_format = "markdown"
        # try to convert to the current format so that it can be included "asis"
        if not MARKUP_FORMAT_CONVERTER[mimetype] in [to_format,
                                                     self.export_config.pandoc_export_format]:
            if "<table" in mimedata:
                # There is a bug in pandoc <=1.13.2, where th in normal tr is triggers "only
                # text" conversion.
                msg = "Trying to fix tables for conversion with pandoc (bug in pandoc <=1.13.2)."
                self.log.debug(msg)
                mimedata = self._fix_html_tables_old_pandoc(mimedata)

            try:
                self.log.debug("Converting markup of type '%s' to '%s' via pandoc...",
                               mimetype, to_format)
                mimedata = pandoc(mimedata, to=to_format, format=MARKUP_FORMAT_CONVERTER[mimetype])
            except RuntimeError as e:
                # these are pypandoc errors
                msg = "Could not convert mime data of type '%s' to output format '%s'."
                self.log.debug(msg, mimetype, to_format)
                raise KnitpyOutputException(str(e))
            except Exception as e:
                msg = "Could not convert mime data of type '%s' to output format '%s'."
                self.log.exception(msg, mimetype, to_format)
                raise KnitpyOutputException(str(e))

        self.add_asis("\n")
        self.add_asis(mimedata)
        self.add_asis("\n")
Example #3
0
    def render(self, filename, output=None):
        """
        Convert the filename to the given output format(s)
        """
        # Export each documents
        conversion_success = 0
        converted_docs = []

        # save here to change back after the conversation.
        orig_cwd = getcwd()
        needs_chdir = False
        
        # save original filename as pass in so we can include it in output
        input_filename = filename

        # expand $HOME and so on...
        filename = expand_path(filename)
        filename = os.path.abspath(filename)
        self.log.info("Converting %s..." % filename)

        basedir = os.path.dirname(filename)
        basename = os.path.splitext(os.path.basename(filename))[0]

        # no reason to continue past this point if we dont have
        # a valid file path, so check it here and provide a clear
        # failure message since this a common mistake
        if not os.path.exists(filename):
            raise IOError('File not found: %s\nExpanded path: %s' % (input_filename, filename))

        # It's easier if we just change wd to the dir of the file
        if unicode_type(basedir) != getcwd():
            os.chdir(basedir)
            needs_chdir = True
            self.log.info("Changing to working dir: %s" % basedir)
            filename = os.path.basename(filename)


        outputdir_name = os.path.splitext(basename)[0] + "_files"

        # parse the input document
        parsed, metadata = self.parse_document(filename)

        # get the output formats
        # order: kwarg overwrites default overwrites document
        output_formats = [self._outputs[self.default_export_format]]
        if output is None:
            self.log.debug("Converting to default output format [%s]!" % self.default_export_format)
        elif output == "all":
            outputs = metadata.get("output", None)
            # if nothing is specified, we keep the default
            if outputs is None:
                self.log.debug("Did not find any specified output formats: using only default!")
            else:
                output_formats = []
                for fmt_name, config in iteritems(outputs):
                    fod = self.get_output_format(fmt_name, config)
                    output_formats.append(fod)
                self.log.debug("Converting to all specified output formats: %s" %
                               [fmt.name for fmt in output_formats])
        else:
            self._ensure_valid_output(output)
            output_formats = [self._outputs[output]]

        for final_format in output_formats:
            self.log.info("Converting document %s to %s", filename, final_format.name)
            # TODO: build a proper way to specify final output...

            md_temp = TemporaryOutputDocument(fileoutputs=outputdir_name,
                                              export_config=final_format,
                                              log=self.log, parent=self)

            # get the temporary md file
            self.convert(parsed, md_temp)
            if final_format.keep_md or self.keep_md:
                mdfilename = basename+"."+final_format.name+".md"
                self.log.info("Saving the temporary markdown as '%s'." % mdfilename)
                # TODO: remove the first yaml metadata block and
                # put "#<title>\n<author>\n<date>" before the rest
                with codecs.open(mdfilename, 'w+b','UTF-8') as f:
                    f.write(md_temp.content)

            # convert the md file to the final filetype
            input_format = "markdown" \
                           "+autolink_bare_uris" \
                           "+ascii_identifiers" \
                           "+tex_math_single_backslash-implicit_figures" \
                           "+fenced_code_attributes"

            extra = ["--smart", # typographically correct output (curly quotes, etc)
                     "--email-obfuscation", "none", #do not obfuscation email names with javascript
                     "--self-contained", # include img/scripts as data urls
                     "--standalone", # html with header + footer
                     "--section-divs",
                     ]

            outfilename = basename+"." +final_format.file_extension

            # exported is irrelevant, as we pass in a filename
            exported = pandoc(source=md_temp.content,
                              to=final_format.pandoc_export_format,
                              format=input_format,
                              extra_args=extra,
                              outputfile=outfilename)
            self.log.info("Written final output: %s" % outfilename)
            converted_docs.append(os.path.join(basedir, outfilename))
        if needs_chdir:
            os.chdir(orig_cwd)
        return converted_docs
Example #4
0
    def render(self, filename, output=None):
        """
        Convert the filename to the given output format(s)
        """
        # Export each documents
        conversion_success = 0
        converted_docs = []

        # save here to change back after the conversation.
        orig_cwd = os.getcwd()
        needs_chdir = False

        # expand $HOME and so on...
        filename = expand_path(filename)
        filename = os.path.abspath(filename)
        self.log.info("Converting %s..." % filename)

        basedir = os.path.dirname(filename)
        basename = os.path.splitext(os.path.basename(filename))[0]

        # It's easier if we just change wd to the dir of the file
        if unicode_type(basedir) != py3compat.getcwd():
            os.chdir(basedir)
            needs_chdir = True
            self.log.info("Changing to working dir: %s" % basedir)
            filename = os.path.basename(filename)


        outputdir_name = os.path.splitext(basename)[0] + "_files"

        # parse the input document
        parsed, metadata = self.parse_document(filename)

        # get the output formats
        # order: kwarg overwrites default overwrites document
        output_formats = [self._outputs[self.default_export_format]]
        if output is None:
            self.log.debug("Converting to default output format [%s]!" % self.default_export_format)
        elif output == "all":
            outputs = metadata.get("output", None)
            # if nothing is specified, we keep the default
            if outputs is None:
                self.log.debug("Did not find any specified output formats: using only default!")
            else:
                output_formats = []
                for fmt_name, config in iteritems(outputs):
                    fod = self.get_output_format(fmt_name, config)
                    output_formats.append(fod)
                self.log.debug("Converting to all specified output formats: %s" %
                               [fmt.name for fmt in output_formats])
        else:
            self._ensure_valid_output(output)
            output_formats = [self._outputs[output]]

        for final_format in output_formats:
            self.log.info("Converting document %s to %s", filename, final_format.name)
            # TODO: build a proper way to specify final output...

            md_temp = TemporaryOutputDocument(fileoutputs=outputdir_name,
                                              export_config=final_format,
                                              log=self.log, parent=self)

            # get the temporary md file
            self.convert(parsed, md_temp)
            if final_format.keep_md or self.keep_md:
                mdfilename = basename+"."+final_format.name+".md"
                self.log.info("Saving the temporary markdown as '%s'." % mdfilename)
                # TODO: remove the first yaml metadata block and
                # put "#<title>\n<author>\n<date>" before the rest
                with codecs.open(mdfilename, 'w+b','UTF-8') as f:
                    f.write(md_temp.content)

            # convert the md file to the final filetype
            input_format = "markdown" \
                           "+autolink_bare_uris" \
                           "+ascii_identifiers" \
                           "+tex_math_single_backslash-implicit_figures" \
                           "+fenced_code_attributes"

            extra = ["--smart", # typographically correct output (curly quotes, etc)
                     "--email-obfuscation", "none", #do not obfuscation email names with javascript
                     "--self-contained", # include img/scripts as data urls
                     "--standalone", # html with header + footer
                     "--section-divs",
                     ]

            outfilename = basename+"." +final_format.file_extension

            # exported is irrelevant, as we pass in a filename
            exported = pandoc(source=md_temp.content,
                              to=final_format.pandoc_export_format,
                              format=input_format,
                              extra_args=extra,
                              outputfile=outfilename)
            self.log.info("Written final output: %s" % outfilename)
            converted_docs.append(os.path.join(basedir, outfilename))
        if needs_chdir:
            os.chdir(orig_cwd)
        return converted_docs