Beispiel #1
0
def run_pandoc(pandoc_options, md_fn, ext, verbose):
    assert ext in ('pdf', 'tex')
    assert isinstance(pandoc_options, dict)

    if pandoc_options['output'] is None:
        out_fn = md_fn.parent / (md_fn.stem + f'.{ext}')
    else:
        out_fn = Path(pandoc_options['output'])
        out_fn = out_fn.parent / (out_fn.stem + f'.{ext}'
                                  )  # Ensure we output .tex when we need to

    pandoc_options['output'] = out_fn
    fix_citation_options(pandoc_options, ext)
    pandoc_args = options2arguments(pandoc_options)
    pandoc_args.append(str(md_fn))

    if verbose:
        print('[pandocmk] Pandoc call:')
        print(f'    pandoc {" ".join(pandoc_args)}')
        tic = time.perf_counter()

    panflute.run_pandoc(args=pandoc_args)

    if verbose:
        toc = time.perf_counter()
        print(f'[pandocmk] Pandoc call completed in  {toc - tic:0.1f} seconds')

    return out_fn  # In case we want to view the file later
Beispiel #2
0
def test_env():
    # A Doc() created by panflute has no environment vars
    print(f'\n - Testing Doc() created by panflute:')
    doc = pf.Doc()
    assert doc.pandoc_version is None
    assert isinstance(doc.pandoc_reader_options,
                      dict) and not doc.pandoc_reader_options
    print(f' - No environment vars; as expected')

    # A Doc() created by running convert_text also doesn't
    print(f'\n - Testing Doc() created by panflute.convert_text():')
    fn = Path("./tests/sample_files/fenced/example.md")
    with fn.open(encoding='utf-8') as f:
        markdown_text = f.read()
    json_pandoc = pf.convert_text(markdown_text,
                                  input_format='markdown',
                                  output_format='json',
                                  standalone=True)
    doc = pf.convert_text(json_pandoc,
                          input_format='json',
                          output_format='panflute',
                          standalone=True)
    assert doc.pandoc_version is None
    assert isinstance(doc.pandoc_reader_options,
                      dict) and not doc.pandoc_reader_options
    print(f' - No environment vars; as expected')

    print(f'\n - Testing Doc() as created by a filter:')
    pf.run_pandoc(text='Hello!',
                  args=['--filter=./tests/filters/assert_env.py'])
    print(f' - Found environment vars; as expected')
Beispiel #3
0
def output_section(doc, heading, content):
    text = ' '.join([pf.stringify(c) for c in content])
    title = heading
    filename = Path(title.lower().replace(' ', '_')).with_suffix('.md')
    pf.run_pandoc(text=text,
                  args=[
                      f'--metadata=title:{title}', f'-o {str(filename)}',
                      '--defaults=create_document'
                  ])
Beispiel #4
0
def inner_run_pandoc(pandoc_args):
    # If there is a latex error ("Undefined control sequence", etc.)
    # we will abort without a huge traceback
    # https://stackoverflow.com/questions/17784849/print-an-error-message-without-printing-a-traceback-and-close-the-program-when-a
    try:
        panflute.run_pandoc(args=pandoc_args)
        return False  # error = False
    except IOError as err:
        if error_is_fatal(err):
            raise SystemExit()
        return True  # error = True
Beispiel #5
0
def prepare(doc):
    datadir = doc.get_metadata('datadir')

    kate = pf.run_pandoc(args=['--print-highlight-style', 'kate'])
    json_styles = json.loads(kate)

    json_styles['background-color'] = '#' + doc.get_metadata('shadecolor')
    text_styles = json_styles['text-styles']
    text_styles['BuiltIn'] = text_styles['Normal']
    text_styles['Comment']['italic'] = True
    text_styles['ControlFlow'] = text_styles['DataType']
    text_styles['Keyword'] = text_styles['DataType']
    text_styles['Variable']['text-color'] = '#' + doc.get_metadata('addcolor')
    text_styles['String']['text-color'] = '#' + doc.get_metadata('rmcolor')

    with tempfile.NamedTemporaryFile('w', suffix='.theme') as f:
        json.dump(json_styles, f)
        f.flush()

        def highlighting(output_format):
            return pf.convert_text('`_`{.cpp}',
                                   output_format=output_format,
                                   extra_args=[
                                       '--highlight-style', f.name,
                                       '--template',
                                       os.path.join(datadir, 'template',
                                                    'highlighting')
                                   ])

        doc.metadata['highlighting-macros'] = pf.MetaBlocks(
            pf.RawBlock(highlighting('latex'), 'latex'))
        doc.metadata['highlighting-css'] = pf.MetaBlocks(
            pf.RawBlock(highlighting('html'), 'html'))
Beispiel #6
0
def prepare(doc):
    datadir = doc.get_metadata('datadir')

    kate = pf.run_pandoc(args=['--print-highlight-style', 'kate'])
    json_styles = json.loads(kate)

    json_styles['background-color'] = '#' + doc.get_metadata('shadecolor')
    text_styles = json_styles['text-styles']
    text_styles['BuiltIn'] = text_styles['Normal']
    text_styles['Comment']['italic'] = True
    text_styles['ControlFlow'] = text_styles['DataType']
    text_styles['Keyword'] = text_styles['DataType']
    text_styles['Variable']['text-color'] = '#' + doc.get_metadata('addcolor')
    text_styles['String']['text-color'] = '#' + doc.get_metadata('rmcolor')

    with tempfile.NamedTemporaryFile('w', suffix='.theme') as f:
        json.dump(json_styles, f)
        f.flush()

        def highlighting(output_format):
            return pf.convert_text(
                '`_`{.cpp}',
                output_format=output_format,
                extra_args=[
                    '--highlight-style', f.name,
                    '--template', os.path.join(datadir, 'template', 'highlighting')
                ])

        doc.metadata['highlighting-macros'] = pf.MetaBlocks(
            pf.RawBlock(highlighting('latex'), 'latex'))
        doc.metadata['highlighting-css'] = pf.MetaBlocks(
            pf.RawBlock(highlighting('html'), 'html'))
Beispiel #7
0
def panflute2output(elem, format="json", doc=None):
    if not isinstance(elem, (list, pf.ListContainer)):
        elem = [elem]

    if doc is None:
        doc = pf.Doc(*elem, format=format, api_version=(1, 17, 3, 1))
    else:
        doc = copy.deepcopy(doc)
        doc.content = elem
        doc.format = format
        doc.api_version = (1, 17, 3, 1)

    with io.StringIO() as f:
        pf.dump(doc, f)
        ast = f.getvalue()

    if format == "json":
        return_value = ast
    else:
        return_value = pf.run_pandoc(
            text=ast, args=["-f", "json", "-t", format, "--wrap=none"])

    return return_value
Beispiel #8
0
def apply_filter(in_object,
                 filter_func=None,
                 out_format="panflute",
                 in_format="markdown",
                 strip_meta=False,
                 strip_blank_lines=False,
                 replace_api_version=True,
                 dry_run=False,
                 **kwargs):
    # type: (list[str], FunctionType) -> str
    """convenience function to apply a panflute filter(s)
    to a string, list of string lines, pandoc AST or panflute.Doc

    Parameters
    ----------
    in_object: str or list[str] or dict
        can also be panflute.Doc
    filter_func:
        the filter function or a list of filter functions
    out_format: str
        for use by pandoc or, if 'panflute', return the panflute.Doc
    in_format="markdown": str
    strip_meta=False: bool
        strip the document metadata before final conversion
    strip_blank_lines: bool
    strip_ends: bool
        strip any blank lines or space from the start and end
    replace_api_version: bool
        for dict input only, if True,
        find the api_version of the available pandoc and
        reformat the json as appropriate
    dry_run: bool
        If True, return the Doc object, before applying the filter
    kwargs:
        to parse to filter func

    Returns
    -------
    str

    """
    if isinstance(in_object, pf.Doc):
        pass
    elif isinstance(in_object, dict):
        if not in_format == "json":
            raise AssertionError("the in_format for a dict should be json, "
                                 "not {}".format(in_format))
        if "meta" not in in_object:
            raise ValueError("the in_object does contain a 'meta' key")
        if "blocks" not in in_object:
            raise ValueError("the in_object does contain a 'blocks' key")
        if "pandoc-api-version" not in in_object:
            raise ValueError(
                "the in_object does contain a 'pandoc-api-version' key")
        if replace_api_version:
            # run pandoc on a null object, to get the correct api version
            null_raw = pf.run_pandoc("", args=["-t", "json"])
            null_stream = io.StringIO(null_raw)
            api_version = pf.load(null_stream).api_version

            # see panflute.load, w.r.t to legacy version
            if api_version is None:
                in_object = [{
                    "unMeta": in_object["meta"]
                }, in_object["blocks"]]
            else:
                ans = OrderedDict()
                ans["pandoc-api-version"] = api_version
                ans["meta"] = in_object["meta"]
                ans["blocks"] = in_object["blocks"]
                in_object = ans
        in_str = json.dumps(in_object)
    elif isinstance(in_object, (list, tuple)):
        in_str = "\n".join(in_object)
    elif isinstance(in_object, string_types):
        in_str = in_object
    else:
        raise TypeError("object not accepted: {}".format(in_object))

    if not isinstance(in_object, pf.Doc):
        doc = pf.convert_text(in_str, input_format=in_format, standalone=True)
        # f = io.StringIO(in_json)
        # doc = pf.load(f)
    else:
        doc = in_object

    doc.format = out_format

    if dry_run:
        return doc

    if not isinstance(filter_func, (list, tuple, set)):
        filter_func = [filter_func]

    out_doc = doc
    for func in filter_func:
        out_doc = func(out_doc, **kwargs)  # type: Doc

    # post-process Doc
    if strip_meta:
        out_doc.metadata = {}
    if out_format == "panflute":
        return out_doc

    # create out str
    # with io.StringIO() as f:
    #     pf.dump(doc, f)
    #     jsonstr = f.getvalue()
    # jsonstr = json.dumps(out_doc.to_json()
    out_str = pf.convert_text(out_doc,
                              input_format="panflute",
                              output_format=out_format)

    # post-process final str
    if strip_blank_lines:
        out_str = out_str.replace("\n\n", "\n")

    return out_str
def format_image(image, doc):
    # type: (Image, Doc) -> Element
    """
    originally adapted from:
    `pandoc-fignos <https://github.com/tomduck/pandoc-fignos/>`_
    """
    if not isinstance(image, pf.Image):
        return None

    span = None
    if (isinstance(image.parent, pf.Span)
            and LABELLED_IMAGE_CLASS in image.parent.classes):
        span = image.parent

    if span is not None:
        identifier = span.identifier
        attributes = span.attributes
        #  classes = span.classes
    else:
        identifier = image.identifier
        attributes = image.attributes
        # classes = image.classes

    if doc.format in ("tex", "latex"):
        new_doc = Doc(pf.Para(*image.content))
        new_doc.api_version = doc.api_version
        if image.content:
            caption = pf.run_pandoc(json.dumps(new_doc.to_json()),
                                    args=["-f", "json", "-t",
                                          "latex"]).strip()
        else:
            caption = ""

        options = attributes.get("placement", "")
        size = ""  # max width set as 0.9\linewidth
        if "width" in attributes:
            width = convert_units(attributes["width"], "fraction")
            size = "width={0}\\linewidth".format(width)
        elif "height" in attributes:
            height = convert_units(attributes["height"], "fraction")
            size = "height={0}\\paperheight".format(height)

        if identifier:
            latex = LATEX_FIG_LABELLED.format(
                label=identifier,
                options=options,
                path=image.url,
                caption=caption,
                size=size,
            )
        else:
            latex = LATEX_FIG_UNLABELLED.format(options=options,
                                                path=image.url,
                                                caption=caption,
                                                size=size)

        return pf.RawInline(latex, format="tex")

    elif doc.format in ("rst", ):
        if not image.content.list:
            # If the container is empty, then pandoc will assign an iterative
            # reference identifier to it (image0, image1).
            # However, this iterator restarts for each markdown cell,
            # which can lead to reference clashes.
            # Therefore we specifically assign the identifier here, as its url
            # TODO does this identifier need to be sanitized?
            # (it works fine in the tests)
            identifier = image.url
            image.content = pf.ListContainer(pf.Str(str(identifier)))

        return image
        # TODO formatting and span identifier (convert width/height to %)

    elif doc.format in ("html", "html5"):
        if identifier:
            return _wrap_in_anchor(image, identifier)
        else:
            return image
        # TODO formatting, name by count
    else:
        return None
Beispiel #10
0
def format_image(image, doc):
    # type: (Image, Doc) -> Element
    """
    originally adapted from:
    `pandoc-fignos <https://github.com/tomduck/pandoc-fignos/>`_
    """
    if not isinstance(image, pf.Image):
        return None

    span = None
    if (isinstance(image.parent, pf.Span)
            and LABELLED_IMAGE_CLASS in image.parent.classes):
        span = image.parent

    if span is not None:
        identifier = span.identifier
        attributes = span.attributes
        #  classes = span.classes
    else:
        identifier = image.identifier
        attributes = image.attributes
        # classes = image.classes

    if doc.format in ("tex", "latex"):
        new_doc = Doc(pf.Para(*image.content))
        new_doc.api_version = doc.api_version
        if image.content:
            caption = pf.run_pandoc(json.dumps(new_doc.to_json()),
                                    args=["-f", "json", "-t",
                                          "latex"]).strip()
        else:
            caption = ""

        options = attributes.get("placement", "")
        size = ''  # max width set as 0.9\linewidth
        if "width" in attributes:
            width = convert_units(attributes['width'], "fraction")
            size = 'width={0}\\linewidth'.format(width)
        elif "height" in attributes:
            height = convert_units(attributes['height'], "fraction")
            size = 'height={0}\\paperheight'.format(height)

        if identifier:
            latex = LATEX_FIG_LABELLED.format(label=identifier,
                                              options=options,
                                              path=image.url,
                                              caption=caption,
                                              size=size)
        else:
            latex = LATEX_FIG_UNLABELLED.format(options=options,
                                                path=image.url,
                                                caption=caption,
                                                size=size)

        return pf.RawInline(latex, format="tex")

    elif doc.format in ("rst", ):
        return image
        # TODO formatting and span identifier (convert width/height to %)

    elif doc.format in ("html", "html5"):
        if identifier:
            return _wrap_in_anchor(image, identifier)
        else:
            return image
        # TODO formatting, name by count
    else:
        return None