def run_pandoc(pandoc_options, md_fn, ext, verbose): assert ext in ('pdf', 'tex') assert isinstance(pandoc_options, dict) if pandoc_options['output'] is None: out_fn = md_fn.parent / (md_fn.stem + f'.{ext}') else: out_fn = Path(pandoc_options['output']) out_fn = out_fn.parent / (out_fn.stem + f'.{ext}' ) # Ensure we output .tex when we need to pandoc_options['output'] = out_fn fix_citation_options(pandoc_options, ext) pandoc_args = options2arguments(pandoc_options) pandoc_args.append(str(md_fn)) if verbose: print('[pandocmk] Pandoc call:') print(f' pandoc {" ".join(pandoc_args)}') tic = time.perf_counter() panflute.run_pandoc(args=pandoc_args) if verbose: toc = time.perf_counter() print(f'[pandocmk] Pandoc call completed in {toc - tic:0.1f} seconds') return out_fn # In case we want to view the file later
def test_env(): # A Doc() created by panflute has no environment vars print(f'\n - Testing Doc() created by panflute:') doc = pf.Doc() assert doc.pandoc_version is None assert isinstance(doc.pandoc_reader_options, dict) and not doc.pandoc_reader_options print(f' - No environment vars; as expected') # A Doc() created by running convert_text also doesn't print(f'\n - Testing Doc() created by panflute.convert_text():') fn = Path("./tests/sample_files/fenced/example.md") with fn.open(encoding='utf-8') as f: markdown_text = f.read() json_pandoc = pf.convert_text(markdown_text, input_format='markdown', output_format='json', standalone=True) doc = pf.convert_text(json_pandoc, input_format='json', output_format='panflute', standalone=True) assert doc.pandoc_version is None assert isinstance(doc.pandoc_reader_options, dict) and not doc.pandoc_reader_options print(f' - No environment vars; as expected') print(f'\n - Testing Doc() as created by a filter:') pf.run_pandoc(text='Hello!', args=['--filter=./tests/filters/assert_env.py']) print(f' - Found environment vars; as expected')
def output_section(doc, heading, content): text = ' '.join([pf.stringify(c) for c in content]) title = heading filename = Path(title.lower().replace(' ', '_')).with_suffix('.md') pf.run_pandoc(text=text, args=[ f'--metadata=title:{title}', f'-o {str(filename)}', '--defaults=create_document' ])
def inner_run_pandoc(pandoc_args): # If there is a latex error ("Undefined control sequence", etc.) # we will abort without a huge traceback # https://stackoverflow.com/questions/17784849/print-an-error-message-without-printing-a-traceback-and-close-the-program-when-a try: panflute.run_pandoc(args=pandoc_args) return False # error = False except IOError as err: if error_is_fatal(err): raise SystemExit() return True # error = True
def prepare(doc): datadir = doc.get_metadata('datadir') kate = pf.run_pandoc(args=['--print-highlight-style', 'kate']) json_styles = json.loads(kate) json_styles['background-color'] = '#' + doc.get_metadata('shadecolor') text_styles = json_styles['text-styles'] text_styles['BuiltIn'] = text_styles['Normal'] text_styles['Comment']['italic'] = True text_styles['ControlFlow'] = text_styles['DataType'] text_styles['Keyword'] = text_styles['DataType'] text_styles['Variable']['text-color'] = '#' + doc.get_metadata('addcolor') text_styles['String']['text-color'] = '#' + doc.get_metadata('rmcolor') with tempfile.NamedTemporaryFile('w', suffix='.theme') as f: json.dump(json_styles, f) f.flush() def highlighting(output_format): return pf.convert_text('`_`{.cpp}', output_format=output_format, extra_args=[ '--highlight-style', f.name, '--template', os.path.join(datadir, 'template', 'highlighting') ]) doc.metadata['highlighting-macros'] = pf.MetaBlocks( pf.RawBlock(highlighting('latex'), 'latex')) doc.metadata['highlighting-css'] = pf.MetaBlocks( pf.RawBlock(highlighting('html'), 'html'))
def prepare(doc): datadir = doc.get_metadata('datadir') kate = pf.run_pandoc(args=['--print-highlight-style', 'kate']) json_styles = json.loads(kate) json_styles['background-color'] = '#' + doc.get_metadata('shadecolor') text_styles = json_styles['text-styles'] text_styles['BuiltIn'] = text_styles['Normal'] text_styles['Comment']['italic'] = True text_styles['ControlFlow'] = text_styles['DataType'] text_styles['Keyword'] = text_styles['DataType'] text_styles['Variable']['text-color'] = '#' + doc.get_metadata('addcolor') text_styles['String']['text-color'] = '#' + doc.get_metadata('rmcolor') with tempfile.NamedTemporaryFile('w', suffix='.theme') as f: json.dump(json_styles, f) f.flush() def highlighting(output_format): return pf.convert_text( '`_`{.cpp}', output_format=output_format, extra_args=[ '--highlight-style', f.name, '--template', os.path.join(datadir, 'template', 'highlighting') ]) doc.metadata['highlighting-macros'] = pf.MetaBlocks( pf.RawBlock(highlighting('latex'), 'latex')) doc.metadata['highlighting-css'] = pf.MetaBlocks( pf.RawBlock(highlighting('html'), 'html'))
def panflute2output(elem, format="json", doc=None): if not isinstance(elem, (list, pf.ListContainer)): elem = [elem] if doc is None: doc = pf.Doc(*elem, format=format, api_version=(1, 17, 3, 1)) else: doc = copy.deepcopy(doc) doc.content = elem doc.format = format doc.api_version = (1, 17, 3, 1) with io.StringIO() as f: pf.dump(doc, f) ast = f.getvalue() if format == "json": return_value = ast else: return_value = pf.run_pandoc( text=ast, args=["-f", "json", "-t", format, "--wrap=none"]) return return_value
def apply_filter(in_object, filter_func=None, out_format="panflute", in_format="markdown", strip_meta=False, strip_blank_lines=False, replace_api_version=True, dry_run=False, **kwargs): # type: (list[str], FunctionType) -> str """convenience function to apply a panflute filter(s) to a string, list of string lines, pandoc AST or panflute.Doc Parameters ---------- in_object: str or list[str] or dict can also be panflute.Doc filter_func: the filter function or a list of filter functions out_format: str for use by pandoc or, if 'panflute', return the panflute.Doc in_format="markdown": str strip_meta=False: bool strip the document metadata before final conversion strip_blank_lines: bool strip_ends: bool strip any blank lines or space from the start and end replace_api_version: bool for dict input only, if True, find the api_version of the available pandoc and reformat the json as appropriate dry_run: bool If True, return the Doc object, before applying the filter kwargs: to parse to filter func Returns ------- str """ if isinstance(in_object, pf.Doc): pass elif isinstance(in_object, dict): if not in_format == "json": raise AssertionError("the in_format for a dict should be json, " "not {}".format(in_format)) if "meta" not in in_object: raise ValueError("the in_object does contain a 'meta' key") if "blocks" not in in_object: raise ValueError("the in_object does contain a 'blocks' key") if "pandoc-api-version" not in in_object: raise ValueError( "the in_object does contain a 'pandoc-api-version' key") if replace_api_version: # run pandoc on a null object, to get the correct api version null_raw = pf.run_pandoc("", args=["-t", "json"]) null_stream = io.StringIO(null_raw) api_version = pf.load(null_stream).api_version # see panflute.load, w.r.t to legacy version if api_version is None: in_object = [{ "unMeta": in_object["meta"] }, in_object["blocks"]] else: ans = OrderedDict() ans["pandoc-api-version"] = api_version ans["meta"] = in_object["meta"] ans["blocks"] = in_object["blocks"] in_object = ans in_str = json.dumps(in_object) elif isinstance(in_object, (list, tuple)): in_str = "\n".join(in_object) elif isinstance(in_object, string_types): in_str = in_object else: raise TypeError("object not accepted: {}".format(in_object)) if not isinstance(in_object, pf.Doc): doc = pf.convert_text(in_str, input_format=in_format, standalone=True) # f = io.StringIO(in_json) # doc = pf.load(f) else: doc = in_object doc.format = out_format if dry_run: return doc if not isinstance(filter_func, (list, tuple, set)): filter_func = [filter_func] out_doc = doc for func in filter_func: out_doc = func(out_doc, **kwargs) # type: Doc # post-process Doc if strip_meta: out_doc.metadata = {} if out_format == "panflute": return out_doc # create out str # with io.StringIO() as f: # pf.dump(doc, f) # jsonstr = f.getvalue() # jsonstr = json.dumps(out_doc.to_json() out_str = pf.convert_text(out_doc, input_format="panflute", output_format=out_format) # post-process final str if strip_blank_lines: out_str = out_str.replace("\n\n", "\n") return out_str
def format_image(image, doc): # type: (Image, Doc) -> Element """ originally adapted from: `pandoc-fignos <https://github.com/tomduck/pandoc-fignos/>`_ """ if not isinstance(image, pf.Image): return None span = None if (isinstance(image.parent, pf.Span) and LABELLED_IMAGE_CLASS in image.parent.classes): span = image.parent if span is not None: identifier = span.identifier attributes = span.attributes # classes = span.classes else: identifier = image.identifier attributes = image.attributes # classes = image.classes if doc.format in ("tex", "latex"): new_doc = Doc(pf.Para(*image.content)) new_doc.api_version = doc.api_version if image.content: caption = pf.run_pandoc(json.dumps(new_doc.to_json()), args=["-f", "json", "-t", "latex"]).strip() else: caption = "" options = attributes.get("placement", "") size = "" # max width set as 0.9\linewidth if "width" in attributes: width = convert_units(attributes["width"], "fraction") size = "width={0}\\linewidth".format(width) elif "height" in attributes: height = convert_units(attributes["height"], "fraction") size = "height={0}\\paperheight".format(height) if identifier: latex = LATEX_FIG_LABELLED.format( label=identifier, options=options, path=image.url, caption=caption, size=size, ) else: latex = LATEX_FIG_UNLABELLED.format(options=options, path=image.url, caption=caption, size=size) return pf.RawInline(latex, format="tex") elif doc.format in ("rst", ): if not image.content.list: # If the container is empty, then pandoc will assign an iterative # reference identifier to it (image0, image1). # However, this iterator restarts for each markdown cell, # which can lead to reference clashes. # Therefore we specifically assign the identifier here, as its url # TODO does this identifier need to be sanitized? # (it works fine in the tests) identifier = image.url image.content = pf.ListContainer(pf.Str(str(identifier))) return image # TODO formatting and span identifier (convert width/height to %) elif doc.format in ("html", "html5"): if identifier: return _wrap_in_anchor(image, identifier) else: return image # TODO formatting, name by count else: return None
def format_image(image, doc): # type: (Image, Doc) -> Element """ originally adapted from: `pandoc-fignos <https://github.com/tomduck/pandoc-fignos/>`_ """ if not isinstance(image, pf.Image): return None span = None if (isinstance(image.parent, pf.Span) and LABELLED_IMAGE_CLASS in image.parent.classes): span = image.parent if span is not None: identifier = span.identifier attributes = span.attributes # classes = span.classes else: identifier = image.identifier attributes = image.attributes # classes = image.classes if doc.format in ("tex", "latex"): new_doc = Doc(pf.Para(*image.content)) new_doc.api_version = doc.api_version if image.content: caption = pf.run_pandoc(json.dumps(new_doc.to_json()), args=["-f", "json", "-t", "latex"]).strip() else: caption = "" options = attributes.get("placement", "") size = '' # max width set as 0.9\linewidth if "width" in attributes: width = convert_units(attributes['width'], "fraction") size = 'width={0}\\linewidth'.format(width) elif "height" in attributes: height = convert_units(attributes['height'], "fraction") size = 'height={0}\\paperheight'.format(height) if identifier: latex = LATEX_FIG_LABELLED.format(label=identifier, options=options, path=image.url, caption=caption, size=size) else: latex = LATEX_FIG_UNLABELLED.format(options=options, path=image.url, caption=caption, size=size) return pf.RawInline(latex, format="tex") elif doc.format in ("rst", ): return image # TODO formatting and span identifier (convert width/height to %) elif doc.format in ("html", "html5"): if identifier: return _wrap_in_anchor(image, identifier) else: return image # TODO formatting, name by count else: return None