def main(ctx, filter_to, input_file, read, output, to, standalone, self_contained): if not filter_to: raise KnittyError(f"Invalid Pandoc filter arg: '{filter_to}'") fmts = dict(commonmark='md', markdown='md', gfm='md') if output and (output != '-'): dir_name = p.basename(output).replace('.', '_') elif input_file and (input_file != '-'): dir_name = p.basename(input_file).replace('.', '_') + '_' + fmts.get( filter_to, filter_to) else: dir_name = 'stdout' + '_' + fmts.get(filter_to, filter_to) pandoc_extra_args = ctx.args if standalone: pandoc_extra_args.append('--standalone') if self_contained: pandoc_extra_args.append('--self-contained') out = knitty_pandoc_filter(sys.stdin.read(), name=dir_name, filter_to=filter_to, standalone=standalone, self_contained=self_contained, pandoc_format=read, pandoc_extra_args=pandoc_extra_args) if filter_to == 'ipynb': with io.StringIO(out) as f: doc = pf.load(f) pf.run_filter(action, doc=doc) with io.StringIO() as f: pf.dump(doc, f) out = f.getvalue() sys.stdout.write(out)
def main(): with open("markdown.md", "r") as f: md = f.read() doc = pf.Doc(*pf.convert_text(md), format="docx") pf.debug("doc: {}".format(*doc.content)) ebl = ExtractBulletList() pf.run_filter(ebl.action, doc=doc)
def test_all(): fn = Path("./tests/sample_files/fenced/example.md") print(f'\n - Loading markdown "{fn}"') with fn.open(encoding='utf-8') as f: markdown_text = f.read() print(' - Converting Markdown to JSON') json_pandoc = pf.convert_text(markdown_text, input_format='markdown', output_format='json', standalone=True) print(' - Constructing Doc() object') doc = pf.convert_text(json_pandoc, input_format='json', output_format='panflute', standalone=True) print(' - Applying YAML filter...') pf.run_filter(pf.yaml_filter, tag='spam', function=fenced_action, doc=doc) json_panflute = pf.convert_text(doc, input_format='panflute', output_format='json', standalone=True) print(' Are both JSON files equal?') print(f' - Length: {len(json_pandoc) == len(json_panflute)} ({len(json_pandoc)} vs {len(json_panflute)})') print(f' - Content: {json_pandoc == json_panflute}') assert json_pandoc == json_panflute print(' - Applying Strict YAML filter...') pf.run_filter(pf.yaml_filter, tag='eggs', function=fenced_action, doc=doc, strict_yaml=True) json_panflute = pf.convert_text(doc, input_format='panflute', output_format='json', standalone=True) print(' Are both JSON files equal?') print(f' - Length: {len(json_pandoc) == len(json_panflute)} ({len(json_pandoc)} vs {len(json_panflute)})') print(f' - Content: {json_pandoc == json_panflute}') assert json_pandoc == json_panflute print(' - Done!')
def main(): """Execute filter and remove empty paragraphs.""" debug = bool(os.environ.get("INNOCONV_DEBUG")) filter_action = MintmodFilterAction(debug=debug) def _finalize(doc): remove_empty_paragraphs(doc) if not os.getenv("INNOCONV_RECURSION_DEPTH"): # remove_annotations must not happen in subprocesses remove_annotations(doc) run_filter(filter_action.filter, finalize=_finalize)
def main(doc=None): return panflute.run_filter( action, prepare=prepare, finalize=finalize, doc=doc )
def main(doc=None): return pf.run_filter(pf.yaml_filter, prepare=prepare, finalize=finalize, tag='sometag', function=fenced_action, doc=doc)
def main(doc=None): """ NOTE: The main function has to be exactly like this if we want to be able to run filters automatically with '-F panflute' """ return pf.run_filter(action, prepare=prepare, finalize=finalize, doc=doc)
def main(doc=None, extract_formats=True): # type: (Doc) -> None """if extract_formats then convert citations defined in latex, rst or html formats to special Span elements """ return pf.run_filter(process_citations, prepare, finalize, doc=doc)
def main(doc=None): """main function. """ logging.debug("Start pandoc filter 'typography.py'") ret = pf.run_filter(action, prepare=prepare, finalize=finalize, doc=doc) logging.debug("End pandoc filter 'typography.py'") return ret
def run(args): if not args or len(args) < 2: print('Usage: rexex.py <path of CppCoreGuidelines.md> <output file>', file=sys.stderr) return -1 input_file = args[0] output_file = args[1] data = pypandoc.convert_file(input_file, to='json') doc = panflute.load(io.StringIO(data)) doc.headers = [] doc.codeblocks = [] doc = panflute.run_filter(action, doc=doc) rules = collections.defaultdict(lambda: collections.defaultdict(list)) for codeblock in doc.codeblocks: possible_header = codeblock header_match = match_rule_id(possible_header) example_match = match_example_header(possible_header) while not header_match: if not possible_header: print('ERROR: cannot identify rule of codeblock\n{}'.format( codeblock.text)) break if not example_match: example_match = match_example_header(possible_header) possible_header = possible_header.prev header_match = match_rule_id(possible_header) if not header_match: continue example_class = classify_example(codeblock.text, str(example_match)) rules[header_match][example_class].append(codeblock.text) with open(output_file, 'w') as output: output.write(json.dumps(rules))
def main(doc=None): input_stream = codecs.getreader('utf8')(sys.stdin) return panflute.run_filter(filter, prepare=init_metavars, finalize=finalize, doc=doc, input_stream=input_stream)
def main(doc=None): """ Run the fitler """ return run_filter(handle_google_drive, doc=doc, prepare=prepare, finalize=finalize)
def main(doc=None): return pf.run_filter(pf.yaml_filter, doc=doc, tags={ 'python': fenced_listing, 'bash': fenced_listing, 'sql': fenced_listing })
def main(doc=None, input_stream=None, output_stream=None): return pf.run_filter( action, prepare=prepare, finalize=finalize, doc=doc, input_stream=input_stream, output_stream=output_stream, )
def main(doc=None): """ Any native pandoc tables will be converted into the CSV table format used by pantable: - in code-block with class table - metadata in YAML - table in CSV """ return panflute.run_filter(table2csv, doc=doc)
def main(doc=None): logging.basicConfig(format='%(levelname)s:%(message)s', level=LOG_LEVEL) input_stream = codecs.getreader('utf8')(sys.stdin) return panflute.run_filter(filter, prepare=init_metavars, finalize=finalize, doc=doc, input_stream=input_stream)
def main(doc=None): """main function. """ logging.info("Start pandoc filter 'my_moodle_filter'") ret = pf.run_filter(action, prepare=prepare, finalize=finalize, doc=doc) logging.info("End pandoc filter 'my_moodle_filter'") return ret
def main(doc=None): """ main function. Arguments --------- doc: pandoc document """ return run_filter(include, doc=doc)
def main(doc=None): """ main function. Arguments --------- doc: pandoc document """ return run_filter(block, doc=doc, prepare=prepare)
def main(doc=None): """ main function. Arguments --------- doc: pandoc document """ return run_filter(image, doc=doc, prepare=prepare, finalize=finalize)
def main(doc=None): """ Fenced code block with class table will be parsed using panflute.yaml_filter with the fuction convert2table above. """ return panflute.run_filter(panflute.yaml_filter, tag='odootable', function=convert2table, strict_yaml=True, doc=doc)
def main(doc: panflute.Doc = None): """Covert all tables to CSV table format defined in pantable - in code-block with class table - metadata in YAML - table in CSV """ return panflute.run_filter( table_to_codeblock, doc=doc )
def main(doc=None): """Main function. start logging, do work and close logging. :param doc: document to parse :return: parsed document """ logging.debug("Start pandoc filter 'style.py'") ret = pf.run_filter(action, prepare=prepare, finalize=finalize, doc=doc) logging.debug("End pandoc filter 'style.py'") return ret
def with_markdown(content, space, name): """User pandoc to get markdown from MediaWiki format.""" try: json_converted = pypandoc.convert_text(content, 'json', format='mediawiki') stream = io.StringIO(json_converted) traversable_doc = panflute.load(stream) panflute.run_filter(drop_loose_categories, doc=traversable_doc) panflute.run_filter(rewrite_internal_links, doc=traversable_doc) content = back_to_markdown(traversable_doc) except Exception: click.echo('Failed to parse content! Continuing ...\n') with open(FAILURE_LOG, 'a') as handle: handle.write(('Failed to parse content. Could not re-write links ' 'and drop categories for page {}\n'.format(name))) return convert_image_format(content)
def main(doc=None): tags = { 'table': table_fenced_action, 'figure': figure_fenced_action, 'figures': figures_fenced_action, 'stlog': stlog_fenced_action } return pf.run_filter(action=pf.yaml_filter, prepare=prepare, finalize=finalize, tags=tags, doc=doc, stop_if=stop_if)
def main(doc=None): """Remove empty headings from Vimwiki file. Pandoc filter using panflute """ newdoc = pf.load() for i in range(5): newdoc = pf.run_filter(action, prepare=prepare, finalize=finalize, doc=newdoc) return pf.dump(newdoc)
def main(doc: panflute.Doc = None): """a pandoc filter converting csv table in code block Fenced code block with class table will be parsed using panflute.yaml_filter with the fuction :func:`pantable.codeblock_to_table.codeblock_to_table` """ return panflute.run_filter( panflute.yaml_filter, tag='table', function=codeblock_to_table, strict_yaml=True, doc=doc )
def main(doc=None): """ Main function called by the script. Arguments --------- doc: The pandoc document Returns ------- The modified pandoc document """ return run_filter(admonition, prepare=prepare, finalize=finalize, doc=doc)
def main(doc=None): """main function. """ logging.info(78 * "=") logging.info( f"THIS IS include_exclude.py release {release}. A pandoc filter using panflute." ) logging.info("(C) in 2018-2021 by Norman Markgraf") logging.debug("Start pandoc filter 'include_exclude'") t = time.perf_counter() ret = pf.run_filter(action, prepare=prepare, finalize=finalize, doc=doc) elapsed_time = time.perf_counter() - t logging.debug("End pandoc filter 'include_exclude'") logging.info(f"Running time: {elapsed_time} seconds.") logging.info(78 * "=") return ret
def convert_markdown(markdown_files, output): """Converts markdown file's mermaid code blocks to image blocks. It does this by: * Convert the markdown file to JSON, which include various details such as styling * Then find all mermaid code blocks * Save the code block to `input.mmd` * Use mermaid-cli to export `input.mmd` to a png file * Finally replace all code blocks with the image blocks referencing the new image * Convert JSON to markdown * Save new markdown file Where a mermaid code block looks something like: .. ```mermaid graph LR A --> B ``` Args: markdown_files (:obj:`list` of :obj:`str`): List of paths of the markdown files, we will parse/convert. output (str): Path to the output folder where the new markdown files will be saved. """ for markdown_file in markdown_files: logger.info(f"Exporting {markdown_file} mermaid code blocks to images.") doc = convert_markdown_to_json(markdown_file) try: doc = panflute.run_filter(export_mermaid_blocks, doc=doc, output=output) except subprocess.CalledProcessError as e: logger.error(f"Failed to convert mermaid code block to image. Skiping file. {e}") sys.exit(1) except OSError as e: logger.error(f"Failed to open/create `input.mmd`, check file permissions. Skipping file. {e}") sys.exit(1) file_name = os.path.basename(markdown_file) new_file_name = os.path.join(output, file_name) replace_mermaid_blocks_with_images(doc) save_new_file(doc, new_file_name)
def test_all(): input_fn = './tests/fenced/input.json' output_fn = './tests/fenced/output.json' # Test fenced filter print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print('Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) print('\nApplying trivial filter...') pf.run_filter(empty_filter, doc=doc) print(' - Done!') dump_and_compare(doc, input_fn, output_fn) print('\nApplying YAML filter...') pf.run_filter(pf.yaml_filter, tag='spam', function=fenced_action, doc=doc) print(' - Done!') dump_and_compare(doc, input_fn, output_fn) print('\nApplying Strict YAML filter...') pf.run_filter(pf.yaml_filter, tag='eggs', function=fenced_action, doc=doc, strict_yaml=True) print(' - Done!') dump_and_compare(doc, input_fn, output_fn)
def main(doc=None): return pf.run_filter(action, doc=doc)
def main(doc=None): return run_filter(caps, doc=doc)
def prepare(doc): # Create file list fns = doc.get_metadata('files') pf.debug('-' * 64) pf.debug('Expanding pattern:', fns) fns = glob.glob(fns) pf.debug('Files:', fns) fns = [fn.replace('.md', '.html') for fn in fns] doc.fns = fns pf.debug('-' * 64) # Clear all contents except TOC doc.content.list = doc.content.list[:1] def action(e, doc): if isinstance(e, pf.Div) and e.identifier=='TOC': e.walk(fix_links) def fix_links(e, doc): if isinstance(e, pf.Link): e.url = doc.fns[0] + e.url elif isinstance(e, pf.ListItem) and isinstance(e.parent.parent, pf.Div): doc.fns.pop(0) # Switch to the next filename if __name__ == '__main__': pf.run_filter(action, prepare=prepare)
def main(doc=None): return pf.run_filter(action, finalize=finalize, doc=doc)
def main(doc=None): return pf.run_filter(action, doc=doc, prepare=prepare, finalize=finalize)
def main(doc=None): return pf.run_filter(pf.yaml_filter, tag='csv', function=fenced_action, doc=doc)
""" Render superscript with html <sup> tags: >> echo 2^10^ is 1024 | pandoc --to=markdown -F html_superscript.py 2<sup>10</sup> is 1024 """ import panflute as pf def action(elem, doc): if isinstance(elem, pf.Superscript) and doc.format == 'markdown': text = '<sup>' + pf.stringify(elem) + '</sup>' return pf.RawInline(text) if __name__ == '__main__': pf.run_filter(action)
def main(doc=None): return run_filter(_tip, prepare=_prepare, finalize=_finalize, doc=doc)
import pypandoc import panflute def prepare(doc): doc.images = [] doc.links = [] def action(elem, doc): if isinstance(elem, panflute.Image): doc.images.append(elem) elif isinstance(elem, panflute.Link): doc.links.append(elem) if __name__ == '__main__': data = pypandoc.convert_file('example.md', 'json') f = io.StringIO(data) doc = panflute.load(f) doc = panflute.run_filter(action, prepare=prepare, doc=doc) print("\nImages:") for image in doc.images: print(image.url) print("\nLinks:") for link in doc.links: print(link.url)