def run(args): if not args or len(args) < 2: print('Usage: rexex.py <path of CppCoreGuidelines.md> <output file>', file=sys.stderr) return -1 input_file = args[0] output_file = args[1] data = pypandoc.convert_file(input_file, to='json') doc = panflute.load(io.StringIO(data)) doc.headers = [] doc.codeblocks = [] doc = panflute.run_filter(action, doc=doc) rules = collections.defaultdict(lambda: collections.defaultdict(list)) for codeblock in doc.codeblocks: possible_header = codeblock header_match = match_rule_id(possible_header) example_match = match_example_header(possible_header) while not header_match: if not possible_header: print('ERROR: cannot identify rule of codeblock\n{}'.format( codeblock.text)) break if not example_match: example_match = match_example_header(possible_header) possible_header = possible_header.prev header_match = match_rule_id(possible_header) if not header_match: continue example_class = classify_example(codeblock.text, str(example_match)) rules[header_match][example_class].append(codeblock.text) with open(output_file, 'w') as output: output.write(json.dumps(rules))
def main(ctx, filter_to, input_file, read, output, to, standalone, self_contained): if not filter_to: raise KnittyError(f"Invalid Pandoc filter arg: '{filter_to}'") fmts = dict(commonmark='md', markdown='md', gfm='md') if output and (output != '-'): dir_name = p.basename(output).replace('.', '_') elif input_file and (input_file != '-'): dir_name = p.basename(input_file).replace('.', '_') + '_' + fmts.get( filter_to, filter_to) else: dir_name = 'stdout' + '_' + fmts.get(filter_to, filter_to) pandoc_extra_args = ctx.args if standalone: pandoc_extra_args.append('--standalone') if self_contained: pandoc_extra_args.append('--self-contained') out = knitty_pandoc_filter(sys.stdin.read(), name=dir_name, filter_to=filter_to, standalone=standalone, self_contained=self_contained, pandoc_format=read, pandoc_extra_args=pandoc_extra_args) if filter_to == 'ipynb': with io.StringIO(out) as f: doc = pf.load(f) pf.run_filter(action, doc=doc) with io.StringIO() as f: pf.dump(doc, f) out = f.getvalue() sys.stdout.write(out)
def convert_markdown_to_json(markdown_file): """Converts our markdown file into JSON, which becomes a list of elements. We also create an empty dict, where we will store all the code blocks we will need to replace with images. Where the JSON data looks like: .. data '{"blocks":[{"t":"Para","c":[{"t":"Str","c":"title:"},{"t":"Space"},{"t":"Str","c":"Provisioning"},{"t":"Space"},{"t":"Str","c":"API"},{"t":"Space"},{"t":"Str","c":"LLD"}]},{"t":"Header","c":[1,["low-level-design",[],[]],[{"t":"Str","c":"Low"},{"t":"Space"}}}' .. doc.content.list 00: Para(Str(title:) Space Str(Provisioning) Space Str(API) Space Str(LLD)) 01: Header(Str(Low) Space Str(Level) Space Str(Design); level=1, identifier='low-level-design') ... 12: CodeBlock(graph LR;\n A--> B; classes=['mermaid']) Args: markdown_file (str): List of paths of the markdown files, we will parse/convert. Return: panflute.Doc: Pandoc document container. """ try: data = pypandoc.convert_file(str(markdown_file), "json") except OSError as e: logger.error(f"Pandoc is not installed on the host machine. {e}") sys.exit(1) doc = panflute.load(io.StringIO(data)) doc.mermaid = {} return doc
def run_filters( actions, prepare=None, finalize=None, input_stream=None, output_stream=None, doc=None, **kwargs, ): load_and_dump = doc is None if load_and_dump: doc = pf.load(input_stream=input_stream) if prepare is not None: prepare(doc) for action in actions: if kwargs: if isinstance(action, _BeforeAction): action.partial(**kwargs) else: action = functools.partial(action, **kwargs) doc = doc.walk(action, doc) if finalize is not None: finalize(doc) if load_and_dump: pf.dump(doc, output_stream=output_stream) else: return doc
def main(doc: Optional[Doc] = None) -> None: import sys import io import panflute json_input = sys.stdin.read() json_stream = io.StringIO(json_input) doc = panflute.load(json_stream) tangle.prepare(doc) doc = doc.walk(tangle.action) doc = doc.walk(action) panflute.dump(doc)
def main(doc=None): """Remove empty headings from Vimwiki file. Pandoc filter using panflute """ newdoc = pf.load() for i in range(5): newdoc = pf.run_filter(action, prepare=prepare, finalize=finalize, doc=newdoc) return pf.dump(newdoc)
def main(): from manubot.command import setup_logging_and_errors, exit_if_error_handler_fired diagnostics = setup_logging_and_errors() args = parse_args() # Let panflute handle io to sys.stdout / sys.stdin to set utf-8 encoding. # args.input=None for stdin, args.output=None for stdout doc = pf.load(input_stream=args.input) log_level = doc.get_metadata("manubot-log-level", "WARNING") diagnostics["logger"].setLevel(getattr(logging, log_level)) process_citations(doc) pf.dump(doc, output_stream=args.output) if doc.get_metadata("manubot-fail-on-errors", False): exit_if_error_handler_fired(diagnostics["error_handler"])
def inner_test_idempotent(input_fn, output_fn): print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print('Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) print('\nApplying trivial filter...') doc = doc.walk(action=empty_test, doc=doc) print(' - Done!') print(' - Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print(' - Comparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print(' - Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) assert input_data == output_data
def get_doc_from_markup(markup, style="innoconv-debug", output=None, lang="de"): """Run panzer on markup and return Doc.""" cmd = [ "panzer", "---panzer-support", PANZER_SUPPORT_DIR, "--metadata=style:{}".format(style), "--metadata=lang:{}".format(lang), "--from=latex+raw_tex", "--to=json", "--standalone", ] if output: cmd.append("--output={}".format(output)) env = os.environ.copy() proc = subprocess.Popen( cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, ) proc.stdin.write(markup.encode(ENCODING)) try: outs, errs = proc.communicate(timeout=30) except subprocess.TimeoutExpired: proc.kill() outs, errs = proc.communicate() errout = errs.decode(ENCODING).strip() if errout: pf.debug(errout) if proc.returncode != 0: raise RuntimeError("Failed to run panzer!") json_raw = outs.decode(ENCODING) try: return pf.load(StringIO(json_raw)) except JSONDecodeError: log("Couldn't decode JSON: {}".format(json_raw))
def inner_test_idempotent(input_fn, output_fn): print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print('Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) print('\nApplying trivial filter...') doc = doc.walk(action=empty_test, doc=doc) print(' - Done!') print(' - Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print(' - Comparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print(' - Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) assert input_data == output_data
def run(): print('\nLoading JSON...') input_fn = 'benchmark.json' output_fn = 'panflute.json' with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('\nApplying trivial filter...') doc = doc.walk(action=empty_test, doc=doc) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!')
def pandoc_filters(): """ run a set of ipypublish pandoc filters directly on the pandoc AST, via ``pandoc --filter ipubpandoc`` """ doc = pf.load() # in an rmarkdown file, the metadata will be under a root `jupyter` key jmeta = doc.get_metadata('jupyter', {}) meta = pf.tools.meta2builtin(doc.metadata) if 'jupyter' in meta and hasattr(meta["jupyter"], 'items'): jmeta = meta.pop("jupyter") meta.update(jmeta) doc.metadata = meta # builtin2meta(meta) apply_filters = doc.get_metadata(IPUB_META_ROUTE + ".apply_filters", default=True) convert_raw = doc.get_metadata(IPUB_META_ROUTE + ".convert_raw", default=True) if apply_filters: if convert_raw: filters = [ prepare_raw.main, prepare_cites.main, prepare_labels.main, format_cite_elements.main, format_raw_spans.main, format_label_elements.main, rmarkdown_to_mpe.main ] else: filters = [ prepare_cites.main, prepare_labels.main, format_cite_elements.main, format_label_elements.main, rmarkdown_to_mpe.main ] else: filters = [] out_doc = doc for func in filters: out_doc = func(out_doc) # type: pf.Doc # TODO strip meta? pf.dump(doc)
def run(): print('\nLoading JSON...') input_fn = 'benchmark.json' output_fn = 'panflute.json' with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('\nApplying trivial filter...') doc = doc.walk(action=empty_test, doc=doc) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!')
def test_all(): input_fn = './tests/fenced/input.json' output_fn = './tests/fenced/output.json' # Test fenced filter print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print('Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) print('\nApplying trivial filter...') pf.run_filter(empty_filter, doc=doc) print(' - Done!') dump_and_compare(doc, input_fn, output_fn) print('\nApplying YAML filter...') pf.run_filter(pf.yaml_filter, tag='spam', function=fenced_action, doc=doc) print(' - Done!') dump_and_compare(doc, input_fn, output_fn) print('\nApplying Strict YAML filter...') pf.run_filter(pf.yaml_filter, tag='eggs', function=fenced_action, doc=doc, strict_yaml=True) print(' - Done!') dump_and_compare(doc, input_fn, output_fn)
def inner_test_stringify(input_fn, output_fn): output_txt_benchmark = './tests/temp_benchmark.txt' output_txt_panflute = './tests/temp_panflute.txt' print('Testing stringify()') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) ans = pf.stringify(doc) #print(repr(ans).encode('utf-8')) with open(output_txt_panflute, encoding='utf-8', mode='w') as f: f.write(ans) with open(input_fn, encoding='utf-8') as f: doc = json.load(f) ans = pandocfilters.stringify(doc) with open(output_txt_benchmark, encoding='utf-8', mode='w') as f: f.write(ans)
def inner_test_stringify(input_fn, output_fn): output_txt_benchmark = './tests/temp_benchmark.txt' output_txt_panflute = './tests/temp_panflute.txt' print('Testing stringify()') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) ans = pf.stringify(doc) #print(repr(ans).encode('utf-8')) with open(output_txt_panflute, encoding='utf-8', mode='w') as f: f.write(ans) with open(input_fn, encoding='utf-8') as f: doc = json.load(f) ans = pandocfilters.stringify(doc) with open(output_txt_benchmark, encoding='utf-8', mode='w') as f: f.write(ans)
def main() -> None: ## ------ begin <<load-document>>[0] import io import sys json_input = sys.stdin.read() json_stream = io.StringIO(json_input) doc = panflute.load(json_stream) ## ------ end doc.config = read_config() tangle.prepare(doc) doc = doc.walk(tangle.action) annotate.prepare(doc) doc = doc.walk(annotate.action) doctest.prepare(doc) doc = doc.walk(doctest.action) panflute.dump(doc)
def pandoc_filters(): """ run a set of rst2myst pandoc filters directly on the pandoc AST, via ``pandoc --filter rst2myst`` """ doc = pf.load() meta = pf.tools.meta2builtin(doc.metadata) apply_filters = doc.get_metadata(IPUB_META_ROUTE + ".apply_filters", default=True) convert_raw = doc.get_metadata(IPUB_META_ROUTE + ".convert_raw", default=True) filters = [ # Filters ] out_doc = doc for func in filters: out_doc = func(out_doc) # type: pf.Doc pf.dump(doc)
def with_markdown(content, space, name): """User pandoc to get markdown from MediaWiki format.""" try: json_converted = pypandoc.convert_text(content, 'json', format='mediawiki') stream = io.StringIO(json_converted) traversable_doc = panflute.load(stream) panflute.run_filter(drop_loose_categories, doc=traversable_doc) panflute.run_filter(rewrite_internal_links, doc=traversable_doc) content = back_to_markdown(traversable_doc) except Exception: click.echo('Failed to parse content! Continuing ...\n') with open(FAILURE_LOG, 'a') as handle: handle.write(('Failed to parse content. Could not re-write links ' 'and drop categories for page {}\n'.format(name))) return convert_image_format(content)
def test_all(): input_fn = './tests/fenced/input.json' output_fn = './tests/fenced/output.json' # Test fenced filter print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print('Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) print('\nApplying trivial filter...') pf.run_filter(empty_filter, doc=doc) print(' - Done!') dump_and_compare(doc, input_fn, output_fn) print('\nApplying YAML filter...') pf.run_filter(pf.yaml_filter, tag='spam', function=fenced_action, doc=doc) print(' - Done!') dump_and_compare(doc, input_fn, output_fn) print('\nApplying Strict YAML filter...') pf.run_filter(pf.yaml_filter, tag='eggs', function=fenced_action, doc=doc, strict_yaml=True) print(' - Done!') dump_and_compare(doc, input_fn, output_fn)
def test(): # chcp 65001 --> might be required if running from cmd on Windows print('\nLoading JSON...') fn = "./tests/input/heavy_metadata/benchmark.json" with open(fn, encoding='utf-8') as f: doc = pf.load(f) meta = doc.get_metadata('title') assert meta == "Lorem Ipsum: Title" meta = doc.get_metadata('title', builtin=False) assert type(meta) == pf.MetaInlines # foobar key doesn't exist meta = doc.get_metadata('foobar', True) assert meta == True meta = doc.get_metadata('foobar', 123) assert meta == 123 meta = doc.get_metadata('abstract') assert meta.startswith('Bring to the table win-win') meta = doc.get_metadata('key1.key1-1') assert meta == ['value1-1-1', 'value1-1-2'] meta = doc.get_metadata('amsthm.plain') assert type(meta) == list assert meta[0]['Theorem'] == 'Lemma' print('--') meta = doc.get_metadata('') assert len(meta) > 10 print('\nDone...')
def apply_to_json(in_json, filter_func): # type: (dict, FunctionType) -> dict f = io.StringIO(u(json.dumps(in_json))) doc = pf.load(f) new_doc = filter_func(doc) # type: Doc return new_doc.to_json()
def empty_test(element, doc): return def test_filter(element, doc): if type(element)==pf.Header: return [] if type(element)==pf.Str: element.text = element.text + '!!' return element print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f:
import panflute as pf doc = pf.load(filename=None) # If no fn, from stdin # fmt = pf.format() ??? # doc.content doc.metadata doc.raw_metadata doc.format doc = pf.walk(doc, some_filter) doc_json = doc.to_json() pf.dump(filename=None) # if no fn, to stdout
import pypandoc import panflute def prepare(doc): doc.images = [] doc.links = [] def action(elem, doc): if isinstance(elem, panflute.Image): doc.images.append(elem) elif isinstance(elem, panflute.Link): doc.links.append(elem) if __name__ == '__main__': data = pypandoc.convert_file('example.md', 'json') f = io.StringIO(data) doc = panflute.load(f) doc = panflute.run_filter(action, prepare=prepare, doc=doc) print("\nImages:") for image in doc.images: print(image.url) print("\nLinks:") for link in doc.links: print(link.url)
def _scanner(node, env, path, arg=None): """ Attempt to scan the final target for images and bibliographies In Pandoc flavored MarkDown, the only "included" files are the images and the bibliographies. We need to tell SCons about these, but we don't want to do this by hand. To do this, we directly use Pandoc's json output and analyze the document tree for the images and the metadata for bibliographies. We need to operate on the filtered syntax tree so we can get the final filtered version. The logic should work on any input format Pandoc can translate into its AST. Note you must respect Pandoc's bibliography file rules. The command line arguments will override files specified in the YAML block of the header file. This logic is primarily aimed at the MarkDown sources, but it should work with the other plain text sources too. However, this is not rigorously tested. For LaTeX sources, you should really just use the SCons builder to have the right thing done. """ import panflute logger = logging.getLogger(__name__ + ".scanner") # Grab the base command SCons will run and remove the output flag. # This does assume the user did not override the command variable # and hard code the output. cmd = shlex.split(env.subst_target_source("$PANDOCCOM")) for flag in ("-o", "--output"): try: cmd.remove(flag) except ValueError: # They specified the other flag pass # If the user provided the --from flag, we need to move it to the # beginning of the command newidx = 1 for idx, item in enumerate(cmd): match = re.match(r"(-f|--from=?)([-+\w]*)?", item) if match: cmd[newidx:newidx] = [cmd.pop(idx)] newidx += 1 if not match.group(2): cmd[newidx:newidx] = [cmd.pop(idx + 1)] newidx += 1 logger.debug("initial command: '{0}'".format(" ".join(cmd))) # Now parse the command line for the known arguments with files that # are needed generate the final document. But, we want to make sure # the file is actually in the build tree and not simply an installed # executable or file. To do this, we map destinations in an # :class:`argparser.ArgumentParser` to Pandoc flags. We do not want # to deal with searching all over creation so we do not deal with # the data directory. # # .. note:: This does not deal with the --resource-path flag which # provides additional search paths for Pandoc. arguments = { "filter": ("-F", "--filter"), "lua": ("--lua-filter", ), "metadata": ("--metadata-file", ), "abbreviations": ("--abbreviations", ), "highlight": ("--highlight-style", ), "syntax": ("--syntax-definition", ), "header": ("-H", "--include-in-header"), "before": ("-B", "--include-before-body"), "after": ("-A", "--include-after-body"), "css": ("-c", "--css"), "reference": ("--reference-doc", ), "epubcover": ("--epub-cover-image", ), "epubmeta": ("--epub-metadata", ), "epubfont": ("--epub-embed-font", ), "bibliography": ("--bibliography", ), "csl": ("--csl", ), "citeabbrev": ("--citation-abbreviations", ), } parser = argparse.ArgumentParser() for dest in arguments: parser.add_argument(*arguments[dest], dest=dest, action="append", default=[]) # Add the target format in case it was specified as this overrides # the output format. We also need the data directory for finding # installed filters. parser.add_argument("-t", "--to") parser.add_argument("--data-dir", dest="datadir") parser.add_argument("--template", default="default") args, _ = parser.parse_known_args(cmd) files = [] for dest in arguments: files.extend( [env.File(x) for x in getattr(args, dest) if os.path.exists(x)]) # Now we need to determine the files inside the document that will # influence the output. To do this, we need to analyze the tree # Pandoc will write out after all of the filters have been run. The # best parser for a Pandoc document is Pandoc itself; however, we # want to interrupt the processing before the Writer is called. # Looking at the filter documentation, we can achieve this by # calling Pandoc with the appropriate flags and piping the JSON # output through each filter. The output format is passed as an # argument to each filter so we must replicate that behavior to # ensure the syntax tree has the final files. # # If the user provided the ``--to`` flag (with possible extensions), # that _is_ the output format. Otherwise, we take the format from # the file extension. The only exception is the 'beamer' output. if args.to: if args.to == "beamer": format = "latex" else: format = re.match(r"(\w+)[-+]?", args.to).group(1) else: _, format = os.path.splitext(str(node)) format = format[1:] # Now that we have the format, we can figure out if the template was # defined and inside the project. First, we need the root of the # build and the template. template = args.template # Add the extension if needed. _, ext = os.path.splitext(template) if ext == "": template = template + "." + format # First, check that the file exists or is findable in the data # directory. if not os.path.exists(template): if args.datadir: template = os.path.join(args.datadir, "templates", template) if os.path.exists(template) and format not in ("docx", "pptx"): files.append(env.File(template)) def run_command(cmd, proc=None): """Helper function for running a command """ logger = logging.getLogger(__name__ + ".scanner.run_command") logger.debug("command: '{0}'".format(" ".join(cmd))) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=proc.stdout if proc else None) return proc # We need to run each filter in order; however, we also need to # run any Lua filters in their proper location. We can do this # by reading from the front of the command until we find a # filter. We consume the command until we find a filter and run # each stage. We start by processing the input files. proc = None cmd_ = [] cmd0 = [_detect(env), "--from", "json", "--to", "json"] + ( ["--data-dir={0}".format(args.datadir)] if args.datadir else []) sources = [x.path for x in node.sources if os.path.exists(x.path)] while cmd and sources: # Grab the first item off the list item = cmd.pop(0) # Is this a 'to' flag? match = re.match(r"(-T|--to=?)([-+\w+]+)?", item) if match: if not match.group(2): cmd.pop(0) continue # Determine if it is a filter match = re.match(r"(-F|--filter=?)([-\w/.]+)?", item) if match: # Grab the filter filt = match.group(2) if match.group(2) else cmd.pop(0) logger.debug("cmd : '{0}'".format(" ".join(cmd))) logger.debug("item: '{0}'".format(item)) logger.debug("filt: '{0}'".format(filt)) logger.debug("cmd_: '{0}'".format(" ".join(cmd_))) # First, deal with any intervening commands if cmd_: if proc: proc = run_command(cmd0 + cmd_, proc) else: # If this is the first filter, we need to process the # input files. cmd_.extend(["--to", "json"]) cmd_.extend(sources) proc = run_command(cmd_) # Now figure out the filter. cmd_ = _find_filter(filt, args.datadir, env) proc = run_command(cmd_ + [format], proc) cmd_ = [] else: # Otherwise, put it on the running command. cmd_.append(item) # Now process any arguments after the last filter if cmd_: if proc: proc = run_command(cmd0 + cmd_, proc) else: # If we have no filters, process the sources. cmd_.extend(["--to", "json"]) cmd_.extend(sources) proc = run_command(cmd_) doc = panflute.load(proc.stdout) if proc else None def _path(x): """A helper for getting the path right""" root = os.path.dirname(str(node)) if os.path.commonprefix([root, x]) == root: return env.File(x) else: return env.File(os.path.join(root, x)) # For images, we only concern ourselves with outputs that are a # final stage. This includes formats such as 'docx', 'pptx', # 'html', and 'epub'. It excludes 'markdown' and 'latex'. The # rationale is these are not delivery formats and, therefore, still # need to be processed as another stage in SCons. That is when the # scanning needs to be done. We also exclude PDF because SCons has # a better scanner built in. (And why would you want to use SCons if # you just want to use Pandoc to go straight to PDF?) skip = ( "asciidoc", "commonmark", "context", "gfm", "json", "latex", "markdown", "markdown_mmd", "markdown_phpextra", "markdown_strict", "native", "org", "plain", "rst", "tex", ) if format not in skip: def walk(src): """Walk the tree and find images and bibliographies """ if isinstance(src, panflute.Image): return [src.url] else: tmp = [walk(y) for y in getattr(src, "content", [])] return [y for z in tmp for y in z if y] images = [x for x in walk(doc) if x] logger.debug("images: {0}".format(images)) files.extend([_path(x) for x in images]) # And, finally, check the metadata for a bibliography file if doc: if not args.bibliography: bibs = doc.metadata.content.get("bibliography", []) if bibs: files.extend( [_path(x.text) for x in getattr(bibs, "content", [bibs])]) logger.debug("{0!s}: {1!s}".format(node, [str(x) for x in files])) return files
def main(): doc = pf.load(input_stream=sys.stdin) merge_settings(doc) pf.dump(pf.run_filters([gloss, gloss_refs], doc=doc), output_stream=sys.stdout)
def apply_filter(in_object, filter_func=None, out_format="panflute", in_format="markdown", strip_meta=False, strip_blank_lines=False, replace_api_version=True, dry_run=False, **kwargs): # type: (list[str], FunctionType) -> str """convenience function to apply a panflute filter(s) to a string, list of string lines, pandoc AST or panflute.Doc Parameters ---------- in_object: str or list[str] or dict can also be panflute.Doc filter_func: the filter function or a list of filter functions out_format: str for use by pandoc or, if 'panflute', return the panflute.Doc in_format="markdown": str strip_meta=False: bool strip the document metadata before final conversion strip_blank_lines: bool strip_ends: bool strip any blank lines or space from the start and end replace_api_version: bool for dict input only, if True, find the api_version of the available pandoc and reformat the json as appropriate dry_run: bool If True, return the Doc object, before applying the filter kwargs: to parse to filter func Returns ------- str """ if isinstance(in_object, pf.Doc): pass elif isinstance(in_object, dict): if not in_format == "json": raise AssertionError("the in_format for a dict should be json, " "not {}".format(in_format)) if "meta" not in in_object: raise ValueError("the in_object does contain a 'meta' key") if "blocks" not in in_object: raise ValueError("the in_object does contain a 'blocks' key") if "pandoc-api-version" not in in_object: raise ValueError( "the in_object does contain a 'pandoc-api-version' key") if replace_api_version: # run pandoc on a null object, to get the correct api version null_raw = pf.run_pandoc("", args=["-t", "json"]) null_stream = io.StringIO(null_raw) api_version = pf.load(null_stream).api_version # see panflute.load, w.r.t to legacy version if api_version is None: in_object = [{ "unMeta": in_object["meta"] }, in_object["blocks"]] else: ans = OrderedDict() ans["pandoc-api-version"] = api_version ans["meta"] = in_object["meta"] ans["blocks"] = in_object["blocks"] in_object = ans in_str = json.dumps(in_object) elif isinstance(in_object, (list, tuple)): in_str = "\n".join(in_object) elif isinstance(in_object, string_types): in_str = in_object else: raise TypeError("object not accepted: {}".format(in_object)) if not isinstance(in_object, pf.Doc): doc = pf.convert_text(in_str, input_format=in_format, standalone=True) # f = io.StringIO(in_json) # doc = pf.load(f) else: doc = in_object doc.format = out_format if dry_run: return doc if not isinstance(filter_func, (list, tuple, set)): filter_func = [filter_func] out_doc = doc for func in filter_func: out_doc = func(out_doc, **kwargs) # type: Doc # post-process Doc if strip_meta: out_doc.metadata = {} if out_format == "panflute": return out_doc # create out str # with io.StringIO() as f: # pf.dump(doc, f) # jsonstr = f.getvalue() # jsonstr = json.dumps(out_doc.to_json() out_str = pf.convert_text(out_doc, input_format="panflute", output_format=out_format) # post-process final str if strip_blank_lines: out_str = out_str.replace("\n\n", "\n") return out_str