def main(): from manubot.command import setup_logging_and_errors, exit_if_error_handler_fired diagnostics = setup_logging_and_errors() args = parse_args() # Let panflute handle io to sys.stdout / sys.stdin to set utf-8 encoding. # args.input=None for stdin, args.output=None for stdout doc = pf.load(input_stream=args.input) log_level = doc.get_metadata("manubot-log-level", "WARNING") diagnostics["logger"].setLevel(getattr(logging, log_level)) process_citations(doc) pf.dump(doc, output_stream=args.output) if doc.get_metadata("manubot-fail-on-errors", False): exit_if_error_handler_fired(diagnostics["error_handler"])
def inner_test_idempotent(input_fn, output_fn): print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print('Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) print('\nApplying trivial filter...') doc = doc.walk(action=empty_test, doc=doc) print(' - Done!') print(' - Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print(' - Comparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print(' - Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) assert input_data == output_data
def dump_and_compare(doc, input_fn, output_fn): print(' - Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print(' - Comparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print(' - Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) assert input_data == output_data
def run(): print('\nLoading JSON...') input_fn = 'benchmark.json' output_fn = 'panflute.json' with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('\nApplying trivial filter...') doc = doc.walk(action=empty_test, doc=doc) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!')
def pandoc_filters(): """ run a set of ipypublish pandoc filters directly on the pandoc AST, via ``pandoc --filter ipubpandoc`` """ doc = pf.load() # in an rmarkdown file, the metadata will be under a root `jupyter` key jmeta = doc.get_metadata('jupyter', {}) meta = pf.tools.meta2builtin(doc.metadata) if 'jupyter' in meta and hasattr(meta["jupyter"], 'items'): jmeta = meta.pop("jupyter") meta.update(jmeta) doc.metadata = meta # builtin2meta(meta) apply_filters = doc.get_metadata(IPUB_META_ROUTE + ".apply_filters", default=True) convert_raw = doc.get_metadata(IPUB_META_ROUTE + ".convert_raw", default=True) if apply_filters: if convert_raw: filters = [ prepare_raw.main, prepare_cites.main, prepare_labels.main, format_cite_elements.main, format_raw_spans.main, format_label_elements.main, rmarkdown_to_mpe.main ] else: filters = [ prepare_cites.main, prepare_labels.main, format_cite_elements.main, format_label_elements.main, rmarkdown_to_mpe.main ] else: filters = [] out_doc = doc for func in filters: out_doc = func(out_doc) # type: pf.Doc # TODO strip meta? pf.dump(doc)
def test_all(): input_fn = './tests/fenced/input.json' output_fn = './tests/fenced/output.json' # Test fenced filter print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print('Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data)) print(' - Content:', input_data == output_data) print('\nApplying trivial filter...') pf.run_filter(empty_filter, doc=doc) print(' - Done!') dump_and_compare(doc, input_fn, output_fn) print('\nApplying YAML filter...') pf.run_filter(pf.yaml_filter, tag='spam', function=fenced_action, doc=doc) print(' - Done!') dump_and_compare(doc, input_fn, output_fn) print('\nApplying Strict YAML filter...') pf.run_filter(pf.yaml_filter, tag='eggs', function=fenced_action, doc=doc, strict_yaml=True) print(' - Done!') dump_and_compare(doc, input_fn, output_fn)
def run_test(name, action): input_fn = os.path.join('tests', name + '.md') # Read markdown, convert to JSON and then to elements with open(input_fn, encoding='utf-8') as f: md = f.read() print('~' * 80) print(' ' * 30, 'INPUT') print('~' * 80) print(md) print('~' * 80, '\n') print('... Parsing markdown') doc = pf.convert_text(md, output_format='doc') doc.format = 'markdown' assert type(doc) == pf.Doc print(' Done.') # Walk through AST sys.path.append('filters') print('... Importing module') mod = importlib.import_module(name) print(' Done.') f_action = mod.__dict__[action] print('... Applying filters') altered = doc.walk(f_action, doc) print(' Done.') # Convert AST into JSON print('... Converting document into JSON') with io.StringIO() as f: pf.dump(altered, f) contents = f.getvalue() print(' Done.') # Convert JSON into markdown print('... Converting JSON into markdown') md = pf.convert_text(contents, input_format='json', output_format='markdown') print(' Done.') print('~' * 80) print(' ' * 30, 'OUTPUT') print('~' * 80) print(md) print('~' * 80, '\n')
def test_all(): x=pf.Para(pf.Str("a")) y=pf.Para(pf.Str("b")) c1=pf.TableCell(x) c2=pf.TableCell(y) row=pf.TableRow(c1,c2) t1 = pf.Table(row) t2 = pf.Table(row, header=row) print(t1.header) print(t2.header) with io.StringIO() as f: pf.dump(pf.Doc(t1), f) print(f.getvalue()) with io.StringIO() as f: pf.dump(pf.Doc(t2), f) print(f.getvalue())
def save_new_file(doc, new_file_name): """Saves the new markdown content to a file. Args: doc (panflute.Doc): Pandoc document container, has a mermaid attribute, where the code block \ index and image path are stored. new_file_name (str): Path where to save the new markdown file. """ logger.info(f"Saving new file to {new_file_name}.") with io.StringIO() as temp_file: panflute.dump(doc, temp_file) contents = temp_file.getvalue() try: pypandoc.convert_text(contents, "markdown_github", "json", outputfile=new_file_name) except OSError as e: logger.error(f"Failed to save file, check permissions. {e}.") sys.exit(1)
def pandoc_filters(): """ run a set of rst2myst pandoc filters directly on the pandoc AST, via ``pandoc --filter rst2myst`` """ doc = pf.load() meta = pf.tools.meta2builtin(doc.metadata) apply_filters = doc.get_metadata(IPUB_META_ROUTE + ".apply_filters", default=True) convert_raw = doc.get_metadata(IPUB_META_ROUTE + ".convert_raw", default=True) filters = [ # Filters ] out_doc = doc for func in filters: out_doc = func(out_doc) # type: pf.Doc pf.dump(doc)
def main() -> None: ## ------ begin <<load-document>>[0] import io import sys json_input = sys.stdin.read() json_stream = io.StringIO(json_input) doc = panflute.load(json_stream) ## ------ end doc.config = read_config() tangle.prepare(doc) doc = doc.walk(tangle.action) annotate.prepare(doc) doc = doc.walk(annotate.action) doctest.prepare(doc) doc = doc.walk(doctest.action) panflute.dump(doc)
def main(doc=None): """Remove empty headings from Vimwiki file. Pandoc filter using panflute """ newdoc = pf.load() for i in range(5): newdoc = pf.run_filter(action, prepare=prepare, finalize=finalize, doc=newdoc) return pf.dump(newdoc)
def panflute2output(elem, format="json", doc=None): if not isinstance(elem, (list, pf.ListContainer)): elem = [elem] if doc is None: doc = pf.Doc(*elem, format=format, api_version=(1, 17, 3, 1)) else: doc = copy.deepcopy(doc) doc.content = elem doc.format = format doc.api_version = (1, 17, 3, 1) with io.StringIO() as f: pf.dump(doc, f) ast = f.getvalue() if format == "json": return_value = ast else: return_value = pf.run_pandoc( text=ast, args=["-f", "json", "-t", format, "--wrap=none"]) return return_value
import panflute as pf doc = pf.load(filename=None) # If no fn, from stdin # fmt = pf.format() ??? # doc.content doc.metadata doc.raw_metadata doc.format doc = pf.walk(doc, some_filter) doc_json = doc.to_json() pf.dump(filename=None) # if no fn, to stdout
) extra_args = [] extra_args += ["--wrap=none"] # extra_args += ["--columns=100"] extra_args += ["--reference-links"] mdtext: str = pf.convert_text( doc, input_format="panflute", output_format=output_format, standalone=True, extra_args=extra_args ) # type: ignore mdtext = apply_regex_fixes(mdtext, markdown_fixes, job=job) json_str = "" if job.is_debug: with io.StringIO() as fs: pf.dump(doc, fs) json_str = json.dumps(json.loads(fs.getvalue()), indent=2) # Prettifies JSON if job.is_dry_run: if job.is_bugreport: print("BUGREPORT:\n------------") print(f"pandoc -f mediawiki -t {output_format} {' '.join(extra_args)} <<EOF\n{text}\nEOF") print(json_str) print("------------") return job.complete(result={"text": mdtext, "debug": json_str, "path": file_path}) else: os.makedirs(os.path.dirname(file_path), exist_ok=True) # Ensure dir exists with open(file_path, "w") as f: # Manually write YAML header as not supported yet for Commonmark https://github.com/jgm/pandoc/issues/6629 f.write("---\n") yaml.dump(context["raw_metadata"], f, allow_unicode=True)
def doc_to_json_str(doc): stream = io.StringIO() pf.dump(doc, stream) return stream.getvalue()
def back_to_content(document): with io.StringIO() as f: panflute.dump(document, f) return f.getvalue()
def main(): doc = pf.load(input_stream=sys.stdin) merge_settings(doc) pf.dump(pf.run_filters([gloss, gloss_refs], doc=doc), output_stream=sys.stdout)
def test_filter(element, doc): if type(element)==pf.Header: return [] if type(element)==pf.Str: element.text = element.text + '!!' return element print('\nLoading JSON...') with open(input_fn, encoding='utf-8') as f: doc = pf.load(f) print('Dumping JSON...') with open(output_fn, mode='w', encoding='utf-8') as f: pf.dump(doc, f) f.write('\n') print(' - Done!') print('\nComparing...') with open(input_fn, encoding='utf-8') as f: input_data = f.read() with open(output_fn, encoding='utf-8') as f: output_data = f.read() print('Are both files the same?') print(' - Length:', len(input_data) == len(output_data), len(input_data), len(output_data))
def test_all(): md = 'Some *markdown* **text** ~xyz~' c_md = pf.convert_text(md) b_md = [ pf.Para(pf.Str("Some"), pf.Space, pf.Emph(pf.Str("markdown")), pf.Space, pf.Strong(pf.Str("text")), pf.Space, pf.Subscript(pf.Str("xyz"))) ] print("Benchmark MD:") print(b_md) print("Converted MD:") print(c_md) assert repr(c_md) == repr(b_md) with io.StringIO() as f: doc = pf.Doc(*c_md) pf.dump(doc, f) c_md_dump = f.getvalue() with io.StringIO() as f: doc = pf.Doc(*b_md) pf.dump(doc, f) b_md_dump = f.getvalue() assert c_md_dump == b_md_dump # ---------------------- print() tex = r'Some $x^y$ or $x_n = \sqrt{a + b}$ \textit{a}' c_tex = pf.convert_text(tex) b_tex = [ pf.Para(pf.Str("Some"), pf.Space, pf.Math("x^y", format='InlineMath'), pf.Space, pf.Str("or"), pf.Space, pf.Math(r"x_n = \sqrt{a + b}", format='InlineMath'), pf.Space, pf.RawInline(r"\textit{a}", format='tex')) ] print("Benchmark TEX:") print(b_tex) print("Converted TEX:") print(c_tex) assert repr(c_tex) == repr(b_tex) with io.StringIO() as f: doc = pf.Doc(*c_tex) pf.dump(doc, f) c_tex_dump = f.getvalue() with io.StringIO() as f: doc = pf.Doc(*b_tex) pf.dump(doc, f) b_tex_dump = f.getvalue() assert c_tex_dump == b_tex_dump print("\nBack and forth conversions... md->json->md") md = 'Some *markdown* **text** ~xyz~' print("[MD]", md) md2json = pf.convert_text(md, input_format='markdown', output_format='json') print("[JSON]", md2json) md2json2md = pf.convert_text(md2json, input_format='json', output_format='markdown') print("[MD]", md2json2md) assert md == md2json2md print("\nBack and forth conversions... md->panflute->md") md = 'Some *markdown* **text** ~xyz~' print("[MD]", md) md2panflute = pf.convert_text(md, input_format='markdown', output_format='panflute') print("[PANFLUTE]", md2panflute) md2panflute2md = pf.convert_text(md2panflute, input_format='panflute', output_format='markdown') print("[MD]", md2panflute2md) assert md == md2panflute2md print("\nBack and forth conversions... md->panflute(standalone)->md") md = 'Some *markdown* **text** ~xyz~' print("[MD]", md) md2panflute = pf.convert_text(md, input_format='markdown', output_format='panflute', standalone=True) print("[PANFLUTE]", md2panflute) md2panflute2md = pf.convert_text(md2panflute, input_format='panflute', output_format='markdown') print("[MD]", md2panflute2md) assert md == md2panflute2md print( "\nBack and forth conversions... md table -> json(standalone) -> md table" ) md = """lorem --- --- x y --- --- ipsum""" print("[MD]", repr(md)) md2json = pf.convert_text(md, input_format='markdown', output_format='json', standalone=True) print("[json]", md2json) md2json2md = pf.convert_text(md2json, input_format='json', output_format='markdown') print("[MD]", repr(md2json2md)) assert md == md2json2md print( "\nBack and forth conversions... md table -> panflute(standalone) -> md table" ) print("[MD]", repr(md)) md2panflute = pf.convert_text(md, input_format='markdown', output_format='panflute', standalone=True) print("[PANFLUTE]", md2panflute) md2panflute2md = pf.convert_text(md2panflute, input_format='panflute', output_format='markdown') print("[MD]", repr(md2panflute2md)) assert md == md2panflute2md print( "\nBack and forth conversions... gfm table (empty) -> json(standalone) -> gfm table (empty)" ) md = """lorem | x | y | | - | - | ipsum""" print("[MD]", repr(md)) md2json = pf.convert_text(md, input_format='gfm', output_format='json', standalone=True) print("[json]", md2json) md2json2md = pf.convert_text(md2json, input_format='json', output_format='gfm') print("[MD]", repr(md2json2md)) assert md == md2json2md print( "\nBack and forth conversions... gfm table (empty) -> panflute(standalone) -> gfm table (empty)" ) print("[MD]", repr(md)) md2panflute = pf.convert_text(md, input_format='gfm', output_format='panflute', standalone=True) print("[PANFLUTE]", md2panflute) md2panflute2md = pf.convert_text(md2panflute, input_format='panflute', output_format='gfm') print("[MD]", repr(md2panflute2md)) assert md == md2panflute2md
def convert(self): doc = panflute.Doc( api_version=(1, 17, 5), metadata={ 'pagetitle': self.title, }, ) doc.content.append(panflute.Header(panflute.Str(self.title))) lists = {} tables = {} table_rows = {} table_cells = {} for chunk in self._attr_chunks(): self.logger.debug(chunk) container = panflute.Para() cdiv = panflute.Div(container) # Handle lists if 'list_class' in chunk[0]['attrs']: lc = chunk[0]['attrs']['list_class'] check_state = None if lc in ['checked', 'unchecked']: check_state = lc lc = 'checklist' ld = chunk[0]['attrs']['list_depth'] # prune any lists that are lower than us, they're finished for i in list(lists.keys()): if i > ld: lists.pop(i) # non-homogenous list types can be immediately adjacent without # ending up merged if ld in lists and lists[ld]['class'] != lc: lists.pop(ld) # checklists are a special case, they can't contain other lists if lc != 'checklist' and lists and lists[1][ 'class'] == 'checklist': lists = {} # make sure any intermediate lists were created, including # the top level because boxnotes for i in range(1, ld + 1): if i not in lists: lists[i] = self._list(lc, i) if i != ld: lists[i]['pf'].content.append(panflute.ListItem()) lp = lists[i]['pf'] if lc == 'checklist': lp = panflute.Div(lp, classes=['checklist']) if i == 1: doc.content.append(lp) else: lists[i - 1]['pf'].content[-1].content.append(lp) # set the container for the other subchunks container = panflute.Plain() cdiv.content = [container] cdiv.classes.append(lc) if check_state: cdiv.classes.append(check_state) lists[ld]['pf'].content.append(panflute.ListItem(cdiv)) if check_state == 'checked': container.content.append(panflute.Str(CHECKED)) elif check_state == 'unchecked': container.content.append(panflute.Str(UNCHECKED)) elif 'table_id' in chunk[-1]['attrs']: table_id = chunk[-1]['attrs']['table_id'] row_id = chunk[-1]['attrs']['table_row'] cell_id = row_id + chunk[-1]['attrs']['table_col'] if table_id not in tables: # There's some magic in the constructor for panflute tables # that isn't exposed in any other way, so we can't create # the table until we've finished populating the rows. # Instead, use a placeholder div to locate it within the # document. tables[table_id] = { 'div': panflute.Div(), 'rows': [], } doc.content.append(tables[table_id]['div']) if row_id not in table_rows: table_rows[row_id] = panflute.TableRow() tables[table_id]['rows'].append(table_rows[row_id]) if cell_id not in table_cells: cdiv = panflute.Div(panflute.Plain()) table_cells[cell_id] = panflute.TableCell(cdiv) table_rows[row_id].content.append(table_cells[cell_id]) container = table_cells[cell_id].content[0].content[0] else: lists = {} doc.content.append(cdiv) if 'align' in chunk[0]['attrs']: cdiv.attributes['style'] = 'text-align: ' + chunk[0]['attrs'][ 'align'] + ';' for subchunk in chunk: if subchunk['newlines'] > 1: # we've had an extra linebreak, no more adding on to lists lists = {} # don't do anything with markers if subchunk['text'] == '*' and 'lmkr' in subchunk['attrs']: continue scont = container if 'href' in subchunk['attrs']: scont = panflute.Link(url=subchunk['attrs']['href']) container.content.append(scont) if 'image' in subchunk['attrs']: scont.content.append( panflute.Image( url=self._image(subchunk['attrs']['author'], subchunk['attrs']['image']))) continue span = panflute.Span() lines = subchunk['text'].splitlines() while lines: subtext = lines.pop(0) span.content.append(panflute.Str(subtext)) if lines: span.content.append(panflute.LineBreak()) if 'font' in subchunk['attrs']: color = subchunk['attrs']['font'].get('color', '000000') size = subchunk['attrs']['font'].get('size', 'medium') span.classes.append('font-size-' + size) span.classes.append('font-color-' + color) # I don't actually know what the possible colors are and I # don't feel like finding out, so just inject it as an # inline style. if color != '000000': span.attributes['style'] = 'color: #' + color + ';' if subchunk['attrs'].get('underline'): span.classes.append('underline') if subchunk['attrs'].get('bold'): span = panflute.Strong(span) if subchunk['attrs'].get('italic'): span = panflute.Emph(span) if subchunk['attrs'].get('strikethrough'): span = panflute.Strikeout(span) scont.content.append(span) # Actually create the tables for x in tables: tables[x]['div'].content.append(panflute.Table(*tables[x]['rows'])) with io.StringIO() as f: panflute.dump(doc, f) return f.getvalue()