def toJSONFilter(): doc = json.loads(sys.stdin.read()) if len(sys.argv) > 1: format = sys.argv[1] else: format = "" # first, process metadata (title and subtitle) result_meta = doc[0]['unMeta'] doc = walk(doc, extract_metadata, format, result_meta) # We need a title, use a default if unset if 'title' not in result_meta: title = {'c': 'Untitled', 't': 'Str'} result_meta['title'] = { "c": [title], "t": "MetaInlines" } doc[0]['unMeta'] = result_meta # then, fix page breaks doc = walk(doc, fix_pagebreaks, format, result_meta) # then, fix underline doc = walk(doc, fix_underline, format, result_meta) # then, customize horizontal rules (otherwise they're hardcoded in Writers/LaTeX.hs) doc = walk(doc, fix_hr, format, result_meta) json.dump(doc, sys.stdout)
def stringify(x, format): """Walks the tree x and returns concatenated string content, leaving out all formatting. """ result = [] def go(key, val, format, meta): if key in ['Str', 'MetaString']: result.append(val) elif key == 'Code': result.append(val[1]) elif key == 'Math': # Modified from the stringify function in the pandocfilter package if format == 'latex': result.append('$' + val[1] + '$') else: result.append(val[1]) elif key == 'LineBreak': result.append(" ") elif key == 'Space': result.append(" ") elif key == 'Note': # Do not stringify value from Note node del val[:] walk(x, go, format, {}) return ''.join(result)
def quotify(x): """Replaces Quoted elements in element list 'x' with quoted strings. Pandoc uses the Quoted element in its json when --smart is enabled. Output to TeX/pdf automatically triggers --smart. stringify() ignores Quoted elements. Use quotify() first to replace Quoted elements in 'x' with quoted strings. 'x' should be a deep copy so that the underlying document is left untouched. Returns x.""" def _quotify(key, value, fmt, meta): # pylint: disable=unused-argument """Replaced Quoted elements with quoted strings.""" if key == 'Quoted': ret = [] quote = '"' if value[0]['t'] == 'DoubleQuote' else "'" if value[1][0]['t'] == 'Str': value[1][0]['c'] = quote + value[1][0]['c'] else: ret.append(Str(quote)) if value[1][-1]['t'] == 'Str': value[1][-1]['c'] = value[1][-1]['c'] + quote ret += value[1] else: ret += value[1] + [Str(quote)] return ret return None return walk(walk(x, _quotify, '', {}), join_strings, '', {})
def test_insert_secnos_factory_2(self): """Tests insert_secnos_factory() #2.""" ## test.md: ---\nxnos-number-sections: True\n...\n\n# Title\n\n$$ x $$\n ## # Command: pandoc test.md -t json src = eval(r'''{"blocks": [{"t": "Header", "c": [1, ["title", [], []], [{"t": "Str", "c": "Title"}]]}, {"t": "Para", "c": [{"t": "Math", "c": [{"t": "DisplayMath"}, " x "]}]}], "pandoc-api-version": [1, 17, 5, 1], "meta": {"xnos-number-sections": {"t": "MetaBool", "c": True}}}''') # Check src against current pandoc md = subprocess.Popen(('echo', '---\nxnos-number-sections: True\n...\n\n# Title\n\n$$ x $$\n'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip().decode("utf-8").replace('true', 'True')) self.assertEqual(src, output) expected = eval(r'''{"blocks": [{"t": "Header", "c": [1, ["title", [], []], [{"t": "Str", "c": "Title"}]]}, {"t": "Para", "c": [{"t": "Math", "c": [{"t": "DisplayMath"}, " x "]}]}], "pandoc-api-version": [1, 17, 5, 1], "meta": {"xnos-number-sections": {"t": "MetaBool", "c": True}}}''') # Make the comparison meta = src['meta'] fmt = 'html' attach_attrs_math = attach_attrs_factory(Math, allow_space=True) insert_secnos = insert_secnos_factory(Math) tmp = walk(src, attach_attrs_math, fmt, meta) self.assertEqual(walk(tmp, insert_secnos, fmt, meta), expected)
def checker(self, input_data, bib_data): walk(input_data, extractCites, 'json', {}) alterMetadata(input_data['meta']) json_file = input_data['meta']['bibliography']['c'][0]['c'] with open(json_file) as f: test_data = json.load(f) self.assertEqual(len(bib_data), len(test_data)) for i in range(len(bib_data)): self.assertDictEqual(bib_data[i], test_data[i])
def liftHeaders(key, value, fmt, meta): if key == 'Header': level, attrs, content = value level -= 1 if level == 1: content = walk(content, upcase, fmt, meta) return Header(level, attrs, content)
def textbook(key, value, format, meta): if key == "Header": [level, [ident, classes, keyvals], inlines] = value if (level == 5 or level == 1) and not "unnumbered" in classes: return Header(level, [ident, classes + ["unnumbered"], keyvals], inlines) if key == "CodeBlock": [[ident, classes, keyvals], code] = value if format == "html": return RawBlock("html", "<pre>" + process_html(code) + "</pre>") if key == "Code": [[ident, classes, keyvals], code] = value if format == "html": return RawInline("html", "<code>" + process_html(code) + "</code>") if key == "Image": [attr, inlines, [src, tit]] = value if format != "icml": return Image(attr, inlines, [src.replace(".pdf", ".png"), tit]) if key == "Div": [[ident, classes, keyvals], blocks] = value if format == "docx": if "numbers" in classes: return Null() if any(cls in classes for cls in ["keyterm", "keyterms", "didyouknow", "syntax", "quickcheck", "program"]): return Div([ident, classes, keyvals], [HorizontalRule()] + walk(blocks, textbook, format, meta) + [HorizontalRule()])
def test_use_refs_factory_8(self): """Tests use_refs_factory() #8.""" ## test.md: @fig:1: # pandoc-1.15.2 doesn't detect references that end in a colon. This # was fixed in subsequent versions of pandoc. There is a trivial # workaround; use "{@fig:1}:" instead. This is demonstrated in the # next unit test. Given that there is a trivial work-around, this is # probably not worth fixing. # Command: pandoc-1.15.2 test.md -t json src = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"@fig:1:"}]}]]''') # Check against pandoc-1.15.2 md = subprocess.Popen(('echo', '@fig:1:'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( (PANDOC1p15 + ' -t json').split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Generate expected using current pandoc md = subprocess.Popen(('echo', '@fig:1:'), stdout=subprocess.PIPE) expected = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) # Make the comparison process_refs = process_refs_factory(['fig:1']) self.assertEqual(walk(src, process_refs, {}, ''), expected)
def toJSONFilters(actions): """Converts a list of actions into a filter that reads a JSON-formatted pandoc document from stdin, transforms it by walking the tree with the actions, and returns a new JSON-formatted pandoc document to stdout. The argument is a list of functions action(key, value, format, meta), where key is the type of the pandoc object (e.g. 'Str', 'Para'), value is the contents of the object (e.g. a string for 'Str', a list of inline elements for 'Para'), format is the target output format (which will be taken for the first command line argument if present), and meta is the document's metadata. If the function returns None, the object to which it applies will remain unchanged. If it returns an object, the object will be replaced. If it returns a list, the list will be spliced in to the list to which the target object belongs. (So, returning an empty list deletes the object.) """ try: input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') except AttributeError: # Python 2 does not have sys.stdin.buffer. # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin input_stream = codecs.getreader("utf-8")(sys.stdin) doc = json.loads(input_stream.read()) if len(sys.argv) > 1: format = sys.argv[1] else: format = "" altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc) addListings(altered, format, altered[0]['unMeta']) json.dump(altered, sys.stdout)
def main(): doc = pf.json.loads(pf.sys.stdin.read()) if len(pf.sys.argv) > 1: format = pf.sys.argv[1] else: format = "" metadata = doc[0]['unMeta'] args = {k: v['c'] for k, v in metadata.items()} autoref = args.get('autoref', True) numbersections = args.get('numbersections', True) refmanager = ReferenceManager(autoref=autoref, numbersections=numbersections) altered = doc for action in refmanager.reference_filter: altered = pf.walk(altered, action, format, metadata) # Need to ensure the LaTeX template knows about figures and tables # by adding to metadata (only if it's not already specified). if format == 'latex' or format == 'beamer': if refmanager.table_exists and 'tables' not in metadata: metadata['tables'] = pf.elt('MetaBool', 1)(True) if refmanager.figure_exists and 'graphics' not in metadata: metadata['graphics'] = pf.elt('MetaBool', 1)(True) altered[0]['unMeta'] = metadata pf.json.dump(altered, pf.sys.stdout)
def filter_in(self, instring): def _filter(key, value, format, meta): # remove HTML specific stuff if key == "Link": # remove relative path prefix and .html suffix internal, [href, text] = value if href.endswith(".html"): href = href[:-5] # FIXME: this stupid detection will not work # or just leave the full path? # if href.startswith("./"): # href = href[2:] # elif href.startswith("../"): # href = href[3:] return pandocfilters.Link(internal, [href, text]) # TODO: it's implemented in filter_pre, but could be useful anyway since html may not be # the only input format; the most generic way should be implemented # if key == "Header": # level, classes, internal = value # # # record top level # if self.heading_top_level == 0: # self.heading_top_level = level # # # ensure we start from h1 in output # if level > self.heading_top_level: # level -= self.heading_top_level # # return pandocfilters.Header(level, classes, internal) doc = json.loads(instring) altered = pandocfilters.walk(doc, _filter, self.format, doc[0]["unMeta"]) return json.dumps(altered)
def latexstringify(x): result = [] def go(key, val, format, meta): if key == 'Str': result.append(val) elif key == 'Code': result.append(val[1]) elif key == 'Math': result.append('$' + val[1] + '$') elif key == 'LineBreak': result.append(" ") elif key == 'Space': result.append(" ") elif key == 'RawInline' and val[0] == 'tex': result.append(val[1]) pf.walk(x, go, "", {}) return ''.join(result).replace('%', '\\%')
def test_replace_refs_factory(self): """Tests replace_refs_factory.""" ## test.md: As shown in @fig:1. ## # Command: pandoc-1.15.2 test.md -t json src = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"As"},{"t":"Space","c":[]},{"t":"Str","c":"shown"},{"t":"Space","c":[]},{"t":"Str","c":"in"},{"t":"Space","c":[]},{"t":"Cite","c":[["",[],[]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"fig:one","citationHash":0}],[{"t":"Str","c":"@fig:one"}]]},{"t":"Str","c":"."}]}]]''') # Hand-coded expected = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"As"},{"t":"Space","c":[]},{"t":"Str","c":"shown"},{"t":"Space","c":[]},{"t":"Str","c":"in"},{"t":"Space","c":[]},{"t":"Str","c":"fig."},{"t":"Space","c":[]},{'t':'Link','c':[['',[],[]],[{'t':'Str','c':'1'}],['#fig:one','']]},{"t":"Str","c":"."}]}]]''') # Make the comparison replace_refs = replace_refs_factory({'fig:one':1}, True, False, ['fig.', 'figs.'], ['Figure', 'Figures'], 'figure') self.assertEqual(walk(walk(src, replace_refs, {}, ''), join_strings, {}, ''), expected)
def stringify_maths(x): """Walks the tree x and returns concatenated string content, leaving out all formatting. """ result = [] def go(key, val, format, meta): if key in ['Str', 'MetaString']: result.append(val) elif key == 'Code': result.append(val[1]) elif key == 'Math': result.append("$" + val[1] + "$") elif key == 'LineBreak': result.append(" ") elif key == 'Space': result.append(" ") pf.walk(x, go, "", {}) return ''.join(result)
def toJSONFilter(actions): """Modified from pandocfilters to accept a list of actions (to apply in series) as well as a single action. Converts an action into a filter that reads a JSON-formatted pandoc document from stdin, transforms it by walking the tree with the action, and returns a new JSON-formatted pandoc document to stdout. The argument is a function action(key, value, format, meta), where key is the type of the pandoc object (e.g. 'Str', 'Para'), value is the contents of the object (e.g. a string for 'Str', a list of inline elements for 'Para'), format is the target output format (which will be taken for the first command line argument if present), and meta is the document's metadata. If the function returns None, the object to which it applies will remain unchanged. If it returns an object, the object will be replaced. If it returns a list, the list will be spliced in to the list to which the target object belongs. (So, returning an empty list deletes the object.) """ doc = pf.json.loads(pf.sys.stdin.read()) if len(pf.sys.argv) > 1: format = pf.sys.argv[1] else: format = "" if 'meta' in doc: metadata = doc['meta'] elif doc[0]: # old API metadata = doc[0]['unMeta'] if type(actions) is type(toJSONFilter): altered = pf.walk(doc, actions, format, metadata) elif type(actions) is list: altered = doc for action in actions: altered = pf.walk(altered, action, format, metadata) pf.json.dump(altered, pf.sys.stdout)
def main(): """Filters the document AST.""" # Get the output format, document and metadata fmt = args.fmt doc = json.loads(STDIN.read()) meta = doc[0]['unMeta'] # Process the metadata variables process(meta) # First pass altered = functools.reduce(lambda x, action: walk(x, action, fmt, meta), [attach_attrs_table, process_tables, detach_attrs_table], doc) # Second pass process_refs = process_refs_factory(references.keys()) replace_refs = replace_refs_factory(references, cleveref_default, plusname, starname, 'table') altered = functools.reduce(lambda x, action: walk(x, action, fmt, meta), [repair_refs, process_refs, replace_refs], altered) # Assemble supporting TeX if fmt == 'latex': tex = ['% Tablenos directives'] # Change caption name if captionname != 'Table': tex.append(r'\renewcommand{\tablename}{%s}'%captionname) if len(tex) > 1: altered[1] = [RawBlock('tex', '\n'.join(tex))] + altered[1] # Dump the results json.dump(altered, STDOUT) # Flush stdout STDOUT.flush()
def toJSONFilter(filters=[], metafilters=[]): reader = codecs.getreader('utf8') doc = json.loads(reader(sys.stdin).read()) if len(sys.argv) > 1: format = sys.argv[1] else: format = "" altered = doc for action in filters: altered = walk(altered, action, format, doc['meta']) for action in metafilters: action(altered['meta']) json.dump(altered, sys.stdout)
def main(): """Filters the document AST.""" # Get the output format, document and metadata fmt = sys.argv[1] if len(sys.argv) > 1 else '' doc = pandocfilters.json.loads(STDIN.read()) meta = doc[0]['unMeta'] # Replace attributed equations and references in the AST altered = functools.reduce(lambda x, action: walk(x, action, fmt, meta), [replace_attreqs, replace_refs], doc) # Dump the results pandocfilters.json.dump(altered, STDOUT)
def test_repair_refs_2(self): """Tests repair_refs() #2.""" ## test.md: Eqs. {@eq:1}a and {@eq:1}b. ## # Command: pandoc-1.17.2 test.md -f markdown+autolink_bare_uris -t json src = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"Eqs."},{"t":"Space","c":[]},{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"{@eq"}],["mailto:%7B@eq",""]]},{"t":"Str","c":":1}a"},{"t":"Space","c":[]},{"t":"Str","c":"and"},{"t":"Space","c":[]},{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"{@eq"}],["mailto:%7B@eq",""]]},{"t":"Str","c":":1}b."}]}]]''') # Command: pandoc-1.17.2 test.md -t json expected = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"Eqs."},{"t":"Space","c":[]},{"t":"Str","c":"{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"eq:1","citationHash":0}],[{"t":"Str","c":"@eq:1"}]]},{"t":"Str","c":"}a"},{"t":"Space","c":[]},{"t":"Str","c":"and"},{"t":"Space","c":[]},{"t":"Str","c":"{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"eq:1","citationHash":0}],[{"t":"Str","c":"@eq:1"}]]},{"t":"Str","c":"}b."}]}]]''') # Make the comparison pandocxnos.init('1.17.2') self.assertEqual(walk(src, repair_refs, {}, ''), expected) pandocxnos.init(PANDOCVERSION)
def test_repair_refs_8(self): """Tests repair_refs() #8.""" ## test.md: {@fig:1}-{@fig:3} ## # Command: pandoc-1.17.2 test.md -f markdown+autolink_bare_uris -t json src = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"{@fig"}],["mailto:%7B@fig",""]]},{"t":"Str","c":":"},{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"1}-{@fig"}],["mailto:1%7D-%7B@fig",""]]},{"t":"Str","c":":3}"}]}]]''') # Command: pandoc test.md -t json expected = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"fig:1","citationHash":0}],[{"t":"Str","c":"@fig:1"}]]},{"t":"Str","c":"}-{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"fig:3","citationHash":0}],[{"t":"Str","c":"@fig:3"}]]},{"t":"Str","c":"}"}]}]]''') # Make the comparison pandocxnos.init('1.17.2') self.assertEqual(walk(src, repair_refs, {}, ''), expected) pandocxnos.init(PANDOCVERSION)
def toJSONFilters(input_stream, output_stream, *actions): # ============================================================================== """Modified version of pandoc filter. This version of pandoc filter is able to read from any input stream (not only from stdin) and writes to any output stream (not only stdout). """ import pandocfilters doc = json.loads(input_stream.read()) fmt = "json" altered = functools.reduce( lambda x, action: pandocfilters.walk(x, action, fmt, doc[0]['unMeta']) , actions, doc ) json.dump(altered, output_stream)
def dollarfy(x): """Replaces Math elements in element list 'x' with a $-enclosed string. stringify() passes through TeX math. Use dollarfy(x) first to replace Math elements with math strings set in dollars. 'x' should be a deep copy so that the underlying document is left untouched. Returns 'x'.""" def _dollarfy(key, value, fmt, meta): # pylint: disable=unused-argument """Replaces Math elements""" if key == 'Math': return Str('$' + value[1] + '$') return None return walk(x, _dollarfy, '', {})
def pandoc(app, docname, source): args = ["pandoc", "-f", "markdown", "-t", "json", "--bibliography=h5md.bib"] proc = Popen(args, stdin=PIPE, stdout=PIPE) indata = source[0].encode(app.config.source_encoding) outdata, _ = proc.communicate(indata) outdata = loads(outdata) outdata = walk(outdata, escape_link, None, None) outdata = dumps(outdata) args = ["pandoc", "-f", "json", "-t", "rst"] if os.path.exists(docname + ".rst"): args += ["-A", docname + ".rst"] proc = Popen(args, stdin=PIPE, stdout=PIPE) indata = source[0].encode(app.config.source_encoding) outdata, _ = proc.communicate(outdata) outdata = outdata.replace(".. code::", ".. code-block::") source[0] = outdata.decode(app.config.source_encoding)
def main(): doc = pf.json.loads(pf.sys.stdin.read()) if len(pf.sys.argv) > 1: format = pf.sys.argv[1] else: format = "" metadata = doc[0]['unMeta'] args = {k: v['c'] for k, v in metadata.items()} autoref = args.get('autoref', True) refmanager = ReferenceManager(autoref=autoref) altered = doc for action in refmanager.reference_filter: altered = pf.walk(altered, action, format, metadata) pf.json.dump(altered, pf.sys.stdout)
def test_repair_refs_2(self): """Tests repair_refs() #2.""" ## test.md: Eqs. {@eq:1}a and {@eq:1}b. ## # Command: pandoc-1.17.2 test.md -f markdown+autolink_bare_uris -t json src = eval( r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"Eqs."},{"t":"Space","c":[]},{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"{@eq"}],["mailto:%7B@eq",""]]},{"t":"Str","c":":1}a"},{"t":"Space","c":[]},{"t":"Str","c":"and"},{"t":"Space","c":[]},{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"{@eq"}],["mailto:%7B@eq",""]]},{"t":"Str","c":":1}b."}]}]]''' ) # Command: pandoc-1.17.2 test.md -t json expected = eval( r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"Eqs."},{"t":"Space","c":[]},{"t":"Str","c":"{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"eq:1","citationHash":0}],[{"t":"Str","c":"@eq:1"}]]},{"t":"Str","c":"}a"},{"t":"Space","c":[]},{"t":"Str","c":"and"},{"t":"Space","c":[]},{"t":"Str","c":"{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"eq:1","citationHash":0}],[{"t":"Str","c":"@eq:1"}]]},{"t":"Str","c":"}b."}]}]]''' ) # Make the comparison pandocxnos.init('1.17.2') self.assertEqual(walk(src, repair_refs, {}, ''), expected) pandocxnos.init(PANDOCVERSION)
def main(): # This grabs the output of `pandoc` as json file, retrieves `metadata` to # check for draft status, and runs the document through `handle_comments`. # Then adds any needed entries to `metadata` and passes the output back out # to `pandoc`. This code is modeled after # <https://github.com/aaren/pandoc-reference-filter>. global DRAFT document = json.loads(sys.stdin.read()) if len(sys.argv) > 1: format = sys.argv[1] else: format = '' if 'meta' in document: # new API metadata = document['meta'] elif document[0]: # old API metadata = document[0]['unMeta'] if 'draft' in metadata: DRAFT = metadata['draft']['c'] else: DRAFT = False newDocument = document newDocument = walk(newDocument, handle_comments, format, metadata) # Need to ensure the LaTeX/beamer template knows if `mdframed` package is # required (when `<!box>` has been used). if (format == 'latex' or format == 'beamer') and USED_BOX: MetaList = elt('MetaList', 1) MetaInlines = elt('MetaInlines', 1) rawinlines = [MetaInlines([RawInline('tex', '\\RequirePackage{mdframed}')])] if 'header-includes' in metadata: headerIncludes = metadata['header-includes'] if headerIncludes['t'] == 'MetaList': rawinlines += headerIncludes['c'] else: # headerIncludes['t'] == 'MetaInlines' rawinlines += [headerIncludes] metadata['header-includes'] = MetaList(rawinlines) newDocument['meta'] = metadata json.dump(newDocument, sys.stdout)
def test_repair_refs_1(self): """Tests repair_refs() #1.""" ## test.md: {@doe:1999} ## # Command: pandoc-1.17.2 test.md -f markdown+autolink_bare_uris -t json src = eval( r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Link","c":[["",[],[]],[{"t":"Str","c":"{@doe"}],["mailto:%7B@doe",""]]},{"t":"Str","c":":1999}"}]}]]''' ) # Command: pandoc-1.17.2 test.md -t json expected = eval( r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"doe:1999","citationHash":0}],[{"t":"Str","c":"@doe:1999"}]]},{"t":"Str","c":"}"}]}]]''' ) # Make the comparison pandocxnos.init('1.17.2') self.assertEqual(walk(src, repair_refs, '', {}), expected) pandocxnos.init(PANDOCVERSION)
def main(): """Filters the document AST.""" # pylint: disable=global-statement global PANDOCVERSION global AttrMath # Get the output format and document fmt = args.fmt doc = json.loads(STDIN.read()) # Initialize pandocxnos # pylint: disable=too-many-function-args PANDOCVERSION = pandocxnos.init(args.pandocversion, doc) # Element primitives AttrMath = elt('Math', 2) # Chop up the doc meta = doc['meta'] if PANDOCVERSION >= '1.18' else doc[0]['unMeta'] blocks = doc['blocks'] if PANDOCVERSION >= '1.18' else doc[1:] # First pass attach_attrs_math = attach_attrs_factory(Math, allow_space=True) detach_attrs_math = detach_attrs_factory(Math) insert_secnos = insert_secnos_factory(Math) delete_secnos = delete_secnos_factory(Math) altered = functools.reduce(lambda x, action: walk(x, action, fmt, meta), [ attach_attrs_math, insert_secnos, process_figs, delete_secnos, detach_attrs_math ], blocks) # Update the doc if PANDOCVERSION >= '1.18': doc['blocks'] = altered else: doc = doc[:1] + altered # Dump the results json.dump(doc, STDOUT) # Flush stdout STDOUT.flush()
def __apply_filters(self, json_ast, filters, path, meta=None): """add MarkDown extensions with Pandoc filters""" if meta is None: meta = [] try: filter_ = None fmt = self.PANDOC_FORMAT_NAME for filter_ in filters: # reset chapter count for next filter which may count chapters if isinstance(meta, dict): if "chapter" in meta: meta["chapter"] = 1 json_ast = pandocfilters.walk(json_ast, filter_, fmt, meta) except KeyError as e: # API clash(?) raise errors.StructuralError( ("Incompatible Pandoc API found, while " "applying filter %s (ABI clash?).\nKeyError: %s") % (filter.__name__, str(e)), path, )
def test_process_refs_factory_6(self): """Tests process_refs_factory() #6.""" ## test.md: See xxx{+@tbl:1}xxx. ## # Command: pandoc test.md -t json src = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Str","c":"See"},{"t":"Space"},{"t":"Str","c":"xxx{+"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:1","citationHash":0}],[{"t":"Str","c":"@tbl:1"}]]},{"t":"Str","c":"}xxx."}]}],"pandoc-api-version":[1,17,5,1],"meta":{}}''') # Check src against current pandoc md = subprocess.Popen(('echo', 'See xxx{+@tbl:1}xxx.'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded (braces stripped, modifier extracted, attributes added) expected = eval(r'''{"meta":{},"blocks":[{"t":"Para","c":[{"t":"Str","c":"See"},{"t":"Space"},{"t":"Str","c":"xxx"},{"t":"Cite","c":[["",[],[["modifier","+"]]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:1","citationHash":0}],[{"t":"Str","c":"@tbl:1"}]]},{"t":"Str","c":"xxx."}]}],"pandoc-api-version":[1,17,5,1]}''') # Make the comparison process_refs = process_refs_factory(['tbl:1']) self.assertEqual(walk(src, process_refs, '', {}), expected)
def test_process_refs_factory_7(self): """Tests process_refs_factory() #7.""" ## test.md: See [+@eq:1]. ## # Command: pandoc test.md -t json src = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Str","c":"See"},{"t":"Space"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"NormalCitation"},"citationPrefix":[{"t":"Str","c":"+"}],"citationId":"eq:1","citationHash":0}],[{"t":"Str","c":"[+@eq:1]"}]]},{"t":"Str","c":"."}]}],"pandoc-api-version":[%s],"meta":{}}'''%PANDOC_API_VERSION) # Check src against current pandoc md = subprocess.Popen(('echo', 'See [+@eq:1].'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded (modifier extracted, attributes added) expected = eval(r'''{"meta":{},"blocks":[{"t":"Para","c":[{"t":"Str","c":"See"},{"t":"Space"},{"t":"Cite","c":[["",[],[["modifier","+"]]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"NormalCitation"},"citationPrefix":[],"citationId":"eq:1","citationHash":0}],[{"t":"Str","c":"[+@eq:1]"}]]},{"t":"Str","c":"."}]}],"pandoc-api-version":[%s]}'''%PANDOC_API_VERSION) # Make the comparison process_refs = process_refs_factory(None, ['eq:1'], 0) self.assertEqual(walk(src, process_refs, '', {}), expected)
def update_math(j, equations, use_png=True): equation_index = [0] def walker(key, value, format, meta): if key == 'Math': equation = equations[equation_index[0]] equation_index[0] += 1 if use_png: image_path = equation.png_path geometry = equation.png_geometry zoom = png_zoom / png_zoom_fudge zoom_depth = png_zoom depth_fudge = png_depth_fudge mime = 'image/png' else: image_path = equation.svg_path geometry = equation.svg_geometry zoom = 1 zoom_depth = 1 depth_fudge = 0 mime = 'image/svg' new_image_path = relocate_autogenerated_resource(image_path, mime) options = {} if equation.inline: options['classes'] = ['inlinemath', 'math'] else: options['classes'] = ['displaymath', 'math'] options['alt_text'] = equation.alt_text() options['attrs'] = [[ 'style', 'vertical-align: -{}px'.format((geometry[2] / zoom_depth) - depth_fudge) ], ['width', str(geometry[0] / zoom) + 'px' ], ['height', str((geometry[1] + geometry[2]) / zoom) + 'px']] return create_image_element(new_image_path, **options) return pf.walk(j, walker, None, None)
def test_detach_attrs_factory(self): """Tests filter_attrs_factory().""" detach_attrs_math = detach_attrs_factory(Math) ## Use 'expected' from test_attach_attrs_factory ## src = eval(r'''{"meta":{},"blocks":[{"t":"Para","c":[{"t":"Math","c":[["eq:1",[],[["tag","B.1"]]],{"t":"DisplayMath"}," y = f(x) "]}]}],"pandoc-api-version":[%s]}'''%PANDOC_API_VERSION) # test.md: $$ y = f(x) $$ # Command: pandoc test.md -t json expected = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Math","c":[{"t":"DisplayMath"}," y = f(x) "]}]}],"pandoc-api-version":[%s],"meta":{}}'''%PANDOC_API_VERSION) # Check expected against current pandoc md = subprocess.Popen(('echo', '$$ y = f(x) $$'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(expected, output) # Make the comparison self.assertEqual(walk(src, detach_attrs_math, '', {}), expected)
def test_attach_attrs_factory(self): """Tests attach_attrs_math().""" attach_attrs_math = attach_attrs_factory(Math, 0, allow_space=True) ## test.md: $$ y = f(x) $${#eq:1 tag="B.1"} ## # Command: pandoc test.md -t json src = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Math","c":[{"t":"DisplayMath"}," y = f(x) "]},{"t":"Str","c":"{#eq:1"},{"t":"Space"},{"t":"Str","c":"tag="},{"t":"Quoted","c":[{"t":"DoubleQuote"},[{"t":"Str","c":"B.1"}]]},{"t":"Str","c":"}"}]}],"pandoc-api-version":[%s],"meta":{}}'''%PANDOC_API_VERSION) # Check src against current pandoc md = subprocess.Popen(('echo', '$$ y = f(x) $${#eq:1 tag="B.1"}'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded (attributes deleted) expected = eval(r'''{"meta":{},"blocks":[{"t":"Para","c":[{"t":"Math","c":[["eq:1",[],[["tag","B.1"]]],{"t":"DisplayMath"}," y = f(x) "]}]}],"pandoc-api-version":[%s]}'''%PANDOC_API_VERSION) # Make the comparison self.assertEqual(walk(src, attach_attrs_math, '', {}), expected)
def test_use_refs_factory_7(self): """Tests use_refs_factory() #7.""" ## test.md: {+@tbl:one}-{@tbl:four} provide the data. ## # Command: pandoc test.md -t json src = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Str","c":"{+"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:one","citationHash":0}],[{"t":"Str","c":"@tbl:one"}]]},{"t":"Str","c":"}-{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:four","citationHash":0}],[{"t":"Str","c":"@tbl:four"}]]},{"t":"Str","c":"}"},{"t":"Space"},{"t":"Str","c":"provide"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"data."}]}],"pandoc-api-version":[%s],"meta":{}}'''%PANDOC_API_VERSION) # Check src against current pandoc md = subprocess.Popen( ('echo', '{+@tbl:one}-{@tbl:four} provide the data.'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded expected = eval(r'''{"meta":{},"blocks":[{"t":"Para","c":[{"t":"Cite","c":[["",[],[["modifier","+"]]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:one","citationHash":0}],[{"t":"Str","c":"@tbl:one"}]]},{"t":"Str","c":"-"},{"t":"Cite","c":[["",[],[]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:four","citationHash":0}],[{"t":"Str","c":"@tbl:four"}]]},{"t":"Space"},{"t":"Str","c":"provide"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"data."}]}],"pandoc-api-version":[%s]}'''%PANDOC_API_VERSION) # Make the comparison process_refs = process_refs_factory(None, ['tbl:one', 'tbl:four'], 0) self.assertEqual(walk(src, process_refs, '', {}), expected)
def test_process_refs_factory_9(self): """Tests process_refs_factory() #9.""" ## test.md: {@fig:1}: # See previous unit test # Command: pandoc-1.15.2 test.md -t json src = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"fig:1","citationHash":0}],[{"t":"Str","c":"@fig:1"}]]},{"t":"Str","c":"}:"}]}]]''') # Check against pandoc-1.15.2 md = subprocess.Popen(('echo', '{@fig:1}:'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( (PANDOC1p15 + ' -t json').split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded expected = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Cite","c":[["",[],[]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"fig:1","citationHash":0}],[{"t":"Str","c":"@fig:1"}]]},{"t":"Str","c":":"}]}]]''') # Make the comparison process_refs = process_refs_factory(None, ['fig:1'], 0) self.assertEqual(walk(src, process_refs, {}, ''), expected)
def test_process_refs_factory_2(self): """Tests process_refs_factory() #2.""" ## test.md: (@eq:one) ## # Command: pandoc test.md -t json src = eval( r'''{"blocks":[{"t":"Para","c":[{"t":"Str","c":"("},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"eq:one","citationHash":0}],[{"t":"Str","c":"@eq:one"}]]},{"t":"Str","c":")"}]}],"pandoc-api-version":[%s],"meta":{}}'''%PANDOC_API_VERSION) # Check src against current pandoc md = subprocess.Popen(('echo', '(@eq:one)'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded (attributes added) expected = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Str","c":"("},{"t":"Cite","c":[["",[],[]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"eq:one","citationHash":0}],[{"t":"Str","c":"@eq:one"}]]},{"t":"Str","c":")"}]}],"pandoc-api-version":[%s],"meta":{}}'''%PANDOC_API_VERSION) # Make the comparison process_refs = process_refs_factory(None, ['eq:one'], 0) self.assertEqual(walk(src, process_refs, '', {}), expected)
def test_attach_attrs_factory(self): """Tests attach_attrs_math().""" attach_attrs_math = attach_attrs_factory(Math, allow_space=True) ## test.md: $$ y = f(x) $${#eq:1 tag="B.1"} ## # Command: pandoc test.md -t json src = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Math","c":[{"t":"DisplayMath"}," y = f(x) "]},{"t":"Str","c":"{#eq:1"},{"t":"Space"},{"t":"Str","c":"tag="},{"t":"Quoted","c":[{"t":"DoubleQuote"},[{"t":"Str","c":"B.1"}]]},{"t":"Str","c":"}"}]}],"pandoc-api-version":[1,17,5,1],"meta":{}}''') # Check src against current pandoc md = subprocess.Popen(('echo', '$$ y = f(x) $${#eq:1 tag="B.1"}'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded (attributes deleted) expected = eval(r'''{"meta":{},"blocks":[{"t":"Para","c":[{"t":"Math","c":[["eq:1",[],[["tag","B.1"]]],{"t":"DisplayMath"}," y = f(x) "]}]}],"pandoc-api-version":[1,17,5,1]}''') # Make the comparison self.assertEqual(walk(src, attach_attrs_math, '', {}), expected)
def test_process_refs_factory_9(self): """Tests process_refs_factory() #9.""" ## test.md: {@fig:1}: # See previous unit test # Command: pandoc-1.15.2 test.md -t json src = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Str","c":"{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"fig:1","citationHash":0}],[{"t":"Str","c":"@fig:1"}]]},{"t":"Str","c":"}:"}]}]]''') # Check against pandoc-1.15.2 md = subprocess.Popen(('echo', '{@fig:1}:'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( (PANDOC1p15 + ' -t json').split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded expected = eval(r'''[{"unMeta":{}},[{"t":"Para","c":[{"t":"Cite","c":[["",[],[]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText","c":[]},"citationPrefix":[],"citationId":"fig:1","citationHash":0}],[{"t":"Str","c":"@fig:1"}]]},{"t":"Str","c":":"}]}]]''') # Make the comparison process_refs = process_refs_factory(['fig:1']) self.assertEqual(walk(src, process_refs, {}, ''), expected)
def test_detach_attrs_factory(self): """Tests filter_attrs_factory().""" detach_attrs_math = detach_attrs_factory(Math) ## Use 'expected' from test_attach_attrs_factory ## src = eval(r'''{"meta":{},"blocks":[{"t":"Para","c":[{"t":"Math","c":[["eq:1",[],[["tag","B.1"]]],{"t":"DisplayMath"}," y = f(x) "]}]}],"pandoc-api-version":[1,17,5,1]}''') # test.md: $$ y = f(x) $$ # Command: pandoc test.md -t json expected = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Math","c":[{"t":"DisplayMath"}," y = f(x) "]}]}],"pandoc-api-version":[1,17,5,1],"meta":{}}''') # Check expected against current pandoc md = subprocess.Popen(('echo', '$$ y = f(x) $$'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(expected, output) # Make the comparison self.assertEqual(walk(src, detach_attrs_math, '', {}), expected)
def test_use_refs_factory_7(self): """Tests use_refs_factory() #7.""" ## test.md: {+@tbl:one}-{@tbl:four} provide the data. ## # Command: pandoc test.md -t json src = eval(r'''{"blocks":[{"t":"Para","c":[{"t":"Str","c":"{+"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:one","citationHash":0}],[{"t":"Str","c":"@tbl:one"}]]},{"t":"Str","c":"}-{"},{"t":"Cite","c":[[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:four","citationHash":0}],[{"t":"Str","c":"@tbl:four"}]]},{"t":"Str","c":"}"},{"t":"Space"},{"t":"Str","c":"provide"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"data."}]}],"pandoc-api-version":[1,17,5,1],"meta":{}}''') # Check src against current pandoc md = subprocess.Popen( ('echo', '{+@tbl:one}-{@tbl:four} provide the data.'), stdout=subprocess.PIPE) output = eval(subprocess.check_output( 'pandoc -t json'.split(), stdin=md.stdout).strip()) self.assertEqual(src, output) # Hand-coded expected = eval(r'''{"meta":{},"blocks":[{"t":"Para","c":[{"t":"Cite","c":[["",[],[["modifier","+"]]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:one","citationHash":0}],[{"t":"Str","c":"@tbl:one"}]]},{"t":"Str","c":"-"},{"t":"Cite","c":[["",[],[]],[{"citationSuffix":[],"citationNoteNum":0,"citationMode":{"t":"AuthorInText"},"citationPrefix":[],"citationId":"tbl:four","citationHash":0}],[{"t":"Str","c":"@tbl:four"}]]},{"t":"Space"},{"t":"Str","c":"provide"},{"t":"Space"},{"t":"Str","c":"the"},{"t":"Space"},{"t":"Str","c":"data."}]}],"pandoc-api-version":[1,17,5,1]}''') # Make the comparison process_refs = process_refs_factory(['tbl:one', 'tbl:four']) self.assertEqual(walk(src, process_refs, '', {}), expected)
def test_conentfilter_create_back_links(self): """meta contains the key 'image_id_back' with the chapter number '3' as value. ast contains a RawBlock which includes the id in a paragraph: 'image_id'. The paragraph should contain the backlink with the target 'image_id_back' after the filter.""" ast = { "blocks": [ { "t": "RawBlock", "c": [ "html", '<p id="image_id" class="header" data-level="2">Image</p>', ], } ] } meta = {"ids": {"image_id_back": 3}} ast = pandocfilters.walk( ast, pandoc.contentfilter.epub_create_back_links, "epub", meta ) self.assertTrue( '<a href=\\"ch003.xhtml#image_id_back\\">Image</a>' in json.dumps(ast) )
def __apply_filters(self, json_ast, file_path): """Process MAGSBS-specific markdown extensions using Pandoc filters. `file_path` is relative to the current directory and points to the file being converted.""" try: filter_ = None fmt = self.PANDOC_FORMAT_NAME for filter_ in self.CONTENT_FILTERS: json_ast = pandocfilters.walk(json_ast, filter_, fmt, []) except KeyError as e: # API clash(?) raise errors.StructuralError( ("Incompatible Pandoc API found, while " "applying filter %s (ABI clash?).\nKeyError: %s") % (filter.__name__, str(e)), file_path, ) # use GleeTeX if configured if self.get_profile() is ConversionProfile.Blind: try: # this alters the Pandoc document AST -- no return required contentfilter.convert_formulas(file_path, "bilder", json_ast) except errors.MathError as err: HtmlConverter.__handle_error(file_path, err)
def __convert_document(self, path, file_cache, converter, conf): """Convert a document by a given path. It takes a converter which takes actual care of the underlying format. The filecache caches the list of files in the lecture. The list of files within a lecture is required to build navigation links. This function also inserts a page navigation bar to navigate between chapters and the table of contents.""" # if output file name exists and is newer than the original, it doesn need to be converted again if not converter.needs_update(path): return with open(path, 'r', encoding='utf-8') as f: document = f.read() if not document: return # skip empty documents if self.IS_CHAPTER.search(os.path.basename(path)): try: nav_start, nav_end = generate_page_navigation( path, file_cache, mparser.extract_page_numbers_from_par( mparser.file2paragraphs(document))) except errors.FormattingError as e: e.path = path raise e document = '{}\n\n{}\n\n{}\n'.format(nav_start, document, nav_end) json_ast = self.load_json(document) # add MarkDown extensions with Pandoc filters try: filter = None for filter in Pandoc.CONTENT_FILTERS: json_ast = pandocfilters.walk(json_ast, filter, conf[MetaInfo.Format], []) converter.convert(json_ast, contentfilter.get_title(json_ast), path) except KeyError as e: # API clash(?) raise errors.StructuralError(("Incompatible Pandoc API found, while " "applying filter %s (ABI clash?).\nKeyError: %s") % \ (filter.__name__, str(e)), path)
def deemph(key, val, fmt, meta): if key == 'Emph': return walk(val, caps, fmt, meta)
#!/usr/bin/python import sys, pprint, json pp = pprint.PrettyPrinter(indent=4) from pandocfilters import walk code_blocks = [] def extract(key, value, fmt, meta): if (key == 'CodeBlock'): [[ident,classes,keyvals], code] = value value = [[ident,classes,[['test_id',str(len(code_blocks))]]],code] code_blocks.append({'c': value, 't': 'CodeBlock'}) if __name__ == "__main__": doc = json.loads(sys.stdin.read()) if len(sys.argv) > 1: format = sys.argv[1] else: format = "" walk(doc, extract, format, doc[0]['unMeta']) altered = [{'unMeta': {}}, code_blocks] json.dump(altered, sys.stdout) # for CHP in `cat chapters.txt`; do pandoc --filter ./dev/extract-codeblocks.py --to markdown md/$CHP.md -o code/$CHP.md; done
def extract_contents(blocks): for block in blocks: if block["t"] == "Div": if block["c"][0][0] == "content": for child in block["c"][1]: yield child else: for result in extract_contents(block["c"][1]): yield result def remove_moin(key, value, format, meta): if key == "Span" and "anchor" in value[0][1]: return [] if key == "Div" and "table-of-contents" in value[0][1]: return [] if key == "Link": if "nonexistent" in value[0][1]: return value[1] value[0][1] = [c for c in value[0][1] if c not in ("http", "https")] return {"t": key, "c": value} if __name__ == "__main__": doc = json.load(codecs.getreader("utf-8")(sys.stdin)) doc["blocks"] = list(extract_contents(doc["blocks"])) doc = walk(doc, remove_moin, "", {}) json.dump(doc, codecs.getwriter("utf-8")(sys.stdout))
change all \ref{<something>} to the correct equation number, based on the labelMap. In the third and last pass we process extra math commands. """ STDIN = io.TextIOWrapper(sys.stdin.buffer, 'utf-8', 'strict') STDOUT = io.TextIOWrapper(sys.stdout.buffer, 'utf-8', 'strict') STDERR = io.TextIOWrapper(sys.stderr.buffer, 'utf-8', 'strict') fmt = "markdown" doc = json.loads(STDIN.read()) meta = doc['meta'] blocks = doc['blocks'] # first pass altered = functools.reduce(lambda x, action: pf.walk(x, action, fmt, meta), [ createLabelMapAndAddEqNum, ], blocks) eprint(labelMap) # second pass altered = functools.reduce(lambda x, action: pf.walk(x, action, fmt, meta), [ applyLabelMapToText, ], altered) # update the document doc['blocks'] = altered
if key == 'Str': return Str(value.upper()) def liftHeaders(key, value, fmt, meta): if key == 'Header': level, attrs, content = value level -= 1 if level == 1: content = walk(content, upcase, fmt, meta) return Header(level, attrs, content) _man_link_re = re.compile(r'^(#|(?:\./)?[^/]+\.\d\.html$)') def dropLinks(key, value, fmt, meta): if key == 'Link': text, link = value url, title = link if _man_link_re.match(url) is not None: return text doc = json.load(sys.stdin, object_pairs_hook=OrderedDict) doc = liftTitle(doc) doc = walk(doc, liftHeaders, 'man', doc[0]['unMeta']) doc = walk(doc, dropLinks, 'man', doc[0]['unMeta']) json.dump(doc, sys.stdout)
def main(): """Filters the document AST.""" # pylint: disable=global-statement global PANDOCVERSION global Image # Get the output format and document fmt = args.fmt doc = json.loads(STDIN.read()) # Initialize pandocxnos # pylint: disable=too-many-function-args PANDOCVERSION = pandocxnos.init(args.pandocversion, doc) # Element primitives if PANDOCVERSION < '1.16': Image = elt('Image', 2) # Chop up the doc meta = doc['meta'] if PANDOCVERSION >= '1.18' else doc[0]['unMeta'] blocks = doc['blocks'] if PANDOCVERSION >= '1.18' else doc[1:] # Process the metadata variables process(meta) # First pass attach_attrs_image = attach_attrs_factory(Image, extract_attrs=_extract_attrs) detach_attrs_image = detach_attrs_factory(Image) insert_secnos = insert_secnos_factory(Image) delete_secnos = delete_secnos_factory(Image) filters = [insert_secnos, process_figures, delete_secnos] \ if PANDOCVERSION >= '1.16' else \ [attach_attrs_image, insert_secnos, process_figures, delete_secnos, detach_attrs_image] altered = functools.reduce(lambda x, action: walk(x, action, fmt, meta), filters, blocks) # Second pass process_refs = process_refs_factory(references.keys()) replace_refs = replace_refs_factory(references, cleveref_default, plusname, starname, 'figure') altered = functools.reduce(lambda x, action: walk(x, action, fmt, meta), [repair_refs, process_refs, replace_refs], altered) # Insert supporting TeX if fmt == 'latex': rawblocks = [] if has_unnumbered_figures: rawblocks += [ RawBlock('tex', TEX0), RawBlock('tex', TEX1), RawBlock('tex', TEX2) ] if captionname != 'Figure': rawblocks += [RawBlock('tex', TEX3 % captionname)] insert_rawblocks = insert_rawblocks_factory(rawblocks) altered = functools.reduce( lambda x, action: walk(x, action, fmt, meta), [insert_rawblocks], altered) # Update the doc if PANDOCVERSION >= '1.18': doc['blocks'] = altered else: doc = doc[:1] + altered # Dump the results json.dump(doc, STDOUT) # Flush stdout STDOUT.flush()
def scaps(key, val, fmt, meta): if fmt == "ms": if key == 'SmallCaps': return walk(val, upstr, fmt, meta)
if k == 'Header': return structure_header(v, f, m) # Supported syntax: # [ipe]: start a new in-paragraph enumeration, use \item for the individual items # [/ipe]: end an in-paragraph enumeration def filter_paraenum(k, v, f, m): if k == 'Str': value = v if value.startswith('[') and value.endswith(']'): content = value[1:-1] if content == 'ipe': return li('\\begin{inparaenum}[(1)]') elif content == '/ipe': return li('\\end{inparaenum}') if __name__ == '__main__': doc = json.loads(sys.stdin.read()) if len(sys.argv) > 1: format = sys.argv[1] else: format = '' doc = pf.walk(doc, filter_structure, format, doc[0]['unMeta']) doc = pf.walk(doc, filter_paraenum, format, doc[0]['unMeta']) doc = pf.walk(doc, ImageWalker().filter, format, doc[0]['unMeta']) json.dump(doc, sys.stdout)
def f(key, val, fmt, meta): if key == 'Table': rows = val[4] new_rows = walk(rows, g, fmt, meta) return Table(val[0], val[1], val[2], val[3], new_rows)
def transform(doc, meta, actions): altered = doc for action in actions: altered = walk(altered, action, format, meta) return altered
import json import sys import re from collections import OrderedDict import pandocfilters from pandocfilters import walk, Link MetaString = pandocfilters.elt('MetaString', 1) _man_link_re = re.compile(r'^man:(.*)\((\d)\)') def interpretManLinks(key, value, fmt, meta): if key == 'Link': text, link = value url, title = link match = _man_link_re.match(url) if match is not None: html_url = "%s.%s.html" % (match.group(1), match.group(2)) return Link(text, (html_url, title)) else: return None doc = json.load(sys.stdin, object_pairs_hook=OrderedDict) doc = walk(doc, interpretManLinks, 'man', doc[0]['unMeta']) json.dump(doc, sys.stdout)
def main(stdin=STDIN, stdout=STDOUT, stderr=STDERR): """Filters the document AST.""" # pylint: disable=global-statement global PANDOCVERSION global Image # Read the command-line arguments parser = argparse.ArgumentParser(\ description='Pandoc figure numbers filter.') parser.add_argument(\ '--version', action='version', version='%(prog)s {version}'.format(version=__version__)) parser.add_argument('fmt') parser.add_argument('--pandocversion', help='The pandoc version.') args = parser.parse_args() # Get the output format and document fmt = args.fmt doc = json.loads(stdin.read()) # Initialize pandocxnos PANDOCVERSION = pandocxnos.init(args.pandocversion, doc) # Element primitives if PANDOCVERSION < '1.16': Image = elt('Image', 2) # Chop up the doc meta = doc['meta'] if PANDOCVERSION >= '1.18' else doc[0]['unMeta'] blocks = doc['blocks'] if PANDOCVERSION >= '1.18' else doc[1:] # Process the metadata variables process(meta) # First pass replace = PANDOCVERSION >= '1.16' attach_attrs_image = attach_attrs_factory(Image, extract_attrs=_extract_attrs, replace=replace) detach_attrs_image = detach_attrs_factory(Image) insert_secnos_img = insert_secnos_factory(Image) delete_secnos_img = delete_secnos_factory(Image) insert_secnos_div = insert_secnos_factory(Div) delete_secnos_div = delete_secnos_factory(Div) altered = functools.reduce(lambda x, action: walk(x, action, fmt, meta), [ attach_attrs_image, insert_secnos_img, insert_secnos_div, process_figures, delete_secnos_img, delete_secnos_div, detach_attrs_image ], blocks) # Second pass process_refs = process_refs_factory(LABEL_PATTERN, targets.keys()) replace_refs = replace_refs_factory(targets, cleveref, False, plusname if not capitalise \ or plusname_changed else [name.title() for name in plusname], starname) attach_attrs_span = attach_attrs_factory(Span, replace=True) altered = functools.reduce( lambda x, action: walk(x, action, fmt, meta), [repair_refs, process_refs, replace_refs, attach_attrs_span], altered) if fmt in ['latex', 'beamer']: add_tex(meta) # Update the doc if PANDOCVERSION >= '1.18': doc['blocks'] = altered else: doc = doc[:1] + altered # Dump the results json.dump(doc, stdout) # Flush stdout stdout.flush()
def f(key, val, fmt, meta): if key == 'Table': rows = val[4] new_rows = walk(rows, g, fmt, meta) return Table(*val[0:4], new_rows)
def replace_fragments(block): """Replace a designated fragment with a stored fragment.""" def _iter(): for line in block.splitlines(): match = FRAGMENT_RE.match(line) if match: fragment_name = match.group('fragment_name') indentation = match.group('indentation') logger.debug("Found 'fragment' %s in %s", fragment_name, path) fragment = fragments[fragment_name] yield from indent_fragment(fragment, indentation) else: yield line return "\n".join(_iter()) if __name__ == "__main__": print("This will appear inside the __main__ if block") tree = json.loads(stdin.read()) walk(tree, action, '', {}) for fragment, content in fragments.items(): for path, content in files.items(): content = list(map(replace_fragments, content)) files[path] = content for path, content in files.items(): with open(path, 'w') as fd: logger.debug("Finalizing file %s", path) fd.write("\n\n\n".join(content))