def reformat_spaced_repetition_text(key, value, format, meta): if key == 'Para': # the spaced repetition text will always start with 'Q:' first_word = value[0]['c'] if first_word == 'Q:': # the content of a paragraph is a list of inline elements # e.g. links, line breaks, in-line code snippets, text started_parsing_answer: bool = False para_question = [] para_answer = [] for inline in value: # there's always a soft line break before the 'A:' # once we've seen the soft break, we want to start populating # the list containing the inlines that make up the answer if inline['t'] == 'SoftBreak': started_parsing_answer = True if started_parsing_answer: para_answer.append(inline) else: para_question.append(inline) # see constructor doc: https://github.com/jgm/pandocfilters/blob/master/pandocfilters.py return [ Div(attributes({'class': 'question'}), [Plain(para_question)]), Div(attributes({'class': 'answer'}), [Plain(para_answer)]) ]
def test_div_with_title(): init() meta = { 'pandoc-latex-environment': { 'c': { 'test': { 'c': [{ 'c': [{ 'c': 'class1', 't': 'Str' }], 't': 'MetaInlines' }, { 'c': [{ 'c': 'class2', 't': 'Str' }], 't': 'MetaInlines' }], 't': 'MetaList' } }, 't': 'MetaMap' } } src = json.loads( json.dumps( Div(['', ['class1', 'class2'], [['title', 'theTitle']]], [{ 'c': [{ 'c': 'content', 't': 'Str' }], 't': 'Plain' }]))) dest = json.loads( json.dumps( Div(['', ['class1', 'class2'], [['title', 'theTitle']]], [{ 't': 'RawBlock', 'c': ['tex', '\\begin{test}[theTitle]'] }, { 'c': [{ 'c': 'content', 't': 'Str' }], 't': 'Plain' }, { 't': 'RawBlock', 'c': ['tex', '\\end{test}'] }]))) pandoc_latex_environment.environment(src['t'], src['c'], 'latex', meta) assert json.loads(json.dumps(src)) == dest
def test_div_with_id(): init() meta = { 'pandoc-latex-environment': { 'c': { 'test': { 'c': [{ 'c': [{ 'c': 'class1', 't': 'Str' }], 't': 'MetaInlines' }, { 'c': [{ 'c': 'class2', 't': 'Str' }], 't': 'MetaInlines' }], 't': 'MetaList' } }, 't': 'MetaMap' } } src = json.loads( json.dumps( Div(['identifier', ['class1', 'class2'], []], [{ 'c': [{ 'c': 'content', 't': 'Str' }], 't': 'Plain' }]))) dest = json.loads( json.dumps( Div(['identifier', ['class1', 'class2'], []], [{ 'c': ['tex', '\\begin{test} \\label{identifier}'], 't': 'RawBlock' }, { 'c': [{ 'c': 'content', 't': 'Str' }], 't': 'Plain' }, { 'c': ['tex', '\\end{test}'], 't': 'RawBlock' }]))) pandoc_latex_environment.environment(src['t'], src['c'], 'latex', meta) assert json.loads(json.dumps(src)) == dest
def tikz(key, value, format, meta): global N_FIGS, N_TABS if key == 'Div': if "figure*" in str(value): N_FIGS += 1 [[ident, classes, kvs], _contents] = value newcontents = [html(fig.format(path=f"float{N_FIGS:03}.png"))] ident = f"fig:{N_FIGS}" return Div([ident, classes, kvs], newcontents) if key == 'Table': N_TABS += 1 newcontents = [html(fig.format(path=f"float{N_TABS:03}.png"))] ident = f"tab:{N_TABS}" return Div([ident, [], []], newcontents) if key == 'Link': label = value[-1][0].replace("#", "") if len(label.split(",")) > 1: links = [] for label in label.split(","): pref = label.split(":")[0] # ppprint(pref, "pref") links.append( link(href=f"#{pref}:{label_to_fignum[label]}", label=f"{label_to_fignum[label]}"), ) links.append(Str(",")) del links[-1] return links if label in label_to_fignum: pref = label.split(":")[0] value[-2][0]['c'] = f"{label_to_fignum[label]}" value[-1][0] = f"#{pref}:{label_to_fignum[label]}" if "eq:" in label: return Math({'t': 'InlineMath'}, f"\eqref{{{label}}}") if "sec:" in label: value[-2][0]['c'] = value[-2][0]['c'][1:-1].split(":")[1] if key == 'Span' and "label" in str(value): value[-1][0]['c'] = "" if key == 'Para': ret = [] rets = [] for v in value: if v['t'] == 'Math' and v['c'][0]['t'] == 'DisplayMath': rets.append(Para(ret)) rets.append(Para([v])) ret = [] else: ret.append(v) if len(ret): ret = Para(ret) rets.append(ret) if len(rets): return rets
def test_empty(): init() meta = { 'pandoc-latex-environment': { 'c': { 'test': { 'c': [{ 'c': [{ 'c': 'class1', 't': 'Str' }], 't': 'MetaInlines' }, { 'c': [{ 'c': 'class2', 't': 'Str' }], 't': 'MetaInlines' }], 't': 'MetaList' } }, 't': 'MetaMap' } } src = json.loads( json.dumps( Div(['', [], []], [{ 'c': [{ 'c': 'content', 't': 'Str' }], 't': 'Plain' }]))) dest = json.loads( json.dumps( Div(['', [], []], [{ 'c': [{ 'c': 'content', 't': 'Str' }], 't': 'Plain' }]))) pandoc_latex_environment.environment(src['t'], src['c'], 'latex', meta) assert json.loads(json.dumps(src)) == dest
def process_theorems(key, value, fmt, meta): # pylint: disable=unused-argument """Processes the attributed definition lists.""" # Process definition lists and add markup if key == 'DefinitionList': # Split items into groups of regular and numbered items itemgroups = [] tmp = [] cond = True for v in value: if _is_theorem(v) == cond: tmp.append(v) else: cond = not cond if tmp: itemgroups.append(tmp) tmp = [v] if tmp: itemgroups.append(tmp) # Process each group of items ret = [] for items in itemgroups: if _is_theorem(items[0]): # These are numbered items markup = [] for item in items: # Iterate entries thm = _process_theorem(item[0][0]['c'], fmt) markup = markup + _add_markup(fmt, thm, item) ret.append(Div(['', ['theoremnos'], []], markup)) else: # These are regular (unnumbered) items ret.append(DefinitionList(items)) return ret return None
def replaceCodelistingForLstlisting(token): begin_token = RawBlock('latex', '\\begin{codelisting}') end_token = RawBlock('latex', '\\end{codelisting}') if (begin_token, end_token) != (token[1][0], token[1][3]): return token ret = [token[0], [Plain([])]] # a content of `Div` token # set begin command line = '\\begin{lstlisting}[' params = extractParameters(token[1][2]) params_strings = [ "{}={}".format(key, item) for key, item in params.items() ] ret[1][0]['c'].append( RawInline('latex', line + ",".join(params_strings) + "]\n")) # set the source code for line in token[1][2]['c'][1].split('\n'): ret[1][0]['c'].append(RawInline('latex', line + "\n")) # set end command ret[1][0]['c'].append(RawInline('latex', '\\end{lstlisting}')) return Div(*ret)
def insert_table_labels(key, val, fmt, meta): ''' Insert "Table 3:" style prefixes before table captions and wrap in span with id=table-3 etc. ''' if key == 'Table': caption = val[0] for i, obj in enumerate(caption): if obj['t'] == 'Span': span_val = obj['c'][0][2] if (len(span_val) == 1 and len(span_val[0]) == 2 and span_val[0][0] == 'data-label'): label = span_val[0][1] index = incr_latest_index('table') ref_index = 'table-%d' % index label_map[label] = Label( ref_string='Table %d' % index, ref_index=ref_index, prev_strings=['table', 'tab.'], ) span_index = i caption.pop(span_index) caption.insert(0, Str('Table %d: ' % index)) return Div([ref_index, ['engrafo-table'], []], [Table(*val)])
def textbook(key, value, format, meta): if key == "Header": [level, [ident, classes, keyvals], inlines] = value if (level == 5 or level == 1) and not "unnumbered" in classes: return Header(level, [ident, classes + ["unnumbered"], keyvals], inlines) if key == "CodeBlock": [[ident, classes, keyvals], code] = value if format == "html": return RawBlock("html", "<pre>" + process_html(code) + "</pre>") if key == "Code": [[ident, classes, keyvals], code] = value if format == "html": return RawInline("html", "<code>" + process_html(code) + "</code>") if key == "Image": [attr, inlines, [src, tit]] = value if format != "icml": return Image(attr, inlines, [src.replace(".pdf", ".png"), tit]) if key == "Div": [[ident, classes, keyvals], blocks] = value if format == "docx": if "numbers" in classes: return Null() if any(cls in classes for cls in [ "keyterm", "keyterms", "didyouknow", "syntax", "quickcheck", "program" ]): return Div([ident, classes, keyvals], [HorizontalRule()] + walk(blocks, textbook, format, meta) + [HorizontalRule()])
def plain_output(text: str, pandoc_format: str = "markdown", pandoc_extra_args: list = None, pandoc: bool = False) -> list: if pandoc: return tokenize_block(text, pandoc_format, pandoc_extra_args) else: return [Div(['', ['output'], []], [CodeBlock(['', [], []], text)])]
def action(key, value, fmt, meta): # pylint: disable=unused-argument """Processes elements.""" if key == 'Div': attrs = PandocAttributes(value[0], 'pandoc') if 'noindent' in attrs.classes: return [PRE, Div(*value), POST] return None
def behead(key, value, format, meta): if key == 'Div': [[ident, classes, kvs], content] = value if "code-block" in classes: [link, code_block] = content source_file = "" for s in link['c'][0]['c'][1]: if s['t'] == 'Str': source_file += str(s['c']) elif s['t'] == 'Space': source_file += ' ' target = link['c'][0]['c'][2][0] github_link = html('<a class = "btn-demo-example" href = "' + target + '">View on GitHub</a>') browser_bar = Div([ident, ['browser-bar'], kvs], [html(source_file)]) if len(target.strip()) == 0: browser_content = [browser_bar, code_block] else: browser_content = [browser_bar, github_link, code_block] demo_example_browser = Div([ident, ['demo__example-browser'], kvs], browser_content) return Div([ident, ['demo__example'], kvs], [demo_example_browser])
def lois(key, value, format, meta): if key == 'Div': [[ident, classes, kvs], contents] = value if "loi" in classes: if format == "latex": if ident == "": label = "" else: label = '\\label{' + ident + '}' return ([latex('\\begin{loi}' + label)] + contents + [latex('\\end{loi}')]) elif format == "html" or format == "html5": newcontents = [ html('<div class="loi"><blockquote> ' + contents + '</blockquote></div>') ] return Div([ident, classes, kvs], newcontents)
def theorems(key, value, format, meta): if key == 'Div': [[ident, classes, kvs], contents] = value if "theorem" in classes: if format == "latex": if ident == "": label = "" else: label = '\\label{' + ident + '}' return([latex('\\begin{theorem}' + label)] + contents + [latex('\\end{theorem}')]) elif format == "html" or format == "html5": global theoremcount theoremcount = theoremcount + 1 newcontents = [html('<dt>Theorem ' + str(theoremcount) + '</dt>'), html('<dd>')] + contents + [html('</dd>\n</dl>')] return Div([ident, classes, kvs], newcontents)
def insert_cite_labels(key, val, fmt, meta): if (key == 'Div' and val and val[0] and val[0][1] and 'bibitem' in val[0][1]): keyvals = dict(val[0][2]) label = keyvals['label'] index = incr_latest_index('cite') ref_index = 'cite-%d' % index label_map[label] = Label( ref_string='[%d]' % index, ref_index=ref_index, prev_strings=[], ) val[0][0] = 'cite-%d' % index return Div(*val)
def admonitions(key, value, fmt, meta): if key == 'Div': [[ident, classes, kvs], contents] = value if any(item in classes for item in admonition_types) and fmt == "ipynb": header = stringify(contents[0]) admonition_subtype = "notfound" if "admonition" not in classes: admonition_subtype = header.lower() else: for subtype in admonition_subtypes: if subtype in header.lower(): admonition_subtype = subtype break newcontents = [html('<div style="background-color: ' + admonition_colors[admonition_subtype] + '; margin: 10px 0px; padding:12px;"><p style="font-size: x-large"><i class="' + admonition_icons[admonition_subtype] + '"></i> <b>' + header + '</b></p>')] + contents[1:] + [html('</div>')] return Div([ident, classes, kvs], newcontents)
def make_figures(key, val, fmt, meta): """ Turn <p><img alt="some caption"><img></p> into <div class="engrafo-figure"><img><img> <span class="engrafo-figcaption">some caption</span></div> """ if key != 'Para' or not val: return children = [c for c in val if c['t'] == 'Image'] if not children: return # Pick first child's caption to be the caption. This is because pandoc # gives each image in a figure the same caption. alt = children[0]['c'][1] # Pandoc sets alt text to "image" if there is none if alt and alt != [{u'c': u'image', u't': u'Str'}]: children.append(Span(['', ['engrafo-figcaption'], []], alt)) # Pandoc requires that a Div has a Para in it, so insert a single Para to # wrap all the children return Div(['', ['engrafo-figure'], []], [Para(children)])
def insert_figure_labels(key, val, fmt, meta): """ Insert "Figure 3: " style labels before figure captions and add id to figure element. Assumes make_figures filter has been run. """ if key != 'Div' or val[0][1] != ['engrafo-figure']: return # raise Exception(val) div_children = val[1] p = div_children[0] img = p['c'][0] if img['t'] != 'Image': return alt = img['c'][1] for i, obj in enumerate(alt): if obj['t'] == 'Span': span_val = obj['c'][0][2] if (len(span_val) == 1 and len(span_val[0]) == 2 and span_val[0][0] == 'data-label'): label = span_val[0][1] index = incr_latest_index('figure') ref_index = 'figure-%d' % index label_map[label] = Label( ref_string='Figure %d' % index, ref_index=ref_index, prev_strings=['figure', 'fig.', 'fig'], ) # Set ID on div val[0][0] = ref_index span_index = i alt.pop(span_index) s = 'Figure %d' % index if alt: s += ': ' alt.insert(0, Str(s)) return Div(*val)
def theorems(key, value, format, meta): if key == 'Div': [[ident, classes, kvs], contents] = value for env in environments: if env in classes: if format == "latex": if ident == "": label = "" else: label = '\\label{' + ident + '}' return ([latex('\\begin{{{0}}}'.format(env) + label)] + contents + [latex('\\end{{{0}}}'.format(env))]) elif format == "html" or format == "html5": global theoremcount theoremcount = theoremcount + 1 classes.append("math") newcontents = [ html('<dl><dt class="math">{0} '.format( env.capitalize()) + str(theoremcount) + '</dt>'), html('<dd class="math">') ] + contents + [html('</dd>\n</dl>')] return Div([ident, classes, kvs], newcontents)
def notebook_convert(key, value, format, meta): if (key == "Para" and value[0]["c"][0:2] == "{%" and value[-1]["c"][-2:] == "%}" and value[2]["c"] == "notebook"): convert_notebook_to_html(value[4]["c"]) # sys.stderr.write("{}".format(type(json.loads(convert_html_to_json(value[4]['c'].replace('.ipynb', '.html')))["blocks"][0]['c'][1][0]['c']))) tuple_notebook = tuple( json.loads( convert_html_to_json(value[4]["c"].replace( ".ipynb", ".html")))["blocks"][0]["c"][1][0]["c"]) # Remove unMeta sys.stderr.write("Converting notebook {}\n".format(value[4]["c"])) remove_html(value[4]["c"]) return Div(*tuple_notebook) if key == "Header": # Increment headers by 1 value[0] += 1 # Remove anchor links value[-1] = value[-1][:-1] return Header(*value)
def box(key, value, format, meta): if key == 'Div': [[ident, classes, kvs], contents] = value if "alert" in classes: if format == "latex": if ident == "": label = "" else: label = '\\label{' + ident + '}' return ([latex('\\begin{boxAlert}' + label)] + contents + [latex('\\end{boxAlert}')]) elif format == "html" or format == "html5": global theoremcount theoremcount = theoremcount + 1 newcontents = [ html('<dt>Theorem ' + str(theoremcount) + '</dt>'), html('<dd>') ] + contents + [html('</dd>\n</dl>')] return Div([ident, classes, kvs], newcontents) elif "question" in classes: if format == "latex": if ident == "": label = "" else: label = '\\label{' + ident + '}' return ([latex('\\begin{boxQuestion}' + label)] + contents + [latex('\\end{boxQuestion}')]) elif "box" in classes: if format == "latex": if ident == "": label = "" else: label = '\\label{' + ident + '}' return ([ latex('\\begin{tcolorbox}[arc=4mm,outer arc=1mm]' + label) ] + contents + [latex('\\end{tcolorbox}')])
def process_display_math(key, val, fmt, meta): """ Block-level math is inside paragraphs for some reason, so split the paragraph and pull out into a div. """ if key != 'Para' or not val: return paras = [] last_math_index = 0 for i, child in enumerate(val): if child['t'] == 'Math' and child['c'][0]['t'] == 'DisplayMath': # This filter doesn't seem to work if it follows this filter in # walk, so let's just call it from here equation_id, new_children = insert_equation_labels(child['c']) paras.append(Para(val[last_math_index:i])) paras.append( Div([equation_id, ['engrafo-equation'], []], [Para(new_children)])) last_math_index = i + 1 if last_math_index == 0: return paras.append(Para(val[last_math_index:])) return paras
def process_latex_envs(key, value, oformat, meta): r''' Check LaTeX RawBlock AST objects for environments (i.e. `\begin{env_name}` and `\end{env_name}`) and converts them to Div's with class attribute set to their LaTeX names (i.e. `env_name`). The new Div has a `markdown` attribute set so that its contents can be processed again by Pandoc. This is needed for custom environments (e.g. and example environment with more text and math to be processed), which also means that *recursive Pandoc calls are needed* (since Pandoc already stopped short producing the RawBlocks we start with). For the recursive Pandoc calls to work, we need the Pandoc extension `+markdown_in_html_blocks` enabled, as well. ''' if key != 'RawBlock' or value[0] != 'latex': return None global environment_counters env_info = env_pattern.search(value[1]) if env_info is not None: env_groups = env_info.groups() env_name = env_groups[0] env_name = env_conversions.get(env_name, env_name) env_title = env_groups[1] if env_title is None: env_title = "" env_body = env_groups[2] env_num = environment_counters.get(env_name, 0) env_num += 1 environment_counters[env_name] = env_num label_info = label_pattern.search(env_body) env_label = "" label_div = None if label_info is not None: env_label = label_info.group(2) label_div = label_to_mathjax(env_label, env_tag=env_num) # XXX: For the Pandoc-types we've been using, there's # a strict need to make Div values Block elements and not # Inlines, which Span is. We wrap the Span in Para to # produce the requisite Block value. label_div = Para([label_div]) # Now, remove the latex label string from the original # content: env_body = env_body.replace(label_info.group(1), '') # Div AST objects: # type Attr = (String, [String], [(String, String)]) # Attributes: identifier, classes, key-value pairs div_attr = [env_label, [env_name], [['markdown', ''], ["env-number", str(env_num)], ['title-name', env_title] ]] pandoc_logger.debug(u"env_body (pre-processed): {}\n".format( str(env_body))) # XXX: Nested processing! env_body_proc = pypandoc.convert_text(env_body, 'json', format='latex', extra_args=( '-s', '-R', '--wrap=none'), ) pandoc_logger.debug(u"env_body (pandoc processed): {}\n".format( env_body_proc)) env_body_filt = applyJSONFilters( [latex_prefilter], env_body_proc, format='json') div_blocks = json.loads(env_body_filt)['blocks'] if label_div is not None: div_blocks = [label_div] + div_blocks div_res = Div(div_attr, div_blocks) pandoc_logger.debug("div_res: {}\n".format(div_res)) return div_res else: return []
def plain_output(text): block = Div(['', ['output'], []], [CodeBlock(['', [], []], text)]) return block
def insert_section_labels(key, val, fmt, meta): ''' Insert section labels for headings like 1 This is a top level heading 1.1 This is a subsection A This is a top-level appendix A.1 This is an appendix subheader etc. Also inserts a dummy div with id=appendix-below before the appendix. ''' global is_appendix if key == 'RawBlock' and val[1] == r'\appendix': is_appendix = True sec_lengths[0] = 0 return Div(['engrafo-appendix-below', [], []], []) if key == 'Header': level, attrs, children = val # Ignore \subsubsection{}, \paragraph{} and smaller if level >= 3: return Header(level + 1, attrs, children) unnumbered = 'unnumbered' in val[1][1] label = attrs[0] sec_lengths[level - 1] += 1 sec_lengths[level:] = [0] * (len(sec_lengths) - level) if is_appendix: # appendix: h1 is alpha sec_number = '.'.join([ chr(x + ord('A') - 1) if i == 0 else str(x) for i, x in enumerate(sec_lengths[:level]) ]) else: sec_number = '.'.join([str(x) for x in sec_lengths[:level]]) if label and label not in label_map: if is_appendix: ref_string = 'Appendix %s' % sec_number ref_index = 'appendix-%s' % sec_number prev_strings = ['appendix', 'app.'] else: ref_string = 'Section %s' % sec_number ref_index = 'section-%s' % sec_number prev_strings = ['section', 'sec.'] label_map[label] = Label( ref_string=ref_string, ref_index=ref_index, prev_strings=prev_strings, ) if not unnumbered: span = Span(['', ['section-number'], []], [Str(sec_number)]) children = [span] + children attrs[0] = 'section-%s' % sec_number.lower() # Decrease levels one more than Pandoc outputs (<h1> -> <h2>) level += 1 return Header(level, attrs, children)