def links(key, value, format, meta): if key == 'Link': [_, title, target] = value if (is_absolute(target[0])): # citation = [{"citationSuffix" : [], # "citationNoteNum" : 0, # "citationMode" : {"t":"NormalCitation"}, # "citationPrefix" : [], # "citationId" : target[0], # "citationHash" : 0}] # return Cite(citation, title) return Span(attributes({}), [ Str(u'\u201c'), Span(attributes({}), title), Str(u'\u201d'), Space(), Str('('), Str(target[0]), Str(')') ]) else: [_, _, targetInternal] = target[0].rpartition('#') citation = [{ "citationSuffix": [], "citationNoteNum": 0, "citationMode": { "t": "NormalCitation" }, "citationPrefix": [], "citationId": targetInternal, "citationHash": 0 }] return Cite(citation, [Str("[@{0}]".format(targetInternal))])
def fix_underline(key, value, format, meta): if isUnderline(key, value): return [ RawInline('latex', '\\uline{'), Span(value[0], value[1]), RawInline('latex', '}') ]
def inline_footnotes(key, val, fmt, meta): """ Replace Pandoc footnotes with spans so we can post-process into Distill footnotes. """ if key == 'Note': return Span(['', ['engrafo-footnote'], []], flatten_blocks(val))
def metavars(key, value, format, meta): if key == 'Str': m = pattern.match(value) if m: field = m.group(1) result = meta.get(field, {}) if 'MetaInlines' in result['t']: return Span(attributes({'class': 'interpolated', 'field': field}), result['c']) elif 'MetaString' in result['t']: return Str(result['c'])
def replace_cite_references(key, val, fmt, meta): if key == 'Cite': label = val[0][0]['citationId'] if label and label in label_map: ref_string, ref_id, prev_strings = label_map[label] return [ Link(['', ['engrafo-cite'], []], [Str(ref_string)], ['#%s' % ref_id, '']) ] # TODO: below doesn't work yet else: return Span(['', ['engrafo-cite', 'engrafo-missing-cite'], []], [Str('[?]')])
def interpretManLinks(key, value, fmt, meta): if key == 'Link': text, link = value url, title = link match = _man_link_re.match(url) if match is not None: str = stringify(text) if str.startswith("lenskit"): return text else: rv = Span( attributes(None), text + [ Str(" ("), Strong([Str(match.group(1))]), Str("(%s))" % (match.group(2), )) ]) return rv else: return None
def make_figures(key, val, fmt, meta): """ Turn <p><img alt="some caption"><img></p> into <div class="engrafo-figure"><img><img> <span class="engrafo-figcaption">some caption</span></div> """ if key != 'Para' or not val: return children = [c for c in val if c['t'] == 'Image'] if not children: return # Pick first child's caption to be the caption. This is because pandoc # gives each image in a figure the same caption. alt = children[0]['c'][1] # Pandoc sets alt text to "image" if there is none if alt and alt != [{u'c': u'image', u't': u'Str'}]: children.append(Span(['', ['engrafo-figcaption'], []], alt)) # Pandoc requires that a Div has a Para in it, so insert a single Para to # wrap all the children return Div(['', ['engrafo-figure'], []], [Para(children)])
def label_to_mathjax(env_label, label_postfix='_span', env_tag=None): r""" Replace labels with MathJax-based hack that preserves LaTeX-like functionality and rendering. This works by creating a hidden Span with a labeled dummy MathJax equation environment. References like `\ref{env_label}` will then automatically resolve to the containing Div. Arguments ========= env_label: str The label token (i.e. `\label{env_label}`). label_postfix: str (Optional) String to append to the generated Span's id (env_label + label_postfix). env_tag: int (Optional) Tag for the labeled content (e.g. equation number). Returns ======= The Para(Span) AST object that links to our label (i.e. the new MathJax "label" object). """ hack_span_id = env_label + label_postfix # This is how we're hijacking MathJax's numbering system: ref_hack = r'$$\begin{equation}' if env_tag is not None: ref_hack += r'\tag{{{}}}'.format(env_tag) ref_hack += r'\label{{{}}}'.format(env_label) ref_hack += r'\end{equation}$$' # Hide the display of our equation hack in a Span: label_div = Span([hack_span_id, [], [["style", "display:none;visibility:hidden"]] ], [RawInline('latex', ref_hack)]) return label_div
def textohtml(key, value, format, meta): if key == 'RawInline': fmt, s = value if fmt == "tex": for x in trans: m = x['re'].match(s) if m: return [ Span(attributes({'class': x['class']}), [ Str(x['cont'] if x['key'] == 'Str' else m.group(x['cont'])) ]), Space() ] if cboxStart.match(s): return RawInline("html", "<span class='cbox'>") if cboxEnd.match(s): return RawInline("html", "</span>") if image.match(s): m = image.match(s) # return Image([Str("description")], [m.group(1),""]) # works only for pandocfilters < 1.3.0 return Image( ['', [], []], [Str("description")], [m.group(1), ""]) # should work for pandocfilter >= 1.3.0
def process_image(key, value, oformat, meta): r''' Rewrite filename in Image AST object--adding paths from the meta information and/or LaTeX `\graphicspaths` directive. This can be used to reassign paths to image file names when the meta information has only one entry. It will also wrap LaTeX-labeled Image objects in a Span--for later referencing/linking, say. ''' if key != "Image": return None global figure_dirs, fig_fname_ext, processed_figures # TODO: Find and use labels. # TODO: Perhaps check that it's a valid file? new_value = copy(value[2]) new_fig_fname = rename_find_fig(new_value[0], figure_dirs, fig_fname_ext) pandoc_logger.debug("figure_dirs: {}\tfig_fname_ext: {}\n".format( figure_dirs, fig_fname_ext)) pandoc_logger.debug("new_value: {}\tnew_fig_fname: {}\n".format( new_value, new_fig_fname)) # XXX: Avoid an endless loop of Image replacements. if new_fig_fname in processed_figures.keys(): return None processed_figures[new_fig_fname] = [None, None] new_value[0] = new_fig_fname # Wrap the image in a div with an `id`, so that we can # reference it in HTML. new_image = Image(value[0], value[1], new_value) wrapped_image = new_image try: fig_label_obj = value[1][-1]['c'][0][-1][0] pandoc_logger.debug("fig_label_obj: {}\n".format(fig_label_obj)) if fig_label_obj[0] == 'data-label': fig_label = fig_label_obj[1] processed_figures[new_fig_fname][0] = fig_label env_num = len(processed_figures) processed_figures[new_fig_fname][1] = env_num hack_span = label_to_mathjax(fig_label, env_tag=env_num) wrapped_image = Span([copy(fig_label), [], []], [hack_span, new_image]) except: pass pandoc_logger.debug("wrapped_image: {}\n".format(wrapped_image)) return [wrapped_image]
def _cite_replacement(key, value, fmt, meta): """Returns context-dependent content to replace a Cite element.""" assert key == 'Cite' # Extract the attributes attrs = PandocAttributes(value[0], 'pandoc') # Check if the nolink attribute is set nolink = attrs['nolink'].capitalize() == 'True' if 'nolink' in attrs \ else False # Extract the label label = value[-2][0]['citationId'] if allow_implicit_refs and not label in references and ':' in label: testlabel = label.split(':')[-1] if testlabel in references: label = testlabel # Get the target metadata; typecast it as a Target for easier access target = references[label] if label in references else None if target and not isinstance(target, Target): target = Target(*target) # Issue a warning for duplicate targets if _WARNINGLEVEL and target and target.has_duplicate: msg = textwrap.dedent(""" %s: Referenced label has duplicate: %s """ % (_FILTERNAME, label)) STDERR.write(msg) STDERR.flush() # Get the replacement value text = str(target.num) if target else '??' # Choose between \Cref, \cref and \ref use_cleveref = attrs['modifier'] in ['*', '+'] \ if 'modifier' in attrs else use_cleveref_default is_plus_ref = attrs['modifier'] == '+' if 'modifier' in attrs \ else use_cleveref_default refname = plusname[0] if is_plus_ref else starname[0] # Reference name # The replacement content depends on the output format if fmt == 'latex': if use_cleveref: macro = r'\cref' if is_plus_ref else r'\Cref' ret = RawInline('tex', r'%s{%s}' % (macro, label)) elif use_eqref: ret = RawInline('tex', r'\eqref{%s}' % label) else: ret = RawInline('tex', r'\ref{%s}' % label) if nolink: # https://tex.stackexchange.com/a/323919 ret['c'][1] = \ r'{\protect\NoHyper' + ret['c'][1] + r'\protect\endNoHyper}' else: if use_eqref: text = '(' + text + ')' elem = Math({"t":"InlineMath", "c":[]}, text[1:-1]) \ if text.startswith('$') and text.endswith('$') \ else Str(text) if not nolink and target: prefix = 'ch%03d.xhtml' % target.secno \ if fmt in ['epub', 'epub2', 'epub3'] and \ target.secno else '' elem = elt('Link', 2)([elem], ['%s#%s' % (prefix, label), '']) \ if version(_PANDOCVERSION) < version('1.16') else \ Link(['', [], []], [elem], ['%s#%s' % (prefix, label), '']) ret = ([Str(refname + NBSP)] if use_cleveref else []) + [elem] # If the Cite was square-bracketed then wrap everything in a span s = stringify(value[-1]) # pandoc strips off intervening space between the prefix and the Cite; # we may have to add it back in prefix = value[-2][0]['citationPrefix'] spacer = [Space()] \ if prefix and not stringify(prefix).endswith(('{', '+', '*', '!')) \ else [] if s.startswith('[') and s.endswith(']'): els = value[-2][0]['citationPrefix'] + \ spacer + ([ret] if fmt == 'latex' else ret) + \ value[-2][0]['citationSuffix'] # We don't yet know if there will be attributes, so leave them # as None. This is fixed later when attributes are processed. ret = Span(None, els) return ret
def replace_references(key, val, fmt, meta): ''' Replace [Str("Foo"), Space(), RawInLine("latex", "figref")] with [Str("Foo"), Space(), Link([Str("Figure"), Space(), Str("7")])] and [Str("Figure"), Space(), RawInLine("latex", "figref")] with [Link([Str("Figure"), Space(), Str("7")])] also works with abbreviations. ''' if isinstance(val, list): altered = [] for i, obj in enumerate(val): new_objs = [obj] if (isinstance(obj, dict) and obj['t'] == 'RawInline' and obj['c'][0] == 'latex'): label = match_ref(obj['c'][1]) if not label: continue if label in label_map: ref_string, ref_id, prev_strings = label_map[label] prev = val[i - 1] if i > 0 else None prevprev = val[i - 2] if i > 1 else None new_objs = [] # handle "Table ", "(Table" etc. if (prev_strings and prevprev and prev['t'] == 'Space' and 'c' in prevprev and prevprev['t'] == 'Str'): prevprev_lower = prevprev['c'].lower() for needle in prev_strings: if prevprev_lower.endswith(needle): altered = altered[:-2] prefix = prevprev_lower[:-len(needle)] if prefix: new_objs.append(Str(prefix)) # hack around bug in pandoc where non-breaking space # doesn't tokenize properly if (prev_strings and prev['t'] == 'Str' and prev['c'].replace( u'\xa0', ' ').strip().lower() in prev_strings): altered = altered[:-1] link_content = [] link_content.append(Str(ref_string)) new_objs += [ Link(['', [], []], link_content, ['#%s' % ref_id, '']) ] else: new_objs += [ Space(), Span(['', ['engrafo-missing-ref'], []], [Str('?')]) ] altered += new_objs return {'t': key, 'c': altered}
def insert_section_labels(key, val, fmt, meta): ''' Insert section labels for headings like 1 This is a top level heading 1.1 This is a subsection A This is a top-level appendix A.1 This is an appendix subheader etc. Also inserts a dummy div with id=appendix-below before the appendix. ''' global is_appendix if key == 'RawBlock' and val[1] == r'\appendix': is_appendix = True sec_lengths[0] = 0 return Div(['engrafo-appendix-below', [], []], []) if key == 'Header': level, attrs, children = val # Ignore \subsubsection{}, \paragraph{} and smaller if level >= 3: return Header(level + 1, attrs, children) unnumbered = 'unnumbered' in val[1][1] label = attrs[0] sec_lengths[level - 1] += 1 sec_lengths[level:] = [0] * (len(sec_lengths) - level) if is_appendix: # appendix: h1 is alpha sec_number = '.'.join([ chr(x + ord('A') - 1) if i == 0 else str(x) for i, x in enumerate(sec_lengths[:level]) ]) else: sec_number = '.'.join([str(x) for x in sec_lengths[:level]]) if label and label not in label_map: if is_appendix: ref_string = 'Appendix %s' % sec_number ref_index = 'appendix-%s' % sec_number prev_strings = ['appendix', 'app.'] else: ref_string = 'Section %s' % sec_number ref_index = 'section-%s' % sec_number prev_strings = ['section', 'sec.'] label_map[label] = Label( ref_string=ref_string, ref_index=ref_index, prev_strings=prev_strings, ) if not unnumbered: span = Span(['', ['section-number'], []], [Str(sec_number)]) children = [span] + children attrs[0] = 'section-%s' % sec_number.lower() # Decrease levels one more than Pandoc outputs (<h1> -> <h2>) level += 1 return Header(level, attrs, children)
def makeSpan(contents, classes="", author="", date=""): attrs = {'classes': classes.split(), 'author': author, 'date': date} return Span(attributes(attrs), contents)
def filter_main(key, value, format, meta): # f.write(repr(key) + '\n') # f.write(repr(value) + '\n') # f.write('------\n') if key == 'CodeBlock': text = value[1] m = re.match(r'%%%%lyxblog-raw\n(.*)', text, flags=re.DOTALL | re.I) if m: return RawBlock('html', m[1]) elif key == 'Math' and value[0]['t'] == 'DisplayMath': # i.e. not inline # MathJax supports labels and eq. numbering only for AMS envs, so we # convert non-AMS envs into AMS envs. latex = value[1] if not latex.startswith(r'\begin{'): # not AMS env # We assume there are no comments inside math blocks (if the file # is produced by LyX, there shouldn't be any). pos = latex.find(r'\label{') if pos == -1: # no labels => no numbering fixed = r'\begin{align*}' + value[1] + r'\end{align*}' else: fixed = r'\begin{align}' + value[1] + r'\end{align}' return Math(value[0], fixed) elif key == 'Span': # This supports general labels (i.e. labels not in equations, captions # or section headers). id, classes, key_values = value[0] if len(key_values) == 1 and key_values[0][0] == 'label': # we remove the text from the label. return Span(value[0], []) elif key == 'Header': content = value[2] if content[-1]['t'] == 'Span': [id, classes, key_values], text = content[-1]['c'] if len(key_values) == 1 and key_values[0][0] == 'label': # we label the header itself (id) and delete the label-span label_name = key_values[0][1] value[1][0] = label_name return Header(value[0], value[1], content[:-1]) elif key == 'Math' and value[0]['t'] == 'InlineMath': if value[1].startswith('\\ref{') and value[1][-1] == '}': name = value[1][len('\\ref{'):-1] # We try to extract the text from the label itself. # (=00007B and =00007D represent '{' and '}' and are in the TeX # file produced by LyX.) m = re.match(r'.*=00007B([^}]+)=00007D$', name) if m: return RawInline('html', '<a href="#{}">{}</a>'.format(name, m[1])) # We only handle references to sections and images here. # (Mathjax already handles the equations.) num = sec_name_to_num.get(name, img_name_to_num.get(name, None)) if num: return RawInline('html', '<a href="#{}">{}</a>'.format(name, num)) elif key == 'Para' and value[0]['t'] == 'Image': # NOTE: # In pandoc 2, a Para[Image] where Image.title is 'fig:' becomes # a <figure> with a <figcaption>. [id, classes, style], alt, [src, title] = value[0]['c'] style = {k: v for k, v in style} width = float(style.get('width', '100.0%')[:-1]) margin = (100 - width) / 2 global image_idx src = image_info[image_idx] image_idx += 1 label = '' if alt[-1]['t'] == 'Span': id, classes, key_values = alt[-1]['c'][0] # attr key_values = dict(key_values) if 'label' in key_values: # remove the label from the caption (it'll be put right before # the image). alt = alt[:-1] # remove the label from the caption label = key_values['label'] fake_class = '{}:{:.5}%'.format(UID2, margin) img_attrs = make_attrs(label, [fake_class], {'width': '100%'}) caption = [Emph([Str('Figure {}.'.format(image_idx))])] if title == 'fig:': caption += [Space()] + alt para_content = [Image(img_attrs, caption, (src, 'fig:'))] return Para(para_content)