def listof(key, value, format, meta): global headers2 # Is it a header? if key == 'Header': [level, [id, classes, attributes], content] = value if 'unnumbered' not in classes: headers2[level - 1] = headers2[level - 1] + 1 for index in range(level, 6): headers2[index] = 0 # Is it a paragraph with only one string? if key == 'Para' and len(value) == 1 and value[0]['t'] == 'Str': # Is it {tag}? result = re.match('^{(?P<name>(?P<prefix>[a-zA-Z][\w.-]*)(?P<section>\:((?P<sharp>#(\.#)*)|(\d+(\.\d+)*)))?)}$', value[0]['c']) if result: prefix = result.group('prefix') # Get the collection name if result.group('sharp') == None: name = result.group('name') else: level = (len(result.group('sharp')) - 1) // 2 + 1 name = prefix + ':' + '.'.join(map(str, headers2[:level])) # Is it an existing collection if name in collections: if format == 'latex': # Special case for LaTeX output if 'toccolor' in meta: linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c'], format) + '}' else: linkcolor = '\\hypersetup{linkcolor=black}' if result.group('sharp') == None: suffix = '' else: suffix = '_' return Para([RawInline('tex', linkcolor + '\\makeatletter\\@starttoc{' + name + suffix + '}\\makeatother')]) else: # Prepare the list elements = [] # Loop on the collection for value in collections[name]: # Add an item to the list if pandocVersion() < '1.16': # pandoc 1.15 link = Link([Str(value['text'])], ['#' + prefix + ':' + value['identifier'], '']) else: # pandoc 1.16 link = Link(['', [], []], [Str(value['text'])], ['#' + prefix + ':' + value['identifier'], '']) elements.append([Plain([link])]) # Return a bullet list return BulletList(elements) # Special case where the paragraph start with '{{...' elif re.match('^{{[a-zA-Z][\w.-]*}$', value[0]['c']): value[0]['c'] = value[0]['c'][1:]
def mkBeginSup(): return RawInline('latex', '\\textsuperscript{')
def mkInputListings(src): return RawInline('latex', "\\lstinputlisting[style=scala]{" + src + "}")
def mkIncludegraphics(src): return RawInline('latex', "\\includegraphics{img/" + src.split('.')[0] + "}")
def mkInputListings(src): return RawInline('latex', "\\lstinputlisting[style=rust]{" \ + scriptify(src) + "}")
def latex(code): """LaTeX inline""" return RawInline('latex', code)
def filter_main(key, value, format, meta): # f.write(repr(key) + '\n') # f.write(repr(value) + '\n') # f.write('------\n') if key == 'CodeBlock': text = value[1] m = re.match(r'%%%%lyxblog-raw\n(.*)', text, flags=re.DOTALL | re.I) if m: return RawBlock('html', m[1]) elif key == 'Math' and value[0]['t'] == 'DisplayMath': # i.e. not inline # MathJax supports labels and eq. numbering only for AMS envs, so we # convert non-AMS envs into AMS envs. latex = value[1] if not latex.startswith(r'\begin{'): # not AMS env # We assume there are no comments inside math blocks (if the file # is produced by LyX, there shouldn't be any). pos = latex.find(r'\label{') if pos == -1: # no labels => no numbering fixed = r'\begin{align*}' + value[1] + r'\end{align*}' else: fixed = r'\begin{align}' + value[1] + r'\end{align}' return Math(value[0], fixed) elif key == 'Span': # This supports general labels (i.e. labels not in equations, captions # or section headers). id, classes, key_values = value[0] if len(key_values) == 1 and key_values[0][0] == 'label': # we remove the text from the label. return Span(value[0], []) elif key == 'Header': content = value[2] if content[-1]['t'] == 'Span': [id, classes, key_values], text = content[-1]['c'] if len(key_values) == 1 and key_values[0][0] == 'label': # we label the header itself (id) and delete the label-span label_name = key_values[0][1] value[1][0] = label_name return Header(value[0], value[1], content[:-1]) elif key == 'Math' and value[0]['t'] == 'InlineMath': if value[1].startswith('\\ref{') and value[1][-1] == '}': name = value[1][len('\\ref{'):-1] # We try to extract the text from the label itself. # (=00007B and =00007D represent '{' and '}' and are in the TeX # file produced by LyX.) m = re.match(r'.*=00007B([^}]+)=00007D$', name) if m: return RawInline('html', '<a href="#{}">{}</a>'.format(name, m[1])) # We only handle references to sections and images here. # (Mathjax already handles the equations.) num = sec_name_to_num.get(name, img_name_to_num.get(name, None)) if num: return RawInline('html', '<a href="#{}">{}</a>'.format(name, num)) elif key == 'Para' and value[0]['t'] == 'Image': # NOTE: # In pandoc 2, a Para[Image] where Image.title is 'fig:' becomes # a <figure> with a <figcaption>. [id, classes, style], alt, [src, title] = value[0]['c'] style = {k: v for k, v in style} width = float(style.get('width', '100.0%')[:-1]) margin = (100 - width) / 2 global image_idx src = image_info[image_idx] image_idx += 1 label = '' if alt[-1]['t'] == 'Span': id, classes, key_values = alt[-1]['c'][0] # attr key_values = dict(key_values) if 'label' in key_values: # remove the label from the caption (it'll be put right before # the image). alt = alt[:-1] # remove the label from the caption label = key_values['label'] fake_class = '{}:{:.5}%'.format(UID2, margin) img_attrs = make_attrs(label, [fake_class], {'width': '100%'}) caption = [Emph([Str('Figure {}.'.format(image_idx))])] if title == 'fig:': caption += [Space()] + alt para_content = [Image(img_attrs, caption, (src, 'fig:'))] return Para(para_content)
def latex_in_line(x): return RawInline('latex', x)
def make_rst_ref(x): return RawInline('rst', ":ref:`" + x + "`")
def minted(key, value, format, meta): #print(key, value) """ Use minted for code in LaTeX. Args: key: type of pandoc object value: contents of pandoc object format: target output format meta: document metadata """ if format == 'latex': if key == 'CodeBlock': body, language, params, source_file = unpack(value, meta) if language is None: return if source_file is None: begin = r'\begin{' + language + 'code*}{' + params + '}\n' end = '\n' + r'\end{' + language + 'code*}' return [RawBlock(format, begin + body + end)] else: content = r'\inputminted[' + params + ']{' + language + '}{' + source_file + '}' return [RawBlock(format, content)] elif key == 'Code': body, language, params, source_file = unpack(value, meta) if language is None: return begin = r'\mintinline[' + params + ']{' + language + '}{' end = '}' return [RawInline(format, begin + body + end)] elif format == "html": if key == 'CodeBlock': body, language, params, source_file = unpack(value, meta) if language is None: return if source_file is None: html = '<pre><code class="language-%s">%s</code></pre>' % ( language, body) return [RawBlock(format, html)] else: content = r'\inputminted[' + params + ']{' + language + '}{' + source_file + '}' return [RawBlock(format, content)] elif key == 'Code': body, language, params, source_file = unpack(value, meta) if language is None: return html = '<code class="language-%s">%s</code>' % (language, body) return [RawInline(format, html)]
def _cite_replacement(key, value, fmt, meta): """Returns context-dependent content to replace a Cite element.""" assert key == 'Cite' # Extract the attributes attrs = PandocAttributes(value[0], 'pandoc') # Check if the nolink attribute is set nolink = attrs['nolink'].capitalize() == 'True' if 'nolink' in attrs \ else False # Extract the label label = value[-2][0]['citationId'] if allow_implicit_refs and not label in references and ':' in label: testlabel = label.split(':')[-1] if testlabel in references: label = testlabel # Get the target metadata; typecast it as a Target for easier access target = references[label] if label in references else None if target and not isinstance(target, Target): target = Target(*target) # Issue a warning for duplicate targets if _WARNINGLEVEL and target and target.has_duplicate: msg = textwrap.dedent(""" %s: Referenced label has duplicate: %s """ % (_FILTERNAME, label)) STDERR.write(msg) STDERR.flush() # Get the replacement value text = str(target.num) if target else '??' # Choose between \Cref, \cref and \ref use_cleveref = attrs['modifier'] in ['*', '+'] \ if 'modifier' in attrs else use_cleveref_default is_plus_ref = attrs['modifier'] == '+' if 'modifier' in attrs \ else use_cleveref_default refname = plusname[0] if is_plus_ref else starname[0] # Reference name # The replacement content depends on the output format if fmt == 'latex': if use_cleveref: macro = r'\cref' if is_plus_ref else r'\Cref' ret = RawInline('tex', r'%s{%s}' % (macro, label)) elif use_eqref: ret = RawInline('tex', r'\eqref{%s}' % label) else: ret = RawInline('tex', r'\ref{%s}' % label) if nolink: # https://tex.stackexchange.com/a/323919 ret['c'][1] = \ r'{\protect\NoHyper' + ret['c'][1] + r'\protect\endNoHyper}' else: if use_eqref: text = '(' + text + ')' elem = Math({"t":"InlineMath", "c":[]}, text[1:-1]) \ if text.startswith('$') and text.endswith('$') \ else Str(text) if not nolink and target: prefix = 'ch%03d.xhtml' % target.secno \ if fmt in ['epub', 'epub2', 'epub3'] and \ target.secno else '' elem = elt('Link', 2)([elem], ['%s#%s' % (prefix, label), '']) \ if _PANDOCVERSION < '1.16' else \ Link(['', [], []], [elem], ['%s#%s' % (prefix, label), '']) ret = ([Str(refname), Space()] if use_cleveref else []) + [elem] # If the Cite was square-bracketed then wrap everything in a span s = stringify(value[-1]) # pandoc strips off intervening space between the prefix and the Cite; # we may have to add it back in prefix = value[-2][0]['citationPrefix'] spacer = [Space()] \ if prefix and not stringify(prefix).endswith(('{', '+', '*', '!')) \ else [] if s.startswith('[') and s.endswith(']'): els = value[-2][0]['citationPrefix'] + \ spacer + ([ret] if fmt == 'latex' else ret) + \ value[-2][0]['citationSuffix'] # We don't yet know if there will be attributes, so leave them # as None. This is fixed later when attributes are processed. ret = Span(None, els) return ret
def mkIncludegraphics(src): if src == '88x31.png': return RawInline('latex', "\\includegraphics{pdf/by-nc-sa.pdf}") else: return RawInline('latex', "\\includegraphics{img/" + src + "}")
def fix_underline(key, value, format, meta): if isUnderline(key,value): return [ RawInline('latex', '\\uline{'), Span(value[0], value[1]), RawInline('latex', '}') ]
def collect(key, value, format, meta): global headers # Is it a header? Keep the correct numbered headers in the headers array if key == 'Header': [level, [id, classes, attributes], content] = value if 'unnumbered' not in classes: headers[level - 1] = headers[level - 1] + 1 for index in range(level, 6): headers[index] = 0 # Is it a link with a right tag? elif key == 'Span': # Get the Span [[anchor, classes, other], text] = value # Is the anchor correct? result = re.match('^([a-zA-Z][\w.-]*):([\w.-]+)$', anchor) if result: global collections # Compute the name name = result.group(1) # Compute the identifier identifier = result.group(2) # Store the new item string = stringify(deepcopy(text), format) # Prepare the names names = [] # Add the atomic name to the list names.append(name) # Prepare the latex output if format == 'latex': latex = '\\phantomsection\\addcontentsline{' + name + '}{figure}{' + string + '}' # Loop on all the headers for i in [0, 1, 2, 3, 4, 5]: if headers[i] > 0: # Add an alternate name to the list altName = name + ':' + '.'.join(map(str, headers[:i+1])) names.append(altName) if format == 'latex': # Complete the latex output latex = latex + '\\phantomsection\\addcontentsline{' + altName + '}{figure}{' + string + '}' latex = latex + '\\phantomsection\\addcontentsline{' + altName + '_}{figure}{' + string + '}' else: break for name in names: # Prepare the new collections if needed if name not in collections: collections[name] = [] collections[name].append({'identifier': identifier, 'text': string}) # Special case for LaTeX output if format == 'latex': text.insert(0, RawInline('tex', latex)) value[1] = text
def _process_table(value, fmt): """Processes the table. Returns a dict containing table properties.""" # pylint: disable=global-statement global Nreferences # Global references counter global has_unnumbered_tables # Flags unnumbered tables were found global cursec # Current section # Parse the table attrs, caption = value[:2] # Initialize the return value table = { 'is_unnumbered': False, 'is_unreferenceable': False, 'is_tagged': False, 'attrs': attrs } # Bail out if the label does not conform if not LABEL_PATTERN.match(attrs[0]): has_unnumbered_tables = True table['is_unnumbered'] = True table['is_unreferenceable'] = True return table # Process unreferenceable tables if attrs[0] == 'tbl:': # Make up a unique description attrs[0] = 'tbl:' + str(uuid.uuid4()) table['is_unreferenceable'] = True unreferenceable.append(attrs[0]) # For html, hard-code in the section numbers as tags kvs = PandocAttributes(attrs, 'pandoc').kvs if numbersections and fmt in ['html', 'html5'] and 'tag' not in kvs: if kvs['secno'] != cursec: cursec = kvs['secno'] Nreferences = 1 kvs['tag'] = cursec + '.' + str(Nreferences) Nreferences += 1 # Save to the global references tracker table['is_tagged'] = 'tag' in kvs if table['is_tagged']: # Remove any surrounding quotes if kvs['tag'][0] == '"' and kvs['tag'][-1] == '"': kvs['tag'] = kvs['tag'].strip('"') elif kvs['tag'][0] == "'" and kvs['tag'][-1] == "'": kvs['tag'] = kvs['tag'].strip("'") references[attrs[0]] = kvs['tag'] else: Nreferences += 1 references[attrs[0]] = Nreferences # Adjust caption depending on the output format if fmt in ['latex', 'beamer']: if not table['is_unreferenceable']: value[1] += [RawInline('tex', r'\label{%s}' % attrs[0])] else: # Hard-code in the caption name and number/tag if isinstance(references[attrs[0]], int): value[1] = [RawInline('html', r'<span>'), Str(captionname), Space(), Str('%d:'%references[attrs[0]]), RawInline('html', r'</span>')] \ if fmt in ['html', 'html5'] else \ [Str(captionname), Space(), Str('%d:'%references[attrs[0]])] value[1] += [Space()] + list(caption) else: # Tagged reference assert isinstance(references[attrs[0]], STRTYPES) text = references[attrs[0]] if text.startswith('$') and text.endswith('$'): math = text.replace(' ', r'\ ')[1:-1] els = [Math({"t": "InlineMath", "c": []}, math), Str(':')] else: els = [Str(text + ':')] value[1] = \ [RawInline('html', r'<span>'), Str(captionname), Space()] + \ els + [RawInline('html', r'</span>')] \ if fmt in ['html', 'html5'] else \ [Str(captionname), Space()] + els value[1] += [Space()] + list(caption) return table
def context(s): return RawInline('context', s)
def create_definition(item): word = item['c'][1].strip() return RawInline('html', '{{' + word + '}}')
def latex(s): return RawInline('latex', s)
def external_footnotes(key, value, format, meta): if key == 'Note': global footnote_count footnote_count = footnote_count + 1 return RawInline('html',"<sup>["+str(footnote_count)+"]("+meta['footnote_file']['c']+")</sup>")
def _process_figure(value, fmt): """Processes the figure. Returns a dict containing figure properties.""" # pylint: disable=global-statement global Nreferences global has_unnumbered_figures # Parse the image attrs, caption = value[0]['c'][:2] # Initialize the return value fig = {'is_unnumbered': False, 'is_unreferenceable': False, 'is_tagged': False, 'attrs': attrs} # Bail out if the label does not conform if not LABEL_PATTERN.match(attrs[0]): has_unnumbered_figures = True fig['is_unnumbered'] = True fig['is_unreferenceable'] = True return fig # Process unreferenceable figures if attrs[0] == 'fig:': # Make up a unique description attrs[0] = attrs[0] + str(uuid.uuid4()) fig['is_unreferenceable'] = True unreferenceable.append(attrs[0]) # Save to the global references tracker kvs = PandocAttributes(attrs, 'pandoc').kvs fig['is_tagged'] = 'tag' in kvs if fig['is_tagged']: # Remove any surrounding quotes if kvs['tag'][0] == '"' and kvs['tag'][-1] == '"': kvs['tag'] = kvs['tag'].strip('"') elif kvs['tag'][0] == "'" and kvs['tag'][-1] == "'": kvs['tag'] = kvs['tag'].strip("'") references[attrs[0]] = kvs['tag'] else: Nreferences += 1 references[attrs[0]] = Nreferences # Adjust caption depending on the output format if fmt == 'latex': # Append a \label if this is referenceable if not fig['is_unreferenceable']: value[0]['c'][1] += [RawInline('tex', r'\label{%s}'%attrs[0])] else: # Hard-code in the caption name and number/tag if type(references[attrs[0]]) is int: # Numbered reference value[0]['c'][1] = [Str(captionname), Space(), Str('%d:'%references[attrs[0]]), Space()] + \ list(caption) else: # Tagged reference assert type(references[attrs[0]]) in STRTYPES text = references[attrs[0]] if text.startswith('$') and text.endswith('$'): # Math math = text.replace(' ', r'\ ')[1:-1] els = [Math({"t":"InlineMath", "c":[]}, math), Str(':')] else: # Text els = [Str(text+':')] value[0]['c'][1] = [Str('Table'), Space()]+ els + [Space()] + \ list(caption) return fig
def mkEndSup(): return RawInline('latex', '}')
def latex(text): return RawInline('latex', text)
def filter(key, value, fmt, meta): if key == 'Header': return RawBlock('header-json', json.dumps(value)) elif key == 'CodeBlock': # CodeBlock Attr String value[1] = value[1].replace('\uFFFD', '?') [[ident, classes, kvs], code] = value c = classes[0].split(',')[0] if c == 'rust': return mkListingsEnvironment(code, c) else: return mkListingsEnvironment(code) elif key == 'Link': # Link Attr [Inline] Target [_, inlines, [href, _]] = value if inlines == [Str("include")]: return mkInputListings(href) elif (not href.startswith("http")) and href.endswith(".md"): src = re.search(r'(?:./)?(.+\.md)', href).group(1) return mkRef(src) elif key == 'Image': [_, _, [src, _]] = value if src.startswith("http"): fileName = src.split("/")[-1] os.system("cd img && curl -O " + src) return mkIncludegraphics(fileName) elif key == 'Str': value = value.replace('\uFFFD', '?').replace('〜', '~') value = escape_tex(value) value = scriptify(value, '') return RawInline('latex', value) elif key == 'Code': # Code Attr String value[1] = value[1].replace('\uFFFD', '?') value[1] = value[1].replace('+CHARPIPE+', '|') # \lstinline 内では LaTeX コマンドが使えないので \texttt を使う if RE_SCRIPT.search(value[1]): s = escape_tex(value[1]) s = scriptify(s, '') return RawInline('latex', r'\texttt{%s}' % s) elif key == 'RawInline': [t, s] = value if t == 'html' and '<img' in s: src = re.search(r'src="img/(.+?)"', s).group(1) return mkIncludegraphics(src) elif t == 'html' and s == '<sup>': return mkBeginSup() elif t == 'html' and s == '</sup>': return mkEndSup() elif key == 'Para': if value[0]['t'] == 'RawInline': fmt, content = value[0]['c'] if fmt == 'html' and '<img' in content: src = re.search(r'src="(img/.+?)"', content).group(1) cls = re.search(r'class="(.+?)"', content) if cls: cls = cls.group(1) width = re.search(r'style="width: *(\d+)%;?', content) if width: width = float(width.group(1)) / 100 return mkFigure(src, align=cls, scale=width) elif fmt == 'html' and 'class="caption"' in content: return [Para(value), RawBlock('latex', r'\vspace{1em}')] elif fmt == 'html' and 'class="filename"' in content: return [RawBlock('latex', r'\vspace{1em}'), Para(value)]
def html(text): return RawInline('html', text)
def mkRef(src): return RawInline('latex', "\\ref{" + src + u"}章")
def handle_tab(format): if format == "docx": return RawInline("openxml", "<w:r><w:tab/></w:r>")
def latex(x): return RawInline('latex', x)
def htmlInline(x): return RawInline('html', x)
def mkIncludegraphics(src): return RawInline('latex', "\\includegraphics{img/" + src + "}")
def docx(text): return RawInline('openxml', text)