def behead(key, value, format, meta): global singlefig_rx # pandoc does not emit labels before sections -> insert if key == 'Header': raw_lbl = value[1][0] if raw_lbl: lbl = '.. _' + raw_lbl + ':\n\n' value[1][0] = '' return [make_rst_block(lbl), Header(value[0], value[1], value[2])] # fix string parsing elif key == 'Str': if len(value) > 3: # pandoc generates [refname] as strings for \ref{refname} -> fix if value[0] == '[': return make_rst_ref(value[1:-1]) elif value[1] == '[': return make_rst_ref(value[2:-1]) # images don't have file endings in .tex -> add .png elif key == 'Image': return Image(value[0], value[1], [value[2][0] + ".png", value[2][1]]) elif key == 'RawBlock' and value[0] == 'latex': # simply drop \clearpage if value[1] == '\\clearpage': return [] # convert \singlefig to Image node m = singlefig_rx.match(value[1]) if m: return make_rst_image(m.group(1), m.group(2), m.group(3)) m = listing_rx.match(value[1]) if m: return globals()[m.group(1)](parse_range(m.group(2)), m.group(3)) sys.stderr.write("WARNING: unrecognized raw LaTeX" + str(value) + '\n')
def docmap(key, value, format, meta): global REFFILE if key == 'Header': [level, attr, inline] = value [ids, classes, keyvals] = attr # Change the reference file if we see a new level-1 header if level == 1 and 'fromfile' in meta: reffile = re.sub("\.md", ".html", meta['fromfile']['c']) REFFILE = "~~" + reffile + "~~" sys.stderr.write(reffile + "\n") return Header(level, [REFFILE + str(ids), [], []], inline) elif key == 'CodeBlock': [[ident, classes, keyvals], code] = value if "include" in classes: rv = [] for l in code.splitlines(): l = l.strip() if os.path.isfile(l): (headingLevel, dummy) = get_value(keyvals, "heading-level") if not headingLevel: headingLevel = 0 contents = get_contents_of_file(l, headingLevel) doc = json.loads(md_to_json(contents)) if 'meta' in doc: meta = doc['meta'] elif doc[0]: # old API meta = doc[0]['unMeta'] else: meta = {} # Add a file to the meta info meta['fromfile'] = {u'c': l, u't': 'MetaString'} altered = walk(doc, docmap, format, meta) rv.append(altered['blocks']) else: sys.stderr.write("WARNING: Can't read file '" + l + "'. Skipping.") # Return a flattened list using nested list comprehension # # The following is equivalent to: # # flattened = [] # for sublist in rv: # for item in sublist: # flattened.append(item) # return flattened return [item for sublist in rv for item in sublist]
def process(key, value, format, meta): if key == 'RawBlock': type, content = value if type == 'tikz': header = r'\begin{tikzpicture}[thick,scale=0.8,transform shape]' footer = r'\end{tikzpicture}' return RawBlock('latex', header + content + footer) if type == 'algorithm': header = r'{\scriptsize\begin{algorithmic}[1]' footer = r'\end{algorithmic}}' return RawBlock('latex', header + content + footer) if key == 'BlockQuote': header = [RawInline('latex', r'\begin{block}{')] def extract_title(key, value, format, meta): if key == 'Header': header.extend(value[2]) return [] return process(key, value, format, meta) content = walk(value, extract_title, format, meta) header.append(RawInline('latex', '}')) header = [Para(header)] footer = [RawBlock('latex', r'\end{block}')] return header + content + footer if key == 'Header' and value[0] == 1: global lastheader lastheader = Header(*value) if key == 'HorizontalRule': return walk(lastheader, process, format, meta)
def demote_net_core_1_2(key, value, format, meta): if key == 'Header': header_id = value[1][0] if header_id.startswith('net-core-'): value = Header(2, value[1], value[2][0]['c'][1]) return value return None
def liftHeaders(key, value, fmt, meta): if key == 'Header': level, attrs, content = value level -= 1 if level == 1: content = walk(content, upcase, fmt, meta) return Header(level, attrs, content)
def behead(key, value, format, meta): global last_element # pandoc does not emit labels before sections -> insert if key == 'Header': lbl = value[1][0] if lbl: new_lbl = ".. _" + lbl + ":\n\n" value[1][0] = "" store(key, value) return [rb(new_lbl), Header(value[0], value[1], value[2])] # fix two bugs with string parsing elif key == 'Str': # pandoc does not parse \xpsace correctly -> insert whitespace after CAF if last_element == ('Str', 'CAF') and value.isalnum(): store(key, value) return [Space(), Str(value)] if len(value) > 3: # pandoc generates [refname] as strings for \ref{refname} -> fix if value[0] == '[': store(key, value) return mk_ref(value[1:-1]) elif value[1] == '[': store(key, value) return mk_ref(value[2:-1]) # images don't have file endings in .tex -> add .png elif key == 'Image': store(key, value) return Image(value[0], value[1], [value[2][0] + ".png", value[2][1]]) store(key, value)
def ruby_kenten(key, val, fmt, meta): ruby_pattern = r'(?:(?:[\||](?:\p{Hiragana}|\p{Katakana}|\p{Han}|ー|\p{P}|█)+?)|(?:\p{Han}+?))《(?!.*《).*?》' kenten_pattern = r'《《(?:\p{Hiragana}|\p{Katakana}|\p{Han}|\p{P}|ー)+?》》' if key == 'Header': val[1][0] = val[2][0]['c'] return Header(val[0], val[1], val[2]) if key != 'Str': return filtered_val = val for matched_vals in regex.findall(ruby_pattern, filtered_val): base = regex.search(r'(((?<=[\||])(.*?)(?=《))|(\p{Han}*?(?=《)))', matched_vals).groups(1)[0] ruby = regex.search(r'((?<=《)(.*?)(?=》))', matched_vals).groups(1)[1] filtered_ruby = regex.search( r'^((.*?)(?=[\||]))', ruby)[0] if regex.search( r'(.*)?[\||](?!.*《)(?!.*[\||])', ruby) else ruby for grouped_ruby in regex.findall( r'(((?<=[\||])(.*?)(?=[\||]))|((?<=[\||])(.*)(?=$)))', ruby): if fmt == 'latex': filtered_ruby = r'%s|%s' % (filtered_ruby, grouped_ruby[0]) elif fmt in ('html', 'html5', 'epub', 'epub3'): filtered_ruby = r'%s%s' % (filtered_ruby, grouped_ruby[0]) ruby = filtered_ruby if fmt == 'latex': filtered_str = r'\\ruby{%s}{%s}' % (base, ruby) elif fmt in ('html', 'html5', 'epub', 'epub3'): filtered_str = (r'<ruby><rb>%s</rb><rp>' '《</rp><rt>%s</rt><rp>》' '</rp></ruby>') % (base, ruby) filtered_val = regex.sub(r'%s' % matched_vals, r'%s' % filtered_str, filtered_val) for matched_vals in regex.findall(kenten_pattern, filtered_val): base = regex.search(r'《《(.+?)》》', matched_vals).groups(0)[0] if fmt == 'latex': filtered_str = r'\\kenten{%s}' % base elif fmt in ('html', 'html5', 'epub', 'epub3'): kenten = '' for kenten_count in base: kenten += r'・' filtered_str = (r'<ruby><rb>%s</rb><rp>' '《</rp><rt>%s</rt><rp>》' '</rp></ruby>') % (base, kenten) filtered_val = regex.sub(r'%s' % matched_vals, r'%s' % filtered_str, filtered_val) if fmt == 'latex': for matched_vals in regex.findall(r'…', filtered_val): filtered_val = regex.sub(r'%s' % matched_vals, r'…', filtered_val) for matched_vals in regex.findall(r'―', filtered_val): filtered_val = regex.sub(r'%s' % matched_vals, r'—', filtered_val) filtered_val = regex.sub(r'[\||]《', r'《', filtered_val) if 'matched_vals' in locals(): if fmt == 'latex': return RawInline('latex', r'%s' % filtered_val) if fmt in ('html', 'html5', 'epub', 'epub3'): return RawInline('html', r'%s' % filtered_val) else: return Str(filtered_val)
def preprocess(key, value, format, meta): prefix = normalize(meta["file"]["c"]) # Header - set anchor name explicitly if key == "Header": [level, [anchor, t1, t2], header] = value anchor = prefix + "-" + anchor # Append attribute list with prefixed anchor header.append(RawInline("html", " {{ #{} }}".format(anchor))) return Header(level, [anchor, t1, t2], header) # Link - correct links if key == "Link": [t1, text, [link, t4]] = value # This document if link[0] == "#": ref = prefix + "-" + link[1:] return Link(t1, text, [ref, t4]) # That document elif "#" in link: anchor = "#" + normalize(link) return Link(t1, text, [anchor, t4])
def textbook(key, value, format, meta): if key == "Header": [level, [ident, classes, keyvals], inlines] = value if (level == 5 or level == 1) and not "unnumbered" in classes: return Header(level, [ident, classes + ["unnumbered"], keyvals], inlines) if key == "CodeBlock": [[ident, classes, keyvals], code] = value if format == "html": return RawBlock("html", "<pre>" + process_html(code) + "</pre>") if key == "Code": [[ident, classes, keyvals], code] = value if format == "html": return RawInline("html", "<code>" + process_html(code) + "</code>") if key == "Image": [attr, inlines, [src, tit]] = value if format != "icml": return Image(attr, inlines, [src.replace(".pdf", ".png"), tit]) if key == "Div": [[ident, classes, keyvals], blocks] = value if format == "docx": if "numbers" in classes: return Null() if any(cls in classes for cls in [ "keyterm", "keyterms", "didyouknow", "syntax", "quickcheck", "program" ]): return Div([ident, classes, keyvals], [HorizontalRule()] + walk(blocks, textbook, format, meta) + [HorizontalRule()])
def contain(key, value, format, meta): # raise ValueError, 'key=%s, value=%s, format=%s, meta=%s' % \ # (repr(key), repr(value), repr(format), repr(meta)) if key == 'Header' and value[0] == 1: if 'data-background-size' not in [x[0] for x in value[1][2]]: value[1][2].append([u'data-background-size', u'contain']) return Header(value[0], value[1], value[2])
def filter_main(key, value, format, meta): # f.write(repr(key) + '\n') # f.write(repr(value) + '\n') # f.write('------\n') if key == 'Header': # We use a unique id to identify the header in the html file more # safely. return Header(value[0], [UID, [], []], value[2])
def fragile(key, value, format, meta): """Make headers/frames fragile.""" if format != 'beamer': return if key == 'Header': level, meta, contents = value # Add the attribute meta[1].append('fragile') return Header(level, meta, contents)
def behead(key, value, format_, meta): if key == 'Header': if value[0] >= 3: return Para([Strong(value[2])]) elif value[0] >= 2: return Para([SmallCaps(value[2])]) # increase heading levels level, content, attr = value # content[1] += [ 'unnumbered' ] # causes "Duplicate identifier" warnings return Header(min(6, level + 2), content, attr)
def promote_and_capitalize_sections(key, value, format, meta): if key == 'Header': header_contents = value[2] header_text = ' '.join([ x['c'] for x in header_contents if x['t'] == 'Str']).lower() if header_text in ['name', 'synopsis', 'description', 'options', 'examples', 'environment variables']: # capitalize for element in header_contents: if element['t'] == 'Str': element['c'] = element['c'].upper() # promote value = Header(1, value[1], header_contents) return value return None
def buttonize(key, value, format, meta): if key == 'Link': inlines, (href, title) = value linktext = stringify(inlines).strip() if (format == "html" or format == "html5") and linktext[-1] == ">": html = "<a class='button' title='%s' href='%s'>%s</a>" % ( title, href, linktext[:-1]) newlink = RawInline("html", html) return newlink elif key == 'Header': level, (identifier, classes, kv), inlines = value identifier = identifier.replace("_", "-") return Header(level, (identifier, classes, kv), inlines)
def cscify(key, value, format, meta): # image location depends on the theme try: theme = meta['theme']['c'][0]['c'] except: theme = 'default' template = u'theme/{0}/img/%s.png'.format(theme) # markdown: special class names trigger loading of a data background image # and replacement with a corresponding generic class name if key == 'Header' and value[0] == 1: if 'data-background' not in [x[0] for x in value[1][2]]: for key in ['title-en', 'title-fi', 'author', 'section']: if key in value[1][1]: value[1][1].remove(key) value[1][2].append([u'data-background', template % key]) if key == 'author': value[1][1].append(u'author-slide') elif key == 'section': value[1][1].append(u'section-slide') else: value[1][1].append(u'title-slide') return Header(value[0], value[1], value[2]) # reST: special class name in a container Div triggers the same as above, # but only the modified Header is returned elif key == 'Div' and value[1][0]['t'] == 'Header': for key in ['title-en', 'title-fi', 'author', 'section']: if key in value[0][1]: header = value[1][0]['c'] header[1][2].append([u'data-background', template % key]) if key == 'author': header[1][1].append(u'author-slide') elif key == 'section': header[1][1].append(u'section-slide') else: header[1][1].append(u'title-slide') return Header(header[0], header[1], header[2])
def addpoints(key, value, format, meta): global points if key == 'Header': [level, [ident, classes, keyvals], content] = value p = [int(v) for [k, v] in keyvals if k == "punkte"] if p: p = reduce(lambda x, y: x + y, p) points += p questions.append(content + [Space(), Str("(" + str(p) + "P)")]) content += [ Space(), RawInline("tex", "\\hfill"), Space(), Str("(" + str(p)), Space(), Str("Punkt)" if p == 1 else "Punkte)") ] return Header(level, [ident, classes, keyvals], content)
def add_to_headers(key, val, fmt, meta): global depth # when we are at a header node if key == 'Header': # get details of header lvl, attr, inline = val # if we are at the first header of a larger section if lvl > depth: # record the depth and do not place a jump depth += 1 return elif lvl < depth: # bring depth down to level depth = lvl # if the header is noteworthy, put a jump before it if lvl <= 3: return [jump, Header(lvl, attr, inline)]
def header_numbering(key, value, format, meta): # Only headers if key == 'Header': [level, desc, content] = value try: # Look for headers starting with ("Question -") if ((content[0]['t'] == 'Str' and content[0]['c'] == 'Question') and (content[1]['t'] == 'Space') and (content[2]['t'] == 'Str' and content[2]['c'] == '-')): # Change the '-' string into the actual number content[2] = Str(str(header_numbering.count)) header_numbering.count += 1 except: pass return Header(level, desc, content)
def slides(k, v, fmt, meta): global incomment global newframe if k == "RawBlock": fmt, s = v if re.search("<!-- BEGIN SLIDESNOHEADER -->", s): incomment = False newframe = False elif re.search("<!-- BEGIN SLIDES -->", s): incomment = False newframe = True elif re.search("<!-- END SLIDES -->", s): incomment = True if newframe: newframe = False # print("Creating new frame", file=sys.stderr) return Header(1, ["section", [], []], []) if incomment: return []
def notebook_convert(key, value, format, meta): if (key == "Para" and value[0]["c"][0:2] == "{%" and value[-1]["c"][-2:] == "%}" and value[2]["c"] == "notebook"): convert_notebook_to_html(value[4]["c"]) # sys.stderr.write("{}".format(type(json.loads(convert_html_to_json(value[4]['c'].replace('.ipynb', '.html')))["blocks"][0]['c'][1][0]['c']))) tuple_notebook = tuple( json.loads( convert_html_to_json(value[4]["c"].replace( ".ipynb", ".html")))["blocks"][0]["c"][1][0]["c"]) # Remove unMeta sys.stderr.write("Converting notebook {}\n".format(value[4]["c"])) remove_html(value[4]["c"]) return Div(*tuple_notebook) if key == "Header": # Increment headers by 1 value[0] += 1 # Remove anchor links value[-1] = value[-1][:-1] return Header(*value)
def titlizeHeadings(key, value, format, meta): if (key == 'Header'): size, meta, text = value titled = titleize(stringify(text)) return Header(size, meta, [Str(titled)])
def insert_section_labels(key, val, fmt, meta): ''' Insert section labels for headings like 1 This is a top level heading 1.1 This is a subsection A This is a top-level appendix A.1 This is an appendix subheader etc. Also inserts a dummy div with id=appendix-below before the appendix. ''' global is_appendix if key == 'RawBlock' and val[1] == r'\appendix': is_appendix = True sec_lengths[0] = 0 return Div(['engrafo-appendix-below', [], []], []) if key == 'Header': level, attrs, children = val # Ignore \subsubsection{}, \paragraph{} and smaller if level >= 3: return Header(level + 1, attrs, children) unnumbered = 'unnumbered' in val[1][1] label = attrs[0] sec_lengths[level - 1] += 1 sec_lengths[level:] = [0] * (len(sec_lengths) - level) if is_appendix: # appendix: h1 is alpha sec_number = '.'.join([ chr(x + ord('A') - 1) if i == 0 else str(x) for i, x in enumerate(sec_lengths[:level]) ]) else: sec_number = '.'.join([str(x) for x in sec_lengths[:level]]) if label and label not in label_map: if is_appendix: ref_string = 'Appendix %s' % sec_number ref_index = 'appendix-%s' % sec_number prev_strings = ['appendix', 'app.'] else: ref_string = 'Section %s' % sec_number ref_index = 'section-%s' % sec_number prev_strings = ['section', 'sec.'] label_map[label] = Label( ref_string=ref_string, ref_index=ref_index, prev_strings=prev_strings, ) if not unnumbered: span = Span(['', ['section-number'], []], [Str(sec_number)]) children = [span] + children attrs[0] = 'section-%s' % sec_number.lower() # Decrease levels one more than Pandoc outputs (<h1> -> <h2>) level += 1 return Header(level, attrs, children)
def format_man(key, val, fmt, meta): if key == "Link": return val[1] if key == "Header" and val[0] == 1: return Header(*walk(val, caps, fmt, meta))
def change_head_title(key, value, format, meta): if key == 'Header': value[0] += 2 return Header(*value)
def headinglinks(k, v, fmt, meta): if k == "Header": #[level, attrs, contents] = v #return Header(level, attrs, [ Link([], contents, [attrs[0]]) ]) return Header()
def dec_header(key, value, format, _): if key == 'Header': [level, attr, inline] = value if level > LEVELS: level -= LEVELS return Header(level, attr, inline)
def fix_head(key, value, format, meta): if key == 'Header' and value[0] == 1: if value[2][0]['t'] == 'Strong' and type(value[2][0]['c']) is list: value[2] = value[2][0]['c'] return Header(value[0], value[1], value[2])
def inc_header(key, value, format, _): if key == 'Header': [level, attr, inline] = value if level + LEVELS <= MAX_LEVEL: level += LEVELS return Header(level, attr, inline)
def mycite(key, value, fmt, meta): if key == 'Div' and fmt == 'context': warning(value[0]) try: if ['poetry'] in value[0]: value[1] = [ RawBlock('context', "\startlines\setupindenting[no]") ] + value[1] + [RawBlock('context', "\stoplines")] except KeyError: warning("non poetry div") if key == 'Header' and fmt == 'context': style = value[1][1] if "red" in style: value[2] = [context("\color[red]{")] + value[2] + [context('}')] return Header(value[0], value[1], value[2]) if key == 'Para' and fmt == 'context': keysToInsert = [] redKeys = [] urlKeys = [] lastLineBreak = 0 if type(value) is list: for key, val in enumerate(value): if val == {u'c': [], u't': u'LineBreak'}: lastLineBreak = key + 1 if val == {u'c': u'{.red}', u't': u'Str'}: redKeys.append((lastLineBreak, key)) val['c'] = '' inserted = 0 for key in redKeys: start, end = key value.insert(int(start) + inserted, context("\color[red]{")) value.insert(end + inserted, context("}")) inserted = inserted + 2 lastLineBreak = 0 if type(value) is list: for key, val in enumerate(value): if val == {u'c': [], u't': u'LineBreak'}: keysToInsert.append((lastLineBreak, key)) lastLineBreak = key + 1 inserted = 0 for key in keysToInsert: start, end = key #value.insert(start+inserted,context("\strut ")) value[end] = context("\n\strut ") #inserted=inserted+1 if type(value) is list: for key, val in enumerate(value): if 'c' in val and type(val['c']) is unicode and val[ 't'] == 'Str' and re.match(r'http:', val['c']): #val=[context("\hyphenatedurl{")]+[val]+[context("}")] #val['c']="\hyphenatedurl{%s}" % (val['c']) urlKeys.append((key, key + 2)) inserted = 0 for start, end in urlKeys: value.insert(int(start) + inserted, context("\hyphenatedurl{")) value.insert(end + inserted, context("}")) inserted = inserted + 2