def add_link_to_answers(doc): sections = [] for elt, path in pandoc.iter(doc, path=True): if isinstance(elt, Div) and "section" in elt[0][1]: section = elt attributes, blocks = section if len(blocks) >= 1 and isinstance(blocks[0], Header): header = blocks[0] level, attributes, inlines = header[:] identifier, classes, key_value_pairs = attributes if "question" in classes: sections.append(section) for section in sections: # Not perfect, but a marker anyway. attributes, blocks = section header = blocks[0] _, attributes, _ = header[:] identifier, _, _ = attributes if identifier: #symbol_no_space = RawInline(Format("tex"), r"\faQuestionCircle") symbol_no_space = Strong([Str("(?)")]) #symbol = RawInline(Format("tex"), r"\; \faQuestionCircle") #symbol = RawInline(Format("tex"), r"\; $\to$") symbol = Strong([Space(), Str("(?)")]) if blocks == [] or not isinstance(blocks[-1], (Plain, Para)): blocks.append(Plain([])) symbol = symbol_no_space elif isinstance(blocks[-1], Para): para = blocks[-1] if isinstance(para[0][-1], Image): blocks.append(Para([])) symbol = symbol_no_space elif isinstance(para[0][-1], Math) and para[0][-1][0] == DisplayMath(): blocks.append(Para([])) symbol = symbol_no_space attr = ("", [], []) target = ("#answer-" + identifier, "") link = Link(attr, [symbol], target) last_block = blocks[-1] inlines = last_block[0] inlines.append(link)
def remove(doc, needs_removal): "Selected Content Removal - Two-pass Algorithm" doc = copy.deepcopy(doc) # the removal is actually in-place, # but the user of this function should not worry about that. # Locate the divs and extract the relevant data matches = [] for elt, path in pandoc.iter(doc, path=True): if needs_removal(elt): parent, index = path[-1] matches.append((parent, index)) # Reverse document order is needed not to invalidate # the remaining matches indices. for parent, index in reversed(matches): del parent[index] return doc
def add_link_to_answers(doc): sections = [] for elt, path in pandoc.iter(doc, path=True): if isinstance(elt, Div) and "section" in elt[0][1]: section = elt attributes, blocks = section if len(blocks) >= 1 and isinstance(blocks[0], Header): header = blocks[0] level, attributes, inlines = header[:] identifier, classes, key_value_pairs = attributes if "question" in classes: sections.append(section) for section in sections: # Not perfect, but a marker anyway. attributes, blocks = section header = blocks[0] _, attributes, _ = header[:] identifier, _, _ = attributes if identifier: need_space = True if blocks == [] or not isinstance(blocks[-1], (Plain, Para)): blocks.append(Plain([])) need_space = False elif isinstance(blocks[-1], Para): para = blocks[-1] if isinstance(para[0][-1], Image): blocks.append(Para([])) need_space = False elif isinstance(para[0][-1], Math) and para[0][-1][0] == DisplayMath(): blocks.append(Para([])) need_space = False attr = ("", ["no-parenthesis"], []) target = ("#answer-" + identifier, "") link = Link(attr, [Str("Solution")], target) link_wrapper = [Str("("), link, Str(".)")] if need_space: link_wrapper = [Space()] + link_wrapper last_block = blocks[-1] inlines = last_block[0] inlines.extend(link_wrapper)
def unpack_divs(doc): "Unpack Divs - Two-pass, In-Place Algorithm" # Locate the divs and extract the relevant data matches = [] for elt, path in pandoc.iter(doc, path=True): if isinstance(elt, Div): div = elt parent, index = path[-1] contents = div[1] # Blocks are always held in lists (cf. the document model). assert isinstance(parent, list) matches.append((parent, index, contents)) # We need to unpack the divs in reverse document order # not to invalidate the remaining matches. for parent, index, contents in reversed(matches): del parent[index] parent[index:index] = contents return doc
def divify(doc, level=None): # Encapsulate section content -- separated by headers -- in divs. # Note for the HTML backend: # - div.section turned into section automatically but ... # - the TOC generation is broken. That happens because of the # extra div hierarchy, not specifically the "section" class. # reference: <https://github.com/jgm/pandoc/issues/997>; # marked as wontfix. # Note: isse with references; the bibliography will be added later, # out of the section. if level is None: for level in reversed([1, 2, 3, 4]): divify(doc, level=level) else: sections = [] for elt, path in pandoc.iter(doc, path=True): if isinstance(elt, Header) and elt[0] == level: # print(str(elt)[:100]) header = elt holder, start = path[-1] for offset, elt_ in enumerate(holder[start:]): if offset == 0: continue if isinstance(elt_, Header) and elt_[0] <= level: end = start + offset break else: end = None assert holder[start:end] # not empty, at least a header sections.append((holder, start, end)) for section in reversed(sections): holder, start, end = section attr = ("", ["section"], []) div = Div(attr, holder[slice(start, end)]) # print(div) holder[slice(start, end)] = [div]
def make_level_4_section_headings_inline(doc): found = [] for elt, path in pandoc.iter(doc, path=True): if isinstance(elt, Header): header = elt level, attr, inlines = header[:] identifier, classes, kv_pairs = attr if level == 4: span = Span(attr, [Strong(inlines), RawInline("latex", r"\quad")]) holder, i = path[-1] assert isinstance(holder, list) found.append((holder, span)) for holder, span in found: assert isinstance(holder[0], Header) del holder[0] if holder == [] or not isinstance(holder[0], (Plain, Para)): holder.insert(0, Para([])) block = holder[0] inlines = block[0] # see <https://tex.stackexchange.com/questions/48753/obtaining-the-default-section-spacing-into-the-titlespacing-parameters> inlines.insert(0, RawInline("latex", r"\vspace{3.25ex plus 1ex minus .2ex}")) inlines.insert(1, span)
def detect_image(elt): for _elt in pandoc.iter(elt): if isinstance(_elt, Image): return True else: return False
divs.append(("unpack", index, contents)) # Reverse document order is needed not to invalidate # the remaining matches indices. for action, index, contents in reversed(divs): del root[index] if action == "unpack": root[index:index] = contents return doc code_doc = make_code_doc(doc) src = "" for elt in pandoc.iter(code_doc): if isinstance(elt, CodeBlock): code = elt attr, text = code[:] _, classes, _ = attr[:] if "no-exec" not in classes and "notebook" not in classes: src += text + "\n" with open(".tmp.py", "w") as output: output.write(src) exec(src, {"__file__": __file__}) # Document Filter # ------------------------------------------------------------------------------ # TODO: pandoc naming: is "path" well named? Cause I am tempted to also # name path the last component ... the "path" representation is
return doc notebook_doc = make_notebook_doc(doc) VIDEO_TEMPLATE = ''' from IPython.display import HTML HTML(""" <video controls style="width:100%;"> <source src="{src}" type="{type}"> </video> """) ''' video_elements = [] for elt, path in pandoc.iter(notebook_doc, path=True): if isinstance(elt, RawBlock): format, content = elt[:] if format == Format("html"): parser = lxml.etree.HTMLParser() tree = lxml.etree.parse(io.StringIO(content), parser) html = tree.getroot() if html is not None and len(html): videos = list(html.iter("video")) if videos: video = videos[0] source = video.find("source") src = source.attrib["src"] type_ = source.attrib["type"] code = CodeBlock(("", [], []), VIDEO_TEMPLATE.format(src=src,
def handle_typed_sections(doc): types = { "theorem": "Théorème", "corollary": "Corollaire", "definition": "Définition", "lemma": "Lemme", "proposition": "Proposition", "remark": "Remarque", "exercise": "Exercice", "example": "Exemple", "question": None, "answer": None, } levels = { 1: "section", 2: "subsection", 3: "subsubsection", 4: "paragraph", 5: "subparagraph", } todos = [] for elt, path in pandoc.iter(doc, path=True): if isinstance(elt, Header): header = elt level, attr, inlines = header id_, classes, kvs = attr shared = [type_ for type_ in classes if type_ in types] if shared: classes.extend(["unnumbered", "unlisted"]) holder, index = path[-1] minidoc = Pandoc(Meta(map()), [Para(inlines)]) latex_title = pandoc.write(minidoc, format="latex").strip() todos.append([holder, index, level, latex_title]) type_ = shared[0] if types[type_]: inlines = [Str(types[type_]), Space(), Str("–"), Space()] + inlines if "exercise" in classes or "question" in classes: if "zero" in classes: inlines += [ Space(), Str("("), Math(InlineMath(), r"\mathord{\boldsymbol{\circ}}"), Str(")"), ] if "one" in classes: inlines += [ Space(), Str("("), Math(InlineMath(), r"\mathord{\bullet}"), Str(")"), ] if "two" in classes: inlines += [ Space(), Str("("), Math(InlineMath(), r"\mathord{\bullet}" * 2), Str(")"), ] if "three" in classes: inlines += [ Space(), Str("("), Math(InlineMath(), r"\mathord{\bullet}" * 3), Str(")"), ] if "four" in classes: inlines += [ Space(), Str("("), Math(InlineMath(), r"\mathord{\bullet}" * 4), Str(")"), ] # r"\mathord{\pmb{\infty}}" header[2] = inlines for holder, index, level, latex_title in todos: latex_code = (r"\addcontentsline{toc}{" + levels[level] + "}{" + latex_title + "}") holder.insert(index + 1, Para([RawInline("latex", latex_code)])) for elt, path in pandoc.iter(doc, path=True): if isinstance(elt, (RawInline, RawBlock)): format = elt[0] if format[0] == "html": holder, i = path[-1] found.insert(0, (holder, i))
def remove_images(doc): for elt, path in pandoc.iter(doc, path=True): if isinstance(elt, Image): image = elt holder, index = path[-1] del holder[index]