def render_colon_fence(self, token: Token): """Render a code fence with ``:`` colon delimiters.""" # TODO remove deprecation after v0.13.0 match = REGEX_ADMONTION.match(token.info.strip()) if match and match.groupdict( )["name"] in list(STD_ADMONITIONS) + ["figure"]: classes = match.groupdict()["classes"][1:].split(",") name = match.groupdict()["name"] if classes and classes[0]: self.current_node.append( self.reporter.warning( "comma-separated classes are deprecated, " "use `:class:` option instead", line=token_line(token), )) # we assume that no other options have been used token.content = f":class: {' '.join(classes)}\n\n" + token.content if name == "figure": self.current_node.append( self.reporter.warning( ":::{figure} is deprecated, " "use :::{figure-md} instead", line=token_line(token), )) name = "figure-md" token.info = f"{{{name}}} {match.groupdict()['title']}" if token.content.startswith(":::"): # the content starts with a nested fence block, # but must distinguish between ``:options:``, so we add a new line token.content = "\n" + token.content return self.render_fence(token)
def _anchor_func(state: StateCore): for (idx, token) in enumerate(state.tokens): if token.type != "heading_open": continue level = int(token.tag[1]) if level not in selected_levels: continue inline_token = state.tokens[idx + 1] assert inline_token.children is not None title = "".join(child.content for child in inline_token.children if child.type in ["text", "code_inline"]) slug = unique_slug(slug_func(title), slugs) token.attrSet("id", slug) if permalink: link_tokens = [ Token( "link_open", "a", 1, attrs=[["class", "header-anchor"], ["href", f"#{slug}"]], ), Token("html_block", "", 0, content=permalinkSymbol), Token("link_close", "a", -1), ] if permalinkBefore: inline_token.children = ( link_tokens + ([Token("text", "", 0, content=" ")] if permalinkSpace else []) + inline_token.children) else: inline_token.children.extend( ([Token("text", "", 0, content=" " )] if permalinkSpace else []) + link_tokens)
def parse(self, inputstring: str, document: nodes.document) -> None: """Parse source text. :param inputstring: The source string to parse :param document: The root docutils node to add AST elements to """ config = document.settings.env.myst_config parser = default_parser(config) parser.options["document"] = document env: dict = {} tokens = parser.parse(inputstring, env) if not tokens or tokens[0].type != "front_matter": # we always add front matter, so that we can merge it with global keys, # specified in the sphinx configuration tokens = [Token("front_matter", "", 0, content="{}", map=[0, 0]) ] + tokens header_text = None if tokens[0].type == 'front_matter': # # Hugo article migration # # * get title from frontmatter(yaml) # import pathlib path = pathlib.Path(document.current_source) title = path.stem if title in ('index', '_index'): title = path.parent.stem try: import yaml data = yaml.safe_load(tokens[0].content) title = data['title'] except Exception as ex: pass header_text = Token("text", "", 0, content=title, map=tokens[0].map) tokens = [ tokens[0], Token("heading_open", "h1", 1, content="{}", map=header_text.map), Token("inline", "", 0, content="{}", map=header_text.map, children=[header_text]), Token("heading_close", "h1", -1, content="{}", map=header_text.map) ] + tokens[1:] parser.renderer.render(tokens, parser.options, env)
def parse( self, inputstring: str, document: nodes.document, ): """ Parse source text. Args: inputstring: The source string to parse document: The root docutils node to add AST elements to """ try: config = document.settings.env.myst_config except Exception: config = MdParserConfig(renderer="docutils") parser = default_parser(config) parser.options["document"] = document env = AttrDict() tokens = parser.parse(inputstring, env) if not tokens or tokens[0].type != "front_matter": # we always add front matter, so that we can merge it with global keys, # specified in the sphinx configuration tokens = [ Token( type="front_matter", tag="", nesting=0, content="{}", # noqa: P103 map=[0, 0], ), ] + tokens parser.renderer.render(tokens, parser.options, env)
def parse_code_cell(cell, start_line): tokens = [ Token( "nb_code_cell", "", 0, meta={"cell": cell}, map=[start_line, start_line], ) ] for i, output in enumerate(cell["outputs"]): if output["output_type"] == "display_data": if "text/markdown" in output["data"]: new_code_cell = deepcopy(cell) new_code_cell[ "metadata"]["tags"] = new_code_cell["metadata"].get( "tags", []) + ["remove-input"] cell["outputs"] = cell["outputs"][:i] new_code_cell["outputs"] = new_code_cell["outputs"][i + 1:] tokens.extend( parse_block(output["data"]["text/markdown"], start_line)) if new_code_cell["outputs"]: tokens.extend( parse_code_cell(new_code_cell, start_line)) break return tokens
def test_nest_tokens(): tokens = nest_tokens([ Token("start", "", 0), Token("open", "", 1), Token("open_inner", "", 1), Token("inner", "", 0), Token("close_inner", "", -1), Token("close", "", -1), Token("end", "", 0), ]) assert [t.type for t in tokens] == ["start", "open", "end"] assert isinstance(tokens[0], Token) assert isinstance(tokens[1], NestedTokens) assert isinstance(tokens[2], Token) nested = tokens[1] assert nested.opening.type == "open" assert nested.closing.type == "close" assert len(nested.children) == 1 assert nested.children[0].type == "open_inner" nested2 = nested.children[0] assert nested2.opening.type == "open_inner" assert nested2.closing.type == "close_inner" assert len(nested2.children) == 1 assert nested2.children[0].type == "inner"
def test_footnote_inline(): md = MarkdownIt().use(footnote_plugin) src = r"^[a]" tokens = [] state = StateInline(src, md, {}, tokens) state.env = {"footnotes": {"refs": {":a": -1}}} index.footnote_inline(state, False) # print([t.as_dict() for t in tokens]) assert [t.as_dict() for t in tokens] == [{ "type": "footnote_ref", "tag": "", "nesting": 0, "attrs": None, "map": None, "level": 0, "children": None, "content": "", "markup": "", "info": "", "meta": { "id": 0 }, "block": False, "hidden": False, }] assert state.env == { "footnotes": { "refs": { ":a": -1 }, "list": { 0: { "content": "a", "tokens": [ Token( type="text", tag="", nesting=0, attrs=None, map=None, level=0, children=None, content="a", markup="", info="", meta={}, block=False, hidden=False, ) ], } }, } }
def test_comment_token(): md = MarkdownIt("commonmark").use(myst_block_plugin) tokens = md.parse("\n\n% abc \n%def") expected_token = Token( type="myst_line_comment", tag="", nesting=0, map=[2, 4], level=0, children=None, content=" abc\ndef", markup="%", info="", meta={}, block=True, hidden=False, ) expected_token.attrSet("class", "myst-line-comment") assert tokens == [expected_token]
def render_image(self, token: Token): img_node = nodes.image() self.add_line_and_source_path(img_node, token) destination = token.attrGet("src") or "" if self.config.get("relative-images", None) is not None and not is_external_url( destination, None, True): # make the path relative to an "including" document destination = os.path.normpath( os.path.join( self.config.get("relative-images", ""), os.path.normpath(destination), )) img_node["uri"] = destination img_node["alt"] = self.renderInlineAsText(token.children or []) title = token.attrGet("title") if title: img_node["title"] = token.attrGet("title") self.current_node.append(img_node)
def test_block_token(): md = MarkdownIt("commonmark").use(myst_block_plugin) tokens = md.parse("+++") assert tokens == [ Token( type="myst_block_break", tag="hr", nesting=0, attrs=[["class", "myst-block"]], map=[0, 1], level=0, children=None, content="", markup="+++", info="", meta={}, block=True, hidden=False, ) ] tokens = md.parse("\n+ + + abc") assert tokens == [ Token( type="myst_block_break", tag="hr", nesting=0, attrs=[["class", "myst-block"]], map=[1, 2], level=0, children=None, content="abc", markup="+++", info="", meta={}, block=True, hidden=False, ) ]
def _ensure_anchors_in_place(heading_tokens: Sequence[Token]) -> None: """Mutate heading tokens so that HTML anchors are in place. Add HTML anchor to heading token sequence if it is not already there. Don't add the slug value, we don't know it yet. The slug value will have to be inserted after calling this. """ # Remove possible existing anchor anchor_start_idx = None anchor_end_idx = None inline_root = heading_tokens[1] assert inline_root.children is not None, "inline token's children must not be None" for child_idx, child_tkn in enumerate(inline_root.children): if child_tkn.type != "html_inline": continue if re.match(r"<a\s", child_tkn.content): anchor_start_idx = child_idx anchor_end_idx = child_idx if anchor_start_idx is not None and child_tkn.content == "</a>": anchor_end_idx = child_idx if anchor_start_idx is not None: assert anchor_end_idx is not None inline_root.children = (inline_root.children[:anchor_start_idx] + inline_root.children[anchor_end_idx + 1:]) # Remove trailing whitespace from the heading if (anchor_start_idx != 0 and inline_root.children[anchor_start_idx - 1].type == "text"): inline_root.children[anchor_start_idx - 1].content = inline_root.children[ anchor_start_idx - 1].content.rstrip() # Add the type of anchor we want anchor_text = "" link_tokens = [ Token("html_inline", "", 0, content='<a name="{slug}">'), Token("text", "", 0, content=anchor_text), Token("html_inline", "", 0, content="</a>"), ] inline_root.children += link_tokens
def parse(self, inputstring: str, document: nodes.document) -> None: """Parse source text. :param inputstring: The source string to parse :param document: The root docutils node to add AST elements to """ config = MdParserConfig(renderer="docutils", enable_extensions=['linkify']) parser = default_parser(config) parser.options["document"] = document env = AttrDict() tokens = parser.parse(inputstring, env) if not tokens or tokens[0].type != "front_matter": # we always add front matter, so that we can merge it with global keys, # specified in the sphinx configuration tokens = [Token("front_matter", "", 0, content="{}", map=[0, 0])] + tokens parser.renderer.render(tokens, parser.options, env)
def todoify(token: Token, token_constructor): token.children.insert(0, make_checkbox(token, token_constructor)) token.children[1].content = token.children[1].content[3:] token.content = token.content[3:] if use_label_wrapper: if use_label_after: token.children.pop() # Replaced number generator from original plugin with uuid. checklist_id = f"task-item-{uuid4()}" token.children[0].content = (token.children[0].content[0:-1] + f' id="{checklist_id}">') token.children.append( after_label(token.content, checklist_id, token_constructor)) else: token.children.insert(0, begin_label(token_constructor)) token.children.append(end_label(token_constructor))
def test_comment_token(): md = MarkdownIt("commonmark").use(myst_block_plugin) tokens = md.parse("\n\n% abc") assert tokens == [ Token( type="myst_line_comment", tag="", nesting=0, attrs=[["class", "myst-line-comment"]], map=[2, 3], level=0, children=None, content="abc", markup="%", info="", meta={}, block=True, hidden=False, ) ]
def handle_cross_reference(self, token: Token, destination: str): """Create nodes for references that are not immediately resolvable.""" wrap_node = addnodes.pending_xref( refdoc=self.doc_env.docname, reftarget=unquote(destination), reftype="myst", refdomain=None, # Added to enable cross-linking refexplicit=len(token.children or []) > 0, refwarn=True, ) self.add_line_and_source_path(wrap_node, token) title = token.attrGet("title") if title: wrap_node["title"] = title self.current_node.append(wrap_node) inner_node = nodes.inline("", "", classes=["xref", "myst"]) wrap_node.append(inner_node) with self.current_node_context(inner_node): self.render_children(token)
def test_emptyStr(): md = MarkdownIt() tokens = md.parseInline("") assert tokens == [ Token( type="inline", tag="", nesting=0, attrs=None, map=[0, 1], level=0, children=[], content="", markup="", info="", meta={}, block=False, hidden=False, ) ]
def test_token(): md = MarkdownIt("commonmark").use(front_matter_plugin) tokens = md.parse("---\na: 1\n---") # print(tokens) assert tokens == [ Token( type="front_matter", tag="", nesting=0, attrs=None, map=[0, 3], level=0, children=None, content="a: 1", markup="---", info="", meta={}, block=True, hidden=True, ) ]
def render_fence(self, token: Token): text = token.content if token.info: # Ensure that we'll have an empty string if info exists but is only spaces token.info = token.info.strip() language = token.info.split()[0] if token.info else "" if not self.config.get("commonmark_only", False) and language == "{eval-rst}": # copy necessary elements (source, line no, env, reporter) newdoc = make_document() newdoc["source"] = self.document["source"] newdoc.settings = self.document.settings newdoc.reporter = self.reporter # pad the line numbers artificially so they offset with the fence block pseudosource = ("\n" * token_line(token)) + token.content # actually parse the rst into our document MockRSTParser().parse(pseudosource, newdoc) for node in newdoc: if node["names"]: self.document.note_explicit_target(node, node) self.current_node.extend(newdoc[:]) return elif (not self.config.get("commonmark_only", False) and language.startswith("{") and language.endswith("}")): return self.render_directive(token) if not language: try: sphinx_env = self.document.settings.env language = sphinx_env.temp_data.get( "highlight_language", sphinx_env.config.highlight_language) except AttributeError: pass if not language: language = self.config.get("highlight_language", "") node = nodes.literal_block(text, text, language=language) self.add_line_and_source_path(node, token) self.current_node.append(node)
def render_nb_initialise(self, token: SyntaxTreeNode) -> None: env = cast(BuildEnvironment, self.sphinx_env) metadata = self.nb_client.nb_metadata special_keys = ["kernelspec", "language_info", "source_map"] for key in special_keys: if key in metadata: # save these special keys on the metadata, rather than as docinfo # note, sphinx_book_theme checks kernelspec is in the metadata env.metadata[env.docname][key] = metadata.get(key) # forward the remaining metadata to the front_matter renderer special_keys.append("widgets") top_matter = { k: v for k, v in metadata.items() if k not in special_keys } self.render_front_matter( Token( # type: ignore "front_matter", "", 0, map=[0, 0], content=top_matter, # type: ignore[arg-type] ), )
def footnote_tail(state: StateBlock, *args, **kwargs): """Post-processing step, to move footnote tokens to end of the token stream. Also removes un-referenced tokens. """ insideRef = False refTokens = {} if "footnotes" not in state.env: return current = [] tok_filter = [] for tok in state.tokens: if tok.type == "footnote_reference_open": insideRef = True current = [] currentLabel = tok.meta["label"] tok_filter.append(False) continue if tok.type == "footnote_reference_close": insideRef = False # prepend ':' to avoid conflict with Object.prototype members refTokens[":" + currentLabel] = current tok_filter.append(False) continue if insideRef: current.append(tok) tok_filter.append((not insideRef)) state.tokens = [t for t, f in zip(state.tokens, tok_filter) if f] if "list" not in state.env.get("footnotes", {}): return foot_list = state.env["footnotes"]["list"] token = Token("footnote_block_open", "", 1) state.tokens.append(token) for i, foot_note in foot_list.items(): token = Token("footnote_open", "", 1) token.meta = {"id": i, "label": foot_note.get("label", None)} # TODO propagate line positions of original foot note # (but don't store in token.map, because this is used for scroll syncing) state.tokens.append(token) if "tokens" in foot_note: tokens = [] token = Token("paragraph_open", "p", 1) token.block = True tokens.append(token) token = Token("inline", "", 0) token.children = foot_note["tokens"] token.content = foot_note["content"] tokens.append(token) token = Token("paragraph_close", "p", -1) token.block = True tokens.append(token) elif "label" in foot_note: tokens = refTokens[":" + foot_note["label"]] state.tokens.extend(tokens) if state.tokens[len(state.tokens) - 1].type == "paragraph_close": lastParagraph = state.tokens.pop() else: lastParagraph = None t = (foot_note["count"] if (("count" in foot_note) and (foot_note["count"] > 0)) else 1) j = 0 while j < t: token = Token("footnote_anchor", "", 0) token.meta = { "id": i, "subId": j, "label": foot_note.get("label", None) } state.tokens.append(token) j += 1 if lastParagraph: state.tokens.append(lastParagraph) token = Token("footnote_close", "", -1) state.tokens.append(token) token = Token("footnote_block_close", "", -1) state.tokens.append(token)
def test_serialization(): token = Token("name", "tag", 0, children=[Token("other", "tag2", 0)]) assert token == Token.from_dict(token.as_dict())
def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool): """Process footnote block definition""" start = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # line should be at least 5 chars - "[^x]:" if start + 4 > maximum: return False if state.srcCharCode[start] != 0x5B: # /* [ */ return False if state.srcCharCode[start + 1] != 0x5E: # /* ^ */ return False pos = start + 2 while pos < maximum: if state.srcCharCode[pos] == 0x20: return False if state.srcCharCode[pos] == 0x5D: # /* ] */ break pos += 1 if pos == start + 2: # no empty footnote labels return False pos += 1 if pos + 1 >= maximum or state.srcCharCode[pos] != 0x3A: # /* : */ return False if silent: return True pos += 1 label = state.src[start + 2:pos - 2] state.env.setdefault("footnotes", {}).setdefault("refs", {})[":" + label] = -1 open_token = Token("footnote_reference_open", "", 1) open_token.meta = {"label": label} open_token.level = state.level state.level += 1 state.tokens.append(open_token) oldBMark = state.bMarks[startLine] oldTShift = state.tShift[startLine] oldSCount = state.sCount[startLine] oldParentType = state.parentType posAfterColon = pos initial = offset = (state.sCount[startLine] + pos - (state.bMarks[startLine] + state.tShift[startLine])) while pos < maximum: ch = state.srcCharCode[pos] if isSpace(ch): if ch == 0x09: offset += 4 - offset % 4 else: offset += 1 else: break pos += 1 state.tShift[startLine] = pos - posAfterColon state.sCount[startLine] = offset - initial state.bMarks[startLine] = posAfterColon state.blkIndent += 4 state.parentType = "footnote" if state.sCount[startLine] < state.blkIndent: state.sCount[startLine] += state.blkIndent state.md.block.tokenize(state, startLine, endLine, True) state.parentType = oldParentType state.blkIndent -= 4 state.tShift[startLine] = oldTShift state.sCount[startLine] = oldSCount state.bMarks[startLine] = oldBMark open_token.map = [startLine, state.line] token = Token("footnote_reference_close", "", -1) state.level -= 1 token.level = state.level state.tokens.append(token) return True
def test_parseInline(): md = MarkdownIt() tokens = md.parseInline("abc\n\n> xyz") assert tokens == [ Token( type="inline", tag="", nesting=0, attrs=None, map=[0, 1], level=0, children=[ Token( type="text", tag="", nesting=0, attrs=None, map=None, level=0, children=None, content="abc", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="softbreak", tag="br", nesting=0, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="softbreak", tag="br", nesting=0, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="text", tag="", nesting=0, attrs=None, map=None, level=0, children=None, content="> xyz", markup="", info="", meta={}, block=False, hidden=False, ), ], content="abc\n\n> xyz", markup="", info="", meta={}, block=False, hidden=False, ) ]
def test_basic(): md = MarkdownIt().use(myst_role_plugin) src = "{abc}``` a ```" tokens = md.parse(src) print(tokens) assert tokens == [ Token( type="paragraph_open", tag="p", nesting=1, attrs=None, map=[0, 1], level=0, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="inline", tag="", nesting=0, attrs=None, map=[0, 1], level=1, children=[ Token( type="myst_role", tag="", nesting=0, attrs=None, map=None, level=0, children=None, content=" a ", markup="", info="", meta={"name": "abc"}, block=False, hidden=False, ) ], content="{abc}``` a ```", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="paragraph_close", tag="p", nesting=-1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), ]
def test_footnote_tail(): md = MarkdownIt() tokens = [ Token( **{ "type": "footnote_reference_open", "tag": "", "nesting": 1, "attrs": None, "map": None, "level": 0, "children": None, "content": "", "markup": "", "info": "", "meta": { "label": "a" }, "block": False, "hidden": False, }), Token( **{ "type": "paragraph_open", "tag": "p", "nesting": 1, "attrs": None, "map": [0, 1], "level": 1, "children": None, "content": "", "markup": "", "info": "", "meta": {}, "block": True, "hidden": False, }), Token( **{ "type": "inline", "tag": "", "nesting": 0, "attrs": None, "map": [0, 1], "level": 2, "children": [], "content": "xyz", "markup": "", "info": "", "meta": {}, "block": True, "hidden": False, }), Token( **{ "type": "paragraph_close", "tag": "p", "nesting": -1, "attrs": None, "map": None, "level": 1, "children": None, "content": "", "markup": "", "info": "", "meta": {}, "block": True, "hidden": False, }), Token( **{ "type": "footnote_reference_close", "tag": "", "nesting": -1, "attrs": None, "map": None, "level": 0, "children": None, "content": "", "markup": "", "info": "", "meta": {}, "block": False, "hidden": False, }), Token("other", "", 0), ] env = { "footnotes": { "refs": { ":a": 0 }, "list": { 0: { "label": "a", "count": 1 } } } } state = StateBlock("", md, env, tokens) index.footnote_tail(state) assert state.tokens == [ Token( type="other", tag="", nesting=0, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="footnote_block_open", tag="", nesting=1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="footnote_open", tag="", nesting=1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={ "id": 0, "label": "a" }, block=False, hidden=False, ), Token( type="paragraph_open", tag="p", nesting=1, attrs=None, map=[0, 1], level=1, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="inline", tag="", nesting=0, attrs=None, map=[0, 1], level=2, children=[], content="xyz", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="footnote_anchor", tag="", nesting=0, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={ "id": 0, "subId": 0, "label": "a" }, block=False, hidden=False, ), Token( type="paragraph_close", tag="p", nesting=-1, attrs=None, map=None, level=1, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="footnote_close", tag="", nesting=-1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="footnote_block_close", tag="", nesting=-1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), ]
def test_token(): token = Token("name", "tag", 0) assert token.as_dict() == { "type": "name", "tag": "tag", "nesting": 0, "attrs": None, "map": None, "level": 0, "children": None, "content": "", "markup": "", "info": "", "meta": {}, "block": False, "hidden": False, } token.attrSet("a", "b") assert token.attrGet("a") == "b" token.attrJoin("a", "c") assert token.attrGet("a") == "b c" token.attrPush(["x", "y"]) assert token.attrGet("x") == "y" assert token.attrIndex("a") == 0 assert token.attrIndex("x") == 1 assert token.attrIndex("j") == -1
def nb_to_tokens( ntbk: nbf.NotebookNode, config: MdParserConfig, renderer_plugin: str) -> Tuple[MarkdownIt, AttrDict, List[Token]]: """Parse the notebook content to a list of syntax tokens and an env, containing global data like reference definitions. """ md = default_parser(config) # setup the markdown parser # Note we disable front matter parsing, # because this is taken from the actual notebook metadata md.disable("front_matter", ignoreInvalid=True) md.renderer = SphinxNBRenderer(md) # make a sandbox where all the parsing global data, # like reference definitions will be stored env = AttrDict() rules = md.core.ruler.get_active_rules() # First only run pre-inline chains # so we can collect all reference definitions, etc, before assessing references def parse_block(src, start_line): with md.reset_rules(): # enable only rules up to block md.core.ruler.enableOnly(rules[:rules.index("inline")]) tokens = md.parse(src, env) for token in tokens: if token.map: token.map = [ start_line + token.map[0], start_line + token.map[1] ] for dup_ref in env.get("duplicate_refs", []): if "fixed" not in dup_ref: dup_ref["map"] = [ start_line + dup_ref["map"][0], start_line + dup_ref["map"][1], ] dup_ref["fixed"] = True return tokens block_tokens = [] source_map = ntbk.metadata.get("source_map", None) # get language lexer name langinfo = ntbk.metadata.get("language_info", {}) lexer = langinfo.get("pygments_lexer", langinfo.get("name", None)) # TODO log warning if lexer is still None for cell_index, nb_cell in enumerate(ntbk.cells): # if the the source_map has been stored (for text-based notebooks), # we use that do define the starting line for each cell # otherwise, we set a pseudo base that represents the cell index start_line = source_map[cell_index] if source_map else (cell_index + 1) * 10000 start_line += 1 # use base 1 rather than 0 # Skip empty cells if len(nb_cell["source"].strip()) == 0: continue # skip cells tagged for removal # TODO this logic should be deferred to a transform tags = nb_cell.metadata.get("tags", []) if ("remove_cell" in tags) or ("remove-cell" in tags): continue if nb_cell["cell_type"] == "markdown": # we add the cell index to tokens, # so they can be included in the error logging, block_tokens.extend(parse_block(nb_cell["source"], start_line)) elif nb_cell["cell_type"] == "code": # here we do nothing but store the cell as a custom token block_tokens.append( Token( "nb_code_cell", "", 0, meta={ "cell": nb_cell, "lexer": lexer, "renderer": renderer_plugin }, map=[start_line, start_line], )) # Now all definitions have been gathered, # we run inline and post-inline chains, to expand the text. # Note we assume here that these rules never require the actual source text, # only acting on the existing tokens state = StateCore(None, md, env, block_tokens) with md.reset_rules(): md.core.ruler.enableOnly(rules[rules.index("inline"):]) md.core.process(state) # Add the front matter. # Note that myst_parser serialises dict/list like keys, when rendering to # docutils docinfo. These could be read back with `json.loads`. state.tokens = [ Token("front_matter", "", 0, content=({k: v for k, v in ntbk.metadata.items()})) ] + state.tokens # If there are widgets, this will embed the state of all widgets in a script if contains_widgets(ntbk): state.tokens.append( Token("jupyter_widget_state", "", 0, meta={"state": get_widgets(ntbk)})) return md, env, state.tokens
def test_store_labels(): md = MarkdownIt() md.options["store_labels"] = True src = "[a]\n\n![a]\n\n[a]: ijk" tokens = md.parse(src) # print(tokens) assert tokens == [ Token( type="paragraph_open", tag="p", nesting=1, attrs=None, map=[0, 1], level=0, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="inline", tag="", nesting=0, attrs=None, map=[0, 1], level=1, children=[ Token( type="link_open", tag="a", nesting=1, attrs=[["href", "ijk"]], map=None, level=0, children=None, content="", markup="", info="", meta={"label": "A"}, block=False, hidden=False, ), Token( type="text", tag="", nesting=0, attrs=None, map=None, level=1, children=None, content="a", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="link_close", tag="a", nesting=-1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), ], content="[a]", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="paragraph_close", tag="p", nesting=-1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="paragraph_open", tag="p", nesting=1, attrs=None, map=[2, 3], level=0, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="inline", tag="", nesting=0, attrs=None, map=[2, 3], level=1, children=[ Token( type="image", tag="img", nesting=0, attrs=[["src", "ijk"], ["alt", ""]], map=None, level=0, children=[ Token( type="text", tag="", nesting=0, attrs=None, map=None, level=0, children=None, content="a", markup="", info="", meta={}, block=False, hidden=False, ) ], content="a", markup="", info="", meta={"label": "A"}, block=False, hidden=False, ) ], content="![a]", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="paragraph_close", tag="p", nesting=-1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), ]
def test_use_existing_env(): md = MarkdownIt() src = "[a]\n\n[c]: ijk" env = AttrDict({ "references": { "A": { "title": "", "href": "abc", "map": [0, 1] }, "B": { "title": "", "href": "xyz", "map": [2, 3] }, } }) tokens = md.parse(src, env) # print(tokens) assert tokens == [ Token( type="paragraph_open", tag="p", nesting=1, attrs=None, map=[0, 1], level=0, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="inline", tag="", nesting=0, attrs=None, map=[0, 1], level=1, children=[ Token( type="link_open", tag="a", nesting=1, attrs=[["href", "abc"]], map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="text", tag="", nesting=0, attrs=None, map=None, level=1, children=None, content="a", markup="", info="", meta={}, block=False, hidden=False, ), Token( type="link_close", tag="a", nesting=-1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=False, hidden=False, ), ], content="[a]", markup="", info="", meta={}, block=True, hidden=False, ), Token( type="paragraph_close", tag="p", nesting=-1, attrs=None, map=None, level=0, children=None, content="", markup="", info="", meta={}, block=True, hidden=False, ), ] assert env == { "references": { "A": { "title": "", "href": "abc", "map": [0, 1] }, "B": { "title": "", "href": "xyz", "map": [2, 3] }, "C": { "title": "", "href": "ijk", "map": [2, 3] }, } }
def notebook_to_tokens( notebook: NotebookNode, mdit_parser: MarkdownIt, mdit_env: dict[str, Any], logger: LoggerType, ) -> list[Token]: # disable front-matter, since this is taken from the notebook mdit_parser.disable("front_matter", ignoreInvalid=True) # this stores global state, such as reference definitions # Parse block tokens only first, leaving inline parsing to a second phase # (required to collect all reference definitions, before assessing references). block_tokens = [Token("nb_initialise", "", 0, map=[0, 0])] for cell_index, nb_cell in enumerate(notebook.cells): # skip empty cells if len(nb_cell["source"].strip()) == 0: continue # skip cells tagged for removal tags = nb_cell.metadata.get("tags", []) if ("remove_cell" in tags) or ("remove-cell" in tags): continue # generate tokens tokens: list[Token] if nb_cell["cell_type"] == "markdown": # https://nbformat.readthedocs.io/en/5.1.3/format_description.html#markdown-cells # TODO if cell has tag output-caption, then use as caption for next/preceding cell? tokens = [ Token( "nb_cell_markdown_open", "", 1, hidden=True, meta={ "index": cell_index, "metadata": nb_node_to_dict(nb_cell["metadata"]), }, map=[0, len(nb_cell["source"].splitlines()) - 1], ), ] with mdit_parser.reset_rules(): # enable only rules up to block rules = mdit_parser.core.ruler.get_active_rules() mdit_parser.core.ruler.enableOnly( rules[:rules.index("inline")]) tokens.extend(mdit_parser.parse(nb_cell["source"], mdit_env)) tokens.append( Token( "nb_cell_markdown_close", "", -1, hidden=True, ), ) elif nb_cell["cell_type"] == "raw": # https://nbformat.readthedocs.io/en/5.1.3/format_description.html#raw-nbconvert-cells tokens = [ Token( "nb_cell_raw", "code", 0, content=nb_cell["source"], meta={ "index": cell_index, "metadata": nb_node_to_dict(nb_cell["metadata"]), }, map=[0, 0], ) ] elif nb_cell["cell_type"] == "code": # https://nbformat.readthedocs.io/en/5.1.3/format_description.html#code-cells # we don't copy the outputs here, since this would # greatly increase the memory consumption, # instead they will referenced by the cell index tokens = [ Token( "nb_cell_code", "code", 0, content=nb_cell["source"], meta={ "index": cell_index, "metadata": nb_node_to_dict(nb_cell["metadata"]), }, map=[0, 0], ) ] else: pass # TODO create warning # update token's source lines, using either a source_map (index -> line), # set when converting to a notebook, or a pseudo base of the cell index smap = notebook.metadata.get("source_map", None) start_line = smap[cell_index] if smap else (cell_index + 1) * 10000 start_line += 1 # use base 1 rather than 0 for token in tokens: if token.map: token.map = [ start_line + token.map[0], start_line + token.map[1] ] # also update the source lines for duplicate references for dup_ref in mdit_env.get("duplicate_refs", []): if "fixed" not in dup_ref: dup_ref["map"] = [ start_line + dup_ref["map"][0], start_line + dup_ref["map"][1], ] dup_ref["fixed"] = True # add tokens to list block_tokens.extend(tokens) block_tokens.append(Token("nb_finalise", "", 0, map=[0, 0])) # Now all definitions have been gathered, run the inline parsing phase state = StateCore("", mdit_parser, mdit_env, block_tokens) with mdit_parser.reset_rules(): rules = mdit_parser.core.ruler.get_active_rules() mdit_parser.core.ruler.enableOnly(rules[rules.index("inline"):]) mdit_parser.core.process(state) return state.tokens