Python Tokenの例、markdown_it.token.Token Pythonの例

コード例 #1

0

ファイルを表示

    def render_colon_fence(self, token: Token):
        """Render a code fence with ``:`` colon delimiters."""

        # TODO remove deprecation after v0.13.0
        match = REGEX_ADMONTION.match(token.info.strip())
        if match and match.groupdict(
        )["name"] in list(STD_ADMONITIONS) + ["figure"]:
            classes = match.groupdict()["classes"][1:].split(",")
            name = match.groupdict()["name"]
            if classes and classes[0]:
                self.current_node.append(
                    self.reporter.warning(
                        "comma-separated classes are deprecated, "
                        "use `:class:` option instead",
                        line=token_line(token),
                    ))
                # we assume that no other options have been used
                token.content = f":class: {' '.join(classes)}\n\n" + token.content
            if name == "figure":
                self.current_node.append(
                    self.reporter.warning(
                        ":::{figure} is deprecated, "
                        "use :::{figure-md} instead",
                        line=token_line(token),
                    ))
                name = "figure-md"

            token.info = f"{{{name}}} {match.groupdict()['title']}"

        if token.content.startswith(":::"):
            # the content starts with a nested fence block,
            # but must distinguish between ``:options:``, so we add a new line
            token.content = "\n" + token.content

        return self.render_fence(token)

コード例 #2

0

ファイルを表示

ファイル: index.py プロジェクト: shivam05011996/mdit-py-plugins

    def _anchor_func(state: StateCore):
        for (idx, token) in enumerate(state.tokens):
            if token.type != "heading_open":
                continue
            level = int(token.tag[1])
            if level not in selected_levels:
                continue
            inline_token = state.tokens[idx + 1]
            assert inline_token.children is not None
            title = "".join(child.content for child in inline_token.children
                            if child.type in ["text", "code_inline"])
            slug = unique_slug(slug_func(title), slugs)
            token.attrSet("id", slug)

            if permalink:
                link_tokens = [
                    Token(
                        "link_open",
                        "a",
                        1,
                        attrs=[["class", "header-anchor"],
                               ["href", f"#{slug}"]],
                    ),
                    Token("html_block", "", 0, content=permalinkSymbol),
                    Token("link_close", "a", -1),
                ]
                if permalinkBefore:
                    inline_token.children = (
                        link_tokens + ([Token("text", "", 0, content=" ")]
                                       if permalinkSpace else []) +
                        inline_token.children)
                else:
                    inline_token.children.extend(
                        ([Token("text", "", 0, content=" "
                                )] if permalinkSpace else []) + link_tokens)

コード例 #3

0

ファイルを表示

ファイル: patch.py プロジェクト: ousttrue/vrm.dev

def parse(self, inputstring: str, document: nodes.document) -> None:
    """Parse source text.
    :param inputstring: The source string to parse
    :param document: The root docutils node to add AST elements to
    """
    config = document.settings.env.myst_config
    parser = default_parser(config)
    parser.options["document"] = document
    env: dict = {}
    tokens = parser.parse(inputstring, env)
    if not tokens or tokens[0].type != "front_matter":
        # we always add front matter, so that we can merge it with global keys,
        # specified in the sphinx configuration
        tokens = [Token("front_matter", "", 0, content="{}", map=[0, 0])
                  ] + tokens

    header_text = None
    if tokens[0].type == 'front_matter':
        #
        # Hugo article migration
        #
        # * get title from frontmatter(yaml)
        #
        import pathlib
        path = pathlib.Path(document.current_source)
        title = path.stem
        if title in ('index', '_index'):
            title = path.parent.stem

        try:
            import yaml
            data = yaml.safe_load(tokens[0].content)
            title = data['title']
        except Exception as ex:
            pass

        header_text = Token("text", "", 0, content=title, map=tokens[0].map)
        tokens = [
            tokens[0],
            Token("heading_open", "h1", 1, content="{}", map=header_text.map),
            Token("inline",
                  "",
                  0,
                  content="{}",
                  map=header_text.map,
                  children=[header_text]),
            Token("heading_close", "h1", -1, content="{}", map=header_text.map)
        ] + tokens[1:]

    parser.renderer.render(tokens, parser.options, env)

コード例 #4

0

ファイルを表示

    def parse(
        self,
        inputstring: str,
        document: nodes.document,
    ):
        """
        Parse source text.

        Args:
            inputstring: The source string to parse
            document: The root docutils node to add AST elements to
        """

        try:
            config = document.settings.env.myst_config
        except Exception:
            config = MdParserConfig(renderer="docutils")

        parser = default_parser(config)
        parser.options["document"] = document
        env = AttrDict()
        tokens = parser.parse(inputstring, env)
        if not tokens or tokens[0].type != "front_matter":
            # we always add front matter, so that we can merge it with global keys,
            # specified in the sphinx configuration
            tokens = [
                Token(
                    type="front_matter",
                    tag="",
                    nesting=0,
                    content="{}",  # noqa: P103
                    map=[0, 0],
                ),
            ] + tokens
        parser.renderer.render(tokens, parser.options, env)

コード例 #5

0

ファイルを表示

ファイル: parser.py プロジェクト: tonyfast/MyST-NB

 def parse_code_cell(cell, start_line):
     tokens = [
         Token(
             "nb_code_cell",
             "",
             0,
             meta={"cell": cell},
             map=[start_line, start_line],
         )
     ]
     for i, output in enumerate(cell["outputs"]):
         if output["output_type"] == "display_data":
             if "text/markdown" in output["data"]:
                 new_code_cell = deepcopy(cell)
                 new_code_cell[
                     "metadata"]["tags"] = new_code_cell["metadata"].get(
                         "tags", []) + ["remove-input"]
                 cell["outputs"] = cell["outputs"][:i]
                 new_code_cell["outputs"] = new_code_cell["outputs"][i + 1:]
                 tokens.extend(
                     parse_block(output["data"]["text/markdown"],
                                 start_line))
                 if new_code_cell["outputs"]:
                     tokens.extend(
                         parse_code_cell(new_code_cell, start_line))
                 break
     return tokens

コード例 #6

0

ファイルを表示

ファイル: test_token.py プロジェクト: wna-se/markdown-it-py

def test_nest_tokens():
    tokens = nest_tokens([
        Token("start", "", 0),
        Token("open", "", 1),
        Token("open_inner", "", 1),
        Token("inner", "", 0),
        Token("close_inner", "", -1),
        Token("close", "", -1),
        Token("end", "", 0),
    ])
    assert [t.type for t in tokens] == ["start", "open", "end"]
    assert isinstance(tokens[0], Token)
    assert isinstance(tokens[1], NestedTokens)
    assert isinstance(tokens[2], Token)

    nested = tokens[1]
    assert nested.opening.type == "open"
    assert nested.closing.type == "close"
    assert len(nested.children) == 1
    assert nested.children[0].type == "open_inner"

    nested2 = nested.children[0]
    assert nested2.opening.type == "open_inner"
    assert nested2.closing.type == "close_inner"
    assert len(nested2.children) == 1
    assert nested2.children[0].type == "inner"

コード例 #7

0

ファイルを表示

def test_footnote_inline():

    md = MarkdownIt().use(footnote_plugin)
    src = r"^[a]"
    tokens = []
    state = StateInline(src, md, {}, tokens)
    state.env = {"footnotes": {"refs": {":a": -1}}}
    index.footnote_inline(state, False)
    # print([t.as_dict() for t in tokens])
    assert [t.as_dict() for t in tokens] == [{
        "type": "footnote_ref",
        "tag": "",
        "nesting": 0,
        "attrs": None,
        "map": None,
        "level": 0,
        "children": None,
        "content": "",
        "markup": "",
        "info": "",
        "meta": {
            "id": 0
        },
        "block": False,
        "hidden": False,
    }]
    assert state.env == {
        "footnotes": {
            "refs": {
                ":a": -1
            },
            "list": {
                0: {
                    "content":
                    "a",
                    "tokens": [
                        Token(
                            type="text",
                            tag="",
                            nesting=0,
                            attrs=None,
                            map=None,
                            level=0,
                            children=None,
                            content="a",
                            markup="",
                            info="",
                            meta={},
                            block=False,
                            hidden=False,
                        )
                    ],
                }
            },
        }
    }

コード例 #8

0

ファイルを表示

ファイル: test_myst_block.py プロジェクト: frenzymadness/mdit-py-plugins

def test_comment_token():
    md = MarkdownIt("commonmark").use(myst_block_plugin)
    tokens = md.parse("\n\n% abc \n%def")
    expected_token = Token(
        type="myst_line_comment",
        tag="",
        nesting=0,
        map=[2, 4],
        level=0,
        children=None,
        content=" abc\ndef",
        markup="%",
        info="",
        meta={},
        block=True,
        hidden=False,
    )
    expected_token.attrSet("class", "myst-line-comment")
    assert tokens == [expected_token]

コード例 #9

0

ファイルを表示

ファイル: docutils_renderer.py プロジェクト: jpmckinney/MyST-Parser

    def render_image(self, token: Token):
        img_node = nodes.image()
        self.add_line_and_source_path(img_node, token)
        destination = token.attrGet("src") or ""

        if self.config.get("relative-images",
                           None) is not None and not is_external_url(
                               destination, None, True):
            # make the path relative to an "including" document
            destination = os.path.normpath(
                os.path.join(
                    self.config.get("relative-images", ""),
                    os.path.normpath(destination),
                ))

        img_node["uri"] = destination

        img_node["alt"] = self.renderInlineAsText(token.children or [])
        title = token.attrGet("title")
        if title:
            img_node["title"] = token.attrGet("title")
        self.current_node.append(img_node)

コード例 #10

0

ファイルを表示

def test_block_token():
    md = MarkdownIt("commonmark").use(myst_block_plugin)
    tokens = md.parse("+++")
    assert tokens == [
        Token(
            type="myst_block_break",
            tag="hr",
            nesting=0,
            attrs=[["class", "myst-block"]],
            map=[0, 1],
            level=0,
            children=None,
            content="",
            markup="+++",
            info="",
            meta={},
            block=True,
            hidden=False,
        )
    ]

    tokens = md.parse("\n+ + + abc")
    assert tokens == [
        Token(
            type="myst_block_break",
            tag="hr",
            nesting=0,
            attrs=[["class", "myst-block"]],
            map=[1, 2],
            level=0,
            children=None,
            content="abc",
            markup="+++",
            info="",
            meta={},
            block=True,
            hidden=False,
        )
    ]

コード例 #11

0

ファイルを表示

def _ensure_anchors_in_place(heading_tokens: Sequence[Token]) -> None:
    """Mutate heading tokens so that HTML anchors are in place.

    Add HTML anchor to heading token sequence if it is not already
    there. Don't add the slug value, we don't know it yet. The slug
    value will have to be inserted after calling this.
    """
    # Remove possible existing anchor
    anchor_start_idx = None
    anchor_end_idx = None
    inline_root = heading_tokens[1]
    assert inline_root.children is not None, "inline token's children must not be None"
    for child_idx, child_tkn in enumerate(inline_root.children):
        if child_tkn.type != "html_inline":
            continue
        if re.match(r"<a\s", child_tkn.content):
            anchor_start_idx = child_idx
            anchor_end_idx = child_idx
        if anchor_start_idx is not None and child_tkn.content == "</a>":
            anchor_end_idx = child_idx
    if anchor_start_idx is not None:
        assert anchor_end_idx is not None
        inline_root.children = (inline_root.children[:anchor_start_idx] +
                                inline_root.children[anchor_end_idx + 1:])
        # Remove trailing whitespace from the heading
        if (anchor_start_idx != 0
                and inline_root.children[anchor_start_idx - 1].type == "text"):
            inline_root.children[anchor_start_idx -
                                 1].content = inline_root.children[
                                     anchor_start_idx - 1].content.rstrip()

    # Add the type of anchor we want
    anchor_text = ""
    link_tokens = [
        Token("html_inline", "", 0, content='<a name="{slug}">'),
        Token("text", "", 0, content=anchor_text),
        Token("html_inline", "", 0, content="</a>"),
    ]
    inline_root.children += link_tokens

コード例 #12

0

ファイルを表示

    def parse(self, inputstring: str, document: nodes.document) -> None:
        """Parse source text.
        :param inputstring: The source string to parse
        :param document: The root docutils node to add AST elements to
        """
        config = MdParserConfig(renderer="docutils", enable_extensions=['linkify'])
        parser = default_parser(config)
        parser.options["document"] = document
        env = AttrDict()

        tokens = parser.parse(inputstring, env)
        if not tokens or tokens[0].type != "front_matter":
            # we always add front matter, so that we can merge it with global keys,
            # specified in the sphinx configuration
            tokens = [Token("front_matter", "", 0, content="{}", map=[0, 0])] + tokens
        parser.renderer.render(tokens, parser.options, env)

コード例 #13

0

ファイルを表示

    def todoify(token: Token, token_constructor):
        token.children.insert(0, make_checkbox(token, token_constructor))
        token.children[1].content = token.children[1].content[3:]
        token.content = token.content[3:]

        if use_label_wrapper:
            if use_label_after:
                token.children.pop()

                # Replaced number generator from original plugin with uuid.
                checklist_id = f"task-item-{uuid4()}"
                token.children[0].content = (token.children[0].content[0:-1] +
                                             f' id="{checklist_id}">')
                token.children.append(
                    after_label(token.content, checklist_id,
                                token_constructor))
            else:
                token.children.insert(0, begin_label(token_constructor))
                token.children.append(end_label(token_constructor))

コード例 #14

0

ファイルを表示

def test_comment_token():
    md = MarkdownIt("commonmark").use(myst_block_plugin)
    tokens = md.parse("\n\n% abc")
    assert tokens == [
        Token(
            type="myst_line_comment",
            tag="",
            nesting=0,
            attrs=[["class", "myst-line-comment"]],
            map=[2, 3],
            level=0,
            children=None,
            content="abc",
            markup="%",
            info="",
            meta={},
            block=True,
            hidden=False,
        )
    ]

コード例 #15

0

ファイルを表示

    def handle_cross_reference(self, token: Token, destination: str):
        """Create nodes for references that are not immediately resolvable."""
        wrap_node = addnodes.pending_xref(
            refdoc=self.doc_env.docname,
            reftarget=unquote(destination),
            reftype="myst",
            refdomain=None,  # Added to enable cross-linking
            refexplicit=len(token.children or []) > 0,
            refwarn=True,
        )
        self.add_line_and_source_path(wrap_node, token)
        title = token.attrGet("title")
        if title:
            wrap_node["title"] = title
        self.current_node.append(wrap_node)

        inner_node = nodes.inline("", "", classes=["xref", "myst"])
        wrap_node.append(inner_node)
        with self.current_node_context(inner_node):
            self.render_children(token)

コード例 #16

0

ファイルを表示

ファイル: test_main.py プロジェクト: firasm/markdown-it-py

def test_emptyStr():
    md = MarkdownIt()
    tokens = md.parseInline("")
    assert tokens == [
        Token(
            type="inline",
            tag="",
            nesting=0,
            attrs=None,
            map=[0, 1],
            level=0,
            children=[],
            content="",
            markup="",
            info="",
            meta={},
            block=False,
            hidden=False,
        )
    ]

コード例 #17

0

ファイルを表示

def test_token():
    md = MarkdownIt("commonmark").use(front_matter_plugin)
    tokens = md.parse("---\na: 1\n---")
    # print(tokens)
    assert tokens == [
        Token(
            type="front_matter",
            tag="",
            nesting=0,
            attrs=None,
            map=[0, 3],
            level=0,
            children=None,
            content="a: 1",
            markup="---",
            info="",
            meta={},
            block=True,
            hidden=True,
        )
    ]

コード例 #18

0

ファイルを表示

ファイル: docutils_renderer.py プロジェクト: jpmckinney/MyST-Parser

    def render_fence(self, token: Token):
        text = token.content
        if token.info:
            # Ensure that we'll have an empty string if info exists but is only spaces
            token.info = token.info.strip()
        language = token.info.split()[0] if token.info else ""

        if not self.config.get("commonmark_only",
                               False) and language == "{eval-rst}":
            # copy necessary elements (source, line no, env, reporter)
            newdoc = make_document()
            newdoc["source"] = self.document["source"]
            newdoc.settings = self.document.settings
            newdoc.reporter = self.reporter
            # pad the line numbers artificially so they offset with the fence block
            pseudosource = ("\n" * token_line(token)) + token.content
            # actually parse the rst into our document
            MockRSTParser().parse(pseudosource, newdoc)
            for node in newdoc:
                if node["names"]:
                    self.document.note_explicit_target(node, node)
            self.current_node.extend(newdoc[:])
            return
        elif (not self.config.get("commonmark_only", False)
              and language.startswith("{") and language.endswith("}")):
            return self.render_directive(token)

        if not language:
            try:
                sphinx_env = self.document.settings.env
                language = sphinx_env.temp_data.get(
                    "highlight_language", sphinx_env.config.highlight_language)
            except AttributeError:
                pass
        if not language:
            language = self.config.get("highlight_language", "")
        node = nodes.literal_block(text, text, language=language)
        self.add_line_and_source_path(node, token)
        self.current_node.append(node)

コード例 #19

0

ファイルを表示

    def render_nb_initialise(self, token: SyntaxTreeNode) -> None:
        env = cast(BuildEnvironment, self.sphinx_env)
        metadata = self.nb_client.nb_metadata
        special_keys = ["kernelspec", "language_info", "source_map"]
        for key in special_keys:
            if key in metadata:
                # save these special keys on the metadata, rather than as docinfo
                # note, sphinx_book_theme checks kernelspec is in the metadata
                env.metadata[env.docname][key] = metadata.get(key)

        # forward the remaining metadata to the front_matter renderer
        special_keys.append("widgets")
        top_matter = {
            k: v
            for k, v in metadata.items() if k not in special_keys
        }
        self.render_front_matter(
            Token(  # type: ignore
                "front_matter",
                "",
                0,
                map=[0, 0],
                content=top_matter,  # type: ignore[arg-type]
            ), )

コード例 #20

0

ファイルを表示

def footnote_tail(state: StateBlock, *args, **kwargs):
    """Post-processing step, to move footnote tokens to end of the token stream.

    Also removes un-referenced tokens.
    """

    insideRef = False
    refTokens = {}

    if "footnotes" not in state.env:
        return

    current = []
    tok_filter = []
    for tok in state.tokens:

        if tok.type == "footnote_reference_open":
            insideRef = True
            current = []
            currentLabel = tok.meta["label"]
            tok_filter.append(False)
            continue

        if tok.type == "footnote_reference_close":
            insideRef = False
            # prepend ':' to avoid conflict with Object.prototype members
            refTokens[":" + currentLabel] = current
            tok_filter.append(False)
            continue

        if insideRef:
            current.append(tok)

        tok_filter.append((not insideRef))

    state.tokens = [t for t, f in zip(state.tokens, tok_filter) if f]

    if "list" not in state.env.get("footnotes", {}):
        return
    foot_list = state.env["footnotes"]["list"]

    token = Token("footnote_block_open", "", 1)
    state.tokens.append(token)

    for i, foot_note in foot_list.items():
        token = Token("footnote_open", "", 1)
        token.meta = {"id": i, "label": foot_note.get("label", None)}
        # TODO propagate line positions of original foot note
        # (but don't store in token.map, because this is used for scroll syncing)
        state.tokens.append(token)

        if "tokens" in foot_note:
            tokens = []

            token = Token("paragraph_open", "p", 1)
            token.block = True
            tokens.append(token)

            token = Token("inline", "", 0)
            token.children = foot_note["tokens"]
            token.content = foot_note["content"]
            tokens.append(token)

            token = Token("paragraph_close", "p", -1)
            token.block = True
            tokens.append(token)

        elif "label" in foot_note:
            tokens = refTokens[":" + foot_note["label"]]

        state.tokens.extend(tokens)
        if state.tokens[len(state.tokens) - 1].type == "paragraph_close":
            lastParagraph = state.tokens.pop()
        else:
            lastParagraph = None

        t = (foot_note["count"] if
             (("count" in foot_note) and (foot_note["count"] > 0)) else 1)
        j = 0
        while j < t:
            token = Token("footnote_anchor", "", 0)
            token.meta = {
                "id": i,
                "subId": j,
                "label": foot_note.get("label", None)
            }
            state.tokens.append(token)
            j += 1

        if lastParagraph:
            state.tokens.append(lastParagraph)

        token = Token("footnote_close", "", -1)
        state.tokens.append(token)

    token = Token("footnote_block_close", "", -1)
    state.tokens.append(token)

コード例 #21

0

ファイルを表示

ファイル: test_token.py プロジェクト: wna-se/markdown-it-py

def test_serialization():
    token = Token("name", "tag", 0, children=[Token("other", "tag2", 0)])
    assert token == Token.from_dict(token.as_dict())

コード例 #22

0

ファイルを表示

def footnote_def(state: StateBlock, startLine: int, endLine: int,
                 silent: bool):
    """Process footnote block definition"""

    start = state.bMarks[startLine] + state.tShift[startLine]
    maximum = state.eMarks[startLine]

    # line should be at least 5 chars - "[^x]:"
    if start + 4 > maximum:
        return False

    if state.srcCharCode[start] != 0x5B:  # /* [ */
        return False
    if state.srcCharCode[start + 1] != 0x5E:  # /* ^ */
        return False

    pos = start + 2
    while pos < maximum:
        if state.srcCharCode[pos] == 0x20:
            return False
        if state.srcCharCode[pos] == 0x5D:  # /* ] */
            break
        pos += 1

    if pos == start + 2:  # no empty footnote labels
        return False
    pos += 1
    if pos + 1 >= maximum or state.srcCharCode[pos] != 0x3A:  # /* : */
        return False
    if silent:
        return True
    pos += 1

    label = state.src[start + 2:pos - 2]
    state.env.setdefault("footnotes", {}).setdefault("refs",
                                                     {})[":" + label] = -1

    open_token = Token("footnote_reference_open", "", 1)
    open_token.meta = {"label": label}
    open_token.level = state.level
    state.level += 1
    state.tokens.append(open_token)

    oldBMark = state.bMarks[startLine]
    oldTShift = state.tShift[startLine]
    oldSCount = state.sCount[startLine]
    oldParentType = state.parentType

    posAfterColon = pos
    initial = offset = (state.sCount[startLine] + pos -
                        (state.bMarks[startLine] + state.tShift[startLine]))

    while pos < maximum:
        ch = state.srcCharCode[pos]

        if isSpace(ch):
            if ch == 0x09:
                offset += 4 - offset % 4
            else:
                offset += 1

        else:
            break

        pos += 1

    state.tShift[startLine] = pos - posAfterColon
    state.sCount[startLine] = offset - initial

    state.bMarks[startLine] = posAfterColon
    state.blkIndent += 4
    state.parentType = "footnote"

    if state.sCount[startLine] < state.blkIndent:
        state.sCount[startLine] += state.blkIndent

    state.md.block.tokenize(state, startLine, endLine, True)

    state.parentType = oldParentType
    state.blkIndent -= 4
    state.tShift[startLine] = oldTShift
    state.sCount[startLine] = oldSCount
    state.bMarks[startLine] = oldBMark

    open_token.map = [startLine, state.line]

    token = Token("footnote_reference_close", "", -1)
    state.level -= 1
    token.level = state.level
    state.tokens.append(token)

    return True

コード例 #23

0

ファイルを表示

ファイル: test_main.py プロジェクト: firasm/markdown-it-py

def test_parseInline():
    md = MarkdownIt()
    tokens = md.parseInline("abc\n\n> xyz")
    assert tokens == [
        Token(
            type="inline",
            tag="",
            nesting=0,
            attrs=None,
            map=[0, 1],
            level=0,
            children=[
                Token(
                    type="text",
                    tag="",
                    nesting=0,
                    attrs=None,
                    map=None,
                    level=0,
                    children=None,
                    content="abc",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
                Token(
                    type="softbreak",
                    tag="br",
                    nesting=0,
                    attrs=None,
                    map=None,
                    level=0,
                    children=None,
                    content="",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
                Token(
                    type="softbreak",
                    tag="br",
                    nesting=0,
                    attrs=None,
                    map=None,
                    level=0,
                    children=None,
                    content="",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
                Token(
                    type="text",
                    tag="",
                    nesting=0,
                    attrs=None,
                    map=None,
                    level=0,
                    children=None,
                    content="> xyz",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
            ],
            content="abc\n\n> xyz",
            markup="",
            info="",
            meta={},
            block=False,
            hidden=False,
        )
    ]

コード例 #24

0

ファイルを表示

def test_basic():
    md = MarkdownIt().use(myst_role_plugin)
    src = "{abc}``` a ```"
    tokens = md.parse(src)
    print(tokens)
    assert tokens == [
        Token(
            type="paragraph_open",
            tag="p",
            nesting=1,
            attrs=None,
            map=[0, 1],
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="inline",
            tag="",
            nesting=0,
            attrs=None,
            map=[0, 1],
            level=1,
            children=[
                Token(
                    type="myst_role",
                    tag="",
                    nesting=0,
                    attrs=None,
                    map=None,
                    level=0,
                    children=None,
                    content=" a ",
                    markup="",
                    info="",
                    meta={"name": "abc"},
                    block=False,
                    hidden=False,
                )
            ],
            content="{abc}``` a ```",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="paragraph_close",
            tag="p",
            nesting=-1,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
    ]

コード例 #25

0

ファイルを表示

def test_footnote_tail():
    md = MarkdownIt()

    tokens = [
        Token(
            **{
                "type": "footnote_reference_open",
                "tag": "",
                "nesting": 1,
                "attrs": None,
                "map": None,
                "level": 0,
                "children": None,
                "content": "",
                "markup": "",
                "info": "",
                "meta": {
                    "label": "a"
                },
                "block": False,
                "hidden": False,
            }),
        Token(
            **{
                "type": "paragraph_open",
                "tag": "p",
                "nesting": 1,
                "attrs": None,
                "map": [0, 1],
                "level": 1,
                "children": None,
                "content": "",
                "markup": "",
                "info": "",
                "meta": {},
                "block": True,
                "hidden": False,
            }),
        Token(
            **{
                "type": "inline",
                "tag": "",
                "nesting": 0,
                "attrs": None,
                "map": [0, 1],
                "level": 2,
                "children": [],
                "content": "xyz",
                "markup": "",
                "info": "",
                "meta": {},
                "block": True,
                "hidden": False,
            }),
        Token(
            **{
                "type": "paragraph_close",
                "tag": "p",
                "nesting": -1,
                "attrs": None,
                "map": None,
                "level": 1,
                "children": None,
                "content": "",
                "markup": "",
                "info": "",
                "meta": {},
                "block": True,
                "hidden": False,
            }),
        Token(
            **{
                "type": "footnote_reference_close",
                "tag": "",
                "nesting": -1,
                "attrs": None,
                "map": None,
                "level": 0,
                "children": None,
                "content": "",
                "markup": "",
                "info": "",
                "meta": {},
                "block": False,
                "hidden": False,
            }),
        Token("other", "", 0),
    ]
    env = {
        "footnotes": {
            "refs": {
                ":a": 0
            },
            "list": {
                0: {
                    "label": "a",
                    "count": 1
                }
            }
        }
    }
    state = StateBlock("", md, env, tokens)
    index.footnote_tail(state)
    assert state.tokens == [
        Token(
            type="other",
            tag="",
            nesting=0,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=False,
            hidden=False,
        ),
        Token(
            type="footnote_block_open",
            tag="",
            nesting=1,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=False,
            hidden=False,
        ),
        Token(
            type="footnote_open",
            tag="",
            nesting=1,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={
                "id": 0,
                "label": "a"
            },
            block=False,
            hidden=False,
        ),
        Token(
            type="paragraph_open",
            tag="p",
            nesting=1,
            attrs=None,
            map=[0, 1],
            level=1,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="inline",
            tag="",
            nesting=0,
            attrs=None,
            map=[0, 1],
            level=2,
            children=[],
            content="xyz",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="footnote_anchor",
            tag="",
            nesting=0,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={
                "id": 0,
                "subId": 0,
                "label": "a"
            },
            block=False,
            hidden=False,
        ),
        Token(
            type="paragraph_close",
            tag="p",
            nesting=-1,
            attrs=None,
            map=None,
            level=1,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="footnote_close",
            tag="",
            nesting=-1,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=False,
            hidden=False,
        ),
        Token(
            type="footnote_block_close",
            tag="",
            nesting=-1,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=False,
            hidden=False,
        ),
    ]

コード例 #26

0

ファイルを表示

ファイル: test_token.py プロジェクト: wna-se/markdown-it-py

def test_token():
    token = Token("name", "tag", 0)
    assert token.as_dict() == {
        "type": "name",
        "tag": "tag",
        "nesting": 0,
        "attrs": None,
        "map": None,
        "level": 0,
        "children": None,
        "content": "",
        "markup": "",
        "info": "",
        "meta": {},
        "block": False,
        "hidden": False,
    }
    token.attrSet("a", "b")
    assert token.attrGet("a") == "b"
    token.attrJoin("a", "c")
    assert token.attrGet("a") == "b c"
    token.attrPush(["x", "y"])
    assert token.attrGet("x") == "y"
    assert token.attrIndex("a") == 0
    assert token.attrIndex("x") == 1
    assert token.attrIndex("j") == -1

コード例 #27

0

ファイルを表示

ファイル: parser.py プロジェクト: foster999/MyST-NB

def nb_to_tokens(
        ntbk: nbf.NotebookNode, config: MdParserConfig,
        renderer_plugin: str) -> Tuple[MarkdownIt, AttrDict, List[Token]]:
    """Parse the notebook content to a list of syntax tokens and an env,
    containing global data like reference definitions.
    """
    md = default_parser(config)
    # setup the markdown parser
    # Note we disable front matter parsing,
    # because this is taken from the actual notebook metadata
    md.disable("front_matter", ignoreInvalid=True)
    md.renderer = SphinxNBRenderer(md)
    # make a sandbox where all the parsing global data,
    # like reference definitions will be stored
    env = AttrDict()
    rules = md.core.ruler.get_active_rules()

    # First only run pre-inline chains
    # so we can collect all reference definitions, etc, before assessing references
    def parse_block(src, start_line):
        with md.reset_rules():
            # enable only rules up to block
            md.core.ruler.enableOnly(rules[:rules.index("inline")])
            tokens = md.parse(src, env)
        for token in tokens:
            if token.map:
                token.map = [
                    start_line + token.map[0], start_line + token.map[1]
                ]
        for dup_ref in env.get("duplicate_refs", []):
            if "fixed" not in dup_ref:
                dup_ref["map"] = [
                    start_line + dup_ref["map"][0],
                    start_line + dup_ref["map"][1],
                ]
                dup_ref["fixed"] = True
        return tokens

    block_tokens = []
    source_map = ntbk.metadata.get("source_map", None)

    # get language lexer name
    langinfo = ntbk.metadata.get("language_info", {})
    lexer = langinfo.get("pygments_lexer", langinfo.get("name", None))
    # TODO log warning if lexer is still None

    for cell_index, nb_cell in enumerate(ntbk.cells):

        # if the the source_map has been stored (for text-based notebooks),
        # we use that do define the starting line for each cell
        # otherwise, we set a pseudo base that represents the cell index
        start_line = source_map[cell_index] if source_map else (cell_index +
                                                                1) * 10000
        start_line += 1  # use base 1 rather than 0

        # Skip empty cells
        if len(nb_cell["source"].strip()) == 0:
            continue

        # skip cells tagged for removal
        # TODO this logic should be deferred to a transform
        tags = nb_cell.metadata.get("tags", [])
        if ("remove_cell" in tags) or ("remove-cell" in tags):
            continue

        if nb_cell["cell_type"] == "markdown":

            # we add the cell index to tokens,
            # so they can be included in the error logging,
            block_tokens.extend(parse_block(nb_cell["source"], start_line))

        elif nb_cell["cell_type"] == "code":
            # here we do nothing but store the cell as a custom token
            block_tokens.append(
                Token(
                    "nb_code_cell",
                    "",
                    0,
                    meta={
                        "cell": nb_cell,
                        "lexer": lexer,
                        "renderer": renderer_plugin
                    },
                    map=[start_line, start_line],
                ))

    # Now all definitions have been gathered,
    # we run inline and post-inline chains, to expand the text.
    # Note we assume here that these rules never require the actual source text,
    # only acting on the existing tokens
    state = StateCore(None, md, env, block_tokens)
    with md.reset_rules():
        md.core.ruler.enableOnly(rules[rules.index("inline"):])
        md.core.process(state)

    # Add the front matter.
    # Note that myst_parser serialises dict/list like keys, when rendering to
    # docutils docinfo. These could be read back with `json.loads`.
    state.tokens = [
        Token("front_matter",
              "",
              0,
              content=({k: v
                        for k, v in ntbk.metadata.items()}))
    ] + state.tokens

    # If there are widgets, this will embed the state of all widgets in a script
    if contains_widgets(ntbk):
        state.tokens.append(
            Token("jupyter_widget_state",
                  "",
                  0,
                  meta={"state": get_widgets(ntbk)}))

    return md, env, state.tokens

コード例 #28

0

ファイルを表示

def test_store_labels():
    md = MarkdownIt()
    md.options["store_labels"] = True
    src = "[a]\n\n![a]\n\n[a]: ijk"
    tokens = md.parse(src)
    # print(tokens)
    assert tokens == [
        Token(
            type="paragraph_open",
            tag="p",
            nesting=1,
            attrs=None,
            map=[0, 1],
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="inline",
            tag="",
            nesting=0,
            attrs=None,
            map=[0, 1],
            level=1,
            children=[
                Token(
                    type="link_open",
                    tag="a",
                    nesting=1,
                    attrs=[["href", "ijk"]],
                    map=None,
                    level=0,
                    children=None,
                    content="",
                    markup="",
                    info="",
                    meta={"label": "A"},
                    block=False,
                    hidden=False,
                ),
                Token(
                    type="text",
                    tag="",
                    nesting=0,
                    attrs=None,
                    map=None,
                    level=1,
                    children=None,
                    content="a",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
                Token(
                    type="link_close",
                    tag="a",
                    nesting=-1,
                    attrs=None,
                    map=None,
                    level=0,
                    children=None,
                    content="",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
            ],
            content="[a]",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="paragraph_close",
            tag="p",
            nesting=-1,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="paragraph_open",
            tag="p",
            nesting=1,
            attrs=None,
            map=[2, 3],
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="inline",
            tag="",
            nesting=0,
            attrs=None,
            map=[2, 3],
            level=1,
            children=[
                Token(
                    type="image",
                    tag="img",
                    nesting=0,
                    attrs=[["src", "ijk"], ["alt", ""]],
                    map=None,
                    level=0,
                    children=[
                        Token(
                            type="text",
                            tag="",
                            nesting=0,
                            attrs=None,
                            map=None,
                            level=0,
                            children=None,
                            content="a",
                            markup="",
                            info="",
                            meta={},
                            block=False,
                            hidden=False,
                        )
                    ],
                    content="a",
                    markup="",
                    info="",
                    meta={"label": "A"},
                    block=False,
                    hidden=False,
                )
            ],
            content="![a]",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="paragraph_close",
            tag="p",
            nesting=-1,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
    ]

コード例 #29

0

ファイルを表示

def test_use_existing_env():
    md = MarkdownIt()
    src = "[a]\n\n[c]: ijk"
    env = AttrDict({
        "references": {
            "A": {
                "title": "",
                "href": "abc",
                "map": [0, 1]
            },
            "B": {
                "title": "",
                "href": "xyz",
                "map": [2, 3]
            },
        }
    })
    tokens = md.parse(src, env)
    # print(tokens)
    assert tokens == [
        Token(
            type="paragraph_open",
            tag="p",
            nesting=1,
            attrs=None,
            map=[0, 1],
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="inline",
            tag="",
            nesting=0,
            attrs=None,
            map=[0, 1],
            level=1,
            children=[
                Token(
                    type="link_open",
                    tag="a",
                    nesting=1,
                    attrs=[["href", "abc"]],
                    map=None,
                    level=0,
                    children=None,
                    content="",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
                Token(
                    type="text",
                    tag="",
                    nesting=0,
                    attrs=None,
                    map=None,
                    level=1,
                    children=None,
                    content="a",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
                Token(
                    type="link_close",
                    tag="a",
                    nesting=-1,
                    attrs=None,
                    map=None,
                    level=0,
                    children=None,
                    content="",
                    markup="",
                    info="",
                    meta={},
                    block=False,
                    hidden=False,
                ),
            ],
            content="[a]",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
        Token(
            type="paragraph_close",
            tag="p",
            nesting=-1,
            attrs=None,
            map=None,
            level=0,
            children=None,
            content="",
            markup="",
            info="",
            meta={},
            block=True,
            hidden=False,
        ),
    ]
    assert env == {
        "references": {
            "A": {
                "title": "",
                "href": "abc",
                "map": [0, 1]
            },
            "B": {
                "title": "",
                "href": "xyz",
                "map": [2, 3]
            },
            "C": {
                "title": "",
                "href": "ijk",
                "map": [2, 3]
            },
        }
    }

コード例 #30

0

ファイルを表示

def notebook_to_tokens(
    notebook: NotebookNode,
    mdit_parser: MarkdownIt,
    mdit_env: dict[str, Any],
    logger: LoggerType,
) -> list[Token]:
    # disable front-matter, since this is taken from the notebook
    mdit_parser.disable("front_matter", ignoreInvalid=True)
    # this stores global state, such as reference definitions

    # Parse block tokens only first, leaving inline parsing to a second phase
    # (required to collect all reference definitions, before assessing references).
    block_tokens = [Token("nb_initialise", "", 0, map=[0, 0])]
    for cell_index, nb_cell in enumerate(notebook.cells):

        # skip empty cells
        if len(nb_cell["source"].strip()) == 0:
            continue

        # skip cells tagged for removal
        tags = nb_cell.metadata.get("tags", [])
        if ("remove_cell" in tags) or ("remove-cell" in tags):
            continue

        # generate tokens
        tokens: list[Token]
        if nb_cell["cell_type"] == "markdown":
            # https://nbformat.readthedocs.io/en/5.1.3/format_description.html#markdown-cells
            # TODO if cell has tag output-caption, then use as caption for next/preceding cell?
            tokens = [
                Token(
                    "nb_cell_markdown_open",
                    "",
                    1,
                    hidden=True,
                    meta={
                        "index": cell_index,
                        "metadata": nb_node_to_dict(nb_cell["metadata"]),
                    },
                    map=[0, len(nb_cell["source"].splitlines()) - 1],
                ),
            ]
            with mdit_parser.reset_rules():
                # enable only rules up to block
                rules = mdit_parser.core.ruler.get_active_rules()
                mdit_parser.core.ruler.enableOnly(
                    rules[:rules.index("inline")])
                tokens.extend(mdit_parser.parse(nb_cell["source"], mdit_env))
            tokens.append(
                Token(
                    "nb_cell_markdown_close",
                    "",
                    -1,
                    hidden=True,
                ), )
        elif nb_cell["cell_type"] == "raw":
            # https://nbformat.readthedocs.io/en/5.1.3/format_description.html#raw-nbconvert-cells
            tokens = [
                Token(
                    "nb_cell_raw",
                    "code",
                    0,
                    content=nb_cell["source"],
                    meta={
                        "index": cell_index,
                        "metadata": nb_node_to_dict(nb_cell["metadata"]),
                    },
                    map=[0, 0],
                )
            ]
        elif nb_cell["cell_type"] == "code":
            # https://nbformat.readthedocs.io/en/5.1.3/format_description.html#code-cells
            # we don't copy the outputs here, since this would
            # greatly increase the memory consumption,
            # instead they will referenced by the cell index
            tokens = [
                Token(
                    "nb_cell_code",
                    "code",
                    0,
                    content=nb_cell["source"],
                    meta={
                        "index": cell_index,
                        "metadata": nb_node_to_dict(nb_cell["metadata"]),
                    },
                    map=[0, 0],
                )
            ]
        else:
            pass  # TODO create warning

        # update token's source lines, using either a source_map (index -> line),
        # set when converting to a notebook, or a pseudo base of the cell index
        smap = notebook.metadata.get("source_map", None)
        start_line = smap[cell_index] if smap else (cell_index + 1) * 10000
        start_line += 1  # use base 1 rather than 0
        for token in tokens:
            if token.map:
                token.map = [
                    start_line + token.map[0], start_line + token.map[1]
                ]
        # also update the source lines for duplicate references
        for dup_ref in mdit_env.get("duplicate_refs", []):
            if "fixed" not in dup_ref:
                dup_ref["map"] = [
                    start_line + dup_ref["map"][0],
                    start_line + dup_ref["map"][1],
                ]
                dup_ref["fixed"] = True

        # add tokens to list
        block_tokens.extend(tokens)

    block_tokens.append(Token("nb_finalise", "", 0, map=[0, 0]))

    # Now all definitions have been gathered, run the inline parsing phase
    state = StateCore("", mdit_parser, mdit_env, block_tokens)
    with mdit_parser.reset_rules():
        rules = mdit_parser.core.ruler.get_active_rules()
        mdit_parser.core.ruler.enableOnly(rules[rules.index("inline"):])
        mdit_parser.core.process(state)

    return state.tokens