コード例 #1
0
    def parse_markdown(
        self, text: str, parent: Optional[nodes.Node] = None
    ) -> List[nodes.Node]:
        """Parse text as CommonMark, in a new document."""
        parser = default_parser(MdParserConfig(commonmark_only=True))

        # setup parent node
        if parent is None:
            parent = nodes.container()
            self.add_source_and_line(parent)
        parser.options["current_node"] = parent

        # setup containing document
        new_doc = make_document(self.node.source)
        new_doc.settings = self.document.settings
        new_doc.reporter = self.document.reporter
        parser.options["document"] = new_doc

        # use the node docname, where possible, to deal with single document builds
        with mock.patch.dict(
            self.env.temp_data, {"docname": self.env.path2doc(self.node.source)}
        ):
            parser.render(text)

        # TODO is there any transforms we should retroactively carry out?
        return parent.children
コード例 #2
0
    def parse(
        self,
        inputstring: str,
        document: nodes.document,
    ):
        """
        Parse source text.

        Args:
            inputstring: The source string to parse
            document: The root docutils node to add AST elements to
        """

        try:
            config = document.settings.env.myst_config
        except Exception:
            config = MdParserConfig(renderer="docutils")

        parser = default_parser(config)
        parser.options["document"] = document
        env = AttrDict()
        tokens = parser.parse(inputstring, env)
        if not tokens or tokens[0].type != "front_matter":
            # we always add front matter, so that we can merge it with global keys,
            # specified in the sphinx configuration
            tokens = [
                Token(
                    type="front_matter",
                    tag="",
                    nesting=0,
                    content="{}",  # noqa: P103
                    map=[0, 0],
                ),
            ] + tokens
        parser.renderer.render(tokens, parser.options, env)
コード例 #3
0
ファイル: cli.py プロジェクト: tfiers/MyST-Parser
def print_anchors(args=None):
    """ """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "input",
        nargs="?",
        type=argparse.FileType("r"),
        default=sys.stdin,
        help="Input file (default stdin)",
    )
    parser.add_argument(
        "-o",
        "--output",
        type=argparse.FileType("w"),
        default=sys.stdout,
        help="Output file (default stdout)",
    )
    parser.add_argument("-l",
                        "--level",
                        type=int,
                        default=2,
                        help="Maximum heading level.")
    args = parser.parse_args(args)
    parser = default_parser(
        MdParserConfig(renderer="html", heading_anchors=args.level))

    def _filter_plugin(state):
        state.tokens = [
            t for t in state.tokens
            if t.type.startswith("heading_") and int(t.tag[1]) <= args.level
        ]

    parser.use(lambda p: p.core.ruler.push("filter", _filter_plugin))
    text = parser.render(args.input.read())
    args.output.write(text)
コード例 #4
0
ファイル: myst.py プロジェクト: theniteshsingh/jupytext
def matches_mystnb(
    text,
    ext=None,
    requires_meta=True,
    code_directive=CODE_DIRECTIVE,
    raw_directive=RAW_DIRECTIVE,
):
    """Attempt to distinguish a file as myst, only given its extension and content.

    :param ext: the extension of the file
    :param requires_meta: requires the file to contain top matter metadata
    :param code_directive: the name of the directive to search for containing code cells
    :param raw_directive: the name of the directive to search for containing raw cells
    """
    # is the extension uniquely associated with myst (i.e. not just .md)
    if ext and "." + ("." + ext).rsplit(".", 1)[1] in myst_extensions(no_md=True):
        return True

    # might the text contain metadata front matter
    if requires_meta and not text.startswith("---"):
        return False

    try:
        # parse markdown file up to the block level (i.e. don't worry about inline text)
        parser = default_parser("html", disable_syntax=["inline"])
        tokens = parser.parse(text + "\n")
    except (TypeError, ValueError) as err:
        warnings.warn("myst-parse failed unexpectedly: {}".format(err))
        return False

    # Is the format information available in the jupytext text representation?
    if tokens and tokens[0].type == "front_matter":
        try:
            metadata = yaml.safe_load(tokens[0].content)
        except (yaml.parser.ParserError, yaml.scanner.ScannerError):
            pass
        else:
            try:
                if (
                    metadata.get("jupytext", {})
                    .get("text_representation", {})
                    .get("format_name", "")
                    == MYST_FORMAT_NAME
                ):
                    return True
            except AttributeError:
                pass

    # is there at least on fenced code block with a code/raw directive language
    for token in tokens:
        if token.type == "fence" and (
            token.info.startswith(code_directive)
            or token.info.startswith(raw_directive)
        ):
            return True

    return False
コード例 #5
0
 def parse_markdown(
     self, text: str, parent: Optional[nodes.Node] = None
 ) -> List[nodes.Node]:
     """Parse text as CommonMark, in a new document."""
     parser = default_parser(MdParserConfig(commonmark_only=True))
     parent = parent or nodes.container()
     parser.options["current_node"] = parent
     parser.render(text)
     # TODO is there any transforms we should retroactively carry out?
     return parent.children
コード例 #6
0
ファイル: patch.py プロジェクト: ousttrue/vrm.dev
def parse(self, inputstring: str, document: nodes.document) -> None:
    """Parse source text.
    :param inputstring: The source string to parse
    :param document: The root docutils node to add AST elements to
    """
    config = document.settings.env.myst_config
    parser = default_parser(config)
    parser.options["document"] = document
    env: dict = {}
    tokens = parser.parse(inputstring, env)
    if not tokens or tokens[0].type != "front_matter":
        # we always add front matter, so that we can merge it with global keys,
        # specified in the sphinx configuration
        tokens = [Token("front_matter", "", 0, content="{}", map=[0, 0])
                  ] + tokens

    header_text = None
    if tokens[0].type == 'front_matter':
        #
        # Hugo article migration
        #
        # * get title from frontmatter(yaml)
        #
        import pathlib
        path = pathlib.Path(document.current_source)
        title = path.stem
        if title in ('index', '_index'):
            title = path.parent.stem

        try:
            import yaml
            data = yaml.safe_load(tokens[0].content)
            title = data['title']
        except Exception as ex:
            pass

        header_text = Token("text", "", 0, content=title, map=tokens[0].map)
        tokens = [
            tokens[0],
            Token("heading_open", "h1", 1, content="{}", map=header_text.map),
            Token("inline",
                  "",
                  0,
                  content="{}",
                  map=header_text.map,
                  children=[header_text]),
            Token("heading_close", "h1", -1, content="{}", map=header_text.map)
        ] + tokens[1:]

    parser.renderer.render(tokens, parser.options, env)
コード例 #7
0
ファイル: sphinx_parser.py プロジェクト: tfiers/MyST-Parser
    def parse(
        self, inputstring: str, document: nodes.document, renderer: str = "sphinx"
    ):
        """Parse source text.

        :param inputstring: The source string to parse
        :param document: The root docutils node to add AST elements to
        """
        if renderer == "sphinx":
            config = document.settings.env.myst_config
        else:
            config = MdParserConfig()
        parser = default_parser(config)
        parser.options["document"] = document
        parser.render(inputstring)
コード例 #8
0
    def parse(self, inputstring: str, document: nodes.document) -> None:
        """Parse source text.
        :param inputstring: The source string to parse
        :param document: The root docutils node to add AST elements to
        """
        config = MdParserConfig(renderer="docutils", enable_extensions=['linkify'])
        parser = default_parser(config)
        parser.options["document"] = document
        env = AttrDict()

        tokens = parser.parse(inputstring, env)
        if not tokens or tokens[0].type != "front_matter":
            # we always add front matter, so that we can merge it with global keys,
            # specified in the sphinx configuration
            tokens = [Token("front_matter", "", 0, content="{}", map=[0, 0])] + tokens
        parser.renderer.render(tokens, parser.options, env)
コード例 #9
0
def to_sphinx(
    filename: Iterable[str],
    parser_config: Optional[MdParserConfig] = None,
    options=None,
    env=None,
    document=None,
    conf=None,
    srcdir=None,
    with_builder="singlehtml",
):
    """Render text to the docutils AST (before transforms)

    :param text: the text to render
    :param options: options to update the parser with
    :param env: The sandbox environment for the parse
        (will contain e.g. reference definitions)
    :param document: the docutils root node to use (otherwise a new one will be created)
    :param in_sphinx_env: initialise a minimal sphinx environment (useful for testing)
    :param conf: the sphinx conf.py as a dictionary
    :param srcdir: to parse to the mock sphinx env

    :returns: docutils document
    """
    from myst_parser.docutils_renderer import make_document

    md = default_parser(parser_config or MdParserConfig())
    if options:
        md.options.update(options)
    md.options["document"] = document or make_document()

    force_all = False

    with mock_sphinx_env_compat(
            conf=conf,
            srcdir=srcdir,
            document=md.options["document"],
            with_builder=with_builder,
    ) as app:
        app.build(force_all, (filename, ))
        filehtml = Path(filename).with_suffix(".html").name
        output = (Path(app.outdir) / filehtml).read_text()
        return get_div_body(output)
コード例 #10
0
def replace_admonition_in_cell_source(cell_str):
    """Returns cell source with admonition replaced by its generated HTML.
    """
    config = MdParserConfig(renderer="docutils")
    parser = default_parser(config)
    tokens = parser.parse(cell_str)

    admonition_tokens = [
        t for t in tokens if t.type == "fence" and t.info in all_directive_names
    ]

    cell_lines = cell_str.splitlines()
    new_cell_str = cell_str

    for t in admonition_tokens:
        adm_begin, adm_end = t.map
        adm_src = "\n".join(cell_lines[adm_begin:adm_end])
        adm_doc = parser.render(adm_src)
        adm_html = admonition_html(adm_doc)
        new_cell_str = new_cell_str.replace(adm_src, adm_html)

    return new_cell_str
コード例 #11
0
def myst_to_notebook(
    text,
    code_directive=CODE_DIRECTIVE,
    raw_directive=RAW_DIRECTIVE,
    add_source_map=False,
):
    """Convert text written in the myst format to a notebook.

    :param text: the file text
    :param code_directive: the name of the directive to search for containing code cells
    :param raw_directive: the name of the directive to search for containing raw cells
    :param add_source_map: add a `source_map` key to the notebook metadata,
        which is a list of the starting source line number for each cell.

    :raises MystMetadataParsingError if the metadata block is not valid JSON/YAML

    NOTE: we assume here that all of these directives are at the top-level,
    i.e. not nested in other directives.
    """
    # parse markdown file up to the block level (i.e. don't worry about inline text)
    parser = default_parser("html", disable_syntax=["inline"])
    tokens = parser.parse(text + "\n")
    lines = text.splitlines()
    md_start_line = 0

    # get the document metadata
    metadata_nb = {}
    if tokens[0].type == "front_matter":
        metadata = tokens.pop(0)
        md_start_line = metadata.map[1]
        try:
            metadata_nb = yaml.safe_load(metadata.content)
        except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error:
            raise MystMetadataParsingError(
                "Notebook metadata: {}".format(error))

    # create an empty notebook
    nbf_version = nbf.v4
    kwargs = {"metadata": nbf.from_dict(metadata_nb)}
    notebook = nbf_version.new_notebook(**kwargs)
    source_map = []  # this is a list of the starting line number for each cell

    def _flush_markdown(start_line, token, md_metadata):
        """When we find a cell we check if there is preceding text.o"""
        endline = token.map[0] if token else len(lines)
        md_source = strip_blank_lines("\n".join(lines[start_line:endline]))
        meta = nbf.from_dict(md_metadata)
        if md_source:
            source_map.append(start_line)
            notebook.cells.append(
                nbf_version.new_markdown_cell(source=md_source, metadata=meta))

    # iterate through the tokens to identify notebook cells
    nesting_level = 0
    md_metadata = {}

    for token in tokens:

        nesting_level += token.nesting

        if nesting_level != 0:
            # we ignore fenced block that are nested, e.g. as part of lists, etc
            continue

        if token.type == "fence" and token.info.startswith(code_directive):
            _flush_markdown(md_start_line, token, md_metadata)
            options, body_lines = read_fenced_cell(token, len(notebook.cells),
                                                   "Code")
            meta = nbf.from_dict(options)
            source_map.append(token.map[0] + 1)
            notebook.cells.append(
                nbf_version.new_code_cell(source="\n".join(body_lines),
                                          metadata=meta))
            md_metadata = {}
            md_start_line = token.map[1]

        elif token.type == "fence" and token.info.startswith(raw_directive):
            _flush_markdown(md_start_line, token, md_metadata)
            options, body_lines = read_fenced_cell(token, len(notebook.cells),
                                                   "Raw")
            meta = nbf.from_dict(options)
            source_map.append(token.map[0] + 1)
            notebook.cells.append(
                nbf_version.new_raw_cell(source="\n".join(body_lines),
                                         metadata=meta))
            md_metadata = {}
            md_start_line = token.map[1]

        elif token.type == "myst_block_break":
            _flush_markdown(md_start_line, token, md_metadata)
            md_metadata = read_cell_metadata(token, len(notebook.cells))
            md_start_line = token.map[1]

    _flush_markdown(md_start_line, None, md_metadata)

    if add_source_map:
        notebook.metadata["source_map"] = source_map
    return notebook
コード例 #12
0
ファイル: parser.py プロジェクト: foster999/MyST-NB
def nb_to_tokens(
        ntbk: nbf.NotebookNode, config: MdParserConfig,
        renderer_plugin: str) -> Tuple[MarkdownIt, AttrDict, List[Token]]:
    """Parse the notebook content to a list of syntax tokens and an env,
    containing global data like reference definitions.
    """
    md = default_parser(config)
    # setup the markdown parser
    # Note we disable front matter parsing,
    # because this is taken from the actual notebook metadata
    md.disable("front_matter", ignoreInvalid=True)
    md.renderer = SphinxNBRenderer(md)
    # make a sandbox where all the parsing global data,
    # like reference definitions will be stored
    env = AttrDict()
    rules = md.core.ruler.get_active_rules()

    # First only run pre-inline chains
    # so we can collect all reference definitions, etc, before assessing references
    def parse_block(src, start_line):
        with md.reset_rules():
            # enable only rules up to block
            md.core.ruler.enableOnly(rules[:rules.index("inline")])
            tokens = md.parse(src, env)
        for token in tokens:
            if token.map:
                token.map = [
                    start_line + token.map[0], start_line + token.map[1]
                ]
        for dup_ref in env.get("duplicate_refs", []):
            if "fixed" not in dup_ref:
                dup_ref["map"] = [
                    start_line + dup_ref["map"][0],
                    start_line + dup_ref["map"][1],
                ]
                dup_ref["fixed"] = True
        return tokens

    block_tokens = []
    source_map = ntbk.metadata.get("source_map", None)

    # get language lexer name
    langinfo = ntbk.metadata.get("language_info", {})
    lexer = langinfo.get("pygments_lexer", langinfo.get("name", None))
    # TODO log warning if lexer is still None

    for cell_index, nb_cell in enumerate(ntbk.cells):

        # if the the source_map has been stored (for text-based notebooks),
        # we use that do define the starting line for each cell
        # otherwise, we set a pseudo base that represents the cell index
        start_line = source_map[cell_index] if source_map else (cell_index +
                                                                1) * 10000
        start_line += 1  # use base 1 rather than 0

        # Skip empty cells
        if len(nb_cell["source"].strip()) == 0:
            continue

        # skip cells tagged for removal
        # TODO this logic should be deferred to a transform
        tags = nb_cell.metadata.get("tags", [])
        if ("remove_cell" in tags) or ("remove-cell" in tags):
            continue

        if nb_cell["cell_type"] == "markdown":

            # we add the cell index to tokens,
            # so they can be included in the error logging,
            block_tokens.extend(parse_block(nb_cell["source"], start_line))

        elif nb_cell["cell_type"] == "code":
            # here we do nothing but store the cell as a custom token
            block_tokens.append(
                Token(
                    "nb_code_cell",
                    "",
                    0,
                    meta={
                        "cell": nb_cell,
                        "lexer": lexer,
                        "renderer": renderer_plugin
                    },
                    map=[start_line, start_line],
                ))

    # Now all definitions have been gathered,
    # we run inline and post-inline chains, to expand the text.
    # Note we assume here that these rules never require the actual source text,
    # only acting on the existing tokens
    state = StateCore(None, md, env, block_tokens)
    with md.reset_rules():
        md.core.ruler.enableOnly(rules[rules.index("inline"):])
        md.core.process(state)

    # Add the front matter.
    # Note that myst_parser serialises dict/list like keys, when rendering to
    # docutils docinfo. These could be read back with `json.loads`.
    state.tokens = [
        Token("front_matter",
              "",
              0,
              content=({k: v
                        for k, v in ntbk.metadata.items()}))
    ] + state.tokens

    # If there are widgets, this will embed the state of all widgets in a script
    if contains_widgets(ntbk):
        state.tokens.append(
            Token("jupyter_widget_state",
                  "",
                  0,
                  meta={"state": get_widgets(ntbk)}))

    return md, env, state.tokens
コード例 #13
0
def to_model(myst):
    md = default_parser("docutils")
    tokens = md.parse(myst)
    sections = _split_sections(tokens)
    return sections
    """