Python tokenize_block 예제들, mistletoe.block_tokenizer.tokenize_block Python 예제들

예제 #1

0

파일 보기

파일: block_tokens.py 프로젝트: executablebooks/mistletoe-ebp

    def read(cls, lines):
        # first line
        start_line = lines.lineno + 1
        line = cls.convert_leading_tabs(next(lines).lstrip()).split(">", 1)[1]
        if len(line) > 0 and line[0] == " ":
            line = line[1:]
        line_buffer = [line]

        # set booleans
        in_code_fence = CodeFence.start(line)
        in_block_code = BlockCode.start(line)
        blank_line = line.strip() == ""

        # loop
        next_line = lines.peek()
        while not cls.transition(next_line):
            stripped = cls.convert_leading_tabs(next_line.lstrip())
            prepend = 0
            if stripped[0] == ">":
                # has leader, not lazy continuation
                prepend += 1
                if stripped[1] == " ":
                    prepend += 1
                stripped = stripped[prepend:]
                in_code_fence = CodeFence.start(stripped)
                in_block_code = BlockCode.start(stripped)
                blank_line = stripped.strip() == ""
                line_buffer.append(stripped)
            elif in_code_fence or in_block_code or blank_line:
                # not paragraph continuation text
                break
            else:
                # lazy continuation, preserve whitespace
                line_buffer.append(next_line)
            next(lines)
            next_line = lines.peek()

        # block level tokens are parsed here, so that link_definitions
        # in quotes can be recognized before span-level tokenizing.
        Paragraph.parse_setext = False
        try:
            child_tokens = tokenizer.tokenize_block(
                SourceLines(line_buffer, start_line=start_line)
            )
        finally:
            Paragraph.parse_setext = True
        return cls(
            children=child_tokens,
            position=Position.from_source_lines(lines, start_line=start_line),
        )

예제 #2

0

파일 보기

    def read(cls, lines):
        # first line
        line = cls.convert_leading_tabs(next(lines).lstrip()).split('>', 1)[1]
        if len(line) > 0 and line[0] == ' ':
            line = line[1:]
        line_buffer = [line]

        # set booleans
        in_code_fence = CodeFence.start(line)
        in_block_code = BlockCode.start(line)
        blank_line = line.strip() == ''

        # loop
        next_line = lines.peek()
        while (next_line is not None
                and next_line.strip() != ''
                and not Heading.start(next_line)
                and not CodeFence.start(next_line)
                and not ThematicBreak.start(next_line)
                and not List.start(next_line)
                and not Setting.start(next_line)):
            stripped = cls.convert_leading_tabs(next_line.lstrip())
            prepend = 0
            if stripped[0] == '>':
                # has leader, not lazy continuation
                prepend += 1
                if stripped[1] == ' ':
                    prepend += 1
                stripped = stripped[prepend:]
                in_code_fence = CodeFence.start(stripped)
                in_block_code = BlockCode.start(stripped)
                blank_line = stripped.strip() == ''
                line_buffer.append(stripped)
            elif in_code_fence or in_block_code or blank_line:
                # not paragraph continuation text
                break
            else:
                # lazy continuation, preserve whitespace
                line_buffer.append(next_line)
            next(lines)
            next_line = lines.peek()

        # block level tokens are parsed here, so that footnotes
        # in quotes can be recognized before span-level tokenizing.
        Paragraph.parse_setext = False
        parse_buffer = tokenizer.tokenize_block(line_buffer, _token_types)
        Paragraph.parse_setext = True
        return parse_buffer

예제 #3

0

파일 보기

파일: block_token.py 프로젝트: miyuchina/mistletoe

    def read(cls, lines, prev_marker=None):
        next_marker = None
        lines.anchor()
        prepend = -1
        leader = None
        line_buffer = []

        # first line
        line = next(lines)
        prepend, leader = prev_marker if prev_marker else cls.parse_marker(
            line)
        line = line.replace(leader + '\t', leader + '   ',
                            1).replace('\t', '    ')
        empty_first_line = line[prepend:].strip() == ''
        if not empty_first_line:
            line_buffer.append(line[prepend:])
        next_line = lines.peek()
        if empty_first_line and next_line is not None and next_line.strip(
        ) == '':
            parse_buffer = tokenizer.tokenize_block([next(lines)],
                                                    _token_types)
            next_line = lines.peek()
            if next_line is not None:
                marker_info = cls.parse_marker(next_line)
                if marker_info is not None:
                    next_marker = marker_info
            return (parse_buffer, prepend, leader), next_marker

        # loop
        newline = 0
        while True:
            # no more lines
            if next_line is None:
                # strip off newlines
                if newline:
                    lines.backstep()
                    del line_buffer[-newline:]
                break
            next_line = next_line.replace('\t', '    ')
            # not in continuation
            if not cls.in_continuation(next_line, prepend):
                # directly followed by another token
                if cls.other_token(next_line):
                    if newline:
                        lines.backstep()
                        del line_buffer[-newline:]
                    break
                # next_line is a new list item
                marker_info = cls.parse_marker(next_line)
                if marker_info is not None:
                    next_marker = marker_info
                    break
                # not another item, has newlines -> not continuation
                if newline:
                    lines.backstep()
                    del line_buffer[-newline:]
                    break
            next(lines)
            line = next_line
            stripped = line.lstrip(' ')
            diff = len(line) - len(stripped)
            if diff > prepend:
                stripped = ' ' * (diff - prepend) + stripped
            line_buffer.append(stripped)
            newline = newline + 1 if next_line.strip() == '' else 0
            next_line = lines.peek()

        # block-level tokens are parsed here, so that footnotes can be
        # recognized before span-level parsing.
        parse_buffer = tokenizer.tokenize_block(line_buffer, _token_types)
        return (parse_buffer, prepend, leader), next_marker

예제 #4

0

파일 보기

파일: block_tokens.py 프로젝트: executablebooks/mistletoe-ebp

    def read(cls, lines, prev_marker=None):
        next_marker = None
        lines.anchor()
        prepend = -1
        leader = None
        start_line = lines.lineno
        line_buffer = []

        # first line
        line = next(lines)
        prepend, leader = prev_marker if prev_marker else cls.parse_marker(line)
        line = line.replace(leader + "\t", leader + "   ", 1).replace("\t", "    ")
        empty_first_line = line[prepend:].strip() == ""
        if not empty_first_line:
            line_buffer.append(line[prepend:])
        next_line = lines.peek()
        if empty_first_line and next_line is not None and next_line.strip() == "":
            child_tokens = tokenizer.tokenize_block(
                SourceLines([next(lines)], start_line=lines.lineno)
            )
            next_line = lines.peek()
            if next_line is not None:
                marker_info = cls.parse_marker(next_line)
                if marker_info is not None:
                    next_marker = marker_info
            return cls(
                children=child_tokens,
                loose=child_tokens.loose,
                prepend=prepend,
                leader=leader,
                next_marker=next_marker,
                position=Position.from_source_lines(lines, start_line=start_line),
            )

        # loop
        newline = 0
        while True:
            # no more lines
            if next_line is None:
                # strip off newlines
                if newline:
                    lines.backstep()
                    del line_buffer[-newline:]
                break
            next_line = next_line.replace("\t", "    ")
            # not in continuation
            if not cls.in_continuation(next_line, prepend):
                # directly followed by another token
                if cls.transition(next_line):
                    if newline:
                        lines.backstep()
                        del line_buffer[-newline:]
                    break
                # next_line is a new list item
                marker_info = cls.parse_marker(next_line)
                if marker_info is not None:
                    next_marker = marker_info
                    break
                # not another item, has newlines -> not continuation
                if newline:
                    lines.backstep()
                    del line_buffer[-newline:]
                    break
            next(lines)
            line = next_line
            stripped = line.lstrip(" ")
            diff = len(line) - len(stripped)
            if diff > prepend:
                stripped = " " * (diff - prepend) + stripped
            line_buffer.append(stripped)
            newline = newline + 1 if next_line.strip() == "" else 0
            next_line = lines.peek()

        child_tokens = tokenizer.tokenize_block(
            SourceLines(line_buffer, start_line=start_line)
        )

        return cls(
            children=child_tokens,
            loose=child_tokens.loose,
            prepend=prepend,
            leader=leader,
            next_marker=next_marker,
            position=Position.from_source_lines(lines, start_line=start_line),
        )

예제 #5

0

파일 보기

파일: mistletoe_interop.py 프로젝트: soxhub/mkdocs-plugin-commonmark

 def run_block(self):
     with self.set_state():
         self._blocks = block_tokenizer.tokenize_block(self._lines, block_token._token_types)
     return self._blocks

예제 #6

0

파일 보기

    def parse(self, inputstring, document):

        # de-serialize the notebook
        ntbk = nbf.reads(inputstring, nbf.NO_CONVERT)

        # This is a contaner for top level markdown tokens
        # which we will add to as we walk the document
        mkdown_tokens = []  # type: list[BlockToken]

        # First we ensure that we are using a 'clean' global context
        # for parsing, which is setup with the MyST parsing tokens
        # the logger will report on duplicate link/footnote definitions, etc
        parse_context = ParseContext(
            find_blocks=SphinxNBRenderer.default_block_tokens,
            find_spans=SphinxNBRenderer.default_span_tokens,
            logger=SPHINX_LOGGER,
        )
        set_parse_context(parse_context)

        for cell_index, nb_cell in enumerate(ntbk.cells):

            # Skip empty cells
            if len(nb_cell["source"].strip()) == 0:
                continue

            # skip cells tagged for removal
            tags = nb_cell.metadata.get("tags", [])
            if "remove_cell" in tags:
                continue

            if nb_cell["cell_type"] == "markdown":

                # we add the document path and cell index
                # to the source lines, so they can be included in the error logging
                # NOTE: currently the logic to report metadata is not written
                # into SphinxRenderer, but this will be introduced in a later update
                lines = SourceLines(
                    nb_cell["source"],
                    uri=document["source"],
                    metadata={"cell_index": cell_index},
                    standardize_ends=True,
                )

                # parse the source markdown text;
                # at this point span/inline level tokens are not yet processed, but
                # link/footnote definitions are collected/stored in the global context
                mkdown_tokens.extend(tokenize_block(lines))

                # TODO for md cells, think of a way to implement the previous
                # `if "hide_input" in tags:` logic

            elif nb_cell["cell_type"] == "code":
                # here we do nothing but store the cell as a custom token
                mkdown_tokens.append(
                    NbCodeCell(
                        cell=nb_cell,
                        position=Position(
                            line_start=0,
                            uri=document["source"],
                            data={"cell_index": cell_index},
                        ),
                    ))

        # Now all definitions have been gathered, we walk the tokens and
        # process any inline text
        for token in mkdown_tokens + list(
                get_parse_context().foot_definitions.values()):
            token.expand_spans()

        # If there are widgets, this will embed the state of all widgets in a script
        if contains_widgets(ntbk):
            mkdown_tokens.insert(0,
                                 JupyterWidgetState(state=get_widgets(ntbk)))

        # create the front matter token
        front_matter = FrontMatter(content=ntbk.metadata, position=None)

        # Finally, we create the top-level markdown document
        markdown_doc = Document(
            children=mkdown_tokens,
            front_matter=front_matter,
            link_definitions=parse_context.link_definitions,
            footnotes=parse_context.foot_definitions,
            footref_order=parse_context.foot_references,
        )

        self.reporter = document.reporter
        self.config = self.default_config.copy()
        try:
            new_cfg = document.settings.env.config.myst_config
            self.config.update(new_cfg)
        except AttributeError:
            pass

        # Remove all the mime prefixes from "glue" step.
        # This way, writing properly captures the glued images
        replace_mime = []
        for cell in ntbk.cells:
            if hasattr(cell, "outputs"):
                for out in cell.outputs:
                    if "data" in out:
                        # Only do the mimebundle replacing for the scrapbook outputs
                        mime_prefix = (out.get("metadata",
                                               {}).get("scrapbook",
                                                       {}).get("mime_prefix"))
                        if mime_prefix:
                            out["data"] = {
                                key.replace(mime_prefix, ""): val
                                for key, val in out["data"].items()
                            }
                            replace_mime.append(out)

        # Write the notebook's output to disk. This changes metadata in notebook cells
        path_doc = Path(document.settings.env.docname)
        doc_relpath = path_doc.parent
        doc_filename = path_doc.name
        build_dir = Path(document.settings.env.app.outdir).parent
        output_dir = build_dir.joinpath("jupyter_execute", doc_relpath)
        write_notebook_output(ntbk, str(output_dir), doc_filename)

        # Now add back the mime prefixes to the right outputs so they aren't rendered
        # until called from the role/directive
        for out in replace_mime:
            out["data"] = {
                f"{GLUE_PREFIX}{key}": val
                for key, val in out["data"].items()
            }

        # Update our glue key list with new ones defined in this page
        glue_domain = NbGlueDomain.from_env(document.settings.env)
        glue_domain.add_notebook(ntbk, path_doc)

        # render the Markdown AST to docutils AST
        renderer = SphinxNBRenderer(parse_context=parse_context,
                                    document=document,
                                    current_node=None)
        renderer.render(markdown_doc)