Python TextNodeの例、mau.parsers.nodes.TextNode Pythonの例

コード例 #1

0

ファイルを表示

    def parse_sentence(self, stop_tokens=None):
        content = []
        stop_tokens = stop_tokens or set()
        stop_tokens = stop_tokens.union(
            {Token(TokenTypes.EOF),
             Token(TokenTypes.EOL)})

        result = self.parse_styled_text(stop_tokens)
        while result is not None:
            content.append(result)
            result = self.parse_styled_text(stop_tokens)

        import itertools

        # Group consecutive WordNode nodes into a single TextNode
        grouped_nodes = []
        for key, group in itertools.groupby(content,
                                            lambda x: x.__class__ == WordNode):
            if key:
                text = "".join([n.value for n in group])
                grouped_nodes.append(TextNode(text))
            else:
                grouped_nodes.extend(list(group))

        return SentenceNode(content=grouped_nodes)

コード例 #2

0

ファイルを表示

    def _parse_escaped_char(self):
        self.get_token(TokenTypes.LITERAL, "\\")

        char = self.get_token().value

        if char not in "{}":
            char = f"\\{char}"

        self._save(TextNode(char))

コード例 #3

0

ファイルを表示

ファイル: preprocess_variables_parser.py プロジェクト: Project-Mau/mau

    def _parse_verbatim(self):
        self.get_token(TokenTypes.LITERAL, "`")
        text = self.collect_join(
            [Token(TokenTypes.LITERAL, "`"), Token(TokenTypes.EOF)],
            preserve_escaped_stop_tokens=True,
        )
        self.get_token(TokenTypes.LITERAL, "`")

        text = f"`{text}`"

        self._save(TextNode(text))

コード例 #4

0

ファイルを表示

ファイル: preprocess_variables_parser.py プロジェクト: Project-Mau/mau

    def _parse_curly(self):
        variable_name = []
        self.get_token(TokenTypes.LITERAL, "{")
        variable_name = self.collect_join(
            [Token(TokenTypes.LITERAL, "}"), Token(TokenTypes.EOF)]
        )
        self.get_token(TokenTypes.LITERAL, "}")

        try:
            if "." not in variable_name:
                variable_value = self.variables[variable_name]
            else:
                namespace, variable_name = variable_name.split(".")

                variable_value = self.variables[namespace][variable_name]

            self._save(TextNode(variable_value))
        except KeyError:
            raise PreprocessError(f'Attribute "{variable_name}" has not been defined')

コード例 #5

0

ファイルを表示

 def parse(self):
     self._parse()
     text = "".join([str(i.value) for i in self.nodes])
     self.nodes = [TextNode(text)]

コード例 #6

0

ファイルを表示

 def _parse_pass(self):
     self._save(TextNode(self.get_token().value))

コード例 #7

0

ファイルを表示

    def _parse_source_engine(self, content, secondary_content, kwargs):
        # Parse a source block in the form
        #
        # [source, language, attributes...]
        # ----
        # content
        # ----
        #
        # Source blocks support the following attributes
        #
        # callouts=":" The separator used by callouts
        # highlight="@" The special character to turn on highlight
        #
        # [source, language, attributes...]
        # ----
        # content:1:
        # ----
        #
        # [source, language, attributes...]
        # ----
        # content:@:
        # ----
        #
        # Callout descriptions can be added to the block
        # as secondary content with the syntax
        #
        # [source, language, attributes...]
        # ----
        # content:name:
        # ----
        # <name>: <description>
        #
        # Since Mau uses Pygments, the attribute language
        # is one of the langauges supported by that tool.

        # Get the delimiter for callouts (":" by default)
        delimiter = kwargs.pop("callouts", ":")

        # A dictionary that contains callout markers in
        # the form {linenum:name}
        callout_markers = {}

        # Get the marker for highlighted lines ("@" by default)
        highlight_marker = kwargs.pop("highlight", "@")

        # A list of highlighted lines
        highlighted_lines = []

        # This is a list of all lines that might contain
        # a callout. They will be further processed
        # later to be sure.
        lines_with_callouts = [(linenum, line)
                               for linenum, line in enumerate(content)
                               if line.endswith(delimiter)]

        # Each line in the previous list is processed
        # and stored if it contains a callout
        for linenum, line in lines_with_callouts:
            # Remove the final delimiter
            line = line[:-1]

            splits = line.split(delimiter)
            if len(splits) < 2:
                # It's a trap! There are no separators left
                continue

            # Get the callout and the line
            callout_name = splits[-1]
            line = delimiter.join(splits[:-1])

            content[linenum] = line

            # Check if we want to just highlight the line
            if callout_name == highlight_marker:
                highlighted_lines.append(linenum)
            else:
                callout_markers[linenum] = callout_name

        # A dictionary that contains the text for each
        # marker in the form {name:text}
        callout_contents = {}

        # If there was secondary content it should be formatted
        # with callout names followed by colon and the
        # callout text.
        for line in secondary_content:
            if ":" not in line:
                self.error(
                    f"Callout description should be written as 'name: text'. Missing ':' in '{line}'"
                )

            name, text = line.split(":")

            if name not in callout_markers.values():
                self.error(
                    f"Callout {name} has not been created in the source code")

            text = text.strip()

            callout_contents[name] = text

        # Put markers and contents together
        callouts = {"markers": callout_markers, "contents": callout_contents}

        # Source blocks must preserve the content literally
        textlines = [TextNode(line) for line in content]

        return textlines, callouts, highlighted_lines

コード例 #8

0

ファイルを表示

    def _parse_block(self):
        # Parse a block in the form
        #
        # [block_type]
        # ----
        # Content
        # ----
        # Optional secondary content
        #
        # Blocks are delimited by 4 consecutive identical characters.

        # Get the delimiter and check the length
        delimiter = self.get_token(TokenTypes.TEXT).value
        if len(delimiter) != 4 or len(set(delimiter)) != 1:
            raise TokenError
        self.get_token(TokenTypes.EOL)

        # Collect everything until the next delimiter
        content = self._collect_lines(
            [Token(TokenTypes.TEXT, delimiter),
             Token(TokenTypes.EOF)])
        self.force_token(TokenTypes.TEXT, delimiter)
        self.get_token(TokenTypes.EOL)

        # Get the optional secondary content
        secondary_content = self._collect_lines(
            [Token(TokenTypes.EOL),
             Token(TokenTypes.EOF)])

        # Consume the title
        title = self._pop_title()

        # The first unnamed argument is the block type
        blocktype = self.argsparser.pop()

        # If there is a block alias for blocktype replace it
        # otherwise use the blocktype we already have
        blocktype = self.block_aliases.get(blocktype, blocktype)

        # Assign names

        self.argsparser.set_names_and_defaults(
            self.block_names.get(blocktype, []),
            self.block_defaults.get(blocktype, {}))

        # Consume the attributes
        args, kwargs = self.argsparser.get_arguments_and_reset()

        # Extract classes and convert them into a list
        classes = [
            i for i in kwargs.pop("classes", "").split(",") if len(i) > 0
        ]

        # Extract condition if present and process it
        condition = kwargs.pop("condition", "")

        # Run this only if there is a condition on this block
        if len(condition) > 0:
            try:
                # The condition should be either test:variable:value or test:variable:
                test, variable, value = condition.split(":")
            except ValueError:
                self.error(
                    f'Condition {condition} is not in the form "test:variable:value" or "test:variable:'
                )

            # If there is no value use True
            if len(value) == 0:
                value = True

            # Check if the variable matches the value and apply the requested test
            match = self.variables.get(variable) == value
            result = True if test == "if" else False

            # If the condition is not satisfied return
            if match is not result:
                return

        # Extract the preprocessor
        preprocessor = kwargs.pop("preprocessor", "none")

        # Extract the engine
        engine = kwargs.pop("engine", "default")

        # Create the node parameters according to the engine
        if engine in ["raw", "mau"]:
            # Engine "raw" doesn't process the content,
            # so we just pass it untouched in the form of
            # a TextNode per line. The same is true for "mau"
            # as the visitor will have to fire up an new parser
            # to process the content.
            content = [TextNode(line) for line in content]
            secondary_content = [TextNode(line) for line in secondary_content]
        elif engine == "source":
            # Engine "source" extracts the content (source code),
            # the callouts, and the highlights.
            # The default language is "text".

            content, callouts, highlights = self._parse_source_engine(
                content, secondary_content, kwargs)
            secondary_content = []

            kwargs["callouts"] = callouts
            kwargs["highlights"] = highlights
            kwargs["language"] = kwargs.get("language", "text")

        elif engine == "default":
            # This is the default engine and it parses
            # both content and secondary content using a new parser
            # but then merges headers and footnotes into the
            # current one.

            # Parse the primary and secondary content and record footnotes
            pc = MainParser(variables=self.variables).analyse(
                "\n".join(content))
            ps = MainParser(variables=self.variables).analyse(
                "\n".join(secondary_content))
            content = pc.nodes
            secondary_content = ps.nodes

            self.footnote_defs.extend(pc.footnote_defs)
            self.headers.extend(pc.headers)
        else:
            raise EngineError(f"Engine {engine} is not available")

        self._save(
            BlockNode(
                blocktype=blocktype,
                content=content,
                secondary_content=secondary_content,
                args=args,
                classes=classes,
                engine=engine,
                preprocessor=preprocessor,
                kwargs=kwargs,
                title=title,
            ))

コード例 #9

0

ファイルを表示

    def _parse_source_block(self, content, secondary_content, title, args,
                            kwargs):
        delimiter = kwargs.pop("callouts", ":")
        highlight_marker = kwargs.pop("highlight", "@")

        # A dictionary that contains callout markers in
        # the form {linenum:name}
        callout_markers = {}

        # A list of highlighted lines
        highlighted_lines = []

        lines_with_callouts = [(linenum, line)
                               for linenum, line in enumerate(content)
                               if line.endswith(delimiter)]

        for linenum, line in lines_with_callouts:
            # Remove the final delimiter
            line = line[:-1]

            splits = line.split(delimiter)
            if len(splits) < 2:
                # It's a trap! There are no separators left
                continue

            callout_name = splits[-1]
            line = delimiter.join(splits[:-1])

            content[linenum] = line

            if callout_name == highlight_marker:
                highlighted_lines.append(linenum)
            else:
                callout_markers[linenum] = callout_name

        # A dictionary that contains the text for each
        # marker in the form {name:text}
        callout_contents = {}

        for line in secondary_content:
            if ":" not in line:
                raise ParseError(
                    f"Callout description should be written as 'linuenum: text'. Missing ':' in '{line}'"
                )

            name, text = line.split(":")

            if name not in callout_markers.values():
                raise ParseError(
                    f"Callout {name} has not been created in the source code")

            text = text.strip()

            callout_contents[name] = text

        # Put markers and contents together
        callouts = {"markers": callout_markers, "contents": callout_contents}

        # Source blocks must preserve the content literally
        textlines = [TextNode(line) for line in content]

        _, kwargs = merge_args(args, kwargs, ["language"])

        language = kwargs.pop("language", "text")

        self._save(
            SourceNode(
                language,
                callouts=callouts,
                highlights=highlighted_lines,
                delimiter=delimiter,
                code=textlines,
                title=title,
                kwargs=kwargs,
            ))

コード例 #10

0

ファイルを表示

    def _parse_raw_block(self, content, args, kwargs):
        textlines = [TextNode(line) for line in content]

        self._save(RawNode(content=textlines))

コード例 #11

0

ファイルを表示

ファイル: main_parser.py プロジェクト: AlexNodex/mau

    def _parse_source_block(self, content, secondary_content, title, args,
                            kwargs):
        delimiter = kwargs.pop("callouts", ":")

        # This removes callouts from the source code
        # and maps line numbers to callouts and text
        # {linenum:(callout,text)}
        marked_lines = {}

        for linenum, line in enumerate(content):
            if not line.endswith(delimiter):
                continue

            # Remove the final delimiter
            line = line[:-1]

            splits = line.split(delimiter)
            if len(splits) < 2:
                # It's a trap! There are no separators left
                continue

            callout_name = splits[-1]
            line = delimiter.join(splits[:-1])

            marked_lines[linenum] = (callout_name, line)

        # This maps callouts names to line numbers
        # {callout_name:linenum}
        callout_name_to_linenum = {}
        for linenum, mark in marked_lines.items():
            callout_name, fixed_line = mark
            content[linenum] = fixed_line
            callout_name_to_linenum[callout_name] = linenum

        # This reads the callout text and connects
        # it with the text line
        callouts = {}
        for line in secondary_content:
            if ":" not in line:
                raise ParseError

            callout_name, text = line.split(": ")

            try:
                linenum = callout_name_to_linenum[callout_name]
            except KeyError:
                raise ParseError(f"Callout {callout_name} can't be found")

            callouts[linenum] = (callout_name, text)

        # Source blocks must preserve the content literally
        textlines = [TextNode(line) for line in content]

        _, kwargs = merge_args(args, kwargs, ["language"])

        language = kwargs.pop("language", "text")

        self._save(
            SourceNode(
                language,
                callouts=callouts,
                delimiter=delimiter,
                code=textlines,
                title=title,
                kwargs=kwargs,
            ))