Beispiel #1
0
    def is_html_block(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        token_stack: List[StackToken],
    ) -> Tuple[Optional[str], Optional[str]]:
        """
        Determine if the current sequence of characters would start a html block element.
        """

        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index(
            line_to_parse,
            start_index,
            HtmlHelper.__html_block_start_character,
        ):
            (
                html_block_type,
                remaining_html_tag,
            ) = HtmlHelper.__determine_html_block_type(
                token_stack,
                line_to_parse,
                start_index,
            )
        else:
            html_block_type, remaining_html_tag = None, None
        return html_block_type, remaining_html_tag
Beispiel #2
0
    def parse_html_block(parser_state, position_marker, extracted_whitespace):
        """
        Determine if we have the criteria that we need to start an HTML block.
        """

        new_tokens = []
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index(
            position_marker.text_to_parse,
            position_marker.index_number,
            HtmlHelper.__html_block_start_character,
        ):
            (
                html_block_type,
                remaining_html_tag,
            ) = HtmlHelper.__determine_html_block_type(
                parser_state,
                position_marker.text_to_parse,
                position_marker.index_number,
            )
            if html_block_type:
                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, only_these_blocks=[ParagraphStackToken],
                )
                parser_state.token_stack.append(
                    HtmlBlockStackToken(html_block_type, remaining_html_tag)
                )
                new_tokens.append(
                    HtmlBlockMarkdownToken(position_marker, extracted_whitespace)
                )
        return new_tokens
Beispiel #3
0
    def parse_setext_headings(
        parser_state,
        position_marker,
        extracted_whitespace,
        this_bq_count,
        stack_bq_count,
    ):
        """
        Handle the parsing of an setext heading.
        """

        new_tokens = []
        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3)
                and ParserHelper.is_character_at_index_one_of(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__setext_characters,
                ) and parser_state.token_stack[-1].is_paragraph
                and (this_bq_count == stack_bq_count)):
            _, collected_to_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                position_marker.text_to_parse[position_marker.index_number],
            )
            (
                after_whitespace_index,
                extra_whitespace_after_setext,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                collected_to_index)
            if after_whitespace_index == len(position_marker.text_to_parse):

                # This is unusual.  Normally, close_open_blocks is used to close off
                # blocks based on the stack token.  However, since the setext takes
                # the last paragraph of text (see case 61) and translates it
                # into a heading, this has to be done separately, as there is no
                # stack token to close.
                new_tokens.append(
                    EndMarkdownToken(
                        MarkdownToken.token_setext_heading,
                        extracted_whitespace,
                        extra_whitespace_after_setext,
                        None,
                    ))
                token_index = len(parser_state.token_document) - 1
                while not parser_state.token_document[token_index].is_paragraph:
                    token_index -= 1

                replacement_token = SetextHeadingMarkdownToken(
                    position_marker.text_to_parse[
                        position_marker.index_number],
                    collected_to_index - position_marker.index_number,
                    parser_state.token_document[token_index].extra_data,
                    position_marker,
                    parser_state.token_document[token_index],
                )
                parser_state.token_document[token_index] = replacement_token
                del parser_state.token_stack[-1]
        return new_tokens
    def is_olist_start(
        parser_state,
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
        adj_ws=None,
    ):
        """
        Determine if we have the start of an numbered or ordered list.
        """
        is_start = False
        end_whitespace_index = -1
        index = None
        my_count = None
        if adj_ws is None:
            adj_ws = extracted_whitespace
        if (ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3)
                or skip_whitespace_check
            ) and ParserHelper.is_character_at_index_one_of(
                line_to_parse, start_index, string.digits):
            index = start_index
            while ParserHelper.is_character_at_index_one_of(
                    line_to_parse, index, string.digits):
                index += 1
            my_count = index - start_index
            olist_index_number = line_to_parse[start_index:index]
            LOGGER.debug("olist?%s<<count>>%s<<", olist_index_number,
                         str(my_count))
            LOGGER.debug("olist>>%s", str(line_to_parse[index]))
            LOGGER.debug("index+1>>%s>>len>>%s", str(index + 1),
                         str(len(line_to_parse)))

            end_whitespace_index, _ = ParserHelper.extract_whitespace(
                line_to_parse, index + 1)
            LOGGER.debug(
                "end_whitespace_index>>%s>>len>>%s>>%s",
                str(end_whitespace_index),
                str(len(line_to_parse)),
                olist_index_number,
            )

            if (my_count <= 9 and ParserHelper.is_character_at_index_one_of(
                    line_to_parse, index,
                    ListBlockProcessor.__olist_start_characters)
                    and not (parser_state.token_stack[-1].is_paragraph
                             and not parser_state.token_stack[-2].is_list and
                             ((end_whitespace_index == len(line_to_parse))
                              or olist_index_number != "1"))
                    and (ParserHelper.is_character_at_index_whitespace(
                        line_to_parse, index + 1) or
                         ((index + 1) == len(line_to_parse)))):
                is_start = True

        LOGGER.debug("is_olist_start>>result>>%s", str(is_start))
        return is_start, index, my_count, end_whitespace_index
Beispiel #5
0
    def parse_setext_headings(
        parser_state: ParserState,
        position_marker: PositionMarker,
        extracted_whitespace: Optional[str],
        block_quote_data: BlockQuoteData,
    ) -> List[MarkdownToken]:

        """
        Handle the parsing of an setext heading.
        """

        new_tokens: List[MarkdownToken] = []
        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            and ParserHelper.is_character_at_index_one_of(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__setext_characters,
            )
            and parser_state.token_stack[-1].is_paragraph
            and (block_quote_data.current_count == block_quote_data.stack_count)
        ):
            is_paragraph_continuation = (
                LeafBlockProcessor.__adjust_continuation_for_active_list(
                    parser_state, position_marker
                )
            )

            _, collected_to_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                position_marker.text_to_parse[position_marker.index_number],
            )
            assert collected_to_index is not None
            (
                after_whitespace_index,
                extra_whitespace_after_setext,
            ) = ParserHelper.extract_whitespace(
                position_marker.text_to_parse, collected_to_index
            )

            if not is_paragraph_continuation and after_whitespace_index == len(
                position_marker.text_to_parse
            ):
                LeafBlockProcessor.__create_setext_token(
                    parser_state,
                    position_marker,
                    collected_to_index,
                    new_tokens,
                    extracted_whitespace,
                    extra_whitespace_after_setext,
                )
        return new_tokens
Beispiel #6
0
    def is_thematic_break(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
        whitespace_allowed_between_characters: bool = True,
    ) -> Tuple[Optional[str], Optional[int]]:
        """
        Determine whether or not we have a thematic break.
        """

        assert extracted_whitespace is not None
        thematic_break_character, end_of_break_index = None, None
        is_thematic_character = ParserHelper.is_character_at_index_one_of(
            line_to_parse, start_index, LeafBlockProcessor.__thematic_break_characters
        )
        POGGER.debug("skip_whitespace_check>>$", skip_whitespace_check)
        POGGER.debug("is_thematic_character>>$", is_thematic_character)
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            or skip_whitespace_check
        ) and is_thematic_character:
            start_char, index, char_count, line_to_parse_size = (
                line_to_parse[start_index],
                start_index,
                0,
                len(line_to_parse),
            )

            while index < line_to_parse_size:
                if (
                    whitespace_allowed_between_characters
                    and ParserHelper.is_character_at_index_whitespace(
                        line_to_parse, index
                    )
                ):
                    index += 1
                elif line_to_parse[index] == start_char:
                    index += 1
                    char_count += 1
                else:
                    break  # pragma: no cover

            POGGER.debug("char_count>>$", char_count)
            POGGER.debug("index>>$", index)
            POGGER.debug("line_to_parse_size>>$", line_to_parse_size)
            if char_count >= 3 and index == line_to_parse_size:
                thematic_break_character, end_of_break_index = start_char, index

        return thematic_break_character, end_of_break_index
    def __close_indented_block_if_indent_not_there(parser_state, extracted_whitespace):

        pre_tokens = []
        if parser_state.token_stack[
            -1
        ].is_indented_code_block and ParserHelper.is_length_less_than_or_equal_to(
            extracted_whitespace, 3
        ):
            pre_tokens.append(parser_state.token_stack[-1].generate_close_token())
            del parser_state.token_stack[-1]
            pre_tokens.extend(
                ContainerBlockProcessor.extract_markdown_tokens_back_to_blank_line(
                    parser_state
                )
            )
        return pre_tokens
    def is_block_quote_start(line_to_parse,
                             start_index,
                             extracted_whitespace,
                             adj_ws=None):
        """
        Determine if we have the start of a block quote section.
        """

        if adj_ws is None:
            adj_ws = extracted_whitespace

        if ParserHelper.is_length_less_than_or_equal_to(
                adj_ws, 3) and ParserHelper.is_character_at_index(
                    line_to_parse, start_index,
                    BlockQuoteProcessor.__block_quote_character):
            return True
        return False
Beispiel #9
0
    def is_atx_heading(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
    ) -> Tuple[bool, Optional[int], Optional[int], Optional[str]]:
        """
        Determine whether or not an ATX Heading is about to start.
        """

        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            or skip_whitespace_check
        ) and ParserHelper.is_character_at_index(
            line_to_parse,
            start_index,
            LeafBlockProcessor.__atx_character,
        ):
            hash_count, new_index = ParserHelper.collect_while_character(
                line_to_parse,
                start_index,
                LeafBlockProcessor.__atx_character,
            )

            assert new_index is not None
            _, non_whitespace_index = ParserHelper.collect_while_character(
                line_to_parse, new_index, " "
            )
            extracted_whitespace_at_start = line_to_parse[
                new_index:non_whitespace_index
            ]

            assert hash_count is not None
            if hash_count <= 6 and (
                extracted_whitespace_at_start
                or non_whitespace_index == len(line_to_parse)
            ):
                return (
                    True,
                    non_whitespace_index,
                    hash_count,
                    extracted_whitespace_at_start,
                )
        return False, None, None, None
    def is_ulist_start(
        parser_state,
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
        adj_ws=None,
    ):
        """
        Determine if we have the start of an un-numbered list.
        """
        LOGGER.debug("is_ulist_start>>pre>>")
        is_start = False
        after_all_whitespace_index = -1
        if adj_ws is None:
            adj_ws = extracted_whitespace

        if ((ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3)
             or skip_whitespace_check)
                and ParserHelper.is_character_at_index_one_of(
                    line_to_parse, start_index,
                    ListBlockProcessor.__ulist_start_characters)
                and (ParserHelper.is_character_at_index_whitespace(
                    line_to_parse, start_index + 1) or
                     ((start_index + 1) == len(line_to_parse)))):

            LOGGER.debug("is_ulist_start>>mid>>")
            after_all_whitespace_index, _ = ParserHelper.extract_whitespace(
                line_to_parse, start_index + 1)
            LOGGER.debug(
                "after_all_whitespace_index>>%s>>len>>%s",
                str(after_all_whitespace_index),
                str(len(line_to_parse)),
            )

            is_break, _ = LeafBlockProcessor.is_thematic_break(
                line_to_parse, start_index, extracted_whitespace)
            if not is_break and not (
                    parser_state.token_stack[-1].is_paragraph
                    and not parser_state.token_stack[-2].is_list and
                (after_all_whitespace_index == len(line_to_parse))):
                is_start = True

        LOGGER.debug("is_ulist_start>>result>>%s", str(is_start))
        return is_start, after_all_whitespace_index
    def __is_link_reference_definition(
        parser_state: ParserState,
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
    ) -> bool:
        """
        Determine whether or not we have the start of a link reference definition.
        """

        if parser_state.token_stack[-1].is_paragraph:
            return False

        assert extracted_whitespace is not None
        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace,
                3)) and ParserHelper.is_character_at_index_one_of(
                    line_to_parse,
                    start_index,
                    LinkReferenceDefinitionHelper.__lrd_start_character,
                ):
            remaining_line, continue_with_lrd = line_to_parse[start_index +
                                                              1:], True
            if (remaining_line and remaining_line[-1]
                    == InlineHelper.backslash_character):
                remaining_line_size, start_index, found_index = (
                    len(remaining_line),
                    0,
                    remaining_line.find(InlineHelper.backslash_character,
                                        start_index),
                )
                POGGER.debug(">>$<<$", remaining_line, remaining_line_size)
                POGGER.debug(">>$<<$", remaining_line, start_index)
                POGGER.debug(">>$<<", found_index)
                while found_index != -1 and found_index < (
                        remaining_line_size - 1):
                    start_index = found_index + 2
                    POGGER.debug(">>$<<$", remaining_line, start_index)
                    found_index = remaining_line.find(
                        InlineHelper.backslash_character, start_index)
                    POGGER.debug(">>$<<", found_index)
                POGGER.debug(">>>>>>>$<<", found_index)
                continue_with_lrd = found_index != remaining_line_size - 1
            return continue_with_lrd
        return False
Beispiel #12
0
    def __is_link_reference_definition(position_marker, line_to_parse,
                                       start_index, extracted_whitespace):
        """
        Determine whether or not we have the start of a link reference definition.
        """

        if position_marker.token_stack[-1].is_paragraph:
            return False

        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace,
                3)) and ParserHelper.is_character_at_index_one_of(
                    line_to_parse,
                    start_index,
                    LinkReferenceDefinitionHelper.__lrd_start_character,
                ):
            return True
        return False
Beispiel #13
0
    def is_fenced_code_block(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
    ) -> Tuple[bool, Optional[int], Optional[str], Optional[int]]:
        """
        Determine if we have the start of a fenced code block.
        """

        assert extracted_whitespace is not None
        if (
            skip_whitespace_check
            or ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index_one_of(
            line_to_parse,
            start_index,
            LeafBlockProcessor.__fenced_code_block_start_characters,
        ):
            POGGER.debug("ifcb:collected_count>>$<<$<<", line_to_parse, start_index)
            collected_count, new_index = ParserHelper.collect_while_character(
                line_to_parse, start_index, line_to_parse[start_index]
            )
            POGGER.debug("ifcb:collected_count:$", collected_count)
            assert collected_count is not None
            assert new_index is not None
            (
                non_whitespace_index,
                extracted_whitespace_before_info_string,
            ) = ParserHelper.extract_whitespace(line_to_parse, new_index)

            if collected_count >= 3:
                POGGER.debug("ifcb:True")
                return (
                    True,
                    non_whitespace_index,
                    extracted_whitespace_before_info_string,
                    collected_count,
                )
        return False, None, None, None
Beispiel #14
0
    def is_thematic_break(
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
    ):
        """
        Determine whether or not we have a thematic break.
        """

        thematic_break_character = None
        end_of_break_index = None
        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3) or skip_whitespace_check
            ) and ParserHelper.is_character_at_index_one_of(
                line_to_parse, start_index,
                LeafBlockProcessor.__thematic_break_characters):
            start_char = line_to_parse[start_index]
            index = start_index

            char_count = 0
            while index < len(line_to_parse):
                if ParserHelper.is_character_at_index_whitespace(
                        line_to_parse, index):
                    index += 1
                elif line_to_parse[index] == start_char:
                    index += 1
                    char_count += 1
                else:
                    break

            if char_count >= 3 and index == len(line_to_parse):
                thematic_break_character = start_char
                end_of_break_index = index

        return thematic_break_character, end_of_break_index
Beispiel #15
0
    def is_fenced_code_block(
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
    ):
        """
        Determine if we have the start of a fenced code block.
        """

        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3) or skip_whitespace_check
            ) and ParserHelper.is_character_at_index_one_of(
                line_to_parse,
                start_index,
                LeafBlockProcessor.__fenced_code_block_start_characters,
            ):
            LOGGER.debug("ifcb:collected_count>>%s<<%s<<", line_to_parse,
                         str(start_index))
            collected_count, new_index = ParserHelper.collect_while_character(
                line_to_parse, start_index, line_to_parse[start_index])
            LOGGER.debug("ifcb:collected_count:%s", str(collected_count))
            (
                non_whitespace_index,
                extracted_whitespace_before_info_string,
            ) = ParserHelper.extract_whitespace(line_to_parse, new_index)

            if collected_count >= 3:
                LOGGER.debug("ifcb:True")
                return (
                    True,
                    non_whitespace_index,
                    extracted_whitespace_before_info_string,
                    collected_count,
                )
        return False, None, None, None
Beispiel #16
0
    def parse_atx_headings(parser_state, position_marker,
                           extracted_whitespace):
        """
        Handle the parsing of an atx heading.
        """

        new_tokens = []

        if ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace,
                3) and ParserHelper.is_character_at_index(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__atx_character,
                ):
            hash_count, new_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__atx_character,
            )
            (
                non_whitespace_index,
                extracted_whitespace_at_start,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                new_index)

            if hash_count <= 6 and (extracted_whitespace_at_start
                                    or non_whitespace_index == len(
                                        position_marker.text_to_parse)):

                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, new_tokens)
                remaining_line = position_marker.text_to_parse[
                    non_whitespace_index:]
                (
                    end_index,
                    extracted_whitespace_at_end,
                ) = ParserHelper.extract_whitespace_from_end(remaining_line)
                remove_trailing_count = 0
                while (end_index > 0 and remaining_line[end_index - 1]
                       == LeafBlockProcessor.__atx_character):
                    end_index -= 1
                    remove_trailing_count += 1
                extracted_whitespace_before_end = ""
                if remove_trailing_count:
                    if end_index > 0:
                        if ParserHelper.is_character_at_index_whitespace(
                                remaining_line, end_index - 1):
                            remaining_line = remaining_line[:end_index]
                            (
                                end_index,
                                extracted_whitespace_before_end,
                            ) = ParserHelper.extract_whitespace_from_end(
                                remaining_line)
                            remaining_line = remaining_line[:end_index]
                        else:
                            extracted_whitespace_at_end = ""
                            remove_trailing_count = 0
                    else:
                        remaining_line = ""
                else:
                    extracted_whitespace_at_end = remaining_line[end_index:]
                    remaining_line = remaining_line[0:end_index]
                start_token = AtxHeadingMarkdownToken(
                    hash_count,
                    remove_trailing_count,
                    extracted_whitespace,
                    position_marker,
                )
                new_tokens.append(start_token)
                new_tokens.append(
                    TextMarkdownToken(remaining_line,
                                      extracted_whitespace_at_start))
                end_token = EndMarkdownToken(
                    "atx",
                    extracted_whitespace_at_end,
                    extracted_whitespace_before_end,
                    None,
                )
                end_token.start_markdown_token = start_token
                new_tokens.append(end_token)
        return new_tokens