def is_olist_start(
        parser_state,
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
        adj_ws=None,
    ):
        """
        Determine if we have the start of an numbered or ordered list.
        """
        is_start = False
        end_whitespace_index = -1
        index = None
        my_count = None
        if adj_ws is None:
            adj_ws = extracted_whitespace
        if (ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3)
                or skip_whitespace_check
            ) and ParserHelper.is_character_at_index_one_of(
                line_to_parse, start_index, string.digits):
            index = start_index
            while ParserHelper.is_character_at_index_one_of(
                    line_to_parse, index, string.digits):
                index += 1
            my_count = index - start_index
            olist_index_number = line_to_parse[start_index:index]
            LOGGER.debug("olist?%s<<count>>%s<<", olist_index_number,
                         str(my_count))
            LOGGER.debug("olist>>%s", str(line_to_parse[index]))
            LOGGER.debug("index+1>>%s>>len>>%s", str(index + 1),
                         str(len(line_to_parse)))

            end_whitespace_index, _ = ParserHelper.extract_whitespace(
                line_to_parse, index + 1)
            LOGGER.debug(
                "end_whitespace_index>>%s>>len>>%s>>%s",
                str(end_whitespace_index),
                str(len(line_to_parse)),
                olist_index_number,
            )

            if (my_count <= 9 and ParserHelper.is_character_at_index_one_of(
                    line_to_parse, index,
                    ListBlockProcessor.__olist_start_characters)
                    and not (parser_state.token_stack[-1].is_paragraph
                             and not parser_state.token_stack[-2].is_list and
                             ((end_whitespace_index == len(line_to_parse))
                              or olist_index_number != "1"))
                    and (ParserHelper.is_character_at_index_whitespace(
                        line_to_parse, index + 1) or
                         ((index + 1) == len(line_to_parse)))):
                is_start = True

        LOGGER.debug("is_olist_start>>result>>%s", str(is_start))
        return is_start, index, my_count, end_whitespace_index
Example #2
0
    def is_thematic_break(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
        whitespace_allowed_between_characters: bool = True,
    ) -> Tuple[Optional[str], Optional[int]]:
        """
        Determine whether or not we have a thematic break.
        """

        assert extracted_whitespace is not None
        thematic_break_character, end_of_break_index = None, None
        is_thematic_character = ParserHelper.is_character_at_index_one_of(
            line_to_parse, start_index, LeafBlockProcessor.__thematic_break_characters
        )
        POGGER.debug("skip_whitespace_check>>$", skip_whitespace_check)
        POGGER.debug("is_thematic_character>>$", is_thematic_character)
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            or skip_whitespace_check
        ) and is_thematic_character:
            start_char, index, char_count, line_to_parse_size = (
                line_to_parse[start_index],
                start_index,
                0,
                len(line_to_parse),
            )

            while index < line_to_parse_size:
                if (
                    whitespace_allowed_between_characters
                    and ParserHelper.is_character_at_index_whitespace(
                        line_to_parse, index
                    )
                ):
                    index += 1
                elif line_to_parse[index] == start_char:
                    index += 1
                    char_count += 1
                else:
                    break  # pragma: no cover

            POGGER.debug("char_count>>$", char_count)
            POGGER.debug("index>>$", index)
            POGGER.debug("line_to_parse_size>>$", line_to_parse_size)
            if char_count >= 3 and index == line_to_parse_size:
                thematic_break_character, end_of_break_index = start_char, index

        return thematic_break_character, end_of_break_index
def test_is_character_at_index_whitespace_without_whitespace():
    """
    Make sure that a string with whitespace at the index is handled properly.
    """

    # Arrange
    input_string = "a"
    start_index = 0
    expected_output = False

    # Act
    actual_output = ParserHelper.is_character_at_index_whitespace(
        input_string, start_index)

    # Assert
    assert expected_output == actual_output
def test_is_character_at_index_whitespace_with_whitespace_at_end():
    """
    Make sure that a string with whitespace at the index is handled properly.
    """

    # Arrange
    input_string = "this is a test "
    start_index = len(input_string) - 1
    expected_output = True

    # Act
    actual_output = ParserHelper.is_character_at_index_whitespace(
        input_string, start_index)

    # Assert
    assert expected_output == actual_output
def test_is_character_at_index_whitespace_with_low_index():
    """
    Make sure that a string with a low index is handled properly.
    """

    # Arrange
    input_string = "this is a test"
    start_index = -1
    expected_output = False

    # Act
    actual_output = ParserHelper.is_character_at_index_whitespace(
        input_string, start_index)

    # Assert
    assert expected_output == actual_output
    def is_ulist_start(
        parser_state,
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
        adj_ws=None,
    ):
        """
        Determine if we have the start of an un-numbered list.
        """
        LOGGER.debug("is_ulist_start>>pre>>")
        is_start = False
        after_all_whitespace_index = -1
        if adj_ws is None:
            adj_ws = extracted_whitespace

        if ((ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3)
             or skip_whitespace_check)
                and ParserHelper.is_character_at_index_one_of(
                    line_to_parse, start_index,
                    ListBlockProcessor.__ulist_start_characters)
                and (ParserHelper.is_character_at_index_whitespace(
                    line_to_parse, start_index + 1) or
                     ((start_index + 1) == len(line_to_parse)))):

            LOGGER.debug("is_ulist_start>>mid>>")
            after_all_whitespace_index, _ = ParserHelper.extract_whitespace(
                line_to_parse, start_index + 1)
            LOGGER.debug(
                "after_all_whitespace_index>>%s>>len>>%s",
                str(after_all_whitespace_index),
                str(len(line_to_parse)),
            )

            is_break, _ = LeafBlockProcessor.is_thematic_break(
                line_to_parse, start_index, extracted_whitespace)
            if not is_break and not (
                    parser_state.token_stack[-1].is_paragraph
                    and not parser_state.token_stack[-2].is_list and
                (after_all_whitespace_index == len(line_to_parse))):
                is_start = True

        LOGGER.debug("is_ulist_start>>result>>%s", str(is_start))
        return is_start, after_all_whitespace_index
    def __ensure_stack_at_level(
        parser_state,
        this_bq_count,
        stack_bq_count,
        extracted_whitespace,
        position_marker,
        original_start_index,
    ):
        """
        Ensure that the block quote stack is at the proper level on the stack.
        """

        container_level_tokens = []
        if this_bq_count > stack_bq_count:
            container_level_tokens, _, _ = parser_state.close_open_blocks_fn(
                parser_state,
                only_these_blocks=[
                    ParagraphStackToken, IndentedCodeBlockStackToken
                ],
            )
            while this_bq_count > stack_bq_count:
                parser_state.token_stack.append(BlockQuoteStackToken())
                stack_bq_count += 1

                adjusted_position_marker = PositionMarker(
                    position_marker.line_number,
                    original_start_index,
                    position_marker.text_to_parse,
                )
                container_level_tokens.append(
                    BlockQuoteMarkdownToken(extracted_whitespace,
                                            adjusted_position_marker))

                assert (position_marker.text_to_parse[original_start_index] ==
                        BlockQuoteProcessor.__block_quote_character)
                original_start_index += 1
                if ParserHelper.is_character_at_index_whitespace(
                        position_marker.text_to_parse, original_start_index):
                    original_start_index += 1

        return container_level_tokens, stack_bq_count
Example #8
0
    def is_thematic_break(
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
    ):
        """
        Determine whether or not we have a thematic break.
        """

        thematic_break_character = None
        end_of_break_index = None
        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3) or skip_whitespace_check
            ) and ParserHelper.is_character_at_index_one_of(
                line_to_parse, start_index,
                LeafBlockProcessor.__thematic_break_characters):
            start_char = line_to_parse[start_index]
            index = start_index

            char_count = 0
            while index < len(line_to_parse):
                if ParserHelper.is_character_at_index_whitespace(
                        line_to_parse, index):
                    index += 1
                elif line_to_parse[index] == start_char:
                    index += 1
                    char_count += 1
                else:
                    break

            if char_count >= 3 and index == len(line_to_parse):
                thematic_break_character = start_char
                end_of_break_index = index

        return thematic_break_character, end_of_break_index
Example #9
0
    def parse_atx_headings(parser_state, position_marker,
                           extracted_whitespace):
        """
        Handle the parsing of an atx heading.
        """

        new_tokens = []

        if ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace,
                3) and ParserHelper.is_character_at_index(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__atx_character,
                ):
            hash_count, new_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__atx_character,
            )
            (
                non_whitespace_index,
                extracted_whitespace_at_start,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                new_index)

            if hash_count <= 6 and (extracted_whitespace_at_start
                                    or non_whitespace_index == len(
                                        position_marker.text_to_parse)):

                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, new_tokens)
                remaining_line = position_marker.text_to_parse[
                    non_whitespace_index:]
                (
                    end_index,
                    extracted_whitespace_at_end,
                ) = ParserHelper.extract_whitespace_from_end(remaining_line)
                remove_trailing_count = 0
                while (end_index > 0 and remaining_line[end_index - 1]
                       == LeafBlockProcessor.__atx_character):
                    end_index -= 1
                    remove_trailing_count += 1
                extracted_whitespace_before_end = ""
                if remove_trailing_count:
                    if end_index > 0:
                        if ParserHelper.is_character_at_index_whitespace(
                                remaining_line, end_index - 1):
                            remaining_line = remaining_line[:end_index]
                            (
                                end_index,
                                extracted_whitespace_before_end,
                            ) = ParserHelper.extract_whitespace_from_end(
                                remaining_line)
                            remaining_line = remaining_line[:end_index]
                        else:
                            extracted_whitespace_at_end = ""
                            remove_trailing_count = 0
                    else:
                        remaining_line = ""
                else:
                    extracted_whitespace_at_end = remaining_line[end_index:]
                    remaining_line = remaining_line[0:end_index]
                start_token = AtxHeadingMarkdownToken(
                    hash_count,
                    remove_trailing_count,
                    extracted_whitespace,
                    position_marker,
                )
                new_tokens.append(start_token)
                new_tokens.append(
                    TextMarkdownToken(remaining_line,
                                      extracted_whitespace_at_start))
                end_token = EndMarkdownToken(
                    "atx",
                    extracted_whitespace_at_end,
                    extracted_whitespace_before_end,
                    None,
                )
                end_token.start_markdown_token = start_token
                new_tokens.append(end_token)
        return new_tokens
Example #10
0
    def __handle_link_reference_definition(
        self,
        context: PluginScanContext,
        token: MarkdownToken,
        num_container_tokens: int,
    ) -> None:
        scoped_block_quote_token = cast(BlockQuoteMarkdownToken,
                                        self.__container_tokens[-1])
        assert scoped_block_quote_token.leading_spaces is not None
        lrd_token = cast(LinkReferenceDefinitionMarkdownToken, token)
        if lrd_token.extracted_whitespace:
            column_number_delta = -(lrd_token.column_number -
                                    len(lrd_token.extracted_whitespace))
            # if self.__debug_on:
            #     print("lrd-1-error")
            self.report_next_token_error(
                context, token, column_number_delta=column_number_delta)

        assert lrd_token.link_destination_whitespace is not None
        found_index = lrd_token.link_destination_whitespace.find(
            ParserHelper.newline_character)
        if found_index != -1 and ParserHelper.is_character_at_index_whitespace(
                lrd_token.link_destination_whitespace, found_index + 1):
            self.__report_lrd_error(
                lrd_token,
                num_container_tokens,
                context,
                token,
                scoped_block_quote_token,
            )

        assert lrd_token.link_title_whitespace is not None
        found_index = lrd_token.link_title_whitespace.find(
            ParserHelper.newline_character)
        if found_index != -1 and ParserHelper.is_character_at_index_whitespace(
                lrd_token.link_title_whitespace, found_index + 1):
            assert lrd_token.link_name_debug is not None
            line_number_delta = (lrd_token.link_name_debug.count(
                ParserHelper.newline_character) +
                                 lrd_token.link_title_whitespace.count(
                                     ParserHelper.newline_character) + 1)

            split_array_index = (self.__bq_line_index[num_container_tokens] +
                                 line_number_delta)
            split_leading_spaces = scoped_block_quote_token.leading_spaces.split(
                ParserHelper.newline_character)
            specific_block_quote_prefix = split_leading_spaces[
                split_array_index]

            column_number_delta = -(len(specific_block_quote_prefix) + 1)
            # if self.__debug_on:
            #     print("line_number_delta>>" + str(line_number_delta))
            #     print("split_array_index>>" + str(split_array_index))
            #     print(f"end-container>>{ParserHelper.make_value_visible(self.__container_tokens[-1])}")
            #     print(f"split_leading_spaces>>{ParserHelper.make_value_visible(split_leading_spaces)}")
            #     print("specific_block_quote_prefix>>:" + \
            #       f"{ParserHelper.make_value_visible(specific_block_quote_prefix)}:")
            #     print("lrd-3-error")
            self.report_next_token_error(
                context,
                token,
                line_number_delta=line_number_delta,
                column_number_delta=column_number_delta,
            )

        assert lrd_token.link_name_debug is not None
        assert lrd_token.link_title_raw is not None
        self.__bq_line_index[num_container_tokens] += (
            1 +
            lrd_token.link_name_debug.count(ParserHelper.newline_character) +
            lrd_token.link_destination_whitespace.count(
                ParserHelper.newline_character) +
            lrd_token.link_title_whitespace.count(
                ParserHelper.newline_character) +
            lrd_token.link_title_raw.count(ParserHelper.newline_character))
    def __count_block_quote_starts(
        line_to_parse,
        start_index,
        stack_bq_count,
        is_top_of_stack_fenced_code_block,
    ):
        """
        Having detected a block quote character (">") on a line, continue to consume
        and count while the block quote pattern is there.
        """

        this_bq_count = 0
        last_block_quote_index = -1
        adjusted_line = line_to_parse
        if stack_bq_count == 0 and is_top_of_stack_fenced_code_block:
            start_index -= 1
        else:
            this_bq_count += 1
            start_index += 1
            last_block_quote_index = start_index

            LOGGER.debug(
                "stack_bq_count--%s--is_top_of_stack_fenced_code_block--%s",
                str(stack_bq_count),
                str(is_top_of_stack_fenced_code_block),
            )

            while True:
                if ParserHelper.is_character_at_index_whitespace(
                        adjusted_line, start_index):
                    if adjusted_line[start_index] == "\t":
                        adjusted_tab_length = ParserHelper.calculate_length(
                            "\t", start_index=start_index)
                        LOGGER.debug("adj--%s--",
                                     adjusted_line.replace("\t", "\\t"))
                        adjusted_line = (adjusted_line[0:start_index] +
                                         "".rjust(adjusted_tab_length) +
                                         adjusted_line[start_index + 1:])
                        LOGGER.debug("--%s--",
                                     adjusted_line.replace("\t", "\\t"))
                    start_index += 1

                if is_top_of_stack_fenced_code_block and (this_bq_count >=
                                                          stack_bq_count):
                    break

                if start_index == len(
                        adjusted_line
                ) or ParserHelper.is_character_at_index_not(
                        adjusted_line,
                        start_index,
                        BlockQuoteProcessor.__block_quote_character,
                ):
                    break
                this_bq_count += 1
                start_index += 1
                last_block_quote_index = start_index

            LOGGER.debug(
                "__count_block_quote_starts--%s--%s--",
                str(start_index),
                adjusted_line.replace("\t", "\\t"),
            )
        return this_bq_count, start_index, adjusted_line, last_block_quote_index