Example #1
0
    def __parse_angle_link_destination(source_text, new_index):
        """
        Parse a link destination that is included in angle brackets.
        """

        collected_destination = ""
        new_index += 1
        keep_collecting = True
        while keep_collecting:
            keep_collecting = False
            new_index, ert_new = ParserHelper.collect_until_one_of_characters(
                source_text, new_index,
                LinkHelper.__angle_link_destination_breaks)
            collected_destination = collected_destination + ert_new
            if ParserHelper.is_character_at_index(
                    source_text, new_index, InlineHelper.backslash_character):
                old_new_index = new_index
                inline_request = InlineRequest(source_text, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (collected_destination +
                                         source_text[old_new_index:new_index])
                keep_collecting = True

        if ParserHelper.is_character_at_index(source_text, new_index,
                                              LinkHelper.__angle_link_end):
            new_index += 1
        else:
            new_index = -1
            collected_destination = ""
        return new_index, collected_destination
Example #2
0
    def __process_inline_link_body(source_text, new_index):
        """
        Given that an inline link has been identified, process it's body.
        """

        LOGGER.debug("process_inline_link_body>>%s<<", source_text[new_index:])
        inline_link = ""
        pre_inline_link = ""
        inline_title = ""
        pre_inline_title = ""
        new_index, _ = ParserHelper.extract_any_whitespace(
            source_text, new_index)
        LOGGER.debug("new_index>>%s>>source_text[]>>%s>", str(new_index),
                     source_text[new_index:])
        if not ParserHelper.is_character_at_index(
                source_text, new_index, LinkHelper.__link_format_inline_end):
            (
                inline_link,
                pre_inline_link,
                new_index,
                _,
            ) = LinkHelper.__parse_link_destination(source_text, new_index)
            if new_index != -1:
                LOGGER.debug("before ws>>%s<", source_text[new_index:])
                new_index, _ = ParserHelper.extract_any_whitespace(
                    source_text, new_index)
                LOGGER.debug("after ws>>%s>", source_text[new_index:])
                if ParserHelper.is_character_at_index_not(
                        source_text, new_index,
                        LinkHelper.__link_format_inline_end):
                    (
                        inline_title,
                        pre_inline_title,
                        new_index,
                    ) = LinkHelper.__parse_link_title(source_text, new_index)
                if new_index != -1:
                    new_index, _ = ParserHelper.extract_any_whitespace(
                        source_text, new_index)
        LOGGER.debug(
            "inline_link>>%s>>inline_title>>%s>new_index>%s>",
            str(inline_link),
            str(inline_title),
            str(new_index),
        )
        if new_index != -1:
            if ParserHelper.is_character_at_index(
                    source_text, new_index,
                    LinkHelper.__link_format_inline_end):
                new_index += 1
            else:
                new_index = -1
        LOGGER.debug(
            "process_inline_link_body>>inline_link>>%s>>inline_title>>%s>new_index>%s>",
            str(inline_link),
            str(inline_title),
            str(new_index),
        )
        return inline_link, pre_inline_link, inline_title, pre_inline_title, new_index
Example #3
0
    def extract_bounded_string(source_text, new_index, close_character,
                               start_character):
        """
        Extract a string that is bounded by some manner of characters.
        """
        break_characters = InlineHelper.backslash_character + close_character
        if start_character:
            break_characters = break_characters + start_character
        nesting_level = 0
        LOGGER.debug(
            "extract_bounded_string>>new_index>>%s>>data>>%s>>",
            str(new_index),
            source_text[new_index:],
        )
        next_index, data = ParserHelper.collect_until_one_of_characters(
            source_text, new_index, break_characters)
        LOGGER.debug(">>next_index1>>%s>>data>>%s>>", str(next_index), data)
        while next_index < len(source_text) and not (source_text[next_index]
                                                     == close_character
                                                     and nesting_level == 0):
            if ParserHelper.is_character_at_index(
                    source_text, next_index, InlineHelper.backslash_character):
                LOGGER.debug("pre-back>>next_index>>%s>>", str(next_index))
                old_index = next_index

                inline_request = InlineRequest(source_text, next_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                next_index = inline_response.new_index
                data = data + source_text[old_index:next_index]
            elif start_character is not None and ParserHelper.is_character_at_index(
                    source_text, next_index, start_character):
                LOGGER.debug("pre-start>>next_index>>%s>>", str(next_index))
                data = data + start_character
                next_index += 1
                nesting_level += 1
            else:
                assert ParserHelper.is_character_at_index(
                    source_text, next_index, close_character)
                LOGGER.debug("pre-close>>next_index>>%s>>", str(next_index))
                data = data + close_character
                next_index += 1
                nesting_level -= 1
            next_index, new_data = ParserHelper.collect_until_one_of_characters(
                source_text, next_index, break_characters)
            LOGGER.debug("back>>next_index>>%s>>data>>%s>>", str(next_index),
                         data)
            data = data + new_data
        LOGGER.debug(">>next_index2>>%s>>data>>%s>>", str(next_index), data)
        if (ParserHelper.is_character_at_index(source_text, next_index,
                                               close_character)
                and nesting_level == 0):
            LOGGER.debug("extract_bounded_string>>found-close")
            return next_index + 1, data
        LOGGER.debug(
            "extract_bounded_string>>ran out of string>>next_index>>%s",
            str(next_index))
        return next_index, None
Example #4
0
    def __parse_non_angle_link_destination(source_text, new_index):
        """
        Parse a link destination that is not included in angle brackets.
        """

        collected_destination = ""
        nesting_level = 0
        keep_collecting = True
        while keep_collecting:
            LOGGER.debug(
                "collected_destination>>%s<<source_text<<%s>>nesting_level>>%s>>",
                str(collected_destination),
                source_text[new_index:],
                str(nesting_level),
            )
            keep_collecting = False
            new_index, before_part = ParserHelper.collect_until_one_of_characters(
                source_text, new_index, LinkHelper.__non_angle_link_breaks)
            collected_destination = collected_destination + before_part
            LOGGER.debug(">>>>>>%s<<<<<", source_text[new_index:])
            if ParserHelper.is_character_at_index(
                    source_text, new_index, InlineHelper.backslash_character):
                LOGGER.debug("backslash")
                old_new_index = new_index
                inline_request = InlineRequest(source_text, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (collected_destination +
                                         source_text[old_new_index:new_index])
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    source_text, new_index, LinkHelper.__non_angle_link_nest):
                LOGGER.debug("+1")
                nesting_level += 1
                collected_destination += LinkHelper.__non_angle_link_nest
                new_index += 1
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    source_text, new_index,
                    LinkHelper.__non_angle_link_unnest):
                LOGGER.debug("-1")
                if nesting_level != 0:
                    collected_destination += LinkHelper.__non_angle_link_unnest
                    new_index += 1
                    nesting_level -= 1
                    keep_collecting = True
        ex_link = collected_destination
        LOGGER.debug("collected_destination>>%s", str(collected_destination))
        if nesting_level != 0:
            return -1, None
        return new_index, ex_link
Example #5
0
    def extract_link_label(line_to_parse,
                           new_index,
                           include_reference_colon=True):
        """
        Extract the link reference definition's link label.
        """
        collected_destination = ""
        keep_collecting = True
        while keep_collecting:
            keep_collecting = False
            new_index, ert_new = ParserHelper.collect_until_one_of_characters(
                line_to_parse, new_index, LinkHelper.__link_label_breaks)
            collected_destination = collected_destination + ert_new
            if ParserHelper.is_character_at_index(
                    line_to_parse, new_index,
                    InlineHelper.backslash_character):
                old_new_index = new_index
                inline_request = InlineRequest(line_to_parse, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (
                    collected_destination +
                    line_to_parse[old_new_index:new_index])
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    line_to_parse, new_index, LinkHelper.link_label_start):
                LOGGER.debug(">> unescaped [, bailing")
                return False, -1, None

        LOGGER.debug("look for ]>>%s<<", line_to_parse[new_index:])
        if not ParserHelper.is_character_at_index(line_to_parse, new_index,
                                                  LinkHelper.link_label_end):
            LOGGER.debug(">> no end ], bailing")
            return False, new_index, None
        new_index += 1

        if include_reference_colon:
            LOGGER.debug("look for :>>%s<<", line_to_parse[new_index:])
            if not ParserHelper.is_character_at_index(
                    line_to_parse,
                    new_index,
                    LinkHelper.__link_label_is_definition_character,
            ):
                LOGGER.debug(">> no :, bailing")
                return False, -1, None
            new_index += 1

        return True, new_index, collected_destination
Example #6
0
    def is_html_block(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        token_stack: List[StackToken],
    ) -> Tuple[Optional[str], Optional[str]]:
        """
        Determine if the current sequence of characters would start a html block element.
        """

        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index(
            line_to_parse,
            start_index,
            HtmlHelper.__html_block_start_character,
        ):
            (
                html_block_type,
                remaining_html_tag,
            ) = HtmlHelper.__determine_html_block_type(
                token_stack,
                line_to_parse,
                start_index,
            )
        else:
            html_block_type, remaining_html_tag = None, None
        return html_block_type, remaining_html_tag
Example #7
0
    def parse_html_block(parser_state, position_marker, extracted_whitespace):
        """
        Determine if we have the criteria that we need to start an HTML block.
        """

        new_tokens = []
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index(
            position_marker.text_to_parse,
            position_marker.index_number,
            HtmlHelper.__html_block_start_character,
        ):
            (
                html_block_type,
                remaining_html_tag,
            ) = HtmlHelper.__determine_html_block_type(
                parser_state,
                position_marker.text_to_parse,
                position_marker.index_number,
            )
            if html_block_type:
                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, only_these_blocks=[ParagraphStackToken],
                )
                parser_state.token_stack.append(
                    HtmlBlockStackToken(html_block_type, remaining_html_tag)
                )
                new_tokens.append(
                    HtmlBlockMarkdownToken(position_marker, extracted_whitespace)
                )
        return new_tokens
Example #8
0
    def __parse_link_title(source_text, new_index):
        """
        Parse an inline link's link title.
        """

        LOGGER.debug("parse_link_title>>new_index>>%s>>",
                     source_text[new_index:])
        ex_title = ""
        pre_ex_title = ""
        if ParserHelper.is_character_at_index(source_text, new_index,
                                              LinkHelper.__link_title_single):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text, new_index + 1, LinkHelper.__link_title_single,
                None)
        elif ParserHelper.is_character_at_index(
                source_text, new_index, LinkHelper.__link_title_double):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text, new_index + 1, LinkHelper.__link_title_double,
                None)
        elif ParserHelper.is_character_at_index(
                source_text, new_index,
                LinkHelper.__link_title_parenthesis_open):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text,
                new_index + 1,
                LinkHelper.__link_title_parenthesis_close,
                LinkHelper.__link_title_parenthesis_open,
            )
        else:
            new_index = -1
        LOGGER.debug(
            "parse_link_title>>new_index>>%s>>ex_link>>%s>>",
            str(new_index),
            str(ex_title),
        )
        pre_ex_title = ex_title
        if ex_title is not None:
            ex_title = InlineHelper.append_text(
                "",
                InlineHelper.handle_backslashes(ex_title,
                                                add_text_signature=False),
                add_text_signature=False,
            )
        LOGGER.debug("parse_link_title>>pre>>%s>>", str(pre_ex_title))
        LOGGER.debug("parse_link_title>>after>>%s>>", str(ex_title))

        return ex_title, pre_ex_title, new_index
Example #9
0
    def __handle_next_extract_bounded_string_item(
        source_text: str,
        next_index: int,
        extracted_parts: List[str],
        start_character: Optional[str],
        nesting_level: int,
        close_character: str,
        break_characters: str,
    ) -> Tuple[int, int]:

        if ParserHelper.is_character_at_index(
                source_text, next_index, InlineHelper.backslash_character):
            POGGER.debug("pre-back>>next_index>>$>>", next_index)
            old_index = next_index

            inline_request = InlineRequest(source_text, next_index)
            inline_response = InlineHelper.handle_inline_backslash(
                inline_request)
            assert inline_response.new_index is not None
            next_index = inline_response.new_index
            extracted_parts.append(source_text[old_index:next_index])
        elif start_character is not None and ParserHelper.is_character_at_index(
                source_text, next_index, start_character):
            POGGER.debug("pre-start>>next_index>>$>>", next_index)
            extracted_parts.append(start_character)
            next_index += 1
            nesting_level += 1
        else:
            assert ParserHelper.is_character_at_index(source_text, next_index,
                                                      close_character)
            POGGER.debug("pre-close>>next_index>>$>>", next_index)
            extracted_parts.append(close_character)
            next_index += 1
            nesting_level -= 1
        nexter_index, new_data = ParserHelper.collect_until_one_of_characters(
            source_text, next_index, break_characters)
        assert new_data is not None
        assert nexter_index is not None
        extracted_parts.append(new_data)
        return nexter_index, nesting_level
Example #10
0
    def __parse_raw_open_tag(text_to_parse):
        """
        Parse the current line as if it is an open tag, and determine if it is valid.
        """

        end_parse_index = -1
        valid_raw_html = None
        tag_name = HtmlHelper.__parse_raw_tag_name(text_to_parse, 0)
        if tag_name:
            parse_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
                text_to_parse, len(tag_name)
            )
            if extracted_whitespace:
                while (
                    extracted_whitespace
                    and ParserHelper.is_character_at_index_one_of(
                        text_to_parse,
                        parse_index,
                        HtmlHelper.__tag_attribute_name_start,
                    )
                ):
                    (
                        parse_index,
                        extracted_whitespace,
                    ) = HtmlHelper.__parse_tag_attributes(text_to_parse, parse_index)
                    if parse_index is None:
                        return parse_index, extracted_whitespace

            if ParserHelper.is_character_at_index(
                text_to_parse, parse_index, HtmlHelper.__html_tag_start
            ):
                parse_index += 1

            if ParserHelper.is_character_at_index(
                text_to_parse, parse_index, HtmlHelper.__html_tag_end
            ):
                valid_raw_html = text_to_parse[0:parse_index]
                end_parse_index = parse_index + 1

        return valid_raw_html, end_parse_index
Example #11
0
    def __parse_raw_open_tag(text_to_parse: str) -> Tuple[Optional[str], int]:
        """
        Parse the current line as if it is an open tag, and determine if it is valid.
        """

        end_parse_index, valid_raw_html, tag_name = (
            -1,
            None,
            HtmlHelper.__parse_raw_tag_name(text_to_parse, 0),
        )
        if tag_name:
            parse_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
                text_to_parse, len(tag_name)
            )
            assert parse_index is not None
            while extracted_whitespace and ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                parse_index,
                HtmlHelper.__tag_attribute_name_start,
            ):
                (
                    parse_index,
                    extracted_whitespace,
                ) = HtmlHelper.__parse_tag_attributes(text_to_parse, parse_index)
                if parse_index is None:
                    return None, -1

            if ParserHelper.is_character_at_index(
                text_to_parse, parse_index, HtmlHelper.__html_tag_start
            ):
                parse_index += 1

            if ParserHelper.is_character_at_index(
                text_to_parse, parse_index, HtmlHelper.__html_tag_end
            ):
                valid_raw_html = text_to_parse[:parse_index]
                end_parse_index = parse_index + 1

        return valid_raw_html, end_parse_index
Example #12
0
    def __check_for_special_html_blocks(
        line_to_parse: str, character_index: int
    ) -> Optional[str]:
        """
        Check for the easy to spot special blocks: 2-5.
        """

        if character_index >= len(line_to_parse):
            return None
        html_block_type = None
        if ParserHelper.is_character_at_index(
            line_to_parse, character_index, HtmlHelper.__html_block_2_to_5_start
        ):
            if ParserHelper.are_characters_at_index(
                line_to_parse,
                character_index + 1,
                HtmlHelper.__html_block_2_continued_start,
            ):
                html_block_type = HtmlHelper.html_block_2
            elif ParserHelper.is_character_at_index_one_of(
                line_to_parse,
                character_index + 1,
                HtmlHelper.__html_block_4_continued_start,
            ):
                html_block_type = HtmlHelper.html_block_4
            elif ParserHelper.are_characters_at_index(
                line_to_parse,
                character_index + 1,
                HtmlHelper.__html_block_5_continued_start,
            ):
                html_block_type = HtmlHelper.html_block_5
        elif ParserHelper.is_character_at_index(
            line_to_parse,
            character_index,
            HtmlHelper.__html_block_3_continued_start,
        ):
            html_block_type = HtmlHelper.html_block_3

        return html_block_type
    def is_block_quote_start(line_to_parse,
                             start_index,
                             extracted_whitespace,
                             adj_ws=None):
        """
        Determine if we have the start of a block quote section.
        """

        if adj_ws is None:
            adj_ws = extracted_whitespace

        if ParserHelper.is_length_less_than_or_equal_to(
                adj_ws, 3) and ParserHelper.is_character_at_index(
                    line_to_parse, start_index,
                    BlockQuoteProcessor.__block_quote_character):
            return True
        return False
Example #14
0
    def is_atx_heading(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
    ) -> Tuple[bool, Optional[int], Optional[int], Optional[str]]:
        """
        Determine whether or not an ATX Heading is about to start.
        """

        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            or skip_whitespace_check
        ) and ParserHelper.is_character_at_index(
            line_to_parse,
            start_index,
            LeafBlockProcessor.__atx_character,
        ):
            hash_count, new_index = ParserHelper.collect_while_character(
                line_to_parse,
                start_index,
                LeafBlockProcessor.__atx_character,
            )

            assert new_index is not None
            _, non_whitespace_index = ParserHelper.collect_while_character(
                line_to_parse, new_index, " "
            )
            extracted_whitespace_at_start = line_to_parse[
                new_index:non_whitespace_index
            ]

            assert hash_count is not None
            if hash_count <= 6 and (
                extracted_whitespace_at_start
                or non_whitespace_index == len(line_to_parse)
            ):
                return (
                    True,
                    non_whitespace_index,
                    hash_count,
                    extracted_whitespace_at_start,
                )
        return False, None, None, None
def test_is_character_at_index_with_whitespace():
    """
    Make sure that a string with one of the characters present at the index is handled properly.
    """

    # Arrange
    input_string = "a"
    start_index = 0
    valid_character = "a"
    expected_output = True

    # Act
    actual_output = ParserHelper.is_character_at_index(input_string,
                                                       start_index,
                                                       valid_character)

    # Assert
    assert expected_output == actual_output
Example #16
0
 def __parse_raw_close_tag(text_to_parse):
     """
     Parse the current line as if it is a close tag, and determine if it is valid.
     """
     valid_raw_html = None
     if ParserHelper.is_character_at_index(
         text_to_parse, 0, HtmlHelper.__html_tag_start
     ):
         tag_name = HtmlHelper.__parse_raw_tag_name(text_to_parse, 1)
         if tag_name:
             parse_index = len(tag_name)
             if parse_index != len(text_to_parse):
                 parse_index, _ = ParserHelper.extract_whitespace(
                     text_to_parse, parse_index
                 )
             if parse_index == len(text_to_parse):
                 valid_raw_html = text_to_parse
     return valid_raw_html
def test_is_character_at_index_without_whitespace():
    """
    Make sure that a string without any characters at the index is handled properly.
    """

    # Arrange
    input_string = "this is a test"
    start_index = 0
    valid_character = "b"
    expected_output = False

    # Act
    actual_output = ParserHelper.is_character_at_index(input_string,
                                                       start_index,
                                                       valid_character)

    # Assert
    assert expected_output == actual_output
def test_is_character_at_index_with_character_at_end():
    """
    Make sure that a string with one of the characters at the index is handled properly.
    """

    # Arrange
    input_string = "this is a test!"
    start_index = len(input_string) - 1
    valid_character = "!"
    expected_output = True

    # Act
    actual_output = ParserHelper.is_character_at_index(input_string,
                                                       start_index,
                                                       valid_character)

    # Assert
    assert expected_output == actual_output
Example #19
0
 def __parse_raw_close_tag(text_to_parse: str) -> Optional[str]:
     """
     Parse the current line as if it is a close tag, and determine if it is valid.
     """
     valid_raw_html = None
     if ParserHelper.is_character_at_index(
         text_to_parse, 0, HtmlHelper.__html_tag_start
     ):
         if tag_name := HtmlHelper.__parse_raw_tag_name(text_to_parse, 1):
             parse_index: Optional[int] = len(tag_name)
             assert parse_index is not None
             text_to_parse_size = len(text_to_parse)
             if parse_index != text_to_parse_size:
                 parse_index, _ = ParserHelper.extract_whitespace(
                     text_to_parse, parse_index
                 )
             if parse_index == text_to_parse_size:
                 valid_raw_html = text_to_parse
def test_is_character_at_index_with_high_index():
    """
    Make sure that a string with a high index is handled properly.
    """

    # Arrange
    input_string = "this is a test"
    start_index = len(input_string)
    valid_character = "a"
    expected_output = False

    # Act
    actual_output = ParserHelper.is_character_at_index(input_string,
                                                       start_index,
                                                       valid_character)

    # Assert
    assert expected_output == actual_output
def test_is_character_at_index_with_empty_string():
    """
    Make sure that an empty string is handled properly.
    """

    # Arrange
    input_string = ""
    start_index = 0
    valid_character = "a"
    expected_output = False

    # Act
    actual_output = ParserHelper.is_character_at_index(input_string,
                                                       start_index,
                                                       valid_character)

    # Assert
    assert expected_output == actual_output
Example #22
0
 def extract_bounded_string(
     source_text: str,
     new_index: int,
     close_character: str,
     start_character: Optional[str],
 ) -> Tuple[Optional[int], Optional[str]]:
     """
     Extract a string that is bounded by some manner of characters.
     """
     break_characters = (
         f"{InlineHelper.backslash_character}{close_character}{start_character}"
         if start_character else
         f"{InlineHelper.backslash_character}{close_character}")
     nesting_level: int = 0
     POGGER.debug(
         "extract_bounded_string>>new_index>>$>>data>>$>>",
         new_index,
         source_text[new_index:],
     )
     next_index, data = ParserHelper.collect_until_one_of_characters(
         source_text, new_index, break_characters)
     assert data is not None
     extracted_parts: List[str] = [data]
     POGGER.debug(
         ">>next_index1>>$>>data>>$>>",
         next_index,
         data,
     )
     assert next_index is not None
     while next_index < len(source_text) and not (source_text[next_index]
                                                  == close_character
                                                  and nesting_level == 0):
         (
             next_index,
             nesting_level,
         ) = InlineHelper.__handle_next_extract_bounded_string_item(
             source_text,
             next_index,
             extracted_parts,
             start_character,
             nesting_level,
             close_character,
             break_characters,
         )
         assert next_index is not None
         POGGER.debug(
             "back>>next_index>>$>>data>>$>>",
             next_index,
             data,
         )
     POGGER.debug(
         ">>next_index2>>$>>data>>$>>",
         next_index,
         data,
     )
     assert next_index is not None
     if (ParserHelper.is_character_at_index(source_text, next_index,
                                            close_character)
             and nesting_level == 0):
         POGGER.debug("extract_bounded_string>>found-close")
         return next_index + 1, "".join(extracted_parts)
     POGGER.debug(
         "extract_bounded_string>>ran out of string>>next_index>>$",
         next_index)
     return next_index, None
Example #23
0
    def parse_atx_headings(parser_state, position_marker,
                           extracted_whitespace):
        """
        Handle the parsing of an atx heading.
        """

        new_tokens = []

        if ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace,
                3) and ParserHelper.is_character_at_index(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__atx_character,
                ):
            hash_count, new_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__atx_character,
            )
            (
                non_whitespace_index,
                extracted_whitespace_at_start,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                new_index)

            if hash_count <= 6 and (extracted_whitespace_at_start
                                    or non_whitespace_index == len(
                                        position_marker.text_to_parse)):

                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, new_tokens)
                remaining_line = position_marker.text_to_parse[
                    non_whitespace_index:]
                (
                    end_index,
                    extracted_whitespace_at_end,
                ) = ParserHelper.extract_whitespace_from_end(remaining_line)
                remove_trailing_count = 0
                while (end_index > 0 and remaining_line[end_index - 1]
                       == LeafBlockProcessor.__atx_character):
                    end_index -= 1
                    remove_trailing_count += 1
                extracted_whitespace_before_end = ""
                if remove_trailing_count:
                    if end_index > 0:
                        if ParserHelper.is_character_at_index_whitespace(
                                remaining_line, end_index - 1):
                            remaining_line = remaining_line[:end_index]
                            (
                                end_index,
                                extracted_whitespace_before_end,
                            ) = ParserHelper.extract_whitespace_from_end(
                                remaining_line)
                            remaining_line = remaining_line[:end_index]
                        else:
                            extracted_whitespace_at_end = ""
                            remove_trailing_count = 0
                    else:
                        remaining_line = ""
                else:
                    extracted_whitespace_at_end = remaining_line[end_index:]
                    remaining_line = remaining_line[0:end_index]
                start_token = AtxHeadingMarkdownToken(
                    hash_count,
                    remove_trailing_count,
                    extracted_whitespace,
                    position_marker,
                )
                new_tokens.append(start_token)
                new_tokens.append(
                    TextMarkdownToken(remaining_line,
                                      extracted_whitespace_at_start))
                end_token = EndMarkdownToken(
                    "atx",
                    extracted_whitespace_at_end,
                    extracted_whitespace_before_end,
                    None,
                )
                end_token.start_markdown_token = start_token
                new_tokens.append(end_token)
        return new_tokens
Example #24
0
    def __parse_link_destination(source_text, new_index):
        """
        Parse an inline link's link destination.
        """

        LOGGER.debug("parse_link_destination>>new_index>>%s>>",
                     source_text[new_index:])
        start_index = new_index
        if ParserHelper.is_character_at_index(source_text, new_index,
                                              LinkHelper.__angle_link_start):
            LOGGER.debug(
                ">parse_angle_link_destination>new_index>%s>%s",
                str(new_index),
                str(source_text[new_index:]),
            )
            new_index, ex_link = LinkHelper.__parse_angle_link_destination(
                source_text, new_index)
            LOGGER.debug(
                ">parse_angle_link_destination>new_index>%s>ex_link>%s>",
                str(new_index),
                ex_link,
            )
        else:
            LOGGER.debug(
                ">parse_non_angle_link_destination>new_index>%s>%s",
                str(new_index),
                str(source_text[new_index:]),
            )
            new_index, ex_link = LinkHelper.__parse_non_angle_link_destination(
                source_text, new_index)
            LOGGER.debug(
                ">parse_non_angle_link_destination>new_index>%s>ex_link>%s>",
                str(new_index),
                str(ex_link),
            )
            if not ex_link:
                return None, None, -1, None

        if new_index != -1 and "\n" in ex_link:
            return None, None, -1, None
        LOGGER.debug(
            "handle_backslashes>>new_index>>%s>>ex_link>>%s>>",
            str(new_index),
            str(ex_link),
        )

        pre_handle_link = ex_link
        if new_index != -1 and ex_link:
            ex_link = InlineHelper.handle_backslashes(ex_link,
                                                      add_text_signature=False)
        LOGGER.debug(
            "urllib.parse.quote>>ex_link>>%s>>",
            str(ex_link).replace(InlineHelper.backspace_character, "\\b"),
        )

        ex_link = LinkHelper.__encode_link_destination(ex_link)
        LOGGER.debug(
            "parse_link_destination>>new_index>>%s>>ex_link>>%s>>",
            str(new_index),
            str(ex_link),
        )
        return ex_link, pre_handle_link, new_index, source_text[
            start_index:new_index]
Example #25
0
    def __parse_tag_attributes(text_to_parse, start_index):
        """
        Handle the parsing of the attributes for an open tag.
        """
        parse_index, _ = ParserHelper.collect_while_one_of_characters(
            text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters
        )
        end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
            text_to_parse, parse_index
        )
        if ParserHelper.is_character_at_index(
            text_to_parse,
            end_name_index,
            HtmlHelper.__html_attribute_name_value_separator,
        ):
            (
                value_start_index,
                extracted_whitespace,
            ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1)
            if ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                value_start_index,
                HtmlHelper.__html_attribute_value_single,
            ):
                value_end_index, _ = ParserHelper.collect_until_character(
                    text_to_parse,
                    value_start_index + 1,
                    HtmlHelper.__html_attribute_value_single,
                )
                if not ParserHelper.is_character_at_index(
                    text_to_parse,
                    value_end_index,
                    HtmlHelper.__html_attribute_value_single,
                ):
                    return None, -1
                value_end_index += 1
            elif ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                value_start_index,
                HtmlHelper.__html_attribute_value_double,
            ):
                value_end_index, _ = ParserHelper.collect_until_character(
                    text_to_parse,
                    value_start_index + 1,
                    HtmlHelper.__html_attribute_value_double,
                )
                if not ParserHelper.is_character_at_index(
                    text_to_parse,
                    value_end_index,
                    HtmlHelper.__html_attribute_value_double,
                ):
                    return None, -1
                value_end_index += 1
            else:
                value_end_index, _ = ParserHelper.collect_until_one_of_characters(
                    text_to_parse,
                    value_start_index,
                    HtmlHelper.__unquoted_attribute_value_stop,
                )
            end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
                text_to_parse, value_end_index
            )

        return end_name_index, extracted_whitespace
Example #26
0
 def __look_for_link_formats(source_text, new_index, text_from_blocks):
     """
     Look for links in the various formats.
     """
     inline_link = ""
     pre_inline_link = ""
     inline_title = ""
     pre_inline_title = ""
     update_index = -1
     ex_label = ""
     label_type = ""
     tried_full_reference_form = False
     if ParserHelper.is_character_at_index(
             source_text, new_index, LinkHelper.__link_format_inline_start):
         LOGGER.debug("inline reference?")
         (
             inline_link,
             pre_inline_link,
             inline_title,
             pre_inline_title,
             update_index,
         ) = LinkHelper.__process_inline_link_body(source_text,
                                                   new_index + 1)
         label_type = "inline"
     elif ParserHelper.is_character_at_index(
             source_text, new_index,
             LinkHelper.__link_format_reference_start):
         LOGGER.debug("collapsed reference?")
         after_open_index = new_index + 1
         if ParserHelper.is_character_at_index(
                 source_text, after_open_index,
                 LinkHelper.__link_format_reference_end):
             LOGGER.debug("collapsed reference")
             LOGGER.debug(">>%s>>", text_from_blocks)
             update_index, inline_link, inline_title = LinkHelper.__look_up_link(
                 text_from_blocks,
                 after_open_index + 1,
                 "collapsed reference",
             )
             tried_full_reference_form = True
             label_type = "collapsed"
         else:
             LOGGER.debug("full reference?")
             LOGGER.debug(">>did_extract>>%s>",
                          source_text[after_open_index:])
             (
                 did_extract,
                 after_label_index,
                 ex_label,
             ) = LinkHelper.extract_link_label(
                 source_text,
                 after_open_index,
                 include_reference_colon=False)
             LOGGER.debug(
                 ">>did_extract>>%s>after_label_index>%s>ex_label>%s>",
                 str(did_extract),
                 str(after_label_index),
                 str(ex_label),
             )
             if did_extract:
                 tried_full_reference_form = True
                 label_type = "full"
                 update_index, inline_link, inline_title = LinkHelper.__look_up_link(
                     ex_label, after_label_index, "full reference")
     return (
         inline_link,
         pre_inline_link,
         inline_title,
         pre_inline_title,
         update_index,
         tried_full_reference_form,
         ex_label,
         label_type,
     )
Example #27
0
    def __prepare_for_create_atx_heading(
        parser_state: ParserState,
        position_marker: PositionMarker,
        new_tokens: List[MarkdownToken],
        non_whitespace_index: int,
    ) -> Tuple[StackToken, str, int, str, str, List[MarkdownToken]]:
        (
            old_top_of_stack,
            remaining_line,
            remove_trailing_count,
            extracted_whitespace_before_end,
        ) = (
            parser_state.token_stack[-1],
            position_marker.text_to_parse[non_whitespace_index:],
            0,
            "",
        )

        new_tokens, _ = parser_state.close_open_blocks_fn(parser_state)
        (
            end_index,
            extracted_whitespace_at_end,
        ) = ParserHelper.extract_whitespace_from_end(remaining_line)
        while (
            end_index > 0
            and remaining_line[end_index - 1] == LeafBlockProcessor.__atx_character
        ):
            end_index -= 1
            remove_trailing_count += 1
        if remove_trailing_count:
            if end_index > 0:
                if ParserHelper.is_character_at_index(
                    remaining_line, end_index - 1, " "
                ):
                    remaining_line = remaining_line[:end_index]
                    (
                        _,
                        new_non_whitespace_index,
                    ) = ParserHelper.collect_backwards_while_character(
                        remaining_line, len(remaining_line) - 1, " "
                    )
                    assert new_non_whitespace_index is not None
                    end_index = new_non_whitespace_index
                    extracted_whitespace_before_end = remaining_line[end_index:]
                    remaining_line = remaining_line[:end_index]
                else:
                    extracted_whitespace_at_end, remove_trailing_count = "", 0
            else:
                remaining_line = ""
        else:
            extracted_whitespace_at_end = remaining_line[end_index:]
            remaining_line = remaining_line[:end_index]

        return (
            old_top_of_stack,
            remaining_line,
            remove_trailing_count,
            extracted_whitespace_before_end,
            extracted_whitespace_at_end,
            new_tokens,
        )
Example #28
0
    def __parse_tag_attributes(
        text_to_parse: str, start_index: int
    ) -> Tuple[Optional[int], Optional[str]]:
        """
        Handle the parsing of the attributes for an open tag.
        """
        parse_index, _ = ParserHelper.collect_while_one_of_characters(
            text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters
        )
        assert parse_index is not None
        end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
            text_to_parse, parse_index
        )
        assert end_name_index is not None
        if ParserHelper.is_character_at_index(
            text_to_parse,
            end_name_index,
            HtmlHelper.__html_attribute_name_value_separator,
        ):
            (
                value_start_index,
                extracted_whitespace,
            ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1)
            assert value_start_index is not None
            value_end_index: Optional[int] = None
            if ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                value_start_index,
                HtmlHelper.__html_attribute_value_single,
            ):
                value_end_index, _ = ParserHelper.collect_until_character(
                    text_to_parse,
                    value_start_index + 1,
                    HtmlHelper.__html_attribute_value_single,
                )
                assert value_end_index is not None
                if not ParserHelper.is_character_at_index(
                    text_to_parse,
                    value_end_index,
                    HtmlHelper.__html_attribute_value_single,
                ):
                    return None, None
                value_end_index += 1
            elif ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                value_start_index,
                HtmlHelper.__html_attribute_value_double,
            ):
                value_end_index, _ = ParserHelper.collect_until_character(
                    text_to_parse,
                    value_start_index + 1,
                    HtmlHelper.__html_attribute_value_double,
                )
                assert value_end_index is not None
                if not ParserHelper.is_character_at_index(
                    text_to_parse,
                    value_end_index,
                    HtmlHelper.__html_attribute_value_double,
                ):
                    return None, None
                value_end_index += 1
            else:
                value_end_index, _ = ParserHelper.collect_until_one_of_characters(
                    text_to_parse,
                    value_start_index,
                    HtmlHelper.__unquoted_attribute_value_stop,
                )
            assert value_end_index is not None
            end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
                text_to_parse, value_end_index
            )

        return end_name_index, extracted_whitespace