Beispiel #1
0
    def extract_optional_attribute_value(line_to_parse, value_index):
        """
        Determine and extract an optional attribute value.
        """

        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, value_index
        )
        if (
            non_whitespace_index < len(line_to_parse)
            and line_to_parse[non_whitespace_index]
            != HtmlHelper.__html_attribute_name_value_separator
        ) or non_whitespace_index >= len(line_to_parse):
            return non_whitespace_index

        non_whitespace_index += 1
        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, non_whitespace_index
        )
        if non_whitespace_index < len(line_to_parse):
            first_character_of_value = line_to_parse[non_whitespace_index]
            if first_character_of_value == HtmlHelper.__html_attribute_value_double:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_character(
                    line_to_parse,
                    non_whitespace_index + 1,
                    HtmlHelper.__html_attribute_value_double,
                )
                if non_whitespace_index == len(line_to_parse):
                    return -1
                non_whitespace_index += 1
            elif first_character_of_value == HtmlHelper.__html_attribute_value_single:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_character(
                    line_to_parse,
                    non_whitespace_index + 1,
                    HtmlHelper.__html_attribute_value_single,
                )
                if non_whitespace_index == len(line_to_parse):
                    return -1
                non_whitespace_index += 1
            else:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_one_of_characters(
                    line_to_parse,
                    non_whitespace_index,
                    HtmlHelper.__html_tag_attribute_value_terminators,
                )

                if not extracted_text:
                    non_whitespace_index = -1
        else:
            non_whitespace_index = -1
        return non_whitespace_index
    def look_for_pragmas(
        position_marker: PositionMarker,
        line_to_parse: str,
        container_depth: int,
        extracted_whitespace: Optional[str],
        parser_properties: ParseBlockPassProperties,
    ) -> bool:
        """
        Look for a pragma in the current line.
        """

        if (not container_depth and not extracted_whitespace and
            (line_to_parse.startswith(PragmaToken.pragma_prefix) or
             line_to_parse.startswith(PragmaToken.pragma_alternate_prefix))):
            was_extended_prefix = line_to_parse.startswith(
                PragmaToken.pragma_alternate_prefix)

            start_index, _ = ParserHelper.extract_whitespace(
                line_to_parse,
                len(PragmaToken.pragma_alternate_prefix
                    if was_extended_prefix else PragmaToken.pragma_prefix),
            )
            remaining_line = line_to_parse[start_index:].rstrip().lower()
            if remaining_line.startswith(
                    PragmaToken.pragma_title) and remaining_line.endswith(
                        PragmaToken.pragma_suffix):
                index_number = (-position_marker.line_number
                                if was_extended_prefix else
                                position_marker.line_number)
                parser_properties.pragma_lines[index_number] = line_to_parse
                return True
        return False
Beispiel #3
0
    def parse_setext_headings(
        parser_state,
        position_marker,
        extracted_whitespace,
        this_bq_count,
        stack_bq_count,
    ):
        """
        Handle the parsing of an setext heading.
        """

        new_tokens = []
        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3)
                and ParserHelper.is_character_at_index_one_of(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__setext_characters,
                ) and parser_state.token_stack[-1].is_paragraph
                and (this_bq_count == stack_bq_count)):
            _, collected_to_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                position_marker.text_to_parse[position_marker.index_number],
            )
            (
                after_whitespace_index,
                extra_whitespace_after_setext,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                collected_to_index)
            if after_whitespace_index == len(position_marker.text_to_parse):

                # This is unusual.  Normally, close_open_blocks is used to close off
                # blocks based on the stack token.  However, since the setext takes
                # the last paragraph of text (see case 61) and translates it
                # into a heading, this has to be done separately, as there is no
                # stack token to close.
                new_tokens.append(
                    EndMarkdownToken(
                        MarkdownToken.token_setext_heading,
                        extracted_whitespace,
                        extra_whitespace_after_setext,
                        None,
                    ))
                token_index = len(parser_state.token_document) - 1
                while not parser_state.token_document[token_index].is_paragraph:
                    token_index -= 1

                replacement_token = SetextHeadingMarkdownToken(
                    position_marker.text_to_parse[
                        position_marker.index_number],
                    collected_to_index - position_marker.index_number,
                    parser_state.token_document[token_index].extra_data,
                    position_marker,
                    parser_state.token_document[token_index],
                )
                parser_state.token_document[token_index] = replacement_token
                del parser_state.token_stack[-1]
        return new_tokens
    def is_olist_start(
        parser_state,
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
        adj_ws=None,
    ):
        """
        Determine if we have the start of an numbered or ordered list.
        """
        is_start = False
        end_whitespace_index = -1
        index = None
        my_count = None
        if adj_ws is None:
            adj_ws = extracted_whitespace
        if (ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3)
                or skip_whitespace_check
            ) and ParserHelper.is_character_at_index_one_of(
                line_to_parse, start_index, string.digits):
            index = start_index
            while ParserHelper.is_character_at_index_one_of(
                    line_to_parse, index, string.digits):
                index += 1
            my_count = index - start_index
            olist_index_number = line_to_parse[start_index:index]
            LOGGER.debug("olist?%s<<count>>%s<<", olist_index_number,
                         str(my_count))
            LOGGER.debug("olist>>%s", str(line_to_parse[index]))
            LOGGER.debug("index+1>>%s>>len>>%s", str(index + 1),
                         str(len(line_to_parse)))

            end_whitespace_index, _ = ParserHelper.extract_whitespace(
                line_to_parse, index + 1)
            LOGGER.debug(
                "end_whitespace_index>>%s>>len>>%s>>%s",
                str(end_whitespace_index),
                str(len(line_to_parse)),
                olist_index_number,
            )

            if (my_count <= 9 and ParserHelper.is_character_at_index_one_of(
                    line_to_parse, index,
                    ListBlockProcessor.__olist_start_characters)
                    and not (parser_state.token_stack[-1].is_paragraph
                             and not parser_state.token_stack[-2].is_list and
                             ((end_whitespace_index == len(line_to_parse))
                              or olist_index_number != "1"))
                    and (ParserHelper.is_character_at_index_whitespace(
                        line_to_parse, index + 1) or
                         ((index + 1) == len(line_to_parse)))):
                is_start = True

        LOGGER.debug("is_olist_start>>result>>%s", str(is_start))
        return is_start, index, my_count, end_whitespace_index
Beispiel #5
0
    def parse_setext_headings(
        parser_state: ParserState,
        position_marker: PositionMarker,
        extracted_whitespace: Optional[str],
        block_quote_data: BlockQuoteData,
    ) -> List[MarkdownToken]:

        """
        Handle the parsing of an setext heading.
        """

        new_tokens: List[MarkdownToken] = []
        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            and ParserHelper.is_character_at_index_one_of(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__setext_characters,
            )
            and parser_state.token_stack[-1].is_paragraph
            and (block_quote_data.current_count == block_quote_data.stack_count)
        ):
            is_paragraph_continuation = (
                LeafBlockProcessor.__adjust_continuation_for_active_list(
                    parser_state, position_marker
                )
            )

            _, collected_to_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                position_marker.text_to_parse[position_marker.index_number],
            )
            assert collected_to_index is not None
            (
                after_whitespace_index,
                extra_whitespace_after_setext,
            ) = ParserHelper.extract_whitespace(
                position_marker.text_to_parse, collected_to_index
            )

            if not is_paragraph_continuation and after_whitespace_index == len(
                position_marker.text_to_parse
            ):
                LeafBlockProcessor.__create_setext_token(
                    parser_state,
                    position_marker,
                    collected_to_index,
                    new_tokens,
                    extracted_whitespace,
                    extra_whitespace_after_setext,
                )
        return new_tokens
Beispiel #6
0
    def __complete_inline_block_processing(
        inline_blocks,
        source_text,
        start_index,
        current_string,
        end_string,
        starting_whitespace,
        is_setext,
    ):
        have_processed_once = len(inline_blocks) != 0 or start_index != 0

        LOGGER.debug("__cibp>inline_blocks>%s<",
                     str(inline_blocks).replace("\n", "\\n"))
        LOGGER.debug("__cibp>source_text>%s<",
                     str(source_text).replace("\n", "\\n"))
        LOGGER.debug("__cibp>start_index>%s<",
                     str(start_index).replace("\n", "\\n"))
        LOGGER.debug("__cibp>current_string>%s<",
                     str(current_string).replace("\n", "\\n"))
        LOGGER.debug("__cibp>end_string>%s<",
                     str(end_string).replace("\n", "\\n"))
        LOGGER.debug(
            "__cibp>starting_whitespace>%s<",
            str(starting_whitespace).replace("\n", "\\n"),
        )
        LOGGER.debug("__cibp>is_setext>%s<",
                     str(is_setext).replace("\n", "\\n"))

        if (inline_blocks and inline_blocks[-1].token_name
                == MarkdownToken.token_inline_hard_break):
            start_index, extracted_whitespace = ParserHelper.extract_whitespace(
                source_text, start_index)
            if end_string is None:
                end_string = extracted_whitespace
            else:
                end_string += extracted_whitespace

        if start_index < len(source_text):
            current_string = InlineHelper.append_text(
                current_string, source_text[start_index:])

        if end_string is not None:
            LOGGER.debug("xx-end-lf>%s<", end_string.replace("\n", "\\n"))
        if current_string or not have_processed_once:
            inline_blocks.append(
                TextMarkdownToken(current_string,
                                  starting_whitespace,
                                  end_whitespace=end_string))
        LOGGER.debug(
            ">>%s<<",
            str(inline_blocks).replace("\n", "\\n").replace("\x02", "\\x02"))

        return EmphasisHelper.resolve_inline_emphasis(inline_blocks, None)
Beispiel #7
0
    def __check_for_normal_html_blocks(
        remaining_html_tag: str, line_to_parse: str, character_index: int
    ) -> Optional[str]:
        """
        Check for the the html blocks that are harder to identify: 1, 6-7.
        """

        html_block_type = None

        if HtmlHelper.__is_valid_block_1_tag_name(remaining_html_tag):
            html_block_type = HtmlHelper.html_block_1
        else:
            (
                adjusted_remaining_html_tag,
                line_to_parse_size,
                is_end_tag,
            ) = HtmlHelper.__check_for_normal_html_blocks_adjust_tag(
                remaining_html_tag, line_to_parse, character_index
            )

            complete_parse_index: Optional[int] = 0
            if adjusted_remaining_html_tag in HtmlHelper.__html_block_6_start:
                html_block_type = HtmlHelper.html_block_6
            elif is_end_tag:
                is_complete, complete_parse_index = HtmlHelper.is_complete_html_end_tag(
                    adjusted_remaining_html_tag, line_to_parse, character_index
                )
                if is_complete:
                    html_block_type, character_index = (
                        HtmlHelper.html_block_7,
                        complete_parse_index,
                    )
            else:
                (
                    is_complete,
                    complete_parse_index,
                ) = HtmlHelper.is_complete_html_start_tag(
                    adjusted_remaining_html_tag, line_to_parse, character_index
                )
                if is_complete:
                    assert complete_parse_index is not None
                    html_block_type, character_index = (
                        HtmlHelper.html_block_7,
                        complete_parse_index,
                    )
            if html_block_type == HtmlHelper.html_block_7:
                new_index, _ = ParserHelper.extract_whitespace(
                    line_to_parse, character_index
                )
                if new_index != line_to_parse_size:
                    html_block_type = None
        return html_block_type
    def __is_front_matter_valid(
        collected_lines: List[str], ) -> Union[Dict[str, str], str]:

        ascii_letters_and_digits = f"{string.ascii_letters}{string.digits}_-"

        current_title = ""
        current_value = ""
        value_map: Dict[str, str] = {}

        for next_line in collected_lines:
            POGGER.debug("Next fm:>$s<", next_line)
            next_index, _ = ParserHelper.extract_whitespace(next_line, 0)
            assert next_index is not None
            if next_index >= 4:
                POGGER.debug("Indented line established.")
                if not current_title:
                    return "Continuation line encountered before a keyword line."
                current_value += f"\n{next_line.strip()}"
                POGGER.debug("current_value>$<", current_value)
            else:
                if not next_line.strip():
                    return "Blank line encountered before end of metadata."

                POGGER.debug("Non-indented line established.")
                if current_title:
                    POGGER.debug("Adding '$' as '$'.", current_title,
                                 current_value)
                    value_map[current_title] = current_value

                (
                    next_index,
                    collected_title,
                ) = ParserHelper.collect_while_one_of_characters(
                    next_line, next_index, ascii_letters_and_digits)
                assert next_index is not None
                assert collected_title is not None
                current_title = collected_title
                if next_index < len(
                        next_line) and next_line[next_index] == ":":
                    current_value = next_line[next_index + 1:].strip()
                else:
                    return "Newline did not start with `keyword:`."
        if current_title:
            POGGER.debug("Adding final '$' as '$'.", current_title,
                         current_value)
            value_map[current_title.lower()] = current_value

            # This is specifically to trigger test_front_matter_20.
            assert current_title != "test" or current_value != "assert"
        if not value_map:
            return "No valid metadata header lines were found."
        return value_map
Beispiel #9
0
    def is_complete_html_end_tag(tag_name, line_to_parse, next_char_index):
        """
        Determine if the supplied information is a completed end of tag specification.
        """

        is_valid = HtmlHelper.is_valid_tag_name(tag_name)
        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, next_char_index
        )
        have_end_of_tag = (
            non_whitespace_index < len(line_to_parse)
            and line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end
        )
        return have_end_of_tag and is_valid, non_whitespace_index + 1
Beispiel #10
0
    def __check_for_normal_html_blocks(
        remaining_html_tag, line_to_parse, character_index
    ):
        """
        Check for the the html blocks that are harder to identify: 1, 6-7.
        """

        html_block_type = None

        if HtmlHelper.__is_valid_block_1_tag_name(remaining_html_tag):
            html_block_type = HtmlHelper.html_block_1
        else:
            adjusted_remaining_html_tag = remaining_html_tag
            is_end_tag = False
            if adjusted_remaining_html_tag.startswith(HtmlHelper.__html_tag_start):
                adjusted_remaining_html_tag = adjusted_remaining_html_tag[1:]
                is_end_tag = True
            if (
                character_index < len(line_to_parse)
                and line_to_parse[character_index] == HtmlHelper.__html_tag_end
                and adjusted_remaining_html_tag.endswith(HtmlHelper.__html_tag_start)
            ):
                adjusted_remaining_html_tag = adjusted_remaining_html_tag[0:-1]
            if adjusted_remaining_html_tag in HtmlHelper.__html_block_6_start:
                html_block_type = HtmlHelper.html_block_6
            elif is_end_tag:
                is_complete, complete_parse_index = HtmlHelper.is_complete_html_end_tag(
                    adjusted_remaining_html_tag, line_to_parse, character_index
                )
                if is_complete:
                    html_block_type = HtmlHelper.html_block_7
                    character_index = complete_parse_index
            else:
                (
                    is_complete,
                    complete_parse_index,
                ) = HtmlHelper.is_complete_html_start_tag(
                    adjusted_remaining_html_tag, line_to_parse, character_index
                )
                if is_complete:
                    html_block_type = HtmlHelper.html_block_7
                    character_index = complete_parse_index
            if html_block_type == HtmlHelper.html_block_7:
                new_index, _ = ParserHelper.extract_whitespace(
                    line_to_parse, character_index
                )
                if new_index != len(line_to_parse):
                    html_block_type = None
        return html_block_type
def test_empty_string_with_good_index():
    """
    Make sure that an empty string is handled properly with a good index
    """

    # Arrange
    input_string = ""
    start_index = 0
    expected_output = (0, "")

    # Act
    actual_output = ParserHelper.extract_whitespace(input_string, start_index)

    # Assert
    assert expected_output == actual_output
def test_simple_case_from_middle_with_whitespace():
    """
    Make sure that we test a simple extraction from the middle of the string with whitespace
    """

    # Arrange
    input_string = "this is a test"
    start_index = 4
    expected_output = (5, " ")

    # Act
    actual_output = ParserHelper.extract_whitespace(input_string, start_index)

    # Assert
    assert expected_output == actual_output
def test_empty_string_with_bad_left_index():
    """
    Make sure that an empty string is handled properly with an index that is too far to the left.
    """

    # Arrange
    input_string = ""
    start_index = -1
    expected_output = (None, None)

    # Act
    actual_output = ParserHelper.extract_whitespace(input_string, start_index)

    # Assert
    assert expected_output == actual_output
Beispiel #14
0
    def is_complete_html_end_tag(
        tag_name: str, line_to_parse: str, next_char_index: int
    ) -> Tuple[bool, int]:
        """
        Determine if the supplied information is a completed end of tag specification.
        """

        is_valid = HtmlHelper.is_valid_tag_name(tag_name)
        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, next_char_index
        )
        assert non_whitespace_index is not None
        is_valid = is_valid and (
            non_whitespace_index < len(line_to_parse)
            and line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end
        )
        return is_valid, non_whitespace_index + 1
    def is_ulist_start(
        parser_state,
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
        adj_ws=None,
    ):
        """
        Determine if we have the start of an un-numbered list.
        """
        LOGGER.debug("is_ulist_start>>pre>>")
        is_start = False
        after_all_whitespace_index = -1
        if adj_ws is None:
            adj_ws = extracted_whitespace

        if ((ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3)
             or skip_whitespace_check)
                and ParserHelper.is_character_at_index_one_of(
                    line_to_parse, start_index,
                    ListBlockProcessor.__ulist_start_characters)
                and (ParserHelper.is_character_at_index_whitespace(
                    line_to_parse, start_index + 1) or
                     ((start_index + 1) == len(line_to_parse)))):

            LOGGER.debug("is_ulist_start>>mid>>")
            after_all_whitespace_index, _ = ParserHelper.extract_whitespace(
                line_to_parse, start_index + 1)
            LOGGER.debug(
                "after_all_whitespace_index>>%s>>len>>%s",
                str(after_all_whitespace_index),
                str(len(line_to_parse)),
            )

            is_break, _ = LeafBlockProcessor.is_thematic_break(
                line_to_parse, start_index, extracted_whitespace)
            if not is_break and not (
                    parser_state.token_stack[-1].is_paragraph
                    and not parser_state.token_stack[-2].is_list and
                (after_all_whitespace_index == len(line_to_parse))):
                is_start = True

        LOGGER.debug("is_ulist_start>>result>>%s", str(is_start))
        return is_start, after_all_whitespace_index
Beispiel #16
0
 def __parse_raw_close_tag(text_to_parse):
     """
     Parse the current line as if it is a close tag, and determine if it is valid.
     """
     valid_raw_html = None
     if ParserHelper.is_character_at_index(
         text_to_parse, 0, HtmlHelper.__html_tag_start
     ):
         tag_name = HtmlHelper.__parse_raw_tag_name(text_to_parse, 1)
         if tag_name:
             parse_index = len(tag_name)
             if parse_index != len(text_to_parse):
                 parse_index, _ = ParserHelper.extract_whitespace(
                     text_to_parse, parse_index
                 )
             if parse_index == len(text_to_parse):
                 valid_raw_html = text_to_parse
     return valid_raw_html
Beispiel #17
0
 def __parse_raw_close_tag(text_to_parse: str) -> Optional[str]:
     """
     Parse the current line as if it is a close tag, and determine if it is valid.
     """
     valid_raw_html = None
     if ParserHelper.is_character_at_index(
         text_to_parse, 0, HtmlHelper.__html_tag_start
     ):
         if tag_name := HtmlHelper.__parse_raw_tag_name(text_to_parse, 1):
             parse_index: Optional[int] = len(tag_name)
             assert parse_index is not None
             text_to_parse_size = len(text_to_parse)
             if parse_index != text_to_parse_size:
                 parse_index, _ = ParserHelper.extract_whitespace(
                     text_to_parse, parse_index
                 )
             if parse_index == text_to_parse_size:
                 valid_raw_html = text_to_parse
Beispiel #18
0
    def __add_recombined_whitespace(did_recombine, source_text,
                                    inline_response, end_string, is_setext):

        LOGGER.debug("__arw>>did_recombine>>%s>>", str(did_recombine))
        LOGGER.debug(
            "__arw>>end_string>>%s>>",
            str(end_string).replace("\n", "\\n").replace("\x02", "\\x02"),
        )
        if did_recombine:
            LOGGER.debug(
                "__arw>>source_text>>%s>>",
                str(source_text).replace("\n", "\\n").replace("\x02", "\\x02"),
            )
            new_index, extracted_whitespace = ParserHelper.extract_whitespace(
                source_text, inline_response.new_index)
            LOGGER.debug(
                "__arw>>%s>>",
                str(source_text[0:inline_response.new_index]).replace(
                    "\n", "\\n").replace("\x02", "\\x02"),
            )
            LOGGER.debug(
                "__arw>>%s>>",
                str(source_text[inline_response.new_index:]).replace(
                    "\n", "\\n").replace("\x02", "\\x02"),
            )
            LOGGER.debug(
                "__arw>>extracted_whitespace>>%s>>",
                str(extracted_whitespace).replace("\n", "\\n").replace(
                    "\x02", "\\x02"),
            )
            if extracted_whitespace:
                inline_response.new_index = new_index
                if end_string:
                    end_string += extracted_whitespace
                else:
                    end_string = extracted_whitespace
                if is_setext:
                    end_string += "\x02"
                LOGGER.debug(
                    "__arw>>end_string>>%s>>",
                    str(end_string).replace("\n",
                                            "\\n").replace("\x02", "\\x02"),
                )
        return end_string
Beispiel #19
0
    def is_fenced_code_block(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
    ) -> Tuple[bool, Optional[int], Optional[str], Optional[int]]:
        """
        Determine if we have the start of a fenced code block.
        """

        assert extracted_whitespace is not None
        if (
            skip_whitespace_check
            or ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index_one_of(
            line_to_parse,
            start_index,
            LeafBlockProcessor.__fenced_code_block_start_characters,
        ):
            POGGER.debug("ifcb:collected_count>>$<<$<<", line_to_parse, start_index)
            collected_count, new_index = ParserHelper.collect_while_character(
                line_to_parse, start_index, line_to_parse[start_index]
            )
            POGGER.debug("ifcb:collected_count:$", collected_count)
            assert collected_count is not None
            assert new_index is not None
            (
                non_whitespace_index,
                extracted_whitespace_before_info_string,
            ) = ParserHelper.extract_whitespace(line_to_parse, new_index)

            if collected_count >= 3:
                POGGER.debug("ifcb:True")
                return (
                    True,
                    non_whitespace_index,
                    extracted_whitespace_before_info_string,
                    collected_count,
                )
        return False, None, None, None
    def __handle_blank_line_init(
            from_main_transform: bool,
            input_line: str) -> Tuple[Optional[List[type]], bool, int, str]:
        do_include_block_quotes = from_main_transform
        close_only_these_blocks: Optional[List[type]] = (
            None if from_main_transform else [ParagraphStackToken])

        POGGER.debug("hbl>>from_main_transform>>$", from_main_transform)
        POGGER.debug("hbl>>close_only_these_blocks>>$",
                     close_only_these_blocks)
        POGGER.debug("hbl>>do_include_block_quotes>>$",
                     do_include_block_quotes)

        non_whitespace_index, extracted_whitespace = ParserHelper.extract_whitespace(
            input_line, 0)
        assert extracted_whitespace is not None
        assert non_whitespace_index is not None
        return (
            close_only_these_blocks,
            do_include_block_quotes,
            non_whitespace_index,
            extracted_whitespace,
        )
Beispiel #21
0
    def is_fenced_code_block(
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
    ):
        """
        Determine if we have the start of a fenced code block.
        """

        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3) or skip_whitespace_check
            ) and ParserHelper.is_character_at_index_one_of(
                line_to_parse,
                start_index,
                LeafBlockProcessor.__fenced_code_block_start_characters,
            ):
            LOGGER.debug("ifcb:collected_count>>%s<<%s<<", line_to_parse,
                         str(start_index))
            collected_count, new_index = ParserHelper.collect_while_character(
                line_to_parse, start_index, line_to_parse[start_index])
            LOGGER.debug("ifcb:collected_count:%s", str(collected_count))
            (
                non_whitespace_index,
                extracted_whitespace_before_info_string,
            ) = ParserHelper.extract_whitespace(line_to_parse, new_index)

            if collected_count >= 3:
                LOGGER.debug("ifcb:True")
                return (
                    True,
                    non_whitespace_index,
                    extracted_whitespace_before_info_string,
                    collected_count,
                )
        return False, None, None, None
Beispiel #22
0
    def __adjust_for_block_quote_start(
        force_me,
        original_line_to_parse,
        last_block_quote_index,
        position_marker,
        extracted_whitespace,
    ):
        """
        Block quotes cause indents, which need to be handled specifically.
        """

        did_process = False
        special_parse_start_index = 0
        whitespace_to_parse = extracted_whitespace
        block_quote_adjust_delta = 0

        LOGGER.debug(
            "last_block_quote_index>>%s>>force_me>>%s",
            str(last_block_quote_index),
            str(force_me),
        )
        if last_block_quote_index or force_me:
            LOGGER.debug(
                "original_line_to_parse>[%s]>>last_block_quote_index>>%s",
                original_line_to_parse.replace("\t", "\\t"),
                str(last_block_quote_index),
            )
            (
                block_quote_after_whitespace_index,
                during_original_whitespace,
            ) = ParserHelper.extract_whitespace(original_line_to_parse,
                                                last_block_quote_index)
            LOGGER.debug(
                "during_original_whitespace>[%s]",
                during_original_whitespace.replace("\t", "\\t"),
            )
            if "\t" in during_original_whitespace:

                did_process = True
                LOGGER.debug(
                    ".text_to_parse>[%s]",
                    position_marker.text_to_parse.replace("\t", "\\t"),
                )
                LOGGER.debug(".index_number>>%s",
                             str(position_marker.index_number))
                LOGGER.debug(".index_indent>>%s",
                             str(position_marker.index_indent))
                LOGGER.debug("last_block_quote_index>>%s",
                             str(last_block_quote_index))

                # Make sure everything after the whitespace remains the same.
                text_after_original_whitespace = original_line_to_parse[
                    block_quote_after_whitespace_index:]
                text_after_whitespace = position_marker.text_to_parse[
                    position_marker.index_number:]
                LOGGER.debug(
                    "text_after_original_whitespace>[%s]",
                    text_after_original_whitespace.replace("\t", "\\t"),
                )
                LOGGER.debug(
                    "text_after_whitespace>[%s]",
                    text_after_whitespace.replace("\t", "\\t"),
                )
                assert text_after_original_whitespace == text_after_whitespace

                # Make sure the whitespace is within expected bounds.
                during_current_whitespace = position_marker.text_to_parse[
                    position_marker.index_number -
                    len(extracted_whitespace):position_marker.index_number]
                LOGGER.debug(
                    "during_current_whitespace>[%s]",
                    during_current_whitespace.replace("\t", "\\t"),
                )
                LOGGER.debug(
                    "during_original_whitespace>[%s]",
                    during_original_whitespace.replace("\t", "\\t"),
                )

                current_whitespace_length = len(during_current_whitespace)
                original_whitespace_length = (ParserHelper.calculate_length(
                    during_original_whitespace,
                    start_index=last_block_quote_index) - 1)
                LOGGER.debug(
                    "current_whitespace_length[%s],original_whitespace_length[%s]",
                    str(current_whitespace_length),
                    str(original_whitespace_length),
                )
                assert current_whitespace_length <= original_whitespace_length

                special_parse_start_index = last_block_quote_index + 1
                if during_original_whitespace[0] == "\t":
                    whitespace_to_parse = during_original_whitespace
                    if (len(during_original_whitespace) > 1
                            and during_original_whitespace[1] == "\t"):
                        block_quote_adjust_delta = -1
                else:
                    whitespace_to_parse = during_original_whitespace[1:]

        return (
            did_process,
            special_parse_start_index,
            whitespace_to_parse,
            block_quote_adjust_delta,
        )
Beispiel #23
0
    def is_complete_html_start_tag(
        tag_name: str, line_to_parse: str, next_char_index: int
    ) -> Tuple[bool, Optional[int]]:
        """
        Determine if the supplied information is a completed start of tag specification.
        """

        is_tag_valid = HtmlHelper.is_valid_tag_name(
            tag_name
        ) and not HtmlHelper.__is_valid_block_1_tag_name(tag_name)

        non_whitespace_index, extracted_whitespace = ParserHelper.extract_whitespace(
            line_to_parse, next_char_index
        )
        assert non_whitespace_index is not None
        are_attributes_valid: bool = True
        line_to_parse_size: int = len(line_to_parse)
        while (
            is_tag_valid
            and extracted_whitespace
            and are_attributes_valid
            and 0 <= non_whitespace_index < line_to_parse_size
            and line_to_parse[non_whitespace_index]
            not in [HtmlHelper.__html_tag_end, HtmlHelper.__html_tag_start]
        ):

            non_whitespace_index = HtmlHelper.extract_html_attribute_name(
                line_to_parse, non_whitespace_index
            )
            assert non_whitespace_index is not None
            are_attributes_valid = non_whitespace_index != -1
            if not are_attributes_valid:
                break
            non_whitespace_index = HtmlHelper.extract_optional_attribute_value(
                line_to_parse, non_whitespace_index
            )
            assert non_whitespace_index is not None
            are_attributes_valid = non_whitespace_index != -1
            if not are_attributes_valid:
                break
            (
                non_whitespace_index,
                extracted_whitespace,
            ) = ParserHelper.extract_whitespace(line_to_parse, non_whitespace_index)
            assert non_whitespace_index is not None

        if non_whitespace_index < line_to_parse_size:
            if line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_start:
                non_whitespace_index += 1
            is_end_of_tag_present = (
                line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end
            )
            if is_end_of_tag_present:
                non_whitespace_index += 1
        else:
            is_end_of_tag_present = False

        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, non_whitespace_index
        )
        return (
            (
                is_tag_valid
                and is_end_of_tag_present
                and non_whitespace_index == line_to_parse_size
                and are_attributes_valid
            ),
            non_whitespace_index,
        )
Beispiel #24
0
    def __select_line_ending(
        new_tokens: List[MarkdownToken],
        is_proper_hard_break: bool,
        line_number: int,
        adj_hard_column: int,
        current_string: str,
        removed_end_whitespace: str,
        removed_end_whitespace_size: int,
        whitespace_to_add: Optional[str],
        append_to_current_string: str,
        end_string: Optional[str],
        remaining_line: str,
        inline_blocks: List[MarkdownToken],
        is_setext: bool,
    ) -> Tuple[str, Optional[str], str, Optional[str], str]:
        if is_proper_hard_break:
            POGGER.debug(">>proper hard break")
            new_tokens.append(
                HardBreakMarkdownToken(InlineHelper.backslash_character,
                                       line_number, adj_hard_column - 1))
            current_string, whitespace_to_add = current_string[:-1], None
            append_to_current_string = ""
        elif removed_end_whitespace_size >= 2:
            POGGER.debug(">>whitespace hard break")
            new_tokens.append(
                HardBreakMarkdownToken(removed_end_whitespace, line_number,
                                       adj_hard_column))
            whitespace_to_add = None
            append_to_current_string = ""
        else:
            POGGER.debug(">>normal end")
            POGGER.debug("<<is_setext<<$<<", is_setext)
            POGGER.debug("<<inline_blocks<<$<<", inline_blocks)
            POGGER.debug("<<current_string<<$<<", current_string)
            POGGER.debug("<<remaining_line<<$<<", remaining_line)
            POGGER.debug("<<end_string<<$<<", end_string)
            POGGER.debug("<<removed_end_whitespace<<$<<",
                         removed_end_whitespace)
            if (is_setext and inline_blocks
                    and inline_blocks[-1].is_inline_hard_break
                    and not current_string):
                new_index, ex_ws = ParserHelper.extract_whitespace(
                    remaining_line, 0)
                POGGER.debug("<<new_index<<$<<", new_index)
                POGGER.debug("<<ex_ws<<$<<", ex_ws)
                assert new_index
                end_string = f"{ex_ws}{ParserHelper.whitespace_split_character}"
                remaining_line = remaining_line[new_index:]

            end_string = InlineHelper.modify_end_string(
                end_string, removed_end_whitespace)
            POGGER.debug("<<end_string<<$<<", end_string)

        POGGER.debug(
            "<<append_to_current_string<<$<<",
            append_to_current_string,
        )
        POGGER.debug(
            "<<whitespace_to_add<<$<<",
            whitespace_to_add,
        )
        POGGER.debug("<<remaining_line<<$<<", remaining_line)
        POGGER.debug("<<end_string<<$<<", end_string)
        POGGER.debug("<<current_string<<$<<", current_string)
        return (
            current_string,
            whitespace_to_add,
            append_to_current_string,
            end_string,
            remaining_line,
        )
Beispiel #25
0
    def parse_atx_headings(parser_state, position_marker,
                           extracted_whitespace):
        """
        Handle the parsing of an atx heading.
        """

        new_tokens = []

        if ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace,
                3) and ParserHelper.is_character_at_index(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__atx_character,
                ):
            hash_count, new_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__atx_character,
            )
            (
                non_whitespace_index,
                extracted_whitespace_at_start,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                new_index)

            if hash_count <= 6 and (extracted_whitespace_at_start
                                    or non_whitespace_index == len(
                                        position_marker.text_to_parse)):

                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, new_tokens)
                remaining_line = position_marker.text_to_parse[
                    non_whitespace_index:]
                (
                    end_index,
                    extracted_whitespace_at_end,
                ) = ParserHelper.extract_whitespace_from_end(remaining_line)
                remove_trailing_count = 0
                while (end_index > 0 and remaining_line[end_index - 1]
                       == LeafBlockProcessor.__atx_character):
                    end_index -= 1
                    remove_trailing_count += 1
                extracted_whitespace_before_end = ""
                if remove_trailing_count:
                    if end_index > 0:
                        if ParserHelper.is_character_at_index_whitespace(
                                remaining_line, end_index - 1):
                            remaining_line = remaining_line[:end_index]
                            (
                                end_index,
                                extracted_whitespace_before_end,
                            ) = ParserHelper.extract_whitespace_from_end(
                                remaining_line)
                            remaining_line = remaining_line[:end_index]
                        else:
                            extracted_whitespace_at_end = ""
                            remove_trailing_count = 0
                    else:
                        remaining_line = ""
                else:
                    extracted_whitespace_at_end = remaining_line[end_index:]
                    remaining_line = remaining_line[0:end_index]
                start_token = AtxHeadingMarkdownToken(
                    hash_count,
                    remove_trailing_count,
                    extracted_whitespace,
                    position_marker,
                )
                new_tokens.append(start_token)
                new_tokens.append(
                    TextMarkdownToken(remaining_line,
                                      extracted_whitespace_at_start))
                end_token = EndMarkdownToken(
                    "atx",
                    extracted_whitespace_at_end,
                    extracted_whitespace_before_end,
                    None,
                )
                end_token.start_markdown_token = start_token
                new_tokens.append(end_token)
        return new_tokens
    def __parse_line_for_leaf_blocks(
        parser_state,
        xposition_marker,
        this_bq_count,
        removed_chars_at_start,
        no_para_start_if_empty,
        ignore_link_definition_start,
        original_line_to_parsex,
        last_block_quote_index,
        last_list_start_index,
    ):
        """
        Parse the contents of a line for a leaf block.
        """
        LOGGER.debug(
            "Leaf Line:%s:", xposition_marker.text_to_parse.replace("\t", "\\t")
        )
        new_tokens = []

        requeue_line_info = RequeueLineInfo()
        # TODO rename to avoid collision with parameter
        original_line_to_parse = xposition_marker.text_to_parse[
            xposition_marker.index_number :
        ]
        (new_index_number, extracted_whitespace,) = ParserHelper.extract_whitespace(
            xposition_marker.text_to_parse, xposition_marker.index_number
        )
        position_marker = PositionMarker(
            xposition_marker.line_number,
            new_index_number,
            xposition_marker.text_to_parse,
            index_indent=xposition_marker.index_indent,
        )

        pre_tokens = ContainerBlockProcessor.__close_indented_block_if_indent_not_there(
            parser_state, extracted_whitespace
        )

        outer_processed = False
        outer_processed = ContainerBlockProcessor.__handle_fenced_code_block(
            parser_state,
            outer_processed,
            position_marker,
            extracted_whitespace,
            new_tokens,
        )

        (
            outer_processed,
            requeue_line_info.lines_to_requeue,
            requeue_line_info.force_ignore_first_as_lrd,
        ) = ContainerBlockProcessor.__handle_link_reference_definition(
            parser_state,
            outer_processed,
            position_marker,
            extracted_whitespace,
            original_line_to_parse,
            ignore_link_definition_start,
            pre_tokens,
        )

        outer_processed = ContainerBlockProcessor.__handle_html_block(
            parser_state,
            outer_processed,
            position_marker,
            extracted_whitespace,
            new_tokens,
        )

        if not outer_processed:
            assert not new_tokens
            new_tokens = LeafBlockProcessor.parse_atx_headings(
                parser_state, position_marker, extracted_whitespace
            )
            if not new_tokens:
                new_tokens = LeafBlockProcessor.parse_indented_code_block(
                    parser_state,
                    position_marker,
                    extracted_whitespace,
                    removed_chars_at_start,
                    original_line_to_parsex,
                    last_block_quote_index,
                    last_list_start_index,
                )
            if not new_tokens:
                stack_bq_count = BlockQuoteProcessor.count_of_block_quotes_on_stack(
                    parser_state
                )
                new_tokens = LeafBlockProcessor.parse_setext_headings(
                    parser_state,
                    position_marker,
                    extracted_whitespace,
                    this_bq_count,
                    stack_bq_count,
                )
            if not new_tokens:
                stack_bq_count = BlockQuoteProcessor.count_of_block_quotes_on_stack(
                    parser_state
                )
                new_tokens = LeafBlockProcessor.parse_thematic_break(
                    parser_state,
                    position_marker,
                    extracted_whitespace,
                    this_bq_count,
                    stack_bq_count,
                )
            if not new_tokens:
                stack_bq_count = BlockQuoteProcessor.count_of_block_quotes_on_stack(
                    parser_state
                )
                new_tokens = LeafBlockProcessor.parse_paragraph(
                    parser_state,
                    position_marker,
                    extracted_whitespace,
                    this_bq_count,
                    no_para_start_if_empty,
                    stack_bq_count,
                )

        # assert new_tokens or did_complete_lrd or did_pause_lrd or lines_to_requeue
        LOGGER.debug(">>leaf--adding>>%s", str(new_tokens))
        pre_tokens.extend(new_tokens)
        return pre_tokens, requeue_line_info
Beispiel #27
0
    def process_link_reference_definition(
        parser_state,
        position_marker,
        original_line_to_parse,
        extracted_whitespace,
    ):
        """
        Process a link deference definition.  Note, this requires a lot of work to
        handle properly because of partial definitions across lines.
        """
        line_to_parse = position_marker.text_to_parse
        start_index = position_marker.index_number

        did_pause_lrd = False
        lines_to_requeue = []
        new_tokens = []
        force_ignore_first_as_lrd = False

        was_started = False
        is_blank_line = not line_to_parse and not start_index
        if parser_state.token_stack[-1].was_link_definition_started:
            was_started = True
            LOGGER.debug(
                ">>continuation_lines>>%s<<",
                str(parser_state.token_stack[-1].continuation_lines),
            )
            line_to_parse = parser_state.token_stack[-1].get_joined_lines(
                line_to_parse)
            start_index, extracted_whitespace = ParserHelper.extract_whitespace(
                line_to_parse, 0)
            LOGGER.debug(">>line_to_parse>>%s<<",
                         line_to_parse.replace("\n", "\\n"))

        if was_started:
            LOGGER.debug(">>parse_link_reference_definition>>was_started")
            (
                did_complete_lrd,
                end_lrd_index,
                parsed_lrd_tuple,
            ) = LinkReferenceDefinitionHelper.__parse_link_reference_definition(
                parser_state,
                line_to_parse,
                start_index,
                extracted_whitespace,
                is_blank_line,
            )
            LOGGER.debug(
                ">>parse_link_reference_definition>>was_started>>did_complete_lrd>>%s>>end_lrd_index>>%s>>len(line_to_parse)>>%s",
                str(did_complete_lrd),
                str(end_lrd_index),
                str(len(line_to_parse)),
            )

            if not (did_complete_lrd or
                    (not is_blank_line and not did_complete_lrd and
                     (end_lrd_index == len(line_to_parse)))):
                LOGGER.debug(
                    ">>parse_link_reference_definition>>was_started>>GOT HARD FAILURE"
                )
                (
                    is_blank_line,
                    line_to_parse,
                    did_complete_lrd,
                    end_lrd_index,
                    parsed_lrd_tuple,
                ) = LinkReferenceDefinitionHelper.__process_lrd_hard_failure(
                    parser_state, original_line_to_parse, lines_to_requeue)
        else:
            (
                did_complete_lrd,
                end_lrd_index,
                parsed_lrd_tuple,
            ) = LinkReferenceDefinitionHelper.__parse_link_reference_definition(
                parser_state,
                line_to_parse,
                start_index,
                extracted_whitespace,
                is_blank_line,
            )
            LOGGER.debug(
                ">>parse_link_reference_definition>>did_complete_lrd>>%s>>end_lrd_index>>%s>>len(line_to_parse)>>%s",
                str(did_complete_lrd),
                str(end_lrd_index),
                str(len(line_to_parse)),
            )
        if (end_lrd_index >= 0 and end_lrd_index == len(line_to_parse)
                and not is_blank_line):
            LinkReferenceDefinitionHelper.__add_line_for_lrd_continuation(
                parser_state,
                position_marker,
                was_started,
                original_line_to_parse,
                extracted_whitespace,
            )
            did_pause_lrd = True
        elif was_started:
            (
                force_ignore_first_as_lrd,
                new_tokens,
            ) = LinkReferenceDefinitionHelper.__stop_lrd_continuation(
                parser_state,
                did_complete_lrd,
                parsed_lrd_tuple,
                end_lrd_index,
                original_line_to_parse,
                is_blank_line,
            )
        else:
            LOGGER.debug(">>parse_link_reference_definition>>other")

        return (
            did_complete_lrd or end_lrd_index != -1,
            did_complete_lrd,
            did_pause_lrd,
            lines_to_requeue,
            force_ignore_first_as_lrd,
            new_tokens,
        )
    def parse_line_for_container_blocks(
        parser_state,
        position_marker,
        ignore_link_definition_start,
        container_depth=0,
        foobar=None,
        init_bq=None,
    ):
        """
        Parse the line, taking care to handle any container blocks before deciding
        whether or not to pass the (remaining parts of the) line to the leaf block
        processor.
        """
        # TODO work on removing this
        line_to_parse = position_marker.text_to_parse
        original_line_to_parse = position_marker.text_to_parse + ""

        LOGGER.debug("Line:%s:", position_marker.text_to_parse)
        no_para_start_if_empty = False

        start_index, extracted_whitespace = ParserHelper.extract_whitespace(
            line_to_parse, 0
        )

        (
            current_container_blocks,
            adj_ws,
            stack_bq_count,
            this_bq_count,
        ) = ContainerBlockProcessor.__calculate_for_container_blocks(
            parser_state, line_to_parse, extracted_whitespace, foobar, init_bq,
        )

        new_position_marker = PositionMarker(
            position_marker.line_number, start_index, position_marker.text_to_parse
        )

        end_container_indices = ContainerIndices(-1, -1, -1)
        (
            did_process,
            was_container_start,
            end_container_indices.block_index,
            this_bq_count,
            stack_bq_count,
            line_to_parse,
            start_index,
            leaf_tokens,
            container_level_tokens,
            removed_chars_at_start,
            did_blank,
            last_block_quote_index,
        ) = BlockQuoteProcessor.handle_block_quote_block(
            parser_state,
            new_position_marker,
            extracted_whitespace,
            adj_ws,
            this_bq_count,
            stack_bq_count,
        )
        LOGGER.debug("text>>%s>>", line_to_parse)
        LOGGER.debug(">>container_level_tokens>>%s", str(container_level_tokens))
        LOGGER.debug(">>did_blank>>%s", did_blank)
        if did_blank:
            container_level_tokens.extend(leaf_tokens)
            return container_level_tokens, line_to_parse, RequeueLineInfo()

        # TODO refactor so it doesn't need this!
        new_position_marker = PositionMarker(
            position_marker.line_number, start_index, line_to_parse
        )

        LOGGER.debug(
            "pre-ulist>>#%s#%s#%s#",
            str(position_marker.index_number),
            str(position_marker.index_indent),
            position_marker.text_to_parse.replace("\t", "\\t"),
        )
        LOGGER.debug(
            "pre-ulist>>#%s#%s#%s#",
            str(new_position_marker.index_number),
            str(new_position_marker.index_indent),
            new_position_marker.text_to_parse.replace("\t", "\\t"),
        )
        (
            did_process,
            was_container_start,
            end_container_indices.ulist_index,
            no_para_start_if_empty,
            line_to_parse,
            resultant_tokens,
            removed_chars_at_start,
        ) = ListBlockProcessor.handle_ulist_block(
            parser_state,
            did_process,
            was_container_start,
            no_para_start_if_empty,
            new_position_marker,
            extracted_whitespace,
            adj_ws,
            stack_bq_count,
            this_bq_count,
            removed_chars_at_start,
            current_container_blocks,
        )
        container_level_tokens.extend(resultant_tokens)
        LOGGER.debug(
            "post-ulist>>#%s#%s#%s#",
            str(position_marker.index_number),
            str(position_marker.index_indent),
            position_marker.text_to_parse.replace("\t", "\\t"),
        )
        LOGGER.debug(
            "post-ulist>>#%s#%s#%s#",
            str(new_position_marker.index_number),
            str(new_position_marker.index_indent),
            new_position_marker.text_to_parse.replace("\t", "\\t"),
        )
        LOGGER.debug("text>>%s>>", line_to_parse)

        new_position_marker = PositionMarker(
            position_marker.line_number, start_index, line_to_parse
        )

        LOGGER.debug(
            "pre-olist>>#%s#%s#%s#",
            str(position_marker.index_number),
            str(position_marker.index_indent),
            position_marker.text_to_parse.replace("\t", "\\t"),
        )
        LOGGER.debug(
            "pre-olist>>#%s#%s#%s#",
            str(new_position_marker.index_number),
            str(new_position_marker.index_indent),
            new_position_marker.text_to_parse.replace("\t", "\\t"),
        )
        (
            did_process,
            was_container_start,
            end_container_indices.olist_index,
            no_para_start_if_empty,
            line_to_parse,
            resultant_tokens,
            removed_chars_at_start,
        ) = ListBlockProcessor.handle_olist_block(
            parser_state,
            did_process,
            was_container_start,
            no_para_start_if_empty,
            new_position_marker,
            extracted_whitespace,
            adj_ws,
            stack_bq_count,
            this_bq_count,
            removed_chars_at_start,
            current_container_blocks,
        )
        container_level_tokens.extend(resultant_tokens)
        LOGGER.debug(
            "post-olist>>#%s#%s#%s#",
            str(position_marker.index_number),
            str(position_marker.index_indent),
            position_marker.text_to_parse.replace("\t", "\\t"),
        )
        LOGGER.debug(
            "post-olist>>#%s#%s#%s#",
            str(new_position_marker.index_number),
            str(new_position_marker.index_indent),
            new_position_marker.text_to_parse.replace("\t", "\\t"),
        )
        LOGGER.debug("text>>%s>>", line_to_parse.replace(" ", "\\s"))

        LOGGER.debug("last_block_quote_index>>%s", str(last_block_quote_index))

        LOGGER.debug("olist_index>>%s", str(end_container_indices.olist_index))
        LOGGER.debug("ulist_index>>%s", str(end_container_indices.ulist_index))
        LOGGER.debug("block_index>>%s", str(end_container_indices.block_index))

        last_list_start_index = 0
        if end_container_indices.block_index != -1:
            assert last_block_quote_index in (
                end_container_indices.block_index - 1,
                end_container_indices.block_index,
            )
        elif end_container_indices.olist_index != -1:
            last_list_start_index = end_container_indices.olist_index
        elif end_container_indices.ulist_index != -1:
            last_list_start_index = end_container_indices.ulist_index

        if not parser_state.token_stack[-1].is_fenced_code_block:
            new_position_marker = PositionMarker(
                position_marker.line_number, start_index, line_to_parse
            )
            LOGGER.debug(
                "__handle_nested_container_blocks>>%s>>",
                line_to_parse.replace(" ", "\\s"),
            )
            (
                line_to_parse,
                leaf_tokens,
                container_level_tokens,
                no_para_start_if_empty,
            ) = ContainerBlockProcessor.__handle_nested_container_blocks(
                parser_state,
                container_depth,
                this_bq_count,
                stack_bq_count,
                no_para_start_if_empty,
                new_position_marker,
                end_container_indices,
                leaf_tokens,
                container_level_tokens,
                was_container_start,
            )
            LOGGER.debug("text>>%s>>", line_to_parse.replace(" ", "\\s"))

        LOGGER.debug("removed_chars_at_start>>>%s", str(removed_chars_at_start))

        if container_depth:
            assert not leaf_tokens
            LOGGER.debug(">>>>>>>>%s<<<<<<<<<<", line_to_parse)
            return container_level_tokens, line_to_parse, None

        LOGGER.debug(
            ">>__process_list_in_progress>>%s>>", line_to_parse.replace(" ", "\\s")
        )
        (
            did_process,
            line_to_parse,
            container_level_tokens,
        ) = ContainerBlockProcessor.__process_list_in_progress(
            parser_state,
            did_process,
            line_to_parse,
            start_index,
            container_level_tokens,
            extracted_whitespace,
        )
        LOGGER.debug(
            ">>__process_list_in_progress>>%s>>", line_to_parse.replace(" ", "\\s")
        )
        ContainerBlockProcessor.__process_lazy_lines(
            parser_state,
            leaf_tokens,
            this_bq_count,
            stack_bq_count,
            line_to_parse,
            extracted_whitespace,
            did_process,
            container_level_tokens,
        )
        LOGGER.debug("text>>%s>>", line_to_parse.replace(" ", "\\s"))

        # TODO refactor to make indent unnecessary?
        calculated_indent = len(original_line_to_parse) - len(line_to_parse)
        LOGGER.debug(">>indent>>%s", str(calculated_indent))

        newer_position_marker = PositionMarker(
            position_marker.line_number,
            start_index,
            line_to_parse,
            index_indent=calculated_indent,
        )
        leaf_tokens, requeue_line_info = ContainerBlockProcessor.__process_leaf_tokens(
            parser_state,
            leaf_tokens,
            newer_position_marker,
            this_bq_count,
            removed_chars_at_start,
            no_para_start_if_empty,
            ignore_link_definition_start,
            original_line_to_parse,
            last_block_quote_index,
            last_list_start_index,
        )

        container_level_tokens.extend(leaf_tokens)
        LOGGER.debug(
            "clt-end>>%s>>%s<<",
            str(len(container_level_tokens)),
            str(container_level_tokens),
        )
        return container_level_tokens, line_to_parse, requeue_line_info
Beispiel #29
0
    def __process_lrd_hard_failure(parser_state, original_line_to_parse,
                                   lines_to_requeue):
        """
        In cases of a hard failure, we have had continuations to the original line
        that make it a bit more difficult to figure out if we have an actual good
        LRD in the mix somehow.  So take lines off the end while we have lines.
        """
        is_blank_line = None
        line_to_parse = None
        did_complete_lrd = None
        end_lrd_index = None
        parsed_lrd_tuple = None

        do_again = True
        parser_state.token_stack[-1].add_continuation_line(
            original_line_to_parse)
        while do_again and parser_state.token_stack[-1].continuation_lines:
            LOGGER.debug(
                "continuation_lines>>%s<<",
                str(parser_state.token_stack[-1].continuation_lines),
            )

            lines_to_requeue.append(
                parser_state.token_stack[-1].continuation_lines[-1])
            LOGGER.debug(
                ">>continuation_line>>%s",
                str(parser_state.token_stack[-1].continuation_lines[-1]),
            )
            del parser_state.token_stack[-1].continuation_lines[-1]
            LOGGER.debug(
                ">>lines_to_requeue>>%s>>%s",
                str(lines_to_requeue),
                str(len(lines_to_requeue)),
            )
            LOGGER.debug(
                ">>continuation_lines>>%s<<",
                str(parser_state.token_stack[-1].continuation_lines),
            )
            is_blank_line = True
            line_to_parse = parser_state.token_stack[-1].get_joined_lines("")
            line_to_parse = line_to_parse[0:-1]
            start_index, extracted_whitespace = ParserHelper.extract_whitespace(
                line_to_parse, 0)
            LOGGER.debug(">>line_to_parse>>%s<<",
                         line_to_parse.replace("\n", "\\n"))
            (
                did_complete_lrd,
                end_lrd_index,
                parsed_lrd_tuple,
            ) = LinkReferenceDefinitionHelper.__parse_link_reference_definition(
                parser_state,
                line_to_parse,
                start_index,
                extracted_whitespace,
                is_blank_line,
            )
            LOGGER.debug(
                ">>parse_link_reference_definition>>was_started>>did_complete_lrd>>%s>>end_lrd_index>>%s>>len(line_to_parse)>>%s",
                str(did_complete_lrd),
                str(end_lrd_index),
                str(len(line_to_parse)),
            )
            do_again = not did_complete_lrd
        return (
            is_blank_line,
            line_to_parse,
            did_complete_lrd,
            end_lrd_index,
            parsed_lrd_tuple,
        )
Beispiel #30
0
    def is_complete_html_start_tag(tag_name, line_to_parse, next_char_index):
        """
        Determine if the supplied information is a completed start of tag specification.
        """

        is_tag_valid = HtmlHelper.is_valid_tag_name(
            tag_name
        ) and not HtmlHelper.__is_valid_block_1_tag_name(tag_name)

        non_whitespace_index, extracted_whitespace = ParserHelper.extract_whitespace(
            line_to_parse, next_char_index
        )

        are_attributes_valid = True
        while (
            is_tag_valid
            and extracted_whitespace
            and are_attributes_valid
            and (0 <= non_whitespace_index < len(line_to_parse))
            and not (
                line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end
                or line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_start
            )
        ):

            non_whitespace_index = HtmlHelper.extract_html_attribute_name(
                line_to_parse, non_whitespace_index
            )
            if non_whitespace_index == -1:
                are_attributes_valid = False
                break
            non_whitespace_index = HtmlHelper.extract_optional_attribute_value(
                line_to_parse, non_whitespace_index
            )
            if non_whitespace_index == -1:
                are_attributes_valid = False
                break
            (
                non_whitespace_index,
                extracted_whitespace,
            ) = ParserHelper.extract_whitespace(line_to_parse, non_whitespace_index)

        is_end_of_tag_present = False
        if (
            non_whitespace_index < len(line_to_parse)
            and line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_start
        ):
            non_whitespace_index += 1
        if (
            non_whitespace_index < len(line_to_parse)
            and line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end
        ):
            non_whitespace_index += 1
            is_end_of_tag_present = True

        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, non_whitespace_index
        )
        at_eol = non_whitespace_index == len(line_to_parse)
        return (
            (
                is_tag_valid
                and is_end_of_tag_present
                and at_eol
                and are_attributes_valid
            ),
            non_whitespace_index,
        )