Example #1
0
    def parse_setext_headings(
        parser_state,
        position_marker,
        extracted_whitespace,
        this_bq_count,
        stack_bq_count,
    ):
        """
        Handle the parsing of an setext heading.
        """

        new_tokens = []
        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3)
                and ParserHelper.is_character_at_index_one_of(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__setext_characters,
                ) and parser_state.token_stack[-1].is_paragraph
                and (this_bq_count == stack_bq_count)):
            _, collected_to_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                position_marker.text_to_parse[position_marker.index_number],
            )
            (
                after_whitespace_index,
                extra_whitespace_after_setext,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                collected_to_index)
            if after_whitespace_index == len(position_marker.text_to_parse):

                # This is unusual.  Normally, close_open_blocks is used to close off
                # blocks based on the stack token.  However, since the setext takes
                # the last paragraph of text (see case 61) and translates it
                # into a heading, this has to be done separately, as there is no
                # stack token to close.
                new_tokens.append(
                    EndMarkdownToken(
                        MarkdownToken.token_setext_heading,
                        extracted_whitespace,
                        extra_whitespace_after_setext,
                        None,
                    ))
                token_index = len(parser_state.token_document) - 1
                while not parser_state.token_document[token_index].is_paragraph:
                    token_index -= 1

                replacement_token = SetextHeadingMarkdownToken(
                    position_marker.text_to_parse[
                        position_marker.index_number],
                    collected_to_index - position_marker.index_number,
                    parser_state.token_document[token_index].extra_data,
                    position_marker,
                    parser_state.token_document[token_index],
                )
                parser_state.token_document[token_index] = replacement_token
                del parser_state.token_stack[-1]
        return new_tokens
Example #2
0
    def is_atx_heading(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
    ) -> Tuple[bool, Optional[int], Optional[int], Optional[str]]:
        """
        Determine whether or not an ATX Heading is about to start.
        """

        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            or skip_whitespace_check
        ) and ParserHelper.is_character_at_index(
            line_to_parse,
            start_index,
            LeafBlockProcessor.__atx_character,
        ):
            hash_count, new_index = ParserHelper.collect_while_character(
                line_to_parse,
                start_index,
                LeafBlockProcessor.__atx_character,
            )

            assert new_index is not None
            _, non_whitespace_index = ParserHelper.collect_while_character(
                line_to_parse, new_index, " "
            )
            extracted_whitespace_at_start = line_to_parse[
                new_index:non_whitespace_index
            ]

            assert hash_count is not None
            if hash_count <= 6 and (
                extracted_whitespace_at_start
                or non_whitespace_index == len(line_to_parse)
            ):
                return (
                    True,
                    non_whitespace_index,
                    hash_count,
                    extracted_whitespace_at_start,
                )
        return False, None, None, None
Example #3
0
    def parse_setext_headings(
        parser_state: ParserState,
        position_marker: PositionMarker,
        extracted_whitespace: Optional[str],
        block_quote_data: BlockQuoteData,
    ) -> List[MarkdownToken]:

        """
        Handle the parsing of an setext heading.
        """

        new_tokens: List[MarkdownToken] = []
        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            and ParserHelper.is_character_at_index_one_of(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__setext_characters,
            )
            and parser_state.token_stack[-1].is_paragraph
            and (block_quote_data.current_count == block_quote_data.stack_count)
        ):
            is_paragraph_continuation = (
                LeafBlockProcessor.__adjust_continuation_for_active_list(
                    parser_state, position_marker
                )
            )

            _, collected_to_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                position_marker.text_to_parse[position_marker.index_number],
            )
            assert collected_to_index is not None
            (
                after_whitespace_index,
                extra_whitespace_after_setext,
            ) = ParserHelper.extract_whitespace(
                position_marker.text_to_parse, collected_to_index
            )

            if not is_paragraph_continuation and after_whitespace_index == len(
                position_marker.text_to_parse
            ):
                LeafBlockProcessor.__create_setext_token(
                    parser_state,
                    position_marker,
                    collected_to_index,
                    new_tokens,
                    extracted_whitespace,
                    extra_whitespace_after_setext,
                )
        return new_tokens
def test_simple_case_from_start_without_whitespace():
    """
    Make sure that we test a simple extraction from the start of the string without whitespace.
    """

    # Arrange
    input_string = "  this is a test"
    start_index = 2
    character_to_match = " "
    expected_output = (0, 2)

    # Act
    actual_output = ParserHelper.collect_while_character(
        input_string, start_index, character_to_match)

    # Assert
    assert expected_output == actual_output
def test_empty_string_with_good_index():
    """
    Make sure that an empty string is handled properly with a good index
    """

    # Arrange
    input_string = ""
    start_index = 0
    character_to_match = " "
    expected_output = (0, 0)

    # Act
    actual_output = ParserHelper.collect_while_character(
        input_string, start_index, character_to_match)

    # Assert
    assert expected_output == actual_output
def test_empty_string_with_bad_left_index():
    """
    Make sure that an empty string is handled properly with an index that is too far to the left.
    """

    # Arrange
    input_string = ""
    start_index = -1
    character_to_match = " "
    expected_output = (None, None)

    # Act
    actual_output = ParserHelper.collect_while_character(
        input_string, start_index, character_to_match)

    # Assert
    assert expected_output == actual_output
Example #7
0
 def __compile(
     cls, found_value: str
 ) -> Tuple[List[Union[str, Tuple[int, str]]], bool, Optional[str]]:
     found_parts = found_value.split(",")
     compiled_lines: List[Union[str, Tuple[int, str]]] = []
     are_any_wildcards = False
     for next_part in found_parts:
         if next_part == "*":
             if compiled_lines and compiled_lines[-1] == "*":
                 return (
                     [],
                     False,
                     "Two wildcard elements cannot be next to each other.",
                 )
             compiled_lines.append(next_part)
             are_any_wildcards = True
         else:
             count, new_index = ParserHelper.collect_while_character(
                 next_part, 0, "#")
             if not count:
                 return [], False, "Element must start with hash characters (#)."
             if count > 6:
                 return (
                     [],
                     False,
                     "Element must start with between 1 and 6 hash characters (#).",
                 )
             assert next_part is not None
             assert new_index is not None
             new_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
                 next_part, new_index)
             if not extracted_whitespace:
                 return (
                     [],
                     False,
                     "Element must have at least one space character after any hash characters (#).",
                 )
             if len(next_part) == new_index:
                 return (
                     [],
                     False,
                     "Element must have at least one non-space character after any space characters.",
                 )
             compiled_lines.append((count, next_part[new_index:]))
     return compiled_lines, are_any_wildcards, None
Example #8
0
    def is_fenced_code_block(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
    ) -> Tuple[bool, Optional[int], Optional[str], Optional[int]]:
        """
        Determine if we have the start of a fenced code block.
        """

        assert extracted_whitespace is not None
        if (
            skip_whitespace_check
            or ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index_one_of(
            line_to_parse,
            start_index,
            LeafBlockProcessor.__fenced_code_block_start_characters,
        ):
            POGGER.debug("ifcb:collected_count>>$<<$<<", line_to_parse, start_index)
            collected_count, new_index = ParserHelper.collect_while_character(
                line_to_parse, start_index, line_to_parse[start_index]
            )
            POGGER.debug("ifcb:collected_count:$", collected_count)
            assert collected_count is not None
            assert new_index is not None
            (
                non_whitespace_index,
                extracted_whitespace_before_info_string,
            ) = ParserHelper.extract_whitespace(line_to_parse, new_index)

            if collected_count >= 3:
                POGGER.debug("ifcb:True")
                return (
                    True,
                    non_whitespace_index,
                    extracted_whitespace_before_info_string,
                    collected_count,
                )
        return False, None, None, None
Example #9
0
    def __parse_raw_declaration(text_to_parse):
        """
        Parse a possible raw html declaration sequence, and return if it is valid.
        """

        valid_raw_html = None
        if ParserHelper.is_character_at_index_one_of(
            text_to_parse, 0, HtmlHelper.__raw_declaration_start_character
        ):
            (
                parse_index,
                declaration_name,
            ) = ParserHelper.collect_while_one_of_characters(
                text_to_parse, 1, HtmlHelper.__html_block_4_continued_start
            )
            if declaration_name:
                whitespace_count, _ = ParserHelper.collect_while_character(
                    text_to_parse, parse_index, HtmlHelper.__raw_declaration_whitespace
                )
                if whitespace_count:
                    valid_raw_html = text_to_parse
        return valid_raw_html
Example #10
0
    def is_fenced_code_block(
        line_to_parse,
        start_index,
        extracted_whitespace,
        skip_whitespace_check=False,
    ):
        """
        Determine if we have the start of a fenced code block.
        """

        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3) or skip_whitespace_check
            ) and ParserHelper.is_character_at_index_one_of(
                line_to_parse,
                start_index,
                LeafBlockProcessor.__fenced_code_block_start_characters,
            ):
            LOGGER.debug("ifcb:collected_count>>%s<<%s<<", line_to_parse,
                         str(start_index))
            collected_count, new_index = ParserHelper.collect_while_character(
                line_to_parse, start_index, line_to_parse[start_index])
            LOGGER.debug("ifcb:collected_count:%s", str(collected_count))
            (
                non_whitespace_index,
                extracted_whitespace_before_info_string,
            ) = ParserHelper.extract_whitespace(line_to_parse, new_index)

            if collected_count >= 3:
                LOGGER.debug("ifcb:True")
                return (
                    True,
                    non_whitespace_index,
                    extracted_whitespace_before_info_string,
                    collected_count,
                )
        return False, None, None, None
Example #11
0
    def __handle_inline_special(
        source_text,
        next_index,
        inline_blocks,
        special_length,
        remaining_line,
        current_string_unresolved,
    ):
        """
        Handle the collection of special inline characters for later processing.
        """
        preceding_two = None
        following_two = None
        new_token = None
        repeat_count = 1
        is_active = True
        consume_rest_of_line = False
        special_sequence = source_text[next_index:next_index + special_length]
        if special_length == 1 and special_sequence in EmphasisHelper.inline_emphasis:
            repeat_count, new_index = ParserHelper.collect_while_character(
                source_text, next_index, special_sequence)
            special_sequence = source_text[next_index:new_index]

            preceding_two = source_text[max(0, next_index - 2):next_index]
            following_two = source_text[new_index:min(len(source_text
                                                          ), new_index + 2)]
        else:
            if special_sequence[0] == LinkHelper.link_label_end:
                LOGGER.debug(
                    "\nPOSSIBLE LINK CLOSE_FOUND>>%s>>%s>>",
                    str(special_length),
                    special_sequence,
                )
                LOGGER.debug(">>inline_blocks>>%s<<", str(inline_blocks))
                LOGGER.debug(">>remaining_line>>%s<<", str(remaining_line))
                LOGGER.debug(">>current_string_unresolved>>%s<<",
                             str(current_string_unresolved))
                LOGGER.debug(">>source_text>>%s<<", source_text[next_index:])
                LOGGER.debug("")
                (
                    new_index,
                    is_active,
                    new_token,
                    consume_rest_of_line,
                ) = LinkHelper.look_for_link_or_image(
                    inline_blocks,
                    source_text,
                    next_index,
                    remaining_line,
                    current_string_unresolved,
                )
                LOGGER.debug(">>inline_blocks>>%s<<", str(inline_blocks))
                LOGGER.debug(">>new_token>>%s<<", str(new_token))
                LOGGER.debug(">>source_text>>%s<<", source_text[new_index:])
                LOGGER.debug(">>consume_rest_of_line>>%s<<",
                             str(consume_rest_of_line))
            else:
                repeat_count = special_length
                new_index = next_index + special_length

        if not new_token:
            new_token = SpecialTextMarkdownToken(special_sequence,
                                                 repeat_count, preceding_two,
                                                 following_two, is_active)

        inline_response = InlineResponse()
        inline_response.new_string = ""
        inline_response.new_index = new_index
        inline_response.new_tokens = [new_token]
        inline_response.consume_rest_of_line = consume_rest_of_line
        return inline_response
Example #12
0
    def parse_atx_headings(parser_state, position_marker,
                           extracted_whitespace):
        """
        Handle the parsing of an atx heading.
        """

        new_tokens = []

        if ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace,
                3) and ParserHelper.is_character_at_index(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__atx_character,
                ):
            hash_count, new_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__atx_character,
            )
            (
                non_whitespace_index,
                extracted_whitespace_at_start,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                new_index)

            if hash_count <= 6 and (extracted_whitespace_at_start
                                    or non_whitespace_index == len(
                                        position_marker.text_to_parse)):

                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, new_tokens)
                remaining_line = position_marker.text_to_parse[
                    non_whitespace_index:]
                (
                    end_index,
                    extracted_whitespace_at_end,
                ) = ParserHelper.extract_whitespace_from_end(remaining_line)
                remove_trailing_count = 0
                while (end_index > 0 and remaining_line[end_index - 1]
                       == LeafBlockProcessor.__atx_character):
                    end_index -= 1
                    remove_trailing_count += 1
                extracted_whitespace_before_end = ""
                if remove_trailing_count:
                    if end_index > 0:
                        if ParserHelper.is_character_at_index_whitespace(
                                remaining_line, end_index - 1):
                            remaining_line = remaining_line[:end_index]
                            (
                                end_index,
                                extracted_whitespace_before_end,
                            ) = ParserHelper.extract_whitespace_from_end(
                                remaining_line)
                            remaining_line = remaining_line[:end_index]
                        else:
                            extracted_whitespace_at_end = ""
                            remove_trailing_count = 0
                    else:
                        remaining_line = ""
                else:
                    extracted_whitespace_at_end = remaining_line[end_index:]
                    remaining_line = remaining_line[0:end_index]
                start_token = AtxHeadingMarkdownToken(
                    hash_count,
                    remove_trailing_count,
                    extracted_whitespace,
                    position_marker,
                )
                new_tokens.append(start_token)
                new_tokens.append(
                    TextMarkdownToken(remaining_line,
                                      extracted_whitespace_at_start))
                end_token = EndMarkdownToken(
                    "atx",
                    extracted_whitespace_at_end,
                    extracted_whitespace_before_end,
                    None,
                )
                end_token.start_markdown_token = start_token
                new_tokens.append(end_token)
        return new_tokens