Example #1
0
    def extract_optional_attribute_value(line_to_parse, value_index):
        """
        Determine and extract an optional attribute value.
        """

        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, value_index
        )
        if (
            non_whitespace_index < len(line_to_parse)
            and line_to_parse[non_whitespace_index]
            != HtmlHelper.__html_attribute_name_value_separator
        ) or non_whitespace_index >= len(line_to_parse):
            return non_whitespace_index

        non_whitespace_index += 1
        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, non_whitespace_index
        )
        if non_whitespace_index < len(line_to_parse):
            first_character_of_value = line_to_parse[non_whitespace_index]
            if first_character_of_value == HtmlHelper.__html_attribute_value_double:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_character(
                    line_to_parse,
                    non_whitespace_index + 1,
                    HtmlHelper.__html_attribute_value_double,
                )
                if non_whitespace_index == len(line_to_parse):
                    return -1
                non_whitespace_index += 1
            elif first_character_of_value == HtmlHelper.__html_attribute_value_single:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_character(
                    line_to_parse,
                    non_whitespace_index + 1,
                    HtmlHelper.__html_attribute_value_single,
                )
                if non_whitespace_index == len(line_to_parse):
                    return -1
                non_whitespace_index += 1
            else:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_one_of_characters(
                    line_to_parse,
                    non_whitespace_index,
                    HtmlHelper.__html_tag_attribute_value_terminators,
                )

                if not extracted_text:
                    non_whitespace_index = -1
        else:
            non_whitespace_index = -1
        return non_whitespace_index
Example #2
0
def test_simple_case_from_middle():
    """
    Make sure that we test a simple extraction from the middle of the string.
    """

    # Arrange
    input_string = "this is a test"
    start_index = 5
    character_to_match = " "
    expected_output = (7, "is")

    # Act
    actual_output = ParserHelper.collect_until_character(
        input_string, start_index, character_to_match)

    # Assert
    assert expected_output == actual_output
Example #3
0
def test_empty_string_with_good_index():
    """
    Make sure that an empty string is handled properly with a good index
    """

    # Arrange
    input_string = ""
    start_index = 0
    character_to_match = " "
    expected_output = (0, "")

    # Act
    actual_output = ParserHelper.collect_until_character(
        input_string, start_index, character_to_match)

    # Assert
    assert expected_output == actual_output
Example #4
0
def test_empty_string_with_bad_left_index():
    """
    Make sure that an empty string is handled properly with an index that is too far to the left.
    """

    # Arrange
    input_string = ""
    start_index = -1
    character_to_match = " "
    expected_output = (None, None)

    # Act
    actual_output = ParserHelper.collect_until_character(
        input_string, start_index, character_to_match)

    # Assert
    assert expected_output == actual_output
Example #5
0
def test_already_on_whitespace():
    """
    Make sure that we test extracting while already on a whitespace character.
    """

    # Arrange
    input_string = "this is a test"
    start_index = 9
    character_to_match = " "
    expected_output = (9, "")

    # Act
    actual_output = ParserHelper.collect_until_character(
        input_string, start_index, character_to_match)

    # Assert
    assert expected_output == actual_output
Example #6
0
    def __parse_tag_attributes(text_to_parse, start_index):
        """
        Handle the parsing of the attributes for an open tag.
        """
        parse_index, _ = ParserHelper.collect_while_one_of_characters(
            text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters
        )
        end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
            text_to_parse, parse_index
        )
        if ParserHelper.is_character_at_index(
            text_to_parse,
            end_name_index,
            HtmlHelper.__html_attribute_name_value_separator,
        ):
            (
                value_start_index,
                extracted_whitespace,
            ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1)
            if ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                value_start_index,
                HtmlHelper.__html_attribute_value_single,
            ):
                value_end_index, _ = ParserHelper.collect_until_character(
                    text_to_parse,
                    value_start_index + 1,
                    HtmlHelper.__html_attribute_value_single,
                )
                if not ParserHelper.is_character_at_index(
                    text_to_parse,
                    value_end_index,
                    HtmlHelper.__html_attribute_value_single,
                ):
                    return None, -1
                value_end_index += 1
            elif ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                value_start_index,
                HtmlHelper.__html_attribute_value_double,
            ):
                value_end_index, _ = ParserHelper.collect_until_character(
                    text_to_parse,
                    value_start_index + 1,
                    HtmlHelper.__html_attribute_value_double,
                )
                if not ParserHelper.is_character_at_index(
                    text_to_parse,
                    value_end_index,
                    HtmlHelper.__html_attribute_value_double,
                ):
                    return None, -1
                value_end_index += 1
            else:
                value_end_index, _ = ParserHelper.collect_until_one_of_characters(
                    text_to_parse,
                    value_start_index,
                    HtmlHelper.__unquoted_attribute_value_stop,
                )
            end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
                text_to_parse, value_end_index
            )

        return end_name_index, extracted_whitespace
Example #7
0
    def __parse_tag_attributes(
        text_to_parse: str, start_index: int
    ) -> Tuple[Optional[int], Optional[str]]:
        """
        Handle the parsing of the attributes for an open tag.
        """
        parse_index, _ = ParserHelper.collect_while_one_of_characters(
            text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters
        )
        assert parse_index is not None
        end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
            text_to_parse, parse_index
        )
        assert end_name_index is not None
        if ParserHelper.is_character_at_index(
            text_to_parse,
            end_name_index,
            HtmlHelper.__html_attribute_name_value_separator,
        ):
            (
                value_start_index,
                extracted_whitespace,
            ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1)
            assert value_start_index is not None
            value_end_index: Optional[int] = None
            if ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                value_start_index,
                HtmlHelper.__html_attribute_value_single,
            ):
                value_end_index, _ = ParserHelper.collect_until_character(
                    text_to_parse,
                    value_start_index + 1,
                    HtmlHelper.__html_attribute_value_single,
                )
                assert value_end_index is not None
                if not ParserHelper.is_character_at_index(
                    text_to_parse,
                    value_end_index,
                    HtmlHelper.__html_attribute_value_single,
                ):
                    return None, None
                value_end_index += 1
            elif ParserHelper.is_character_at_index_one_of(
                text_to_parse,
                value_start_index,
                HtmlHelper.__html_attribute_value_double,
            ):
                value_end_index, _ = ParserHelper.collect_until_character(
                    text_to_parse,
                    value_start_index + 1,
                    HtmlHelper.__html_attribute_value_double,
                )
                assert value_end_index is not None
                if not ParserHelper.is_character_at_index(
                    text_to_parse,
                    value_end_index,
                    HtmlHelper.__html_attribute_value_double,
                ):
                    return None, None
                value_end_index += 1
            else:
                value_end_index, _ = ParserHelper.collect_until_one_of_characters(
                    text_to_parse,
                    value_start_index,
                    HtmlHelper.__unquoted_attribute_value_stop,
                )
            assert value_end_index is not None
            end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace(
                text_to_parse, value_end_index
            )

        return end_name_index, extracted_whitespace