def extract_optional_attribute_value(line_to_parse, value_index): """ Determine and extract an optional attribute value. """ non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, value_index ) if ( non_whitespace_index < len(line_to_parse) and line_to_parse[non_whitespace_index] != HtmlHelper.__html_attribute_name_value_separator ) or non_whitespace_index >= len(line_to_parse): return non_whitespace_index non_whitespace_index += 1 non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, non_whitespace_index ) if non_whitespace_index < len(line_to_parse): first_character_of_value = line_to_parse[non_whitespace_index] if first_character_of_value == HtmlHelper.__html_attribute_value_double: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_character( line_to_parse, non_whitespace_index + 1, HtmlHelper.__html_attribute_value_double, ) if non_whitespace_index == len(line_to_parse): return -1 non_whitespace_index += 1 elif first_character_of_value == HtmlHelper.__html_attribute_value_single: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_character( line_to_parse, non_whitespace_index + 1, HtmlHelper.__html_attribute_value_single, ) if non_whitespace_index == len(line_to_parse): return -1 non_whitespace_index += 1 else: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_one_of_characters( line_to_parse, non_whitespace_index, HtmlHelper.__html_tag_attribute_value_terminators, ) if not extracted_text: non_whitespace_index = -1 else: non_whitespace_index = -1 return non_whitespace_index
def test_simple_case_from_middle(): """ Make sure that we test a simple extraction from the middle of the string. """ # Arrange input_string = "this is a test" start_index = 5 character_to_match = " " expected_output = (7, "is") # Act actual_output = ParserHelper.collect_until_character( input_string, start_index, character_to_match) # Assert assert expected_output == actual_output
def test_empty_string_with_good_index(): """ Make sure that an empty string is handled properly with a good index """ # Arrange input_string = "" start_index = 0 character_to_match = " " expected_output = (0, "") # Act actual_output = ParserHelper.collect_until_character( input_string, start_index, character_to_match) # Assert assert expected_output == actual_output
def test_empty_string_with_bad_left_index(): """ Make sure that an empty string is handled properly with an index that is too far to the left. """ # Arrange input_string = "" start_index = -1 character_to_match = " " expected_output = (None, None) # Act actual_output = ParserHelper.collect_until_character( input_string, start_index, character_to_match) # Assert assert expected_output == actual_output
def test_already_on_whitespace(): """ Make sure that we test extracting while already on a whitespace character. """ # Arrange input_string = "this is a test" start_index = 9 character_to_match = " " expected_output = (9, "") # Act actual_output = ParserHelper.collect_until_character( input_string, start_index, character_to_match) # Assert assert expected_output == actual_output
def __parse_tag_attributes(text_to_parse, start_index): """ Handle the parsing of the attributes for an open tag. """ parse_index, _ = ParserHelper.collect_while_one_of_characters( text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters ) end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, parse_index ) if ParserHelper.is_character_at_index( text_to_parse, end_name_index, HtmlHelper.__html_attribute_name_value_separator, ): ( value_start_index, extracted_whitespace, ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1) if ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_single, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_single, ) if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_single, ): return None, -1 value_end_index += 1 elif ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_double, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_double, ) if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_double, ): return None, -1 value_end_index += 1 else: value_end_index, _ = ParserHelper.collect_until_one_of_characters( text_to_parse, value_start_index, HtmlHelper.__unquoted_attribute_value_stop, ) end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, value_end_index ) return end_name_index, extracted_whitespace
def __parse_tag_attributes( text_to_parse: str, start_index: int ) -> Tuple[Optional[int], Optional[str]]: """ Handle the parsing of the attributes for an open tag. """ parse_index, _ = ParserHelper.collect_while_one_of_characters( text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters ) assert parse_index is not None end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, parse_index ) assert end_name_index is not None if ParserHelper.is_character_at_index( text_to_parse, end_name_index, HtmlHelper.__html_attribute_name_value_separator, ): ( value_start_index, extracted_whitespace, ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1) assert value_start_index is not None value_end_index: Optional[int] = None if ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_single, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_single, ) assert value_end_index is not None if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_single, ): return None, None value_end_index += 1 elif ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_double, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_double, ) assert value_end_index is not None if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_double, ): return None, None value_end_index += 1 else: value_end_index, _ = ParserHelper.collect_until_one_of_characters( text_to_parse, value_start_index, HtmlHelper.__unquoted_attribute_value_stop, ) assert value_end_index is not None end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, value_end_index ) return end_name_index, extracted_whitespace