def extract_optional_attribute_value(line_to_parse, value_index): """ Determine and extract an optional attribute value. """ non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, value_index ) if ( non_whitespace_index < len(line_to_parse) and line_to_parse[non_whitespace_index] != HtmlHelper.__html_attribute_name_value_separator ) or non_whitespace_index >= len(line_to_parse): return non_whitespace_index non_whitespace_index += 1 non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, non_whitespace_index ) if non_whitespace_index < len(line_to_parse): first_character_of_value = line_to_parse[non_whitespace_index] if first_character_of_value == HtmlHelper.__html_attribute_value_double: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_character( line_to_parse, non_whitespace_index + 1, HtmlHelper.__html_attribute_value_double, ) if non_whitespace_index == len(line_to_parse): return -1 non_whitespace_index += 1 elif first_character_of_value == HtmlHelper.__html_attribute_value_single: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_character( line_to_parse, non_whitespace_index + 1, HtmlHelper.__html_attribute_value_single, ) if non_whitespace_index == len(line_to_parse): return -1 non_whitespace_index += 1 else: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_one_of_characters( line_to_parse, non_whitespace_index, HtmlHelper.__html_tag_attribute_value_terminators, ) if not extracted_text: non_whitespace_index = -1 else: non_whitespace_index = -1 return non_whitespace_index
def look_for_pragmas( position_marker: PositionMarker, line_to_parse: str, container_depth: int, extracted_whitespace: Optional[str], parser_properties: ParseBlockPassProperties, ) -> bool: """ Look for a pragma in the current line. """ if (not container_depth and not extracted_whitespace and (line_to_parse.startswith(PragmaToken.pragma_prefix) or line_to_parse.startswith(PragmaToken.pragma_alternate_prefix))): was_extended_prefix = line_to_parse.startswith( PragmaToken.pragma_alternate_prefix) start_index, _ = ParserHelper.extract_whitespace( line_to_parse, len(PragmaToken.pragma_alternate_prefix if was_extended_prefix else PragmaToken.pragma_prefix), ) remaining_line = line_to_parse[start_index:].rstrip().lower() if remaining_line.startswith( PragmaToken.pragma_title) and remaining_line.endswith( PragmaToken.pragma_suffix): index_number = (-position_marker.line_number if was_extended_prefix else position_marker.line_number) parser_properties.pragma_lines[index_number] = line_to_parse return True return False
def parse_setext_headings( parser_state, position_marker, extracted_whitespace, this_bq_count, stack_bq_count, ): """ Handle the parsing of an setext heading. """ new_tokens = [] if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index_one_of( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__setext_characters, ) and parser_state.token_stack[-1].is_paragraph and (this_bq_count == stack_bq_count)): _, collected_to_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, position_marker.text_to_parse[position_marker.index_number], ) ( after_whitespace_index, extra_whitespace_after_setext, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, collected_to_index) if after_whitespace_index == len(position_marker.text_to_parse): # This is unusual. Normally, close_open_blocks is used to close off # blocks based on the stack token. However, since the setext takes # the last paragraph of text (see case 61) and translates it # into a heading, this has to be done separately, as there is no # stack token to close. new_tokens.append( EndMarkdownToken( MarkdownToken.token_setext_heading, extracted_whitespace, extra_whitespace_after_setext, None, )) token_index = len(parser_state.token_document) - 1 while not parser_state.token_document[token_index].is_paragraph: token_index -= 1 replacement_token = SetextHeadingMarkdownToken( position_marker.text_to_parse[ position_marker.index_number], collected_to_index - position_marker.index_number, parser_state.token_document[token_index].extra_data, position_marker, parser_state.token_document[token_index], ) parser_state.token_document[token_index] = replacement_token del parser_state.token_stack[-1] return new_tokens
def is_olist_start( parser_state, line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, adj_ws=None, ): """ Determine if we have the start of an numbered or ordered list. """ is_start = False end_whitespace_index = -1 index = None my_count = None if adj_ws is None: adj_ws = extracted_whitespace if (ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, string.digits): index = start_index while ParserHelper.is_character_at_index_one_of( line_to_parse, index, string.digits): index += 1 my_count = index - start_index olist_index_number = line_to_parse[start_index:index] LOGGER.debug("olist?%s<<count>>%s<<", olist_index_number, str(my_count)) LOGGER.debug("olist>>%s", str(line_to_parse[index])) LOGGER.debug("index+1>>%s>>len>>%s", str(index + 1), str(len(line_to_parse))) end_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, index + 1) LOGGER.debug( "end_whitespace_index>>%s>>len>>%s>>%s", str(end_whitespace_index), str(len(line_to_parse)), olist_index_number, ) if (my_count <= 9 and ParserHelper.is_character_at_index_one_of( line_to_parse, index, ListBlockProcessor.__olist_start_characters) and not (parser_state.token_stack[-1].is_paragraph and not parser_state.token_stack[-2].is_list and ((end_whitespace_index == len(line_to_parse)) or olist_index_number != "1")) and (ParserHelper.is_character_at_index_whitespace( line_to_parse, index + 1) or ((index + 1) == len(line_to_parse)))): is_start = True LOGGER.debug("is_olist_start>>result>>%s", str(is_start)) return is_start, index, my_count, end_whitespace_index
def parse_setext_headings( parser_state: ParserState, position_marker: PositionMarker, extracted_whitespace: Optional[str], block_quote_data: BlockQuoteData, ) -> List[MarkdownToken]: """ Handle the parsing of an setext heading. """ new_tokens: List[MarkdownToken] = [] assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) and ParserHelper.is_character_at_index_one_of( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__setext_characters, ) and parser_state.token_stack[-1].is_paragraph and (block_quote_data.current_count == block_quote_data.stack_count) ): is_paragraph_continuation = ( LeafBlockProcessor.__adjust_continuation_for_active_list( parser_state, position_marker ) ) _, collected_to_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, position_marker.text_to_parse[position_marker.index_number], ) assert collected_to_index is not None ( after_whitespace_index, extra_whitespace_after_setext, ) = ParserHelper.extract_whitespace( position_marker.text_to_parse, collected_to_index ) if not is_paragraph_continuation and after_whitespace_index == len( position_marker.text_to_parse ): LeafBlockProcessor.__create_setext_token( parser_state, position_marker, collected_to_index, new_tokens, extracted_whitespace, extra_whitespace_after_setext, ) return new_tokens
def __complete_inline_block_processing( inline_blocks, source_text, start_index, current_string, end_string, starting_whitespace, is_setext, ): have_processed_once = len(inline_blocks) != 0 or start_index != 0 LOGGER.debug("__cibp>inline_blocks>%s<", str(inline_blocks).replace("\n", "\\n")) LOGGER.debug("__cibp>source_text>%s<", str(source_text).replace("\n", "\\n")) LOGGER.debug("__cibp>start_index>%s<", str(start_index).replace("\n", "\\n")) LOGGER.debug("__cibp>current_string>%s<", str(current_string).replace("\n", "\\n")) LOGGER.debug("__cibp>end_string>%s<", str(end_string).replace("\n", "\\n")) LOGGER.debug( "__cibp>starting_whitespace>%s<", str(starting_whitespace).replace("\n", "\\n"), ) LOGGER.debug("__cibp>is_setext>%s<", str(is_setext).replace("\n", "\\n")) if (inline_blocks and inline_blocks[-1].token_name == MarkdownToken.token_inline_hard_break): start_index, extracted_whitespace = ParserHelper.extract_whitespace( source_text, start_index) if end_string is None: end_string = extracted_whitespace else: end_string += extracted_whitespace if start_index < len(source_text): current_string = InlineHelper.append_text( current_string, source_text[start_index:]) if end_string is not None: LOGGER.debug("xx-end-lf>%s<", end_string.replace("\n", "\\n")) if current_string or not have_processed_once: inline_blocks.append( TextMarkdownToken(current_string, starting_whitespace, end_whitespace=end_string)) LOGGER.debug( ">>%s<<", str(inline_blocks).replace("\n", "\\n").replace("\x02", "\\x02")) return EmphasisHelper.resolve_inline_emphasis(inline_blocks, None)
def __check_for_normal_html_blocks( remaining_html_tag: str, line_to_parse: str, character_index: int ) -> Optional[str]: """ Check for the the html blocks that are harder to identify: 1, 6-7. """ html_block_type = None if HtmlHelper.__is_valid_block_1_tag_name(remaining_html_tag): html_block_type = HtmlHelper.html_block_1 else: ( adjusted_remaining_html_tag, line_to_parse_size, is_end_tag, ) = HtmlHelper.__check_for_normal_html_blocks_adjust_tag( remaining_html_tag, line_to_parse, character_index ) complete_parse_index: Optional[int] = 0 if adjusted_remaining_html_tag in HtmlHelper.__html_block_6_start: html_block_type = HtmlHelper.html_block_6 elif is_end_tag: is_complete, complete_parse_index = HtmlHelper.is_complete_html_end_tag( adjusted_remaining_html_tag, line_to_parse, character_index ) if is_complete: html_block_type, character_index = ( HtmlHelper.html_block_7, complete_parse_index, ) else: ( is_complete, complete_parse_index, ) = HtmlHelper.is_complete_html_start_tag( adjusted_remaining_html_tag, line_to_parse, character_index ) if is_complete: assert complete_parse_index is not None html_block_type, character_index = ( HtmlHelper.html_block_7, complete_parse_index, ) if html_block_type == HtmlHelper.html_block_7: new_index, _ = ParserHelper.extract_whitespace( line_to_parse, character_index ) if new_index != line_to_parse_size: html_block_type = None return html_block_type
def __is_front_matter_valid( collected_lines: List[str], ) -> Union[Dict[str, str], str]: ascii_letters_and_digits = f"{string.ascii_letters}{string.digits}_-" current_title = "" current_value = "" value_map: Dict[str, str] = {} for next_line in collected_lines: POGGER.debug("Next fm:>$s<", next_line) next_index, _ = ParserHelper.extract_whitespace(next_line, 0) assert next_index is not None if next_index >= 4: POGGER.debug("Indented line established.") if not current_title: return "Continuation line encountered before a keyword line." current_value += f"\n{next_line.strip()}" POGGER.debug("current_value>$<", current_value) else: if not next_line.strip(): return "Blank line encountered before end of metadata." POGGER.debug("Non-indented line established.") if current_title: POGGER.debug("Adding '$' as '$'.", current_title, current_value) value_map[current_title] = current_value ( next_index, collected_title, ) = ParserHelper.collect_while_one_of_characters( next_line, next_index, ascii_letters_and_digits) assert next_index is not None assert collected_title is not None current_title = collected_title if next_index < len( next_line) and next_line[next_index] == ":": current_value = next_line[next_index + 1:].strip() else: return "Newline did not start with `keyword:`." if current_title: POGGER.debug("Adding final '$' as '$'.", current_title, current_value) value_map[current_title.lower()] = current_value # This is specifically to trigger test_front_matter_20. assert current_title != "test" or current_value != "assert" if not value_map: return "No valid metadata header lines were found." return value_map
def is_complete_html_end_tag(tag_name, line_to_parse, next_char_index): """ Determine if the supplied information is a completed end of tag specification. """ is_valid = HtmlHelper.is_valid_tag_name(tag_name) non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, next_char_index ) have_end_of_tag = ( non_whitespace_index < len(line_to_parse) and line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end ) return have_end_of_tag and is_valid, non_whitespace_index + 1
def __check_for_normal_html_blocks( remaining_html_tag, line_to_parse, character_index ): """ Check for the the html blocks that are harder to identify: 1, 6-7. """ html_block_type = None if HtmlHelper.__is_valid_block_1_tag_name(remaining_html_tag): html_block_type = HtmlHelper.html_block_1 else: adjusted_remaining_html_tag = remaining_html_tag is_end_tag = False if adjusted_remaining_html_tag.startswith(HtmlHelper.__html_tag_start): adjusted_remaining_html_tag = adjusted_remaining_html_tag[1:] is_end_tag = True if ( character_index < len(line_to_parse) and line_to_parse[character_index] == HtmlHelper.__html_tag_end and adjusted_remaining_html_tag.endswith(HtmlHelper.__html_tag_start) ): adjusted_remaining_html_tag = adjusted_remaining_html_tag[0:-1] if adjusted_remaining_html_tag in HtmlHelper.__html_block_6_start: html_block_type = HtmlHelper.html_block_6 elif is_end_tag: is_complete, complete_parse_index = HtmlHelper.is_complete_html_end_tag( adjusted_remaining_html_tag, line_to_parse, character_index ) if is_complete: html_block_type = HtmlHelper.html_block_7 character_index = complete_parse_index else: ( is_complete, complete_parse_index, ) = HtmlHelper.is_complete_html_start_tag( adjusted_remaining_html_tag, line_to_parse, character_index ) if is_complete: html_block_type = HtmlHelper.html_block_7 character_index = complete_parse_index if html_block_type == HtmlHelper.html_block_7: new_index, _ = ParserHelper.extract_whitespace( line_to_parse, character_index ) if new_index != len(line_to_parse): html_block_type = None return html_block_type
def test_empty_string_with_good_index(): """ Make sure that an empty string is handled properly with a good index """ # Arrange input_string = "" start_index = 0 expected_output = (0, "") # Act actual_output = ParserHelper.extract_whitespace(input_string, start_index) # Assert assert expected_output == actual_output
def test_simple_case_from_middle_with_whitespace(): """ Make sure that we test a simple extraction from the middle of the string with whitespace """ # Arrange input_string = "this is a test" start_index = 4 expected_output = (5, " ") # Act actual_output = ParserHelper.extract_whitespace(input_string, start_index) # Assert assert expected_output == actual_output
def test_empty_string_with_bad_left_index(): """ Make sure that an empty string is handled properly with an index that is too far to the left. """ # Arrange input_string = "" start_index = -1 expected_output = (None, None) # Act actual_output = ParserHelper.extract_whitespace(input_string, start_index) # Assert assert expected_output == actual_output
def is_complete_html_end_tag( tag_name: str, line_to_parse: str, next_char_index: int ) -> Tuple[bool, int]: """ Determine if the supplied information is a completed end of tag specification. """ is_valid = HtmlHelper.is_valid_tag_name(tag_name) non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, next_char_index ) assert non_whitespace_index is not None is_valid = is_valid and ( non_whitespace_index < len(line_to_parse) and line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end ) return is_valid, non_whitespace_index + 1
def is_ulist_start( parser_state, line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, adj_ws=None, ): """ Determine if we have the start of an un-numbered list. """ LOGGER.debug("is_ulist_start>>pre>>") is_start = False after_all_whitespace_index = -1 if adj_ws is None: adj_ws = extracted_whitespace if ((ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3) or skip_whitespace_check) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, ListBlockProcessor.__ulist_start_characters) and (ParserHelper.is_character_at_index_whitespace( line_to_parse, start_index + 1) or ((start_index + 1) == len(line_to_parse)))): LOGGER.debug("is_ulist_start>>mid>>") after_all_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, start_index + 1) LOGGER.debug( "after_all_whitespace_index>>%s>>len>>%s", str(after_all_whitespace_index), str(len(line_to_parse)), ) is_break, _ = LeafBlockProcessor.is_thematic_break( line_to_parse, start_index, extracted_whitespace) if not is_break and not ( parser_state.token_stack[-1].is_paragraph and not parser_state.token_stack[-2].is_list and (after_all_whitespace_index == len(line_to_parse))): is_start = True LOGGER.debug("is_ulist_start>>result>>%s", str(is_start)) return is_start, after_all_whitespace_index
def __parse_raw_close_tag(text_to_parse): """ Parse the current line as if it is a close tag, and determine if it is valid. """ valid_raw_html = None if ParserHelper.is_character_at_index( text_to_parse, 0, HtmlHelper.__html_tag_start ): tag_name = HtmlHelper.__parse_raw_tag_name(text_to_parse, 1) if tag_name: parse_index = len(tag_name) if parse_index != len(text_to_parse): parse_index, _ = ParserHelper.extract_whitespace( text_to_parse, parse_index ) if parse_index == len(text_to_parse): valid_raw_html = text_to_parse return valid_raw_html
def __parse_raw_close_tag(text_to_parse: str) -> Optional[str]: """ Parse the current line as if it is a close tag, and determine if it is valid. """ valid_raw_html = None if ParserHelper.is_character_at_index( text_to_parse, 0, HtmlHelper.__html_tag_start ): if tag_name := HtmlHelper.__parse_raw_tag_name(text_to_parse, 1): parse_index: Optional[int] = len(tag_name) assert parse_index is not None text_to_parse_size = len(text_to_parse) if parse_index != text_to_parse_size: parse_index, _ = ParserHelper.extract_whitespace( text_to_parse, parse_index ) if parse_index == text_to_parse_size: valid_raw_html = text_to_parse
def __add_recombined_whitespace(did_recombine, source_text, inline_response, end_string, is_setext): LOGGER.debug("__arw>>did_recombine>>%s>>", str(did_recombine)) LOGGER.debug( "__arw>>end_string>>%s>>", str(end_string).replace("\n", "\\n").replace("\x02", "\\x02"), ) if did_recombine: LOGGER.debug( "__arw>>source_text>>%s>>", str(source_text).replace("\n", "\\n").replace("\x02", "\\x02"), ) new_index, extracted_whitespace = ParserHelper.extract_whitespace( source_text, inline_response.new_index) LOGGER.debug( "__arw>>%s>>", str(source_text[0:inline_response.new_index]).replace( "\n", "\\n").replace("\x02", "\\x02"), ) LOGGER.debug( "__arw>>%s>>", str(source_text[inline_response.new_index:]).replace( "\n", "\\n").replace("\x02", "\\x02"), ) LOGGER.debug( "__arw>>extracted_whitespace>>%s>>", str(extracted_whitespace).replace("\n", "\\n").replace( "\x02", "\\x02"), ) if extracted_whitespace: inline_response.new_index = new_index if end_string: end_string += extracted_whitespace else: end_string = extracted_whitespace if is_setext: end_string += "\x02" LOGGER.debug( "__arw>>end_string>>%s>>", str(end_string).replace("\n", "\\n").replace("\x02", "\\x02"), ) return end_string
def is_fenced_code_block( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, ) -> Tuple[bool, Optional[int], Optional[str], Optional[int]]: """ Determine if we have the start of a fenced code block. """ assert extracted_whitespace is not None if ( skip_whitespace_check or ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__fenced_code_block_start_characters, ): POGGER.debug("ifcb:collected_count>>$<<$<<", line_to_parse, start_index) collected_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, line_to_parse[start_index] ) POGGER.debug("ifcb:collected_count:$", collected_count) assert collected_count is not None assert new_index is not None ( non_whitespace_index, extracted_whitespace_before_info_string, ) = ParserHelper.extract_whitespace(line_to_parse, new_index) if collected_count >= 3: POGGER.debug("ifcb:True") return ( True, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) return False, None, None, None
def __handle_blank_line_init( from_main_transform: bool, input_line: str) -> Tuple[Optional[List[type]], bool, int, str]: do_include_block_quotes = from_main_transform close_only_these_blocks: Optional[List[type]] = ( None if from_main_transform else [ParagraphStackToken]) POGGER.debug("hbl>>from_main_transform>>$", from_main_transform) POGGER.debug("hbl>>close_only_these_blocks>>$", close_only_these_blocks) POGGER.debug("hbl>>do_include_block_quotes>>$", do_include_block_quotes) non_whitespace_index, extracted_whitespace = ParserHelper.extract_whitespace( input_line, 0) assert extracted_whitespace is not None assert non_whitespace_index is not None return ( close_only_these_blocks, do_include_block_quotes, non_whitespace_index, extracted_whitespace, )
def is_fenced_code_block( line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, ): """ Determine if we have the start of a fenced code block. """ if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__fenced_code_block_start_characters, ): LOGGER.debug("ifcb:collected_count>>%s<<%s<<", line_to_parse, str(start_index)) collected_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, line_to_parse[start_index]) LOGGER.debug("ifcb:collected_count:%s", str(collected_count)) ( non_whitespace_index, extracted_whitespace_before_info_string, ) = ParserHelper.extract_whitespace(line_to_parse, new_index) if collected_count >= 3: LOGGER.debug("ifcb:True") return ( True, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) return False, None, None, None
def __adjust_for_block_quote_start( force_me, original_line_to_parse, last_block_quote_index, position_marker, extracted_whitespace, ): """ Block quotes cause indents, which need to be handled specifically. """ did_process = False special_parse_start_index = 0 whitespace_to_parse = extracted_whitespace block_quote_adjust_delta = 0 LOGGER.debug( "last_block_quote_index>>%s>>force_me>>%s", str(last_block_quote_index), str(force_me), ) if last_block_quote_index or force_me: LOGGER.debug( "original_line_to_parse>[%s]>>last_block_quote_index>>%s", original_line_to_parse.replace("\t", "\\t"), str(last_block_quote_index), ) ( block_quote_after_whitespace_index, during_original_whitespace, ) = ParserHelper.extract_whitespace(original_line_to_parse, last_block_quote_index) LOGGER.debug( "during_original_whitespace>[%s]", during_original_whitespace.replace("\t", "\\t"), ) if "\t" in during_original_whitespace: did_process = True LOGGER.debug( ".text_to_parse>[%s]", position_marker.text_to_parse.replace("\t", "\\t"), ) LOGGER.debug(".index_number>>%s", str(position_marker.index_number)) LOGGER.debug(".index_indent>>%s", str(position_marker.index_indent)) LOGGER.debug("last_block_quote_index>>%s", str(last_block_quote_index)) # Make sure everything after the whitespace remains the same. text_after_original_whitespace = original_line_to_parse[ block_quote_after_whitespace_index:] text_after_whitespace = position_marker.text_to_parse[ position_marker.index_number:] LOGGER.debug( "text_after_original_whitespace>[%s]", text_after_original_whitespace.replace("\t", "\\t"), ) LOGGER.debug( "text_after_whitespace>[%s]", text_after_whitespace.replace("\t", "\\t"), ) assert text_after_original_whitespace == text_after_whitespace # Make sure the whitespace is within expected bounds. during_current_whitespace = position_marker.text_to_parse[ position_marker.index_number - len(extracted_whitespace):position_marker.index_number] LOGGER.debug( "during_current_whitespace>[%s]", during_current_whitespace.replace("\t", "\\t"), ) LOGGER.debug( "during_original_whitespace>[%s]", during_original_whitespace.replace("\t", "\\t"), ) current_whitespace_length = len(during_current_whitespace) original_whitespace_length = (ParserHelper.calculate_length( during_original_whitespace, start_index=last_block_quote_index) - 1) LOGGER.debug( "current_whitespace_length[%s],original_whitespace_length[%s]", str(current_whitespace_length), str(original_whitespace_length), ) assert current_whitespace_length <= original_whitespace_length special_parse_start_index = last_block_quote_index + 1 if during_original_whitespace[0] == "\t": whitespace_to_parse = during_original_whitespace if (len(during_original_whitespace) > 1 and during_original_whitespace[1] == "\t"): block_quote_adjust_delta = -1 else: whitespace_to_parse = during_original_whitespace[1:] return ( did_process, special_parse_start_index, whitespace_to_parse, block_quote_adjust_delta, )
def is_complete_html_start_tag( tag_name: str, line_to_parse: str, next_char_index: int ) -> Tuple[bool, Optional[int]]: """ Determine if the supplied information is a completed start of tag specification. """ is_tag_valid = HtmlHelper.is_valid_tag_name( tag_name ) and not HtmlHelper.__is_valid_block_1_tag_name(tag_name) non_whitespace_index, extracted_whitespace = ParserHelper.extract_whitespace( line_to_parse, next_char_index ) assert non_whitespace_index is not None are_attributes_valid: bool = True line_to_parse_size: int = len(line_to_parse) while ( is_tag_valid and extracted_whitespace and are_attributes_valid and 0 <= non_whitespace_index < line_to_parse_size and line_to_parse[non_whitespace_index] not in [HtmlHelper.__html_tag_end, HtmlHelper.__html_tag_start] ): non_whitespace_index = HtmlHelper.extract_html_attribute_name( line_to_parse, non_whitespace_index ) assert non_whitespace_index is not None are_attributes_valid = non_whitespace_index != -1 if not are_attributes_valid: break non_whitespace_index = HtmlHelper.extract_optional_attribute_value( line_to_parse, non_whitespace_index ) assert non_whitespace_index is not None are_attributes_valid = non_whitespace_index != -1 if not are_attributes_valid: break ( non_whitespace_index, extracted_whitespace, ) = ParserHelper.extract_whitespace(line_to_parse, non_whitespace_index) assert non_whitespace_index is not None if non_whitespace_index < line_to_parse_size: if line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_start: non_whitespace_index += 1 is_end_of_tag_present = ( line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end ) if is_end_of_tag_present: non_whitespace_index += 1 else: is_end_of_tag_present = False non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, non_whitespace_index ) return ( ( is_tag_valid and is_end_of_tag_present and non_whitespace_index == line_to_parse_size and are_attributes_valid ), non_whitespace_index, )
def __select_line_ending( new_tokens: List[MarkdownToken], is_proper_hard_break: bool, line_number: int, adj_hard_column: int, current_string: str, removed_end_whitespace: str, removed_end_whitespace_size: int, whitespace_to_add: Optional[str], append_to_current_string: str, end_string: Optional[str], remaining_line: str, inline_blocks: List[MarkdownToken], is_setext: bool, ) -> Tuple[str, Optional[str], str, Optional[str], str]: if is_proper_hard_break: POGGER.debug(">>proper hard break") new_tokens.append( HardBreakMarkdownToken(InlineHelper.backslash_character, line_number, adj_hard_column - 1)) current_string, whitespace_to_add = current_string[:-1], None append_to_current_string = "" elif removed_end_whitespace_size >= 2: POGGER.debug(">>whitespace hard break") new_tokens.append( HardBreakMarkdownToken(removed_end_whitespace, line_number, adj_hard_column)) whitespace_to_add = None append_to_current_string = "" else: POGGER.debug(">>normal end") POGGER.debug("<<is_setext<<$<<", is_setext) POGGER.debug("<<inline_blocks<<$<<", inline_blocks) POGGER.debug("<<current_string<<$<<", current_string) POGGER.debug("<<remaining_line<<$<<", remaining_line) POGGER.debug("<<end_string<<$<<", end_string) POGGER.debug("<<removed_end_whitespace<<$<<", removed_end_whitespace) if (is_setext and inline_blocks and inline_blocks[-1].is_inline_hard_break and not current_string): new_index, ex_ws = ParserHelper.extract_whitespace( remaining_line, 0) POGGER.debug("<<new_index<<$<<", new_index) POGGER.debug("<<ex_ws<<$<<", ex_ws) assert new_index end_string = f"{ex_ws}{ParserHelper.whitespace_split_character}" remaining_line = remaining_line[new_index:] end_string = InlineHelper.modify_end_string( end_string, removed_end_whitespace) POGGER.debug("<<end_string<<$<<", end_string) POGGER.debug( "<<append_to_current_string<<$<<", append_to_current_string, ) POGGER.debug( "<<whitespace_to_add<<$<<", whitespace_to_add, ) POGGER.debug("<<remaining_line<<$<<", remaining_line) POGGER.debug("<<end_string<<$<<", end_string) POGGER.debug("<<current_string<<$<<", current_string) return ( current_string, whitespace_to_add, append_to_current_string, end_string, remaining_line, )
def parse_atx_headings(parser_state, position_marker, extracted_whitespace): """ Handle the parsing of an atx heading. """ new_tokens = [] if ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ) ( non_whitespace_index, extracted_whitespace_at_start, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, new_index) if hash_count <= 6 and (extracted_whitespace_at_start or non_whitespace_index == len( position_marker.text_to_parse)): new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, new_tokens) remaining_line = position_marker.text_to_parse[ non_whitespace_index:] ( end_index, extracted_whitespace_at_end, ) = ParserHelper.extract_whitespace_from_end(remaining_line) remove_trailing_count = 0 while (end_index > 0 and remaining_line[end_index - 1] == LeafBlockProcessor.__atx_character): end_index -= 1 remove_trailing_count += 1 extracted_whitespace_before_end = "" if remove_trailing_count: if end_index > 0: if ParserHelper.is_character_at_index_whitespace( remaining_line, end_index - 1): remaining_line = remaining_line[:end_index] ( end_index, extracted_whitespace_before_end, ) = ParserHelper.extract_whitespace_from_end( remaining_line) remaining_line = remaining_line[:end_index] else: extracted_whitespace_at_end = "" remove_trailing_count = 0 else: remaining_line = "" else: extracted_whitespace_at_end = remaining_line[end_index:] remaining_line = remaining_line[0:end_index] start_token = AtxHeadingMarkdownToken( hash_count, remove_trailing_count, extracted_whitespace, position_marker, ) new_tokens.append(start_token) new_tokens.append( TextMarkdownToken(remaining_line, extracted_whitespace_at_start)) end_token = EndMarkdownToken( "atx", extracted_whitespace_at_end, extracted_whitespace_before_end, None, ) end_token.start_markdown_token = start_token new_tokens.append(end_token) return new_tokens
def __parse_line_for_leaf_blocks( parser_state, xposition_marker, this_bq_count, removed_chars_at_start, no_para_start_if_empty, ignore_link_definition_start, original_line_to_parsex, last_block_quote_index, last_list_start_index, ): """ Parse the contents of a line for a leaf block. """ LOGGER.debug( "Leaf Line:%s:", xposition_marker.text_to_parse.replace("\t", "\\t") ) new_tokens = [] requeue_line_info = RequeueLineInfo() # TODO rename to avoid collision with parameter original_line_to_parse = xposition_marker.text_to_parse[ xposition_marker.index_number : ] (new_index_number, extracted_whitespace,) = ParserHelper.extract_whitespace( xposition_marker.text_to_parse, xposition_marker.index_number ) position_marker = PositionMarker( xposition_marker.line_number, new_index_number, xposition_marker.text_to_parse, index_indent=xposition_marker.index_indent, ) pre_tokens = ContainerBlockProcessor.__close_indented_block_if_indent_not_there( parser_state, extracted_whitespace ) outer_processed = False outer_processed = ContainerBlockProcessor.__handle_fenced_code_block( parser_state, outer_processed, position_marker, extracted_whitespace, new_tokens, ) ( outer_processed, requeue_line_info.lines_to_requeue, requeue_line_info.force_ignore_first_as_lrd, ) = ContainerBlockProcessor.__handle_link_reference_definition( parser_state, outer_processed, position_marker, extracted_whitespace, original_line_to_parse, ignore_link_definition_start, pre_tokens, ) outer_processed = ContainerBlockProcessor.__handle_html_block( parser_state, outer_processed, position_marker, extracted_whitespace, new_tokens, ) if not outer_processed: assert not new_tokens new_tokens = LeafBlockProcessor.parse_atx_headings( parser_state, position_marker, extracted_whitespace ) if not new_tokens: new_tokens = LeafBlockProcessor.parse_indented_code_block( parser_state, position_marker, extracted_whitespace, removed_chars_at_start, original_line_to_parsex, last_block_quote_index, last_list_start_index, ) if not new_tokens: stack_bq_count = BlockQuoteProcessor.count_of_block_quotes_on_stack( parser_state ) new_tokens = LeafBlockProcessor.parse_setext_headings( parser_state, position_marker, extracted_whitespace, this_bq_count, stack_bq_count, ) if not new_tokens: stack_bq_count = BlockQuoteProcessor.count_of_block_quotes_on_stack( parser_state ) new_tokens = LeafBlockProcessor.parse_thematic_break( parser_state, position_marker, extracted_whitespace, this_bq_count, stack_bq_count, ) if not new_tokens: stack_bq_count = BlockQuoteProcessor.count_of_block_quotes_on_stack( parser_state ) new_tokens = LeafBlockProcessor.parse_paragraph( parser_state, position_marker, extracted_whitespace, this_bq_count, no_para_start_if_empty, stack_bq_count, ) # assert new_tokens or did_complete_lrd or did_pause_lrd or lines_to_requeue LOGGER.debug(">>leaf--adding>>%s", str(new_tokens)) pre_tokens.extend(new_tokens) return pre_tokens, requeue_line_info
def process_link_reference_definition( parser_state, position_marker, original_line_to_parse, extracted_whitespace, ): """ Process a link deference definition. Note, this requires a lot of work to handle properly because of partial definitions across lines. """ line_to_parse = position_marker.text_to_parse start_index = position_marker.index_number did_pause_lrd = False lines_to_requeue = [] new_tokens = [] force_ignore_first_as_lrd = False was_started = False is_blank_line = not line_to_parse and not start_index if parser_state.token_stack[-1].was_link_definition_started: was_started = True LOGGER.debug( ">>continuation_lines>>%s<<", str(parser_state.token_stack[-1].continuation_lines), ) line_to_parse = parser_state.token_stack[-1].get_joined_lines( line_to_parse) start_index, extracted_whitespace = ParserHelper.extract_whitespace( line_to_parse, 0) LOGGER.debug(">>line_to_parse>>%s<<", line_to_parse.replace("\n", "\\n")) if was_started: LOGGER.debug(">>parse_link_reference_definition>>was_started") ( did_complete_lrd, end_lrd_index, parsed_lrd_tuple, ) = LinkReferenceDefinitionHelper.__parse_link_reference_definition( parser_state, line_to_parse, start_index, extracted_whitespace, is_blank_line, ) LOGGER.debug( ">>parse_link_reference_definition>>was_started>>did_complete_lrd>>%s>>end_lrd_index>>%s>>len(line_to_parse)>>%s", str(did_complete_lrd), str(end_lrd_index), str(len(line_to_parse)), ) if not (did_complete_lrd or (not is_blank_line and not did_complete_lrd and (end_lrd_index == len(line_to_parse)))): LOGGER.debug( ">>parse_link_reference_definition>>was_started>>GOT HARD FAILURE" ) ( is_blank_line, line_to_parse, did_complete_lrd, end_lrd_index, parsed_lrd_tuple, ) = LinkReferenceDefinitionHelper.__process_lrd_hard_failure( parser_state, original_line_to_parse, lines_to_requeue) else: ( did_complete_lrd, end_lrd_index, parsed_lrd_tuple, ) = LinkReferenceDefinitionHelper.__parse_link_reference_definition( parser_state, line_to_parse, start_index, extracted_whitespace, is_blank_line, ) LOGGER.debug( ">>parse_link_reference_definition>>did_complete_lrd>>%s>>end_lrd_index>>%s>>len(line_to_parse)>>%s", str(did_complete_lrd), str(end_lrd_index), str(len(line_to_parse)), ) if (end_lrd_index >= 0 and end_lrd_index == len(line_to_parse) and not is_blank_line): LinkReferenceDefinitionHelper.__add_line_for_lrd_continuation( parser_state, position_marker, was_started, original_line_to_parse, extracted_whitespace, ) did_pause_lrd = True elif was_started: ( force_ignore_first_as_lrd, new_tokens, ) = LinkReferenceDefinitionHelper.__stop_lrd_continuation( parser_state, did_complete_lrd, parsed_lrd_tuple, end_lrd_index, original_line_to_parse, is_blank_line, ) else: LOGGER.debug(">>parse_link_reference_definition>>other") return ( did_complete_lrd or end_lrd_index != -1, did_complete_lrd, did_pause_lrd, lines_to_requeue, force_ignore_first_as_lrd, new_tokens, )
def parse_line_for_container_blocks( parser_state, position_marker, ignore_link_definition_start, container_depth=0, foobar=None, init_bq=None, ): """ Parse the line, taking care to handle any container blocks before deciding whether or not to pass the (remaining parts of the) line to the leaf block processor. """ # TODO work on removing this line_to_parse = position_marker.text_to_parse original_line_to_parse = position_marker.text_to_parse + "" LOGGER.debug("Line:%s:", position_marker.text_to_parse) no_para_start_if_empty = False start_index, extracted_whitespace = ParserHelper.extract_whitespace( line_to_parse, 0 ) ( current_container_blocks, adj_ws, stack_bq_count, this_bq_count, ) = ContainerBlockProcessor.__calculate_for_container_blocks( parser_state, line_to_parse, extracted_whitespace, foobar, init_bq, ) new_position_marker = PositionMarker( position_marker.line_number, start_index, position_marker.text_to_parse ) end_container_indices = ContainerIndices(-1, -1, -1) ( did_process, was_container_start, end_container_indices.block_index, this_bq_count, stack_bq_count, line_to_parse, start_index, leaf_tokens, container_level_tokens, removed_chars_at_start, did_blank, last_block_quote_index, ) = BlockQuoteProcessor.handle_block_quote_block( parser_state, new_position_marker, extracted_whitespace, adj_ws, this_bq_count, stack_bq_count, ) LOGGER.debug("text>>%s>>", line_to_parse) LOGGER.debug(">>container_level_tokens>>%s", str(container_level_tokens)) LOGGER.debug(">>did_blank>>%s", did_blank) if did_blank: container_level_tokens.extend(leaf_tokens) return container_level_tokens, line_to_parse, RequeueLineInfo() # TODO refactor so it doesn't need this! new_position_marker = PositionMarker( position_marker.line_number, start_index, line_to_parse ) LOGGER.debug( "pre-ulist>>#%s#%s#%s#", str(position_marker.index_number), str(position_marker.index_indent), position_marker.text_to_parse.replace("\t", "\\t"), ) LOGGER.debug( "pre-ulist>>#%s#%s#%s#", str(new_position_marker.index_number), str(new_position_marker.index_indent), new_position_marker.text_to_parse.replace("\t", "\\t"), ) ( did_process, was_container_start, end_container_indices.ulist_index, no_para_start_if_empty, line_to_parse, resultant_tokens, removed_chars_at_start, ) = ListBlockProcessor.handle_ulist_block( parser_state, did_process, was_container_start, no_para_start_if_empty, new_position_marker, extracted_whitespace, adj_ws, stack_bq_count, this_bq_count, removed_chars_at_start, current_container_blocks, ) container_level_tokens.extend(resultant_tokens) LOGGER.debug( "post-ulist>>#%s#%s#%s#", str(position_marker.index_number), str(position_marker.index_indent), position_marker.text_to_parse.replace("\t", "\\t"), ) LOGGER.debug( "post-ulist>>#%s#%s#%s#", str(new_position_marker.index_number), str(new_position_marker.index_indent), new_position_marker.text_to_parse.replace("\t", "\\t"), ) LOGGER.debug("text>>%s>>", line_to_parse) new_position_marker = PositionMarker( position_marker.line_number, start_index, line_to_parse ) LOGGER.debug( "pre-olist>>#%s#%s#%s#", str(position_marker.index_number), str(position_marker.index_indent), position_marker.text_to_parse.replace("\t", "\\t"), ) LOGGER.debug( "pre-olist>>#%s#%s#%s#", str(new_position_marker.index_number), str(new_position_marker.index_indent), new_position_marker.text_to_parse.replace("\t", "\\t"), ) ( did_process, was_container_start, end_container_indices.olist_index, no_para_start_if_empty, line_to_parse, resultant_tokens, removed_chars_at_start, ) = ListBlockProcessor.handle_olist_block( parser_state, did_process, was_container_start, no_para_start_if_empty, new_position_marker, extracted_whitespace, adj_ws, stack_bq_count, this_bq_count, removed_chars_at_start, current_container_blocks, ) container_level_tokens.extend(resultant_tokens) LOGGER.debug( "post-olist>>#%s#%s#%s#", str(position_marker.index_number), str(position_marker.index_indent), position_marker.text_to_parse.replace("\t", "\\t"), ) LOGGER.debug( "post-olist>>#%s#%s#%s#", str(new_position_marker.index_number), str(new_position_marker.index_indent), new_position_marker.text_to_parse.replace("\t", "\\t"), ) LOGGER.debug("text>>%s>>", line_to_parse.replace(" ", "\\s")) LOGGER.debug("last_block_quote_index>>%s", str(last_block_quote_index)) LOGGER.debug("olist_index>>%s", str(end_container_indices.olist_index)) LOGGER.debug("ulist_index>>%s", str(end_container_indices.ulist_index)) LOGGER.debug("block_index>>%s", str(end_container_indices.block_index)) last_list_start_index = 0 if end_container_indices.block_index != -1: assert last_block_quote_index in ( end_container_indices.block_index - 1, end_container_indices.block_index, ) elif end_container_indices.olist_index != -1: last_list_start_index = end_container_indices.olist_index elif end_container_indices.ulist_index != -1: last_list_start_index = end_container_indices.ulist_index if not parser_state.token_stack[-1].is_fenced_code_block: new_position_marker = PositionMarker( position_marker.line_number, start_index, line_to_parse ) LOGGER.debug( "__handle_nested_container_blocks>>%s>>", line_to_parse.replace(" ", "\\s"), ) ( line_to_parse, leaf_tokens, container_level_tokens, no_para_start_if_empty, ) = ContainerBlockProcessor.__handle_nested_container_blocks( parser_state, container_depth, this_bq_count, stack_bq_count, no_para_start_if_empty, new_position_marker, end_container_indices, leaf_tokens, container_level_tokens, was_container_start, ) LOGGER.debug("text>>%s>>", line_to_parse.replace(" ", "\\s")) LOGGER.debug("removed_chars_at_start>>>%s", str(removed_chars_at_start)) if container_depth: assert not leaf_tokens LOGGER.debug(">>>>>>>>%s<<<<<<<<<<", line_to_parse) return container_level_tokens, line_to_parse, None LOGGER.debug( ">>__process_list_in_progress>>%s>>", line_to_parse.replace(" ", "\\s") ) ( did_process, line_to_parse, container_level_tokens, ) = ContainerBlockProcessor.__process_list_in_progress( parser_state, did_process, line_to_parse, start_index, container_level_tokens, extracted_whitespace, ) LOGGER.debug( ">>__process_list_in_progress>>%s>>", line_to_parse.replace(" ", "\\s") ) ContainerBlockProcessor.__process_lazy_lines( parser_state, leaf_tokens, this_bq_count, stack_bq_count, line_to_parse, extracted_whitespace, did_process, container_level_tokens, ) LOGGER.debug("text>>%s>>", line_to_parse.replace(" ", "\\s")) # TODO refactor to make indent unnecessary? calculated_indent = len(original_line_to_parse) - len(line_to_parse) LOGGER.debug(">>indent>>%s", str(calculated_indent)) newer_position_marker = PositionMarker( position_marker.line_number, start_index, line_to_parse, index_indent=calculated_indent, ) leaf_tokens, requeue_line_info = ContainerBlockProcessor.__process_leaf_tokens( parser_state, leaf_tokens, newer_position_marker, this_bq_count, removed_chars_at_start, no_para_start_if_empty, ignore_link_definition_start, original_line_to_parse, last_block_quote_index, last_list_start_index, ) container_level_tokens.extend(leaf_tokens) LOGGER.debug( "clt-end>>%s>>%s<<", str(len(container_level_tokens)), str(container_level_tokens), ) return container_level_tokens, line_to_parse, requeue_line_info
def __process_lrd_hard_failure(parser_state, original_line_to_parse, lines_to_requeue): """ In cases of a hard failure, we have had continuations to the original line that make it a bit more difficult to figure out if we have an actual good LRD in the mix somehow. So take lines off the end while we have lines. """ is_blank_line = None line_to_parse = None did_complete_lrd = None end_lrd_index = None parsed_lrd_tuple = None do_again = True parser_state.token_stack[-1].add_continuation_line( original_line_to_parse) while do_again and parser_state.token_stack[-1].continuation_lines: LOGGER.debug( "continuation_lines>>%s<<", str(parser_state.token_stack[-1].continuation_lines), ) lines_to_requeue.append( parser_state.token_stack[-1].continuation_lines[-1]) LOGGER.debug( ">>continuation_line>>%s", str(parser_state.token_stack[-1].continuation_lines[-1]), ) del parser_state.token_stack[-1].continuation_lines[-1] LOGGER.debug( ">>lines_to_requeue>>%s>>%s", str(lines_to_requeue), str(len(lines_to_requeue)), ) LOGGER.debug( ">>continuation_lines>>%s<<", str(parser_state.token_stack[-1].continuation_lines), ) is_blank_line = True line_to_parse = parser_state.token_stack[-1].get_joined_lines("") line_to_parse = line_to_parse[0:-1] start_index, extracted_whitespace = ParserHelper.extract_whitespace( line_to_parse, 0) LOGGER.debug(">>line_to_parse>>%s<<", line_to_parse.replace("\n", "\\n")) ( did_complete_lrd, end_lrd_index, parsed_lrd_tuple, ) = LinkReferenceDefinitionHelper.__parse_link_reference_definition( parser_state, line_to_parse, start_index, extracted_whitespace, is_blank_line, ) LOGGER.debug( ">>parse_link_reference_definition>>was_started>>did_complete_lrd>>%s>>end_lrd_index>>%s>>len(line_to_parse)>>%s", str(did_complete_lrd), str(end_lrd_index), str(len(line_to_parse)), ) do_again = not did_complete_lrd return ( is_blank_line, line_to_parse, did_complete_lrd, end_lrd_index, parsed_lrd_tuple, )
def is_complete_html_start_tag(tag_name, line_to_parse, next_char_index): """ Determine if the supplied information is a completed start of tag specification. """ is_tag_valid = HtmlHelper.is_valid_tag_name( tag_name ) and not HtmlHelper.__is_valid_block_1_tag_name(tag_name) non_whitespace_index, extracted_whitespace = ParserHelper.extract_whitespace( line_to_parse, next_char_index ) are_attributes_valid = True while ( is_tag_valid and extracted_whitespace and are_attributes_valid and (0 <= non_whitespace_index < len(line_to_parse)) and not ( line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end or line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_start ) ): non_whitespace_index = HtmlHelper.extract_html_attribute_name( line_to_parse, non_whitespace_index ) if non_whitespace_index == -1: are_attributes_valid = False break non_whitespace_index = HtmlHelper.extract_optional_attribute_value( line_to_parse, non_whitespace_index ) if non_whitespace_index == -1: are_attributes_valid = False break ( non_whitespace_index, extracted_whitespace, ) = ParserHelper.extract_whitespace(line_to_parse, non_whitespace_index) is_end_of_tag_present = False if ( non_whitespace_index < len(line_to_parse) and line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_start ): non_whitespace_index += 1 if ( non_whitespace_index < len(line_to_parse) and line_to_parse[non_whitespace_index] == HtmlHelper.__html_tag_end ): non_whitespace_index += 1 is_end_of_tag_present = True non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, non_whitespace_index ) at_eol = non_whitespace_index == len(line_to_parse) return ( ( is_tag_valid and is_end_of_tag_present and at_eol and are_attributes_valid ), non_whitespace_index, )