def __process_inline_link_body(source_text, new_index): """ Given that an inline link has been identified, process it's body. """ LOGGER.debug("process_inline_link_body>>%s<<", source_text[new_index:]) inline_link = "" pre_inline_link = "" inline_title = "" pre_inline_title = "" new_index, _ = ParserHelper.extract_any_whitespace( source_text, new_index) LOGGER.debug("new_index>>%s>>source_text[]>>%s>", str(new_index), source_text[new_index:]) if not ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_format_inline_end): ( inline_link, pre_inline_link, new_index, _, ) = LinkHelper.__parse_link_destination(source_text, new_index) if new_index != -1: LOGGER.debug("before ws>>%s<", source_text[new_index:]) new_index, _ = ParserHelper.extract_any_whitespace( source_text, new_index) LOGGER.debug("after ws>>%s>", source_text[new_index:]) if ParserHelper.is_character_at_index_not( source_text, new_index, LinkHelper.__link_format_inline_end): ( inline_title, pre_inline_title, new_index, ) = LinkHelper.__parse_link_title(source_text, new_index) if new_index != -1: new_index, _ = ParserHelper.extract_any_whitespace( source_text, new_index) LOGGER.debug( "inline_link>>%s>>inline_title>>%s>new_index>%s>", str(inline_link), str(inline_title), str(new_index), ) if new_index != -1: if ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_format_inline_end): new_index += 1 else: new_index = -1 LOGGER.debug( "process_inline_link_body>>inline_link>>%s>>inline_title>>%s>new_index>%s>", str(inline_link), str(inline_title), str(new_index), ) return inline_link, pre_inline_link, inline_title, pre_inline_title, new_index
def extract_link_title(line_to_parse, new_index, is_blank_line): """ Extract the link reference definition's optional link title. """ inline_title = "" pre_inline_title = "" LOGGER.debug("before ws>>%s>", line_to_parse[new_index:]) new_index, ex_ws = ParserHelper.extract_any_whitespace( line_to_parse, new_index) LOGGER.debug( "after ws>>%s>ex_ws>%s", line_to_parse[new_index:], ex_ws.replace("\n", "\\n"), ) start_index = new_index if new_index == len(line_to_parse) and not is_blank_line: return False, new_index, None, None, None, None if ex_ws and new_index < len(line_to_parse): inline_title, pre_inline_title, new_index = LinkHelper.__parse_link_title( line_to_parse, new_index) if new_index == -1: return False, -1, None, None, None, None if inline_title is None: return False, new_index, None, None, None, None return ( True, new_index, inline_title, pre_inline_title, ex_ws, line_to_parse[start_index:new_index], )
def __verify_link_definition_end(line_to_parse, new_index): """ Verify that the link reference definition's ends properly. """ LOGGER.debug("look for EOL-ws>>%s<<", line_to_parse[new_index:]) new_index, ex_ws = ParserHelper.extract_any_whitespace( line_to_parse, new_index) LOGGER.debug("look for EOL>>%s<<", line_to_parse[new_index:]) if new_index < len(line_to_parse): LOGGER.debug(">> characters left at EOL, bailing") return False, -1, None return True, new_index, ex_ws
def __verify_link_definition_end( line_to_parse: str, new_index: Optional[int] ) -> Tuple[bool, Optional[int], Optional[str]]: """ Verify that the link reference definition's ends properly. """ assert new_index is not None POGGER.debug("look for EOL-ws>>$<<", line_to_parse[new_index:]) new_index, ex_ws = ParserHelper.extract_any_whitespace( line_to_parse, new_index) assert new_index is not None POGGER.debug("look for EOL>>$<<", line_to_parse[new_index:]) if new_index < len(line_to_parse): POGGER.debug(">> characters left at EOL, bailing") return False, -1, None return True, new_index, ex_ws
def __compile( cls, found_value: str ) -> Tuple[List[Union[str, Tuple[int, str]]], bool, Optional[str]]: found_parts = found_value.split(",") compiled_lines: List[Union[str, Tuple[int, str]]] = [] are_any_wildcards = False for next_part in found_parts: if next_part == "*": if compiled_lines and compiled_lines[-1] == "*": return ( [], False, "Two wildcard elements cannot be next to each other.", ) compiled_lines.append(next_part) are_any_wildcards = True else: count, new_index = ParserHelper.collect_while_character( next_part, 0, "#") if not count: return [], False, "Element must start with hash characters (#)." if count > 6: return ( [], False, "Element must start with between 1 and 6 hash characters (#).", ) assert next_part is not None assert new_index is not None new_index, extracted_whitespace = ParserHelper.extract_any_whitespace( next_part, new_index) if not extracted_whitespace: return ( [], False, "Element must have at least one space character after any hash characters (#).", ) if len(next_part) == new_index: return ( [], False, "Element must have at least one non-space character after any space characters.", ) compiled_lines.append((count, next_part[new_index:])) return compiled_lines, are_any_wildcards, None
def __parse_raw_open_tag(text_to_parse): """ Parse the current line as if it is an open tag, and determine if it is valid. """ end_parse_index = -1 valid_raw_html = None tag_name = HtmlHelper.__parse_raw_tag_name(text_to_parse, 0) if tag_name: parse_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, len(tag_name) ) if extracted_whitespace: while ( extracted_whitespace and ParserHelper.is_character_at_index_one_of( text_to_parse, parse_index, HtmlHelper.__tag_attribute_name_start, ) ): ( parse_index, extracted_whitespace, ) = HtmlHelper.__parse_tag_attributes(text_to_parse, parse_index) if parse_index is None: return parse_index, extracted_whitespace if ParserHelper.is_character_at_index( text_to_parse, parse_index, HtmlHelper.__html_tag_start ): parse_index += 1 if ParserHelper.is_character_at_index( text_to_parse, parse_index, HtmlHelper.__html_tag_end ): valid_raw_html = text_to_parse[0:parse_index] end_parse_index = parse_index + 1 return valid_raw_html, end_parse_index
def __parse_raw_open_tag(text_to_parse: str) -> Tuple[Optional[str], int]: """ Parse the current line as if it is an open tag, and determine if it is valid. """ end_parse_index, valid_raw_html, tag_name = ( -1, None, HtmlHelper.__parse_raw_tag_name(text_to_parse, 0), ) if tag_name: parse_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, len(tag_name) ) assert parse_index is not None while extracted_whitespace and ParserHelper.is_character_at_index_one_of( text_to_parse, parse_index, HtmlHelper.__tag_attribute_name_start, ): ( parse_index, extracted_whitespace, ) = HtmlHelper.__parse_tag_attributes(text_to_parse, parse_index) if parse_index is None: return None, -1 if ParserHelper.is_character_at_index( text_to_parse, parse_index, HtmlHelper.__html_tag_start ): parse_index += 1 if ParserHelper.is_character_at_index( text_to_parse, parse_index, HtmlHelper.__html_tag_end ): valid_raw_html = text_to_parse[:parse_index] end_parse_index = parse_index + 1 return valid_raw_html, end_parse_index
def __parse_tag_attributes(text_to_parse, start_index): """ Handle the parsing of the attributes for an open tag. """ parse_index, _ = ParserHelper.collect_while_one_of_characters( text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters ) end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, parse_index ) if ParserHelper.is_character_at_index( text_to_parse, end_name_index, HtmlHelper.__html_attribute_name_value_separator, ): ( value_start_index, extracted_whitespace, ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1) if ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_single, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_single, ) if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_single, ): return None, -1 value_end_index += 1 elif ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_double, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_double, ) if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_double, ): return None, -1 value_end_index += 1 else: value_end_index, _ = ParserHelper.collect_until_one_of_characters( text_to_parse, value_start_index, HtmlHelper.__unquoted_attribute_value_stop, ) end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, value_end_index ) return end_name_index, extracted_whitespace
def __parse_tag_attributes( text_to_parse: str, start_index: int ) -> Tuple[Optional[int], Optional[str]]: """ Handle the parsing of the attributes for an open tag. """ parse_index, _ = ParserHelper.collect_while_one_of_characters( text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters ) assert parse_index is not None end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, parse_index ) assert end_name_index is not None if ParserHelper.is_character_at_index( text_to_parse, end_name_index, HtmlHelper.__html_attribute_name_value_separator, ): ( value_start_index, extracted_whitespace, ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1) assert value_start_index is not None value_end_index: Optional[int] = None if ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_single, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_single, ) assert value_end_index is not None if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_single, ): return None, None value_end_index += 1 elif ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_double, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_double, ) assert value_end_index is not None if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_double, ): return None, None value_end_index += 1 else: value_end_index, _ = ParserHelper.collect_until_one_of_characters( text_to_parse, value_start_index, HtmlHelper.__unquoted_attribute_value_stop, ) assert value_end_index is not None end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, value_end_index ) return end_name_index, extracted_whitespace