def parse_setext_headings( parser_state, position_marker, extracted_whitespace, this_bq_count, stack_bq_count, ): """ Handle the parsing of an setext heading. """ new_tokens = [] if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index_one_of( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__setext_characters, ) and parser_state.token_stack[-1].is_paragraph and (this_bq_count == stack_bq_count)): _, collected_to_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, position_marker.text_to_parse[position_marker.index_number], ) ( after_whitespace_index, extra_whitespace_after_setext, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, collected_to_index) if after_whitespace_index == len(position_marker.text_to_parse): # This is unusual. Normally, close_open_blocks is used to close off # blocks based on the stack token. However, since the setext takes # the last paragraph of text (see case 61) and translates it # into a heading, this has to be done separately, as there is no # stack token to close. new_tokens.append( EndMarkdownToken( MarkdownToken.token_setext_heading, extracted_whitespace, extra_whitespace_after_setext, None, )) token_index = len(parser_state.token_document) - 1 while not parser_state.token_document[token_index].is_paragraph: token_index -= 1 replacement_token = SetextHeadingMarkdownToken( position_marker.text_to_parse[ position_marker.index_number], collected_to_index - position_marker.index_number, parser_state.token_document[token_index].extra_data, position_marker, parser_state.token_document[token_index], ) parser_state.token_document[token_index] = replacement_token del parser_state.token_stack[-1] return new_tokens
def is_atx_heading( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, ) -> Tuple[bool, Optional[int], Optional[int], Optional[str]]: """ Determine whether or not an ATX Heading is about to start. """ assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index( line_to_parse, start_index, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, LeafBlockProcessor.__atx_character, ) assert new_index is not None _, non_whitespace_index = ParserHelper.collect_while_character( line_to_parse, new_index, " " ) extracted_whitespace_at_start = line_to_parse[ new_index:non_whitespace_index ] assert hash_count is not None if hash_count <= 6 and ( extracted_whitespace_at_start or non_whitespace_index == len(line_to_parse) ): return ( True, non_whitespace_index, hash_count, extracted_whitespace_at_start, ) return False, None, None, None
def parse_setext_headings( parser_state: ParserState, position_marker: PositionMarker, extracted_whitespace: Optional[str], block_quote_data: BlockQuoteData, ) -> List[MarkdownToken]: """ Handle the parsing of an setext heading. """ new_tokens: List[MarkdownToken] = [] assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) and ParserHelper.is_character_at_index_one_of( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__setext_characters, ) and parser_state.token_stack[-1].is_paragraph and (block_quote_data.current_count == block_quote_data.stack_count) ): is_paragraph_continuation = ( LeafBlockProcessor.__adjust_continuation_for_active_list( parser_state, position_marker ) ) _, collected_to_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, position_marker.text_to_parse[position_marker.index_number], ) assert collected_to_index is not None ( after_whitespace_index, extra_whitespace_after_setext, ) = ParserHelper.extract_whitespace( position_marker.text_to_parse, collected_to_index ) if not is_paragraph_continuation and after_whitespace_index == len( position_marker.text_to_parse ): LeafBlockProcessor.__create_setext_token( parser_state, position_marker, collected_to_index, new_tokens, extracted_whitespace, extra_whitespace_after_setext, ) return new_tokens
def test_simple_case_from_start_without_whitespace(): """ Make sure that we test a simple extraction from the start of the string without whitespace. """ # Arrange input_string = " this is a test" start_index = 2 character_to_match = " " expected_output = (0, 2) # Act actual_output = ParserHelper.collect_while_character( input_string, start_index, character_to_match) # Assert assert expected_output == actual_output
def test_empty_string_with_good_index(): """ Make sure that an empty string is handled properly with a good index """ # Arrange input_string = "" start_index = 0 character_to_match = " " expected_output = (0, 0) # Act actual_output = ParserHelper.collect_while_character( input_string, start_index, character_to_match) # Assert assert expected_output == actual_output
def test_empty_string_with_bad_left_index(): """ Make sure that an empty string is handled properly with an index that is too far to the left. """ # Arrange input_string = "" start_index = -1 character_to_match = " " expected_output = (None, None) # Act actual_output = ParserHelper.collect_while_character( input_string, start_index, character_to_match) # Assert assert expected_output == actual_output
def __compile( cls, found_value: str ) -> Tuple[List[Union[str, Tuple[int, str]]], bool, Optional[str]]: found_parts = found_value.split(",") compiled_lines: List[Union[str, Tuple[int, str]]] = [] are_any_wildcards = False for next_part in found_parts: if next_part == "*": if compiled_lines and compiled_lines[-1] == "*": return ( [], False, "Two wildcard elements cannot be next to each other.", ) compiled_lines.append(next_part) are_any_wildcards = True else: count, new_index = ParserHelper.collect_while_character( next_part, 0, "#") if not count: return [], False, "Element must start with hash characters (#)." if count > 6: return ( [], False, "Element must start with between 1 and 6 hash characters (#).", ) assert next_part is not None assert new_index is not None new_index, extracted_whitespace = ParserHelper.extract_any_whitespace( next_part, new_index) if not extracted_whitespace: return ( [], False, "Element must have at least one space character after any hash characters (#).", ) if len(next_part) == new_index: return ( [], False, "Element must have at least one non-space character after any space characters.", ) compiled_lines.append((count, next_part[new_index:])) return compiled_lines, are_any_wildcards, None
def is_fenced_code_block( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, ) -> Tuple[bool, Optional[int], Optional[str], Optional[int]]: """ Determine if we have the start of a fenced code block. """ assert extracted_whitespace is not None if ( skip_whitespace_check or ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__fenced_code_block_start_characters, ): POGGER.debug("ifcb:collected_count>>$<<$<<", line_to_parse, start_index) collected_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, line_to_parse[start_index] ) POGGER.debug("ifcb:collected_count:$", collected_count) assert collected_count is not None assert new_index is not None ( non_whitespace_index, extracted_whitespace_before_info_string, ) = ParserHelper.extract_whitespace(line_to_parse, new_index) if collected_count >= 3: POGGER.debug("ifcb:True") return ( True, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) return False, None, None, None
def __parse_raw_declaration(text_to_parse): """ Parse a possible raw html declaration sequence, and return if it is valid. """ valid_raw_html = None if ParserHelper.is_character_at_index_one_of( text_to_parse, 0, HtmlHelper.__raw_declaration_start_character ): ( parse_index, declaration_name, ) = ParserHelper.collect_while_one_of_characters( text_to_parse, 1, HtmlHelper.__html_block_4_continued_start ) if declaration_name: whitespace_count, _ = ParserHelper.collect_while_character( text_to_parse, parse_index, HtmlHelper.__raw_declaration_whitespace ) if whitespace_count: valid_raw_html = text_to_parse return valid_raw_html
def is_fenced_code_block( line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, ): """ Determine if we have the start of a fenced code block. """ if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__fenced_code_block_start_characters, ): LOGGER.debug("ifcb:collected_count>>%s<<%s<<", line_to_parse, str(start_index)) collected_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, line_to_parse[start_index]) LOGGER.debug("ifcb:collected_count:%s", str(collected_count)) ( non_whitespace_index, extracted_whitespace_before_info_string, ) = ParserHelper.extract_whitespace(line_to_parse, new_index) if collected_count >= 3: LOGGER.debug("ifcb:True") return ( True, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) return False, None, None, None
def __handle_inline_special( source_text, next_index, inline_blocks, special_length, remaining_line, current_string_unresolved, ): """ Handle the collection of special inline characters for later processing. """ preceding_two = None following_two = None new_token = None repeat_count = 1 is_active = True consume_rest_of_line = False special_sequence = source_text[next_index:next_index + special_length] if special_length == 1 and special_sequence in EmphasisHelper.inline_emphasis: repeat_count, new_index = ParserHelper.collect_while_character( source_text, next_index, special_sequence) special_sequence = source_text[next_index:new_index] preceding_two = source_text[max(0, next_index - 2):next_index] following_two = source_text[new_index:min(len(source_text ), new_index + 2)] else: if special_sequence[0] == LinkHelper.link_label_end: LOGGER.debug( "\nPOSSIBLE LINK CLOSE_FOUND>>%s>>%s>>", str(special_length), special_sequence, ) LOGGER.debug(">>inline_blocks>>%s<<", str(inline_blocks)) LOGGER.debug(">>remaining_line>>%s<<", str(remaining_line)) LOGGER.debug(">>current_string_unresolved>>%s<<", str(current_string_unresolved)) LOGGER.debug(">>source_text>>%s<<", source_text[next_index:]) LOGGER.debug("") ( new_index, is_active, new_token, consume_rest_of_line, ) = LinkHelper.look_for_link_or_image( inline_blocks, source_text, next_index, remaining_line, current_string_unresolved, ) LOGGER.debug(">>inline_blocks>>%s<<", str(inline_blocks)) LOGGER.debug(">>new_token>>%s<<", str(new_token)) LOGGER.debug(">>source_text>>%s<<", source_text[new_index:]) LOGGER.debug(">>consume_rest_of_line>>%s<<", str(consume_rest_of_line)) else: repeat_count = special_length new_index = next_index + special_length if not new_token: new_token = SpecialTextMarkdownToken(special_sequence, repeat_count, preceding_two, following_two, is_active) inline_response = InlineResponse() inline_response.new_string = "" inline_response.new_index = new_index inline_response.new_tokens = [new_token] inline_response.consume_rest_of_line = consume_rest_of_line return inline_response
def parse_atx_headings(parser_state, position_marker, extracted_whitespace): """ Handle the parsing of an atx heading. """ new_tokens = [] if ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ) ( non_whitespace_index, extracted_whitespace_at_start, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, new_index) if hash_count <= 6 and (extracted_whitespace_at_start or non_whitespace_index == len( position_marker.text_to_parse)): new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, new_tokens) remaining_line = position_marker.text_to_parse[ non_whitespace_index:] ( end_index, extracted_whitespace_at_end, ) = ParserHelper.extract_whitespace_from_end(remaining_line) remove_trailing_count = 0 while (end_index > 0 and remaining_line[end_index - 1] == LeafBlockProcessor.__atx_character): end_index -= 1 remove_trailing_count += 1 extracted_whitespace_before_end = "" if remove_trailing_count: if end_index > 0: if ParserHelper.is_character_at_index_whitespace( remaining_line, end_index - 1): remaining_line = remaining_line[:end_index] ( end_index, extracted_whitespace_before_end, ) = ParserHelper.extract_whitespace_from_end( remaining_line) remaining_line = remaining_line[:end_index] else: extracted_whitespace_at_end = "" remove_trailing_count = 0 else: remaining_line = "" else: extracted_whitespace_at_end = remaining_line[end_index:] remaining_line = remaining_line[0:end_index] start_token = AtxHeadingMarkdownToken( hash_count, remove_trailing_count, extracted_whitespace, position_marker, ) new_tokens.append(start_token) new_tokens.append( TextMarkdownToken(remaining_line, extracted_whitespace_at_start)) end_token = EndMarkdownToken( "atx", extracted_whitespace_at_end, extracted_whitespace_before_end, None, ) end_token.start_markdown_token = start_token new_tokens.append(end_token) return new_tokens