def __handle_html_block( parser_state, outer_processed, position_marker, extracted_whitespace, new_tokens, ): """ Take care of the processing for html blocks. """ LOGGER.debug(">>position_marker>>ttp>>%s>>", position_marker.text_to_parse) LOGGER.debug(">>position_marker>>in>>%s>>", str(position_marker.index_number)) LOGGER.debug(">>position_marker>>ln>>%s>>", str(position_marker.line_number)) if not outer_processed and not parser_state.token_stack[-1].is_html_block: html_tokens = HtmlHelper.parse_html_block( parser_state, position_marker, extracted_whitespace, ) new_tokens.extend(html_tokens) if parser_state.token_stack[-1].is_html_block: html_tokens = HtmlHelper.check_normal_html_block_end( parser_state, position_marker.text_to_parse, position_marker.index_number, extracted_whitespace, ) assert html_tokens new_tokens.extend(html_tokens) outer_processed = True return outer_processed
def __handle_blank_line_token_stack( parser_state: ParserState, ) -> Tuple[Optional[List[MarkdownToken]], bool, Optional[RequeueLineInfo]]: is_processing_list, in_index = LeafBlockProcessor.check_for_list_in_process( parser_state) POGGER.debug( "hbl>>is_processing_list>>$>>in_index>>$>>last_stack>>$", is_processing_list, in_index, parser_state.token_stack[-1], ) requeue_line_info: Optional[RequeueLineInfo] = None new_tokens: Optional[List[MarkdownToken]] = None force_default_handling: bool = parser_state.token_stack[ -1].was_link_definition_started if force_default_handling: POGGER.debug( "hbl>>process_link_reference_definition>>stopping link definition" ) empty_position_marker = PositionMarker(-1, 0, "") ( _, _, did_pause_lrd, requeue_line_info, new_tokens, ) = LinkReferenceDefinitionHelper.process_link_reference_definition( parser_state, empty_position_marker, "", "", "", 0, 0) assert not did_pause_lrd POGGER.debug( "hbl<<process_link_reference_definition>>stopping link definition" ) elif parser_state.token_stack[-1].is_code_block: if parser_state.count_of_block_quotes_on_stack(): POGGER.debug("hbl>>code block within block quote") else: POGGER.debug("hbl>>code block") new_tokens = [] elif parser_state.token_stack[-1].is_html_block: POGGER.debug("hbl>>check_blank_html_block_end") new_tokens = HtmlHelper.check_blank_html_block_end(parser_state) POGGER.debug("hbl<<check_blank_html_block_end") elif (is_processing_list and parser_state.token_document[-1].is_blank_line and parser_state.token_document[-2].is_list_start): POGGER.debug("hbl>>double blank in list") new_tokens, _ = TokenizedMarkdown.__close_open_blocks( parser_state, until_this_index=in_index, include_lists=True) POGGER.debug("hbl<<double blank in list") return new_tokens, force_default_handling, requeue_line_info
def test_simple_alphabetic_tag_name(): """ Make sure to test a simple valid tag name. """ # Arrange input_tag_name = "script" expected_is_valid = True # Act actual_is_valid = HtmlHelper.is_valid_tag_name(input_tag_name) # Assert assert expected_is_valid == actual_is_valid
def test_empty_tag_name(): """ Make sure to test an empty tag name. """ # Arrange input_tag_name = "" expected_is_valid = False # Act actual_is_valid = HtmlHelper.is_valid_tag_name(input_tag_name) # Assert assert expected_is_valid == actual_is_valid
def test_simple_mixed_case_alphanumeric_tag_name(): """ Make sure to test a simple valid tag name. """ # Arrange input_tag_name = "ScRiPt" expected_is_valid = True # Act actual_is_valid = HtmlHelper.is_valid_tag_name(input_tag_name) # Assert assert expected_is_valid == actual_is_valid
def test_simple_dashed_bad_name(): """ Make sure to test a simple invalid tag name. """ # Arrange input_tag_name = "x_new" expected_is_valid = False # Act actual_is_valid = HtmlHelper.is_valid_tag_name(input_tag_name) # Assert assert expected_is_valid == actual_is_valid
def test_simple_dashed_tag_name(): """ Make sure to test a simple valid tag name with a dash. """ # Arrange input_tag_name = "x-new" expected_is_valid = True # Act actual_is_valid = HtmlHelper.is_valid_tag_name(input_tag_name) # Assert assert expected_is_valid == actual_is_valid
def test_dashed_attribute_name(): """ Make sure to test an attribute name with a dash. """ # Arrange input_tag_text = "<form accept-charset='foo'>" start_index = 6 expected_resultant_index = 20 # Act actual_resultant_index = HtmlHelper.extract_html_attribute_name( input_tag_text, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_attribute_name_equals_sign_and_close(): """ Make sure to test an attribute name with a following equal sign and a close. """ # Arrange input_tag_name = "<meta http:equiv=>" start_index = 16 expected_resultant_index = -1 # Act actual_resultant_index = HtmlHelper.extract_optional_attribute_value( input_tag_name, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_double_quoted_attribute_name_following_value_not_empty(): """ Make sure to test an attribute name with a following double quoted value that is not empty. """ # Arrange input_tag_name = '<meta http:equiv="foo">' start_index = 16 expected_resultant_index = 22 # Act actual_resultant_index = HtmlHelper.extract_optional_attribute_value( input_tag_name, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_invalid_attribute_name(): """ Make sure to test an attribute name that is invalid. """ # Arrange input_tag_text = "<meta http*equiv='foo'>" start_index = 6 expected_resultant_index = -1 # Act actual_resultant_index = HtmlHelper.extract_html_attribute_name( input_tag_text, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_no_attribute_name_following_value_and_no_close(): """ Make sure to test an attribute name without a following attribute value and no close bracket. """ # Arrange input_tag_name = "<meta http:equiv" start_index = 16 expected_resultant_index = len(input_tag_name) # Act actual_resultant_index = HtmlHelper.extract_optional_attribute_value( input_tag_name, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_coloned_attribute_name(): """ Make sure to test an attribute name with a colon. """ # Arrange input_tag_text = "<meta http:equiv='foo'>" start_index = 6 expected_resultant_index = 16 # Act actual_resultant_index = HtmlHelper.extract_html_attribute_name( input_tag_text, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_invalid_attribute_name_start(): """ Make sure to test an attribute name that has an invalid start character """ # Arrange input_tag_text = "<meta -http='foo'>" start_index = 6 expected_resultant_index = -1 # Act actual_resultant_index = HtmlHelper.extract_html_attribute_name( input_tag_text, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_simple_attribute_name(): """ Make sure to test a simple attribute name. """ # Arrange input_tag_text = "<a href='foo'>" start_index = 3 expected_resultant_index = 7 # Act actual_resultant_index = HtmlHelper.extract_html_attribute_name( input_tag_text, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_single_quoted_attribute_name_following_value_and_no_close(): """ Make sure to test an attribute name with a following single quoted value that has no close tag. """ # Arrange input_tag_name = "<meta http:equiv='foo'" start_index = 16 expected_resultant_index = len(input_tag_name) # Act actual_resultant_index = HtmlHelper.extract_optional_attribute_value( input_tag_name, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_non_quoted_attribute_name_following_value_empty(): """ Make sure to test an attribute name with a following non-quoted value that is empty. """ # Arrange input_tag_name = "<meta http:equiv==" start_index = 16 expected_resultant_index = -1 # Act actual_resultant_index = HtmlHelper.extract_optional_attribute_value( input_tag_name, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_single_quoted_attribute_name_following_value_and_whitespace_around(): """ Make sure to test an attribute name with a following single quoted value with whitespace around it. """ # Arrange input_tag_name = "<meta http:equiv = 'foo'>" start_index = 16 expected_resultant_index = 24 # Act actual_resultant_index = HtmlHelper.extract_optional_attribute_value( input_tag_name, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_attribute_name_runs_out_of_string(): """ Make sure to test an attribute name that runs out of space in the string. """ # Arrange input_tag_text = "<meta httpequiv" start_index = 6 expected_resultant_index = -1 # Act actual_resultant_index = HtmlHelper.extract_html_attribute_name( input_tag_text, start_index) # Assert assert expected_resultant_index == actual_resultant_index
def test_complete_html_start_tag_with_single_attribute_with_whitespace(): """ Make sure to test a simple complete html start tag with a single attribute with whitespace. """ # Arrange input_tag_name = "a" string_to_parse = " show = '1' >" parse_index = 0 expected_is_valid = True # Act actual_is_valid, parse_index = HtmlHelper.is_complete_html_start_tag( input_tag_name, string_to_parse, parse_index) # Assert assert expected_is_valid == actual_is_valid assert parse_index == 13
def test_complete_html_end_tag_with_no_more_string(): """ Make sure to test a complete html tag that isn't terminated. """ # Arrange input_tag_name = "a" string_to_parse = "" parse_index = 0 expected_is_valid = False # Act actual_is_valid, parse_index = HtmlHelper.is_complete_html_end_tag( input_tag_name, string_to_parse, parse_index) # Assert assert expected_is_valid == actual_is_valid assert parse_index == 1
def test_complete_html_start_tag_with_normal_opening_tag(): """ Make sure to test a simple complete html start tag with multiple attributes. """ # Arrange input_tag_name = "a" string_to_parse = " show>" parse_index = 0 expected_is_valid = True # Act actual_is_valid, parse_index = HtmlHelper.is_complete_html_start_tag( input_tag_name, string_to_parse, parse_index) # Assert assert expected_is_valid == actual_is_valid assert parse_index == 6
def test_simple_complete_html_end_tag(): """ Make sure to test a simple complete html tag. """ # Arrange input_tag_name = "a" string_to_parse = ">" parse_index = 0 expected_is_valid = True # Act actual_is_valid, parse_index = HtmlHelper.is_complete_html_end_tag( input_tag_name, string_to_parse, parse_index) # Assert assert expected_is_valid == actual_is_valid assert parse_index == 1
def test_complete_html_end_tag_with_bad_attribute(): """ Make sure to test a complete html tag with an attribute specified (bad). """ # Arrange input_tag_name = "a" string_to_parse = " foo>" parse_index = 0 expected_is_valid = False # Act actual_is_valid, parse_index = HtmlHelper.is_complete_html_end_tag( input_tag_name, string_to_parse, parse_index) # Assert assert expected_is_valid == actual_is_valid assert parse_index == 2
def test_simple_complete_html_start_tag_with_bad_tag_name(): """ Make sure to test a simple complete html start tag with a bad tag name. """ # Arrange input_tag_name = "a*b" string_to_parse = ">" parse_index = 0 expected_is_valid = False # Act actual_is_valid, parse_index = HtmlHelper.is_complete_html_start_tag( input_tag_name, string_to_parse, parse_index) # Assert assert expected_is_valid == actual_is_valid assert parse_index == 1
def test_complete_html_start_tag_with_invalidly_named_no_value_attributes(): """ Make sure to test a simple complete html start tag with a single attribute that has an invalid name. """ # Arrange input_tag_name = "a" string_to_parse = " sh*ow>" parse_index = 0 expected_is_valid = False # Act actual_is_valid, parse_index = HtmlHelper.is_complete_html_start_tag( input_tag_name, string_to_parse, parse_index) # Assert assert expected_is_valid == actual_is_valid assert parse_index == 1
def handle_angle_brackets(inline_request): """ Given an open angle bracket, determine which of the three possibilities it is. """ closing_angle_index = inline_request.source_text.find( InlineHelper.__angle_bracket_end, inline_request.next_index) new_token = None if closing_angle_index not in (-1, inline_request.next_index + 1): between_brackets = inline_request.source_text[ inline_request.next_index + 1:closing_angle_index] remaining_line = inline_request.source_text[inline_request. next_index + 1:] closing_angle_index += 1 new_token = InlineHelper.__parse_valid_uri_autolink( between_brackets) if not new_token: new_token = InlineHelper.__parse_valid_email_autolink( between_brackets) if not new_token: LOGGER.debug(">>between_brackets>>%s", str(between_brackets)) new_token, after_index = HtmlHelper.parse_raw_html( between_brackets, remaining_line) LOGGER.debug(">>new_token>>%s", str(new_token)) if after_index != -1: closing_angle_index = after_index + inline_request.next_index + 1 inline_response = InlineResponse() if new_token: inline_response.new_string = "" inline_response.new_index = closing_angle_index inline_response.new_tokens = [new_token] else: inline_response.new_string = InlineHelper.angle_bracket_start inline_response.new_index = inline_request.next_index + 1 return inline_response
def handle_angle_brackets(inline_request: InlineRequest) -> InlineResponse: """ Given an open angle bracket, determine which of the three possibilities it is. """ closing_angle_index = inline_request.source_text.find( InlineHelper.__angle_bracket_end, inline_request.next_index) if closing_angle_index not in (-1, inline_request.next_index + 1): between_brackets, remaining_line = ( inline_request.source_text[inline_request.next_index + 1:closing_angle_index], inline_request.source_text[inline_request.next_index + 1:], ) closing_angle_index += 1 assert inline_request.line_number is not None assert inline_request.column_number is not None assert inline_request.remaining_line is not None new_column_number = inline_request.column_number + len( inline_request.remaining_line) new_token: Optional[ MarkdownToken] = InlineHelper.__parse_valid_uri_autolink( between_brackets, inline_request.line_number, new_column_number) if not new_token: new_token = InlineHelper.__parse_valid_email_autolink( between_brackets, inline_request.line_number, new_column_number) if not new_token: new_token, after_index = HtmlHelper.parse_raw_html( between_brackets, remaining_line, inline_request.line_number, new_column_number, inline_request, ) if after_index != -1: closing_angle_index = after_index + inline_request.next_index + 1 html_token = cast(RawHtmlMarkdownToken, new_token) between_brackets = html_token.raw_tag else: new_token, between_brackets = None, None inline_response = InlineResponse() if new_token: ( inline_response.new_string, inline_response.new_index, inline_response.new_tokens, between_brackets, ) = ( "", closing_angle_index, [new_token], f"{InlineHelper.angle_bracket_start}{between_brackets}{InlineHelper.__angle_bracket_end}", ) else: inline_response.new_string, inline_response.new_index, between_brackets = ( InlineHelper.angle_bracket_start, inline_request.next_index + 1, InlineHelper.angle_bracket_start, ) ( inline_response.delta_line_number, inline_response.delta_column_number, ) = ParserHelper.calculate_deltas(between_brackets) return inline_response
def is_paragraph_ending_leaf_block_start( parser_state: ParserState, line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], exclude_thematic_break: bool = False, ) -> bool: """ Determine whether we have a valid leaf block start. """ # TODO Can be Removed? POGGER.debug( "is_paragraph_ending_leaf_block_start, ex=$", exclude_thematic_break ) is_leaf_block_start = not exclude_thematic_break assert not exclude_thematic_break is_thematic_break_start, _ = LeafBlockProcessor.is_thematic_break( line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=True, ) is_leaf_block_start = bool(is_thematic_break_start) POGGER.debug( "is_paragraph_ending_leaf_block_start>>is_theme_break>>$", is_leaf_block_start, ) if not is_leaf_block_start: is_html_block_start, _ = HtmlHelper.is_html_block( line_to_parse, start_index, extracted_whitespace, parser_state.token_stack, ) is_leaf_block_start = bool(is_html_block_start) POGGER.debug( "is_paragraph_ending_leaf_block_start>>is_html_block>>$", is_leaf_block_start, ) if not is_leaf_block_start: is_leaf_block_start, _, _, _ = LeafBlockProcessor.is_fenced_code_block( line_to_parse, start_index, extracted_whitespace ) POGGER.debug( "is_paragraph_ending_leaf_block_start>>is_fenced_code_block>>$", is_leaf_block_start, ) if not is_leaf_block_start: is_leaf_block_start, _, _, _ = LeafBlockProcessor.is_atx_heading( line_to_parse, start_index, extracted_whitespace ) POGGER.debug( "is_paragraph_ending_leaf_block_start>>is_atx_heading>>$", is_leaf_block_start, ) POGGER.debug( "is_paragraph_ending_leaf_block_start<<$", is_leaf_block_start, ) return is_leaf_block_start