def __calculate_adjusted_whitespace( parser_state, current_container_blocks, line_to_parse, extracted_whitespace, foobar=None, ): """ Based on the last container on the stack, determine what the adjusted whitespace is. """ adj_ws = extracted_whitespace stack_index = len(parser_state.token_stack) - 1 while stack_index >= 0 and not parser_state.token_stack[stack_index].is_list: stack_index -= 1 if stack_index < 0: LOGGER.debug("PLFCB>>No Started lists") assert len(current_container_blocks) == 0 if foobar is None: LOGGER.debug("PLFCB>>No Started Block Quote") else: LOGGER.debug("PLFCB>>Started Block Quote") adj_ws = extracted_whitespace[foobar:] else: assert len(current_container_blocks) >= 1 LOGGER.debug( "PLFCB>>Started list-last stack>>%s", str(parser_state.token_stack[stack_index]), ) token_index = len(parser_state.token_document) - 1 while token_index >= 0 and not ( parser_state.token_document[token_index].is_any_list_token ): token_index -= 1 LOGGER.debug( "PLFCB>>Started list-last token>>%s", str(parser_state.token_document[token_index]), ) assert token_index >= 0 old_start_index = parser_state.token_document[token_index].indent_level ws_len = ParserHelper.calculate_length(extracted_whitespace) LOGGER.debug( "old_start_index>>%s>>ws_len>>%s", str(old_start_index), str(ws_len) ) if ws_len >= old_start_index: LOGGER.debug("RELINE:%s:", line_to_parse) adj_ws = extracted_whitespace[old_start_index:] else: LOGGER.debug("DOWNGRADE") return adj_ws
def test_calculate_length_space_then_tab(): """ Make sure that a string with a single space then a tab is handled properly. """ # Arrange input_string = " \t" expected_output = 4 # Act actual_output = ParserHelper.calculate_length(input_string) # Assert assert expected_output == actual_output
def test_calculate_length_empty_string(): """ Make sure that an empty string is handled properly. """ # Arrange input_string = "" expected_output = 0 # Act actual_output = ParserHelper.calculate_length(input_string) # Assert assert expected_output == actual_output
def test_calculate_length_multiple_tabs(): """ Make sure that a string with multiple tabs is handled properly. """ # Arrange for i in range(1, 10): input_string = "".rjust(i, "\t") expected_output = i * 4 # Act actual_output = ParserHelper.calculate_length(input_string) # Assert assert expected_output == actual_output
def test_calculate_length_tab_after_0_index_start(): """ Make sure that a string with a tab is handled properly after a start of 0. """ # Arrange input_string = "\t" start_index = 0 expected_output = 4 # Act actual_output = ParserHelper.calculate_length(input_string, start_index) # Assert assert expected_output == actual_output
def test_calculate_length_tab_after_4_index_start(): """ Make sure that a string with a tab is handled properly after a start of 4. Note that with a start of 4, a tab moves it to the next tab stop at 8, specifying that 4 space characters should be added. """ # Arrange input_string = "\t" start_index = 4 expected_output = 4 # Act actual_output = ParserHelper.calculate_length(input_string, start_index) # Assert assert expected_output == actual_output
def parse_fenced_code_block( parser_state: ParserState, position_marker: PositionMarker, extracted_whitespace: Optional[str], ) -> Tuple[List[MarkdownToken], Optional[str]]: """ Handle the parsing of a fenced code block """ POGGER.debug( "line>>$>>index>>$>>", position_marker.text_to_parse, position_marker.index_number, ) new_tokens: List[MarkdownToken] = [] ( is_fence_start, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) = LeafBlockProcessor.is_fenced_code_block( position_marker.text_to_parse, position_marker.index_number, extracted_whitespace, ) if is_fence_start and not parser_state.token_stack[-1].is_html_block: assert collected_count is not None assert non_whitespace_index is not None if parser_state.token_stack[-1].is_fenced_code_block: LeafBlockProcessor.__check_for_fenced_end( parser_state, position_marker, collected_count, non_whitespace_index, extracted_whitespace, new_tokens, ) else: new_tokens = LeafBlockProcessor.__process_fenced_start( parser_state, position_marker, non_whitespace_index, collected_count, extracted_whitespace, extracted_whitespace_before_info_string, ) elif parser_state.token_stack[-1].is_fenced_code_block: fenced_token = cast(FencedCodeBlockStackToken, parser_state.token_stack[-1]) if fenced_token.whitespace_start_count and extracted_whitespace: current_whitespace_length = ParserHelper.calculate_length( extracted_whitespace ) whitespace_left = max( 0, current_whitespace_length - fenced_token.whitespace_start_count, ) POGGER.debug("previous_ws>>$", current_whitespace_length) POGGER.debug("whitespace_left>>$", whitespace_left) removed_whitespace = ParserHelper.create_replace_with_nothing_marker( ParserHelper.repeat_string( ParserHelper.space_character, current_whitespace_length - whitespace_left, ) ) whitespace_padding = ParserHelper.repeat_string( ParserHelper.space_character, whitespace_left ) extracted_whitespace = f"{removed_whitespace}{whitespace_padding}" return new_tokens, extracted_whitespace
def __process_fenced_start( parser_state: ParserState, position_marker: PositionMarker, non_whitespace_index: int, collected_count: int, extracted_whitespace: Optional[str], extracted_whitespace_before_info_string: Optional[str], ) -> List[MarkdownToken]: POGGER.debug("pfcb->check") new_tokens: List[MarkdownToken] = [] if ( position_marker.text_to_parse[position_marker.index_number] == LeafBlockProcessor.__fenced_start_tilde or LeafBlockProcessor.__fenced_start_backtick not in position_marker.text_to_parse[non_whitespace_index:] ): POGGER.debug("pfcb->start") ( after_extracted_text_index, extracted_text, ) = ParserHelper.extract_until_whitespace( position_marker.text_to_parse, non_whitespace_index ) assert extracted_text is not None text_after_extracted_text = position_marker.text_to_parse[ after_extracted_text_index: ] old_top_of_stack = parser_state.token_stack[-1] new_tokens, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[ParagraphStackToken], ) pre_extracted_text, pre_text_after_extracted_text = ( extracted_text, text_after_extracted_text, ) assert extracted_text is not None extracted_text = InlineHelper.handle_backslashes(extracted_text) text_after_extracted_text = InlineHelper.handle_backslashes( text_after_extracted_text ) if pre_extracted_text == extracted_text: pre_extracted_text = "" if pre_text_after_extracted_text == text_after_extracted_text: pre_text_after_extracted_text = "" assert extracted_whitespace is not None assert extracted_whitespace_before_info_string is not None new_token = FencedCodeBlockMarkdownToken( position_marker.text_to_parse[position_marker.index_number], collected_count, extracted_text, pre_extracted_text, text_after_extracted_text, pre_text_after_extracted_text, extracted_whitespace, extracted_whitespace_before_info_string, position_marker, ) new_tokens.append(new_token) assert extracted_whitespace is not None parser_state.token_stack.append( FencedCodeBlockStackToken( code_fence_character=position_marker.text_to_parse[ position_marker.index_number ], fence_character_count=collected_count, whitespace_start_count=ParserHelper.calculate_length( extracted_whitespace ), matching_markdown_token=new_token, ) ) POGGER.debug("StackToken-->$<<", parser_state.token_stack[-1]) POGGER.debug( "StackToken>start_markdown_token-->$<<", parser_state.token_stack[-1].matching_markdown_token, ) LeafBlockProcessor.correct_for_leaf_block_start_in_list( parser_state, position_marker.index_indent, old_top_of_stack, new_tokens, ) return new_tokens
def parse_fenced_code_block( parser_state, position_marker, extracted_whitespace, ): """ Handle the parsing of a fenced code block """ LOGGER.debug( "line>>%s>>index>>%s>>", position_marker.text_to_parse, position_marker.index_number, ) new_tokens = [] ( is_fence_start, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) = LeafBlockProcessor.is_fenced_code_block( position_marker.text_to_parse, position_marker.index_number, extracted_whitespace, ) if is_fence_start and not parser_state.token_stack[-1].is_html_block: if parser_state.token_stack[-1].is_fenced_code_block: LOGGER.debug("pfcb->end") if (parser_state.token_stack[-1].code_fence_character == position_marker.text_to_parse[ position_marker.index_number] and collected_count >= parser_state.token_stack[-1].fence_character_count and non_whitespace_index >= len( position_marker.text_to_parse)): new_end_token = parser_state.token_stack[ -1].generate_close_token(extracted_whitespace) new_tokens.append(new_end_token) new_end_token.start_markdown_token = parser_state.token_stack[ -1].start_markdown_token new_end_token.extra_end_data = str(collected_count) new_end_token.compose_data_field() del parser_state.token_stack[-1] else: LOGGER.debug("pfcb->check") if (position_marker.text_to_parse[position_marker.index_number] == LeafBlockProcessor.__fenced_start_tilde or LeafBlockProcessor.__fenced_start_backtick not in position_marker.text_to_parse[non_whitespace_index:]): LOGGER.debug("pfcb->start") ( after_extracted_text_index, extracted_text, ) = ParserHelper.extract_until_whitespace( position_marker.text_to_parse, non_whitespace_index) text_after_extracted_text = position_marker.text_to_parse[ after_extracted_text_index:] new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[ParagraphStackToken], ) pre_extracted_text = extracted_text pre_text_after_extracted_text = text_after_extracted_text extracted_text = InlineHelper.handle_backslashes( extracted_text, add_text_signature=False) text_after_extracted_text = InlineHelper.handle_backslashes( text_after_extracted_text, add_text_signature=False) if pre_extracted_text == extracted_text: pre_extracted_text = "" if pre_text_after_extracted_text == text_after_extracted_text: pre_text_after_extracted_text = "" new_token = FencedCodeBlockMarkdownToken( position_marker.text_to_parse[ position_marker.index_number], collected_count, extracted_text, pre_extracted_text, text_after_extracted_text, pre_text_after_extracted_text, extracted_whitespace, extracted_whitespace_before_info_string, position_marker, ) new_tokens.append(new_token) parser_state.token_stack.append( FencedCodeBlockStackToken( code_fence_character=position_marker.text_to_parse[ position_marker.index_number], fence_character_count=collected_count, whitespace_start_count=ParserHelper. calculate_length(extracted_whitespace), start_markdown_token=new_token, )) LOGGER.debug("StackToken-->%s<<", str(parser_state.token_stack[-1])) LOGGER.debug( "StackToken>start_markdown_token-->%s<<", str(parser_state.token_stack[-1].start_markdown_token), ) elif (parser_state.token_stack[-1].is_fenced_code_block and parser_state.token_stack[-1].whitespace_start_count and extracted_whitespace): current_whitespace_length = ParserHelper.calculate_length( extracted_whitespace) whitespace_left = max( 0, current_whitespace_length - parser_state.token_stack[-1].whitespace_start_count, ) LOGGER.debug("previous_ws>>%s", str(current_whitespace_length)) LOGGER.debug("whitespace_left>>%s", str(whitespace_left)) removed_whitespace = ("\a" + "".rjust( current_whitespace_length - whitespace_left, " ") + "\a\x03\a") extracted_whitespace = removed_whitespace + "".rjust( whitespace_left, " ") return new_tokens, extracted_whitespace
def __adjust_for_block_quote_start( force_me, original_line_to_parse, last_block_quote_index, position_marker, extracted_whitespace, ): """ Block quotes cause indents, which need to be handled specifically. """ did_process = False special_parse_start_index = 0 whitespace_to_parse = extracted_whitespace block_quote_adjust_delta = 0 LOGGER.debug( "last_block_quote_index>>%s>>force_me>>%s", str(last_block_quote_index), str(force_me), ) if last_block_quote_index or force_me: LOGGER.debug( "original_line_to_parse>[%s]>>last_block_quote_index>>%s", original_line_to_parse.replace("\t", "\\t"), str(last_block_quote_index), ) ( block_quote_after_whitespace_index, during_original_whitespace, ) = ParserHelper.extract_whitespace(original_line_to_parse, last_block_quote_index) LOGGER.debug( "during_original_whitespace>[%s]", during_original_whitespace.replace("\t", "\\t"), ) if "\t" in during_original_whitespace: did_process = True LOGGER.debug( ".text_to_parse>[%s]", position_marker.text_to_parse.replace("\t", "\\t"), ) LOGGER.debug(".index_number>>%s", str(position_marker.index_number)) LOGGER.debug(".index_indent>>%s", str(position_marker.index_indent)) LOGGER.debug("last_block_quote_index>>%s", str(last_block_quote_index)) # Make sure everything after the whitespace remains the same. text_after_original_whitespace = original_line_to_parse[ block_quote_after_whitespace_index:] text_after_whitespace = position_marker.text_to_parse[ position_marker.index_number:] LOGGER.debug( "text_after_original_whitespace>[%s]", text_after_original_whitespace.replace("\t", "\\t"), ) LOGGER.debug( "text_after_whitespace>[%s]", text_after_whitespace.replace("\t", "\\t"), ) assert text_after_original_whitespace == text_after_whitespace # Make sure the whitespace is within expected bounds. during_current_whitespace = position_marker.text_to_parse[ position_marker.index_number - len(extracted_whitespace):position_marker.index_number] LOGGER.debug( "during_current_whitespace>[%s]", during_current_whitespace.replace("\t", "\\t"), ) LOGGER.debug( "during_original_whitespace>[%s]", during_original_whitespace.replace("\t", "\\t"), ) current_whitespace_length = len(during_current_whitespace) original_whitespace_length = (ParserHelper.calculate_length( during_original_whitespace, start_index=last_block_quote_index) - 1) LOGGER.debug( "current_whitespace_length[%s],original_whitespace_length[%s]", str(current_whitespace_length), str(original_whitespace_length), ) assert current_whitespace_length <= original_whitespace_length special_parse_start_index = last_block_quote_index + 1 if during_original_whitespace[0] == "\t": whitespace_to_parse = during_original_whitespace if (len(during_original_whitespace) > 1 and during_original_whitespace[1] == "\t"): block_quote_adjust_delta = -1 else: whitespace_to_parse = during_original_whitespace[1:] return ( did_process, special_parse_start_index, whitespace_to_parse, block_quote_adjust_delta, )
def __count_block_quote_starts( line_to_parse, start_index, stack_bq_count, is_top_of_stack_fenced_code_block, ): """ Having detected a block quote character (">") on a line, continue to consume and count while the block quote pattern is there. """ this_bq_count = 0 last_block_quote_index = -1 adjusted_line = line_to_parse if stack_bq_count == 0 and is_top_of_stack_fenced_code_block: start_index -= 1 else: this_bq_count += 1 start_index += 1 last_block_quote_index = start_index LOGGER.debug( "stack_bq_count--%s--is_top_of_stack_fenced_code_block--%s", str(stack_bq_count), str(is_top_of_stack_fenced_code_block), ) while True: if ParserHelper.is_character_at_index_whitespace( adjusted_line, start_index): if adjusted_line[start_index] == "\t": adjusted_tab_length = ParserHelper.calculate_length( "\t", start_index=start_index) LOGGER.debug("adj--%s--", adjusted_line.replace("\t", "\\t")) adjusted_line = (adjusted_line[0:start_index] + "".rjust(adjusted_tab_length) + adjusted_line[start_index + 1:]) LOGGER.debug("--%s--", adjusted_line.replace("\t", "\\t")) start_index += 1 if is_top_of_stack_fenced_code_block and (this_bq_count >= stack_bq_count): break if start_index == len( adjusted_line ) or ParserHelper.is_character_at_index_not( adjusted_line, start_index, BlockQuoteProcessor.__block_quote_character, ): break this_bq_count += 1 start_index += 1 last_block_quote_index = start_index LOGGER.debug( "__count_block_quote_starts--%s--%s--", str(start_index), adjusted_line.replace("\t", "\\t"), ) return this_bq_count, start_index, adjusted_line, last_block_quote_index
def __pre_list( parser_state, line_to_parse, start_index, extracted_whitespace, marker_width_minus_one, stack_bq_count, this_bq_count, ): """ Handle the processing of the first part of the list. """ ( after_marker_ws_index, after_marker_whitespace, ) = ParserHelper.extract_whitespace(line_to_parse, start_index + 1) ws_after_marker = ParserHelper.calculate_length( after_marker_whitespace, start_index=start_index + 1) LOGGER.debug("after-marker>>%s>>total=%s", after_marker_whitespace, str(ws_after_marker)) ws_before_marker = ParserHelper.calculate_length(extracted_whitespace) LOGGER.debug( "--ws_before_marker>>%s>>marker_width_minus_one>>%s", str(ws_before_marker), str(marker_width_minus_one), ) LOGGER.debug("--%s--%s", str(start_index), str(start_index + 1)) # assert "\t" not in after_marker_whitespace ( container_level_tokens, stack_bq_count, ) = ListBlockProcessor.__handle_list_nesting(parser_state, stack_bq_count, this_bq_count) LOGGER.debug(">>>>>XX>>%s>>%s<<", str(after_marker_ws_index), str(len(line_to_parse))) if after_marker_ws_index == len(line_to_parse): LOGGER.debug("BOOOOOOOM") indent_level = 2 + marker_width_minus_one remaining_whitespace = ws_after_marker ws_after_marker = 0 else: indent_level = (ws_before_marker + 1 + ws_after_marker + marker_width_minus_one) remaining_whitespace = 0 LOGGER.debug( "ws_after_marker>>%s<<indent_level<<%s<<rem<<%s<<", str(ws_after_marker), str(indent_level), str(remaining_whitespace), ) if ws_after_marker > 4: indent_level = indent_level - ws_after_marker + 1 remaining_whitespace = ws_after_marker - 1 ws_after_marker = 1 LOGGER.debug( "ws_after_marker>>%s<<indent_level<<%s<<rem<<%s<<", str(ws_after_marker), str(indent_level), str(remaining_whitespace), ) return ( indent_level, remaining_whitespace, ws_after_marker, after_marker_ws_index, ws_before_marker, container_level_tokens, stack_bq_count, )
def list_in_process( parser_state, line_to_parse, start_index, extracted_whitespace, ind, ): """ Handle the processing of a line where there is a list in process. """ container_level_tokens = [] LOGGER.debug("!!!!!FOUND>>%s", str(parser_state.token_stack[ind])) LOGGER.debug("!!!!!FOUND>>%s", str(parser_state.token_stack[ind].extra_data)) requested_list_indent = parser_state.token_stack[ind].indent_level before_ws_length = parser_state.token_stack[ind].ws_before_marker LOGGER.debug( "!!!!!requested_list_indent>>%s,before_ws=%s", str(requested_list_indent), str(before_ws_length), ) leading_space_length = ParserHelper.calculate_length( extracted_whitespace) started_ulist, _ = ListBlockProcessor.is_ulist_start( parser_state, line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=True, ) started_olist, _, _, _ = ListBlockProcessor.is_olist_start( parser_state, line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=True, ) allow_list_continue = True if leading_space_length >= 4 and (started_ulist or started_olist): allow_list_continue = not parser_state.token_document[ -1].is_blank_line LOGGER.debug( "leading_space_length>>%s>>requested_list_indent>>%s>>is_in_paragraph>>%s", str(leading_space_length), str(requested_list_indent), str(parser_state.token_stack[-1].is_paragraph), ) used_indent = None if leading_space_length >= requested_list_indent and allow_list_continue: LOGGER.debug("before>>%s>>", line_to_parse.replace(" ", "\\s")) ( line_to_parse, used_indent, ) = ListBlockProcessor.__adjust_line_for_list_in_process( line_to_parse, start_index, extracted_whitespace, leading_space_length, requested_list_indent, ) LOGGER.debug( "after>>%s>>%s>>", line_to_parse.replace(" ", "\\s"), used_indent.replace(" ", "\\s"), ) else: requested_list_indent = requested_list_indent - before_ws_length LOGGER.debug( "leading_space_length>>%s>>adj requested_list_indent>>%s>>%s<<", str(leading_space_length), str(requested_list_indent), str(parser_state.token_stack[-1].is_paragraph), ) if (parser_state.token_stack[-1].is_paragraph and leading_space_length >= requested_list_indent and allow_list_continue): LOGGER.debug(">>line_to_parse>>%s>>", line_to_parse.replace("\n", "\\n")) ( line_to_parse, used_indent, ) = ListBlockProcessor.__adjust_line_for_list_in_process( line_to_parse, start_index, extracted_whitespace, leading_space_length, requested_list_indent, ) LOGGER.debug(">>line_to_parse>>%s>>", line_to_parse.replace("\n", "\\n")) LOGGER.debug(">>used_indent>>%s>>", used_indent.replace("\n", "\\n")) else: container_level_tokens = ListBlockProcessor.__check_for_list_closures( parser_state, line_to_parse, start_index, extracted_whitespace, ind, ) if parser_state.token_stack[-1].is_list: requested_list_indent = parser_state.token_stack[ -1].indent_level LOGGER.debug(">>line_to_parse>>%s>>", line_to_parse) LOGGER.debug(">>start_index>>%s", str(start_index)) LOGGER.debug(">>requested_list_indent>>%s", str(requested_list_indent)) LOGGER.debug(">>before_ws_length>>%s", str(before_ws_length)) ( line_to_parse, _, ) = ListBlockProcessor.__adjust_line_for_list_in_process( line_to_parse, start_index, extracted_whitespace, requested_list_indent, before_ws_length, ) if used_indent is not None: parser_state.token_stack[ ind].matching_markdown_token.add_leading_spaces(used_indent) return container_level_tokens, line_to_parse