def check_normal_html_block_end( parser_state, line_to_parse, start_index, extracted_whitespace, ): """ Check to see if we have encountered the end of the current HTML block via text on a normal line. """ new_tokens = [ TextMarkdownToken(line_to_parse[start_index:], extracted_whitespace) ] is_block_terminated = False adj_line = line_to_parse[start_index:] if parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_1: for next_end_tag in HtmlHelper.__html_block_1_end_tags: if next_end_tag in adj_line: is_block_terminated = True elif parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_2: is_block_terminated = HtmlHelper.__html_block_2_end in adj_line elif parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_3: is_block_terminated = HtmlHelper.__html_block_3_end in adj_line elif parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_4: is_block_terminated = HtmlHelper.__html_block_4_end in adj_line elif parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_5: is_block_terminated = HtmlHelper.__html_block_5_end in adj_line if is_block_terminated: terminated_block_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[type(parser_state.token_stack[-1])], ) assert terminated_block_tokens new_tokens.extend(terminated_block_tokens) return new_tokens
def __handle_fenced_code_block( parser_state, outer_processed, position_marker, extracted_whitespace, new_tokens, ): """ Take care of the processing for fenced code blocks. """ if not parser_state.token_stack[-1].was_link_definition_started: ( fenced_tokens, extracted_whitespace, ) = LeafBlockProcessor.parse_fenced_code_block( parser_state, position_marker, extracted_whitespace, ) outer_processed = False if fenced_tokens: new_tokens.extend(fenced_tokens) outer_processed = True elif parser_state.token_stack[-1].is_fenced_code_block: new_tokens.append( TextMarkdownToken( position_marker.text_to_parse[position_marker.index_number :], extracted_whitespace, ) ) outer_processed = True return outer_processed
def parse_paragraph( parser_state, position_marker, extracted_whitespace, this_bq_count, no_para_start_if_empty, stack_bq_count, ): """ Handle the parsing of a paragraph. """ new_tokens = [] if no_para_start_if_empty and position_marker.index_number >= len( position_marker.text_to_parse): LOGGER.debug("Escaping paragraph due to empty w/ blank") return [ BlankLineMarkdownToken(extracted_whitespace, position_marker, len(extracted_whitespace)) ] LOGGER.debug( "parse_paragraph>stack_bq_count>%s>this_bq_count>%s<", str(stack_bq_count), str(this_bq_count), ) if (len(parser_state.token_document) >= 2 and parser_state.token_document[-1].is_blank_line and parser_state.token_document[-2].is_any_list_token): did_find, last_list_index = LeafBlockProcessor.check_for_list_in_process( parser_state) assert did_find new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, until_this_index=last_list_index) if stack_bq_count != 0 and this_bq_count == 0: new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[BlockQuoteStackToken], include_block_quotes=True, ) if not parser_state.token_stack[-1].is_paragraph: parser_state.token_stack.append(ParagraphStackToken()) new_tokens.append( ParagraphMarkdownToken(extracted_whitespace, position_marker)) extracted_whitespace = "" new_tokens.append( TextMarkdownToken( position_marker.text_to_parse[position_marker.index_number:], extracted_whitespace, )) return new_tokens
def __complete_inline_block_processing( inline_blocks, source_text, start_index, current_string, end_string, starting_whitespace, is_setext, ): have_processed_once = len(inline_blocks) != 0 or start_index != 0 LOGGER.debug("__cibp>inline_blocks>%s<", str(inline_blocks).replace("\n", "\\n")) LOGGER.debug("__cibp>source_text>%s<", str(source_text).replace("\n", "\\n")) LOGGER.debug("__cibp>start_index>%s<", str(start_index).replace("\n", "\\n")) LOGGER.debug("__cibp>current_string>%s<", str(current_string).replace("\n", "\\n")) LOGGER.debug("__cibp>end_string>%s<", str(end_string).replace("\n", "\\n")) LOGGER.debug( "__cibp>starting_whitespace>%s<", str(starting_whitespace).replace("\n", "\\n"), ) LOGGER.debug("__cibp>is_setext>%s<", str(is_setext).replace("\n", "\\n")) if (inline_blocks and inline_blocks[-1].token_name == MarkdownToken.token_inline_hard_break): start_index, extracted_whitespace = ParserHelper.extract_whitespace( source_text, start_index) if end_string is None: end_string = extracted_whitespace else: end_string += extracted_whitespace if start_index < len(source_text): current_string = InlineHelper.append_text( current_string, source_text[start_index:]) if end_string is not None: LOGGER.debug("xx-end-lf>%s<", end_string.replace("\n", "\\n")) if current_string or not have_processed_once: inline_blocks.append( TextMarkdownToken(current_string, starting_whitespace, end_whitespace=end_string)) LOGGER.debug( ">>%s<<", str(inline_blocks).replace("\n", "\\n").replace("\x02", "\\x02")) return EmphasisHelper.resolve_inline_emphasis(inline_blocks, None)
def parse_inline(coalesced_results): """ Parse and resolve any inline elements. """ LOGGER.info("coalesced_results") LOGGER.info("-----") for next_token in coalesced_results: LOGGER.info( ">>%s<<", str(next_token).replace("\t", "\\t").replace("\n", "\\n")) LOGGER.info("-----") coalesced_list = [] coalesced_list.extend(coalesced_results[0:1]) for coalesce_index in range(1, len(coalesced_results)): if coalesced_results[coalesce_index].is_text and ( coalesced_list[-1].is_paragraph or coalesced_list[-1].is_setext or coalesced_list[-1].is_atx_heading or coalesced_list[-1].is_code_block): if coalesced_list[-1].is_code_block: encoded_text = InlineHelper.append_text( "", coalesced_results[coalesce_index].token_text) processed_tokens = [ TextMarkdownToken( encoded_text, coalesced_results[coalesce_index]. extracted_whitespace, ) ] elif coalesced_list[-1].is_setext: combined_test = coalesced_results[ coalesce_index].token_text LOGGER.debug("combined_text>>%s", combined_test.replace("\n", "\\n")) processed_tokens = InlineProcessor.__process_inline_text_block( coalesced_results[coalesce_index].token_text.replace( "\t", " "), whitespace_to_recombine=coalesced_results[ coalesce_index].extracted_whitespace.replace( "\t", " "), is_setext=True, ) LOGGER.debug( "processed_tokens>>%s", str(processed_tokens).replace("\n", "\\n"), ) elif coalesced_list[-1].is_atx_heading: processed_tokens = InlineProcessor.__process_inline_text_block( coalesced_results[coalesce_index].token_text.replace( "\t", " "), coalesced_results[coalesce_index].extracted_whitespace. replace("\t", " "), ) else: assert coalesced_list[-1].is_paragraph LOGGER.debug( ">>before_add_ws>>%s>>add>>%s>>", str(coalesced_list[-1]), str(coalesced_results[coalesce_index]. extracted_whitespace), ) coalesced_list[-1].add_whitespace( coalesced_results[coalesce_index].extracted_whitespace. replace("\t", " ")) LOGGER.debug(">>after_add_ws>>%s", str(coalesced_list[-1])) processed_tokens = InlineProcessor.__process_inline_text_block( coalesced_results[coalesce_index].token_text.replace( "\t", " "), ) coalesced_list.extend(processed_tokens) else: coalesced_list.append(coalesced_results[coalesce_index]) return coalesced_list
def __process_inline_text_block( source_text, starting_whitespace="", whitespace_to_recombine=None, is_setext=False, ): """ Process a text block for any inline items. """ inline_blocks = [] start_index = 0 if whitespace_to_recombine and " " in whitespace_to_recombine: source_text = InlineProcessor.__recombine_with_whitespace( source_text, whitespace_to_recombine) else: whitespace_to_recombine = None current_string = "" current_string_unresolved = "" end_string = "" inline_response = InlineResponse() next_index = ParserHelper.index_any_of( source_text, InlineProcessor.__valid_inline_text_block_sequence_starts, start_index, ) LOGGER.debug("__process_inline_text_block>>is_setext>>%s", str(is_setext)) LOGGER.debug( "__process_inline_text_block>>%s>>%s", source_text.replace("\n", "\\n"), str(start_index), ) while next_index != -1: inline_response.clear_fields() reset_current_string = False whitespace_to_add = None LOGGER.debug("__process_inline_text_block>>%s>>%s", str(start_index), str(next_index)) remaining_line = source_text[start_index:next_index] inline_request = InlineRequest( source_text, next_index, inline_blocks, remaining_line, current_string_unresolved, ) if source_text[ next_index] in InlineProcessor.__inline_character_handlers: LOGGER.debug("handler(before)>>%s<<", source_text[next_index]) proc_fn = InlineProcessor.__inline_character_handlers[ source_text[next_index]] inline_response = proc_fn(inline_request) LOGGER.debug("handler(after)>>%s<<", source_text[next_index]) else: assert source_text[next_index] == "\n" LOGGER.debug( "end_string(before)>>%s<<", str(end_string).replace("\n", "\\n").replace("\x02", "\\x02"), ) ( inline_response.new_string, whitespace_to_add, inline_response.new_index, inline_response.new_tokens, remaining_line, end_string, current_string, ) = InlineHelper.handle_line_end(next_index, remaining_line, end_string, current_string) LOGGER.debug( "handle_line_end>>new_tokens>>%s<<", str(inline_response.new_tokens).replace( "\n", "\\n").replace("\x02", "\\x02"), ) if not inline_response.new_tokens: end_string = InlineProcessor.__add_recombined_whitespace( bool(whitespace_to_recombine), source_text, inline_response, end_string, is_setext, ) LOGGER.debug( "handle_line_end>>%s<<", source_text[inline_response.new_index:].replace( "\n", "\\n").replace("\x02", "\\x02"), ) LOGGER.debug( "end_string(after)>>%s<<", str(end_string).replace("\n", "\\n").replace("\x02", "\\x02"), ) LOGGER.debug( "new_string-->%s<--", str(inline_response.new_string).replace("\n", "\\n"), ) LOGGER.debug("new_index-->%s<--", str(inline_response.new_index)) LOGGER.debug( "new_tokens-->%s<--", str(inline_response.new_tokens).replace("\n", "\\n"), ) LOGGER.debug( "new_string_unresolved-->%s<--", str(inline_response.new_string_unresolved).replace( "\n", "\\n"), ) LOGGER.debug( "consume_rest_of_line-->%s<--", str(inline_response.consume_rest_of_line), ) LOGGER.debug( "original_string-->%s<--", str(inline_response.original_string).replace("\n", "\\n"), ) if inline_response.consume_rest_of_line: inline_response.new_string = "" reset_current_string = True inline_response.new_tokens = None else: current_string = InlineHelper.append_text( current_string, remaining_line) current_string_unresolved = InlineHelper.append_text( current_string_unresolved, remaining_line) LOGGER.debug( "current_string>>%s<<", str(current_string).replace("\n", "\\n").replace("\x02", "\\x02"), ) LOGGER.debug( "current_string_unresolved>>%s<<", str(current_string_unresolved).replace("\n", "\\n").replace( "\x02", "\\x02"), ) if inline_response.new_tokens: if current_string: # assert end_string is None inline_blocks.append( TextMarkdownToken( current_string, starting_whitespace, end_whitespace=end_string, )) reset_current_string = True starting_whitespace = "" end_string = None inline_blocks.extend(inline_response.new_tokens) if reset_current_string: current_string = "" current_string_unresolved = "" ( start_index, next_index, end_string, current_string, current_string_unresolved, ) = InlineProcessor.__complete_inline_loop( source_text, inline_response.new_index, end_string, whitespace_to_add, current_string, current_string_unresolved, inline_response.new_string_unresolved, inline_response.new_string, inline_response.original_string, ) LOGGER.debug( "<<current_string<<%s<<%s<<", str(len(current_string)), current_string.replace("\b", "\\b").replace("\a", "\\a").replace( "\n", "\\n"), ) LOGGER.debug( "<<current_string_unresolved<<%s<<%s<<", str(len(current_string_unresolved)), current_string_unresolved.replace("\b", "\\b").replace( "\a", "\\a").replace("\n", "\\n"), ) LOGGER.debug("<<__complete_inline_block_processing<<") return InlineProcessor.__complete_inline_block_processing( inline_blocks, source_text, start_index, current_string, end_string, starting_whitespace, is_setext, )
def parse_atx_headings(parser_state, position_marker, extracted_whitespace): """ Handle the parsing of an atx heading. """ new_tokens = [] if ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ) ( non_whitespace_index, extracted_whitespace_at_start, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, new_index) if hash_count <= 6 and (extracted_whitespace_at_start or non_whitespace_index == len( position_marker.text_to_parse)): new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, new_tokens) remaining_line = position_marker.text_to_parse[ non_whitespace_index:] ( end_index, extracted_whitespace_at_end, ) = ParserHelper.extract_whitespace_from_end(remaining_line) remove_trailing_count = 0 while (end_index > 0 and remaining_line[end_index - 1] == LeafBlockProcessor.__atx_character): end_index -= 1 remove_trailing_count += 1 extracted_whitespace_before_end = "" if remove_trailing_count: if end_index > 0: if ParserHelper.is_character_at_index_whitespace( remaining_line, end_index - 1): remaining_line = remaining_line[:end_index] ( end_index, extracted_whitespace_before_end, ) = ParserHelper.extract_whitespace_from_end( remaining_line) remaining_line = remaining_line[:end_index] else: extracted_whitespace_at_end = "" remove_trailing_count = 0 else: remaining_line = "" else: extracted_whitespace_at_end = remaining_line[end_index:] remaining_line = remaining_line[0:end_index] start_token = AtxHeadingMarkdownToken( hash_count, remove_trailing_count, extracted_whitespace, position_marker, ) new_tokens.append(start_token) new_tokens.append( TextMarkdownToken(remaining_line, extracted_whitespace_at_start)) end_token = EndMarkdownToken( "atx", extracted_whitespace_at_end, extracted_whitespace_before_end, None, ) end_token.start_markdown_token = start_token new_tokens.append(end_token) return new_tokens
def parse_indented_code_block( parser_state, position_marker, extracted_whitespace, removed_chars_at_start, original_line_to_parse, last_block_quote_index, last_list_start_index, ): """ Handle the parsing of an indented code block """ new_tokens = [] if (ParserHelper.is_length_greater_than_or_equal_to( extracted_whitespace, 4, start_index=removed_chars_at_start) and not parser_state.token_stack[-1].is_paragraph): if not parser_state.token_stack[-1].is_indented_code_block: parser_state.token_stack.append(IndentedCodeBlockStackToken()) LOGGER.debug(">>__adjust_for_list_start") ( did_process, offset_index, last_block_quote_index, ) = LeafBlockProcessor.__adjust_for_list_start( original_line_to_parse, last_list_start_index, last_block_quote_index, ) LOGGER.debug("<<__adjust_for_list_start<<%s", str(did_process)) force_me = False kludge_adjust = 0 if not did_process: LOGGER.debug(">>>>%s", str(parser_state.token_stack[-2])) if parser_state.token_stack[-2].is_list: LOGGER.debug( ">>indent>>%s", parser_state.token_stack[-2].indent_level, ) last_block_quote_index = 0 kludge_adjust = 1 force_me = True LOGGER.debug(">>__adjust_for_block_quote_start") ( did_process, special_parse_start_index, whitespace_to_parse, block_quote_adjust_delta, ) = LeafBlockProcessor.__adjust_for_block_quote_start( force_me, original_line_to_parse, last_block_quote_index, position_marker, extracted_whitespace, ) LOGGER.debug("<<__adjust_for_block_quote_start<<%s", str(did_process)) LOGGER.debug( "__recalculate_whitespace>>%s>>%s", whitespace_to_parse, str(offset_index), ) ( accumulated_whitespace_count, actual_whitespace_index, adj_ws, left_ws, ) = LeafBlockProcessor.__recalculate_whitespace( special_parse_start_index, whitespace_to_parse, offset_index) # TODO revisit with tabs line_number = position_marker.line_number column_number = (position_marker.index_number + position_marker.index_indent - len(extracted_whitespace) + 1) if special_parse_start_index: column_number = (actual_whitespace_index + special_parse_start_index + block_quote_adjust_delta) LOGGER.debug( "column_number(%s)=actual_whitespace_index(%s)+special_parse_start_index(%s)+block_quote_adjust_delta(%s)", str(column_number), str(actual_whitespace_index), str(special_parse_start_index), str(block_quote_adjust_delta), ) excess_whitespace_count = (accumulated_whitespace_count - 4 - offset_index) LOGGER.debug( "excess_whitespace_count(%s)=accumulated_whitespace_count(%s)-4-offset_index(%s)", str(excess_whitespace_count), str(accumulated_whitespace_count), str(offset_index), ) LOGGER.debug("before>>%s>>", left_ws.replace("\t", "\\t")) if excess_whitespace_count: excess_whitespace_count -= kludge_adjust left_ws = " ".rjust(excess_whitespace_count) + left_ws LOGGER.debug("after>>%s>>", left_ws.replace("\t", "\\t")) else: column_number += actual_whitespace_index LOGGER.debug("column_number>>%s", str(column_number)) new_tokens.append( IndentedCodeBlockMarkdownToken(adj_ws, line_number, column_number)) extracted_whitespace = left_ws LOGGER.debug("left_ws>>%s<<", extracted_whitespace.replace("\t", "\\t")) new_tokens.append( TextMarkdownToken( position_marker.text_to_parse[position_marker. index_number:], extracted_whitespace, )) return new_tokens