예제 #1
0
    def check_normal_html_block_end(
        parser_state, line_to_parse, start_index, extracted_whitespace,
    ):
        """
        Check to see if we have encountered the end of the current HTML block
        via text on a normal line.
        """

        new_tokens = [
            TextMarkdownToken(line_to_parse[start_index:], extracted_whitespace)
        ]

        is_block_terminated = False
        adj_line = line_to_parse[start_index:]
        if parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_1:
            for next_end_tag in HtmlHelper.__html_block_1_end_tags:
                if next_end_tag in adj_line:
                    is_block_terminated = True
        elif parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_2:
            is_block_terminated = HtmlHelper.__html_block_2_end in adj_line
        elif parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_3:
            is_block_terminated = HtmlHelper.__html_block_3_end in adj_line
        elif parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_4:
            is_block_terminated = HtmlHelper.__html_block_4_end in adj_line
        elif parser_state.token_stack[-1].html_block_type == HtmlHelper.html_block_5:
            is_block_terminated = HtmlHelper.__html_block_5_end in adj_line

        if is_block_terminated:
            terminated_block_tokens, _, _ = parser_state.close_open_blocks_fn(
                parser_state, only_these_blocks=[type(parser_state.token_stack[-1])],
            )
            assert terminated_block_tokens
            new_tokens.extend(terminated_block_tokens)
        return new_tokens
 def __handle_fenced_code_block(
     parser_state,
     outer_processed,
     position_marker,
     extracted_whitespace,
     new_tokens,
 ):
     """
     Take care of the processing for fenced code blocks.
     """
     if not parser_state.token_stack[-1].was_link_definition_started:
         (
             fenced_tokens,
             extracted_whitespace,
         ) = LeafBlockProcessor.parse_fenced_code_block(
             parser_state, position_marker, extracted_whitespace,
         )
         outer_processed = False
         if fenced_tokens:
             new_tokens.extend(fenced_tokens)
             outer_processed = True
         elif parser_state.token_stack[-1].is_fenced_code_block:
             new_tokens.append(
                 TextMarkdownToken(
                     position_marker.text_to_parse[position_marker.index_number :],
                     extracted_whitespace,
                 )
             )
             outer_processed = True
     return outer_processed
예제 #3
0
    def parse_paragraph(
        parser_state,
        position_marker,
        extracted_whitespace,
        this_bq_count,
        no_para_start_if_empty,
        stack_bq_count,
    ):
        """
        Handle the parsing of a paragraph.
        """
        new_tokens = []

        if no_para_start_if_empty and position_marker.index_number >= len(
                position_marker.text_to_parse):
            LOGGER.debug("Escaping paragraph due to empty w/ blank")
            return [
                BlankLineMarkdownToken(extracted_whitespace, position_marker,
                                       len(extracted_whitespace))
            ]

        LOGGER.debug(
            "parse_paragraph>stack_bq_count>%s>this_bq_count>%s<",
            str(stack_bq_count),
            str(this_bq_count),
        )

        if (len(parser_state.token_document) >= 2
                and parser_state.token_document[-1].is_blank_line
                and parser_state.token_document[-2].is_any_list_token):

            did_find, last_list_index = LeafBlockProcessor.check_for_list_in_process(
                parser_state)
            assert did_find
            new_tokens, _, _ = parser_state.close_open_blocks_fn(
                parser_state, until_this_index=last_list_index)
        if stack_bq_count != 0 and this_bq_count == 0:
            new_tokens, _, _ = parser_state.close_open_blocks_fn(
                parser_state,
                only_these_blocks=[BlockQuoteStackToken],
                include_block_quotes=True,
            )

        if not parser_state.token_stack[-1].is_paragraph:
            parser_state.token_stack.append(ParagraphStackToken())
            new_tokens.append(
                ParagraphMarkdownToken(extracted_whitespace, position_marker))
            extracted_whitespace = ""

        new_tokens.append(
            TextMarkdownToken(
                position_marker.text_to_parse[position_marker.index_number:],
                extracted_whitespace,
            ))
        return new_tokens
예제 #4
0
    def __complete_inline_block_processing(
        inline_blocks,
        source_text,
        start_index,
        current_string,
        end_string,
        starting_whitespace,
        is_setext,
    ):
        have_processed_once = len(inline_blocks) != 0 or start_index != 0

        LOGGER.debug("__cibp>inline_blocks>%s<",
                     str(inline_blocks).replace("\n", "\\n"))
        LOGGER.debug("__cibp>source_text>%s<",
                     str(source_text).replace("\n", "\\n"))
        LOGGER.debug("__cibp>start_index>%s<",
                     str(start_index).replace("\n", "\\n"))
        LOGGER.debug("__cibp>current_string>%s<",
                     str(current_string).replace("\n", "\\n"))
        LOGGER.debug("__cibp>end_string>%s<",
                     str(end_string).replace("\n", "\\n"))
        LOGGER.debug(
            "__cibp>starting_whitespace>%s<",
            str(starting_whitespace).replace("\n", "\\n"),
        )
        LOGGER.debug("__cibp>is_setext>%s<",
                     str(is_setext).replace("\n", "\\n"))

        if (inline_blocks and inline_blocks[-1].token_name
                == MarkdownToken.token_inline_hard_break):
            start_index, extracted_whitespace = ParserHelper.extract_whitespace(
                source_text, start_index)
            if end_string is None:
                end_string = extracted_whitespace
            else:
                end_string += extracted_whitespace

        if start_index < len(source_text):
            current_string = InlineHelper.append_text(
                current_string, source_text[start_index:])

        if end_string is not None:
            LOGGER.debug("xx-end-lf>%s<", end_string.replace("\n", "\\n"))
        if current_string or not have_processed_once:
            inline_blocks.append(
                TextMarkdownToken(current_string,
                                  starting_whitespace,
                                  end_whitespace=end_string))
        LOGGER.debug(
            ">>%s<<",
            str(inline_blocks).replace("\n", "\\n").replace("\x02", "\\x02"))

        return EmphasisHelper.resolve_inline_emphasis(inline_blocks, None)
예제 #5
0
    def parse_inline(coalesced_results):
        """
        Parse and resolve any inline elements.
        """
        LOGGER.info("coalesced_results")
        LOGGER.info("-----")
        for next_token in coalesced_results:
            LOGGER.info(
                ">>%s<<",
                str(next_token).replace("\t", "\\t").replace("\n", "\\n"))
        LOGGER.info("-----")

        coalesced_list = []
        coalesced_list.extend(coalesced_results[0:1])
        for coalesce_index in range(1, len(coalesced_results)):
            if coalesced_results[coalesce_index].is_text and (
                    coalesced_list[-1].is_paragraph
                    or coalesced_list[-1].is_setext
                    or coalesced_list[-1].is_atx_heading
                    or coalesced_list[-1].is_code_block):
                if coalesced_list[-1].is_code_block:
                    encoded_text = InlineHelper.append_text(
                        "", coalesced_results[coalesce_index].token_text)
                    processed_tokens = [
                        TextMarkdownToken(
                            encoded_text,
                            coalesced_results[coalesce_index].
                            extracted_whitespace,
                        )
                    ]
                elif coalesced_list[-1].is_setext:
                    combined_test = coalesced_results[
                        coalesce_index].token_text
                    LOGGER.debug("combined_text>>%s",
                                 combined_test.replace("\n", "\\n"))
                    processed_tokens = InlineProcessor.__process_inline_text_block(
                        coalesced_results[coalesce_index].token_text.replace(
                            "\t", "    "),
                        whitespace_to_recombine=coalesced_results[
                            coalesce_index].extracted_whitespace.replace(
                                "\t", "    "),
                        is_setext=True,
                    )
                    LOGGER.debug(
                        "processed_tokens>>%s",
                        str(processed_tokens).replace("\n", "\\n"),
                    )
                elif coalesced_list[-1].is_atx_heading:
                    processed_tokens = InlineProcessor.__process_inline_text_block(
                        coalesced_results[coalesce_index].token_text.replace(
                            "\t", "    "),
                        coalesced_results[coalesce_index].extracted_whitespace.
                        replace("\t", "    "),
                    )
                else:
                    assert coalesced_list[-1].is_paragraph
                    LOGGER.debug(
                        ">>before_add_ws>>%s>>add>>%s>>",
                        str(coalesced_list[-1]),
                        str(coalesced_results[coalesce_index].
                            extracted_whitespace),
                    )
                    coalesced_list[-1].add_whitespace(
                        coalesced_results[coalesce_index].extracted_whitespace.
                        replace("\t", "    "))
                    LOGGER.debug(">>after_add_ws>>%s", str(coalesced_list[-1]))
                    processed_tokens = InlineProcessor.__process_inline_text_block(
                        coalesced_results[coalesce_index].token_text.replace(
                            "\t", "    "), )
                coalesced_list.extend(processed_tokens)
            else:
                coalesced_list.append(coalesced_results[coalesce_index])
        return coalesced_list
예제 #6
0
    def __process_inline_text_block(
        source_text,
        starting_whitespace="",
        whitespace_to_recombine=None,
        is_setext=False,
    ):
        """
        Process a text block for any inline items.
        """

        inline_blocks = []
        start_index = 0
        if whitespace_to_recombine and " " in whitespace_to_recombine:
            source_text = InlineProcessor.__recombine_with_whitespace(
                source_text, whitespace_to_recombine)
        else:
            whitespace_to_recombine = None

        current_string = ""
        current_string_unresolved = ""
        end_string = ""

        inline_response = InlineResponse()

        next_index = ParserHelper.index_any_of(
            source_text,
            InlineProcessor.__valid_inline_text_block_sequence_starts,
            start_index,
        )
        LOGGER.debug("__process_inline_text_block>>is_setext>>%s",
                     str(is_setext))
        LOGGER.debug(
            "__process_inline_text_block>>%s>>%s",
            source_text.replace("\n", "\\n"),
            str(start_index),
        )
        while next_index != -1:

            inline_response.clear_fields()
            reset_current_string = False
            whitespace_to_add = None

            LOGGER.debug("__process_inline_text_block>>%s>>%s",
                         str(start_index), str(next_index))
            remaining_line = source_text[start_index:next_index]

            inline_request = InlineRequest(
                source_text,
                next_index,
                inline_blocks,
                remaining_line,
                current_string_unresolved,
            )
            if source_text[
                    next_index] in InlineProcessor.__inline_character_handlers:
                LOGGER.debug("handler(before)>>%s<<", source_text[next_index])
                proc_fn = InlineProcessor.__inline_character_handlers[
                    source_text[next_index]]
                inline_response = proc_fn(inline_request)
                LOGGER.debug("handler(after)>>%s<<", source_text[next_index])
            else:
                assert source_text[next_index] == "\n"
                LOGGER.debug(
                    "end_string(before)>>%s<<",
                    str(end_string).replace("\n",
                                            "\\n").replace("\x02", "\\x02"),
                )
                (
                    inline_response.new_string,
                    whitespace_to_add,
                    inline_response.new_index,
                    inline_response.new_tokens,
                    remaining_line,
                    end_string,
                    current_string,
                ) = InlineHelper.handle_line_end(next_index, remaining_line,
                                                 end_string, current_string)
                LOGGER.debug(
                    "handle_line_end>>new_tokens>>%s<<",
                    str(inline_response.new_tokens).replace(
                        "\n", "\\n").replace("\x02", "\\x02"),
                )
                if not inline_response.new_tokens:
                    end_string = InlineProcessor.__add_recombined_whitespace(
                        bool(whitespace_to_recombine),
                        source_text,
                        inline_response,
                        end_string,
                        is_setext,
                    )
                LOGGER.debug(
                    "handle_line_end>>%s<<",
                    source_text[inline_response.new_index:].replace(
                        "\n", "\\n").replace("\x02", "\\x02"),
                )
                LOGGER.debug(
                    "end_string(after)>>%s<<",
                    str(end_string).replace("\n",
                                            "\\n").replace("\x02", "\\x02"),
                )

            LOGGER.debug(
                "new_string-->%s<--",
                str(inline_response.new_string).replace("\n", "\\n"),
            )
            LOGGER.debug("new_index-->%s<--", str(inline_response.new_index))
            LOGGER.debug(
                "new_tokens-->%s<--",
                str(inline_response.new_tokens).replace("\n", "\\n"),
            )
            LOGGER.debug(
                "new_string_unresolved-->%s<--",
                str(inline_response.new_string_unresolved).replace(
                    "\n", "\\n"),
            )
            LOGGER.debug(
                "consume_rest_of_line-->%s<--",
                str(inline_response.consume_rest_of_line),
            )
            LOGGER.debug(
                "original_string-->%s<--",
                str(inline_response.original_string).replace("\n", "\\n"),
            )

            if inline_response.consume_rest_of_line:
                inline_response.new_string = ""
                reset_current_string = True
                inline_response.new_tokens = None
            else:
                current_string = InlineHelper.append_text(
                    current_string, remaining_line)
                current_string_unresolved = InlineHelper.append_text(
                    current_string_unresolved, remaining_line)

            LOGGER.debug(
                "current_string>>%s<<",
                str(current_string).replace("\n",
                                            "\\n").replace("\x02", "\\x02"),
            )
            LOGGER.debug(
                "current_string_unresolved>>%s<<",
                str(current_string_unresolved).replace("\n", "\\n").replace(
                    "\x02", "\\x02"),
            )
            if inline_response.new_tokens:
                if current_string:
                    # assert end_string is None
                    inline_blocks.append(
                        TextMarkdownToken(
                            current_string,
                            starting_whitespace,
                            end_whitespace=end_string,
                        ))
                    reset_current_string = True
                    starting_whitespace = ""
                    end_string = None

                inline_blocks.extend(inline_response.new_tokens)

            if reset_current_string:
                current_string = ""
                current_string_unresolved = ""

            (
                start_index,
                next_index,
                end_string,
                current_string,
                current_string_unresolved,
            ) = InlineProcessor.__complete_inline_loop(
                source_text,
                inline_response.new_index,
                end_string,
                whitespace_to_add,
                current_string,
                current_string_unresolved,
                inline_response.new_string_unresolved,
                inline_response.new_string,
                inline_response.original_string,
            )
            LOGGER.debug(
                "<<current_string<<%s<<%s<<",
                str(len(current_string)),
                current_string.replace("\b", "\\b").replace("\a",
                                                            "\\a").replace(
                                                                "\n", "\\n"),
            )
            LOGGER.debug(
                "<<current_string_unresolved<<%s<<%s<<",
                str(len(current_string_unresolved)),
                current_string_unresolved.replace("\b", "\\b").replace(
                    "\a", "\\a").replace("\n", "\\n"),
            )

        LOGGER.debug("<<__complete_inline_block_processing<<")
        return InlineProcessor.__complete_inline_block_processing(
            inline_blocks,
            source_text,
            start_index,
            current_string,
            end_string,
            starting_whitespace,
            is_setext,
        )
예제 #7
0
    def parse_atx_headings(parser_state, position_marker,
                           extracted_whitespace):
        """
        Handle the parsing of an atx heading.
        """

        new_tokens = []

        if ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace,
                3) and ParserHelper.is_character_at_index(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__atx_character,
                ):
            hash_count, new_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__atx_character,
            )
            (
                non_whitespace_index,
                extracted_whitespace_at_start,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                new_index)

            if hash_count <= 6 and (extracted_whitespace_at_start
                                    or non_whitespace_index == len(
                                        position_marker.text_to_parse)):

                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, new_tokens)
                remaining_line = position_marker.text_to_parse[
                    non_whitespace_index:]
                (
                    end_index,
                    extracted_whitespace_at_end,
                ) = ParserHelper.extract_whitespace_from_end(remaining_line)
                remove_trailing_count = 0
                while (end_index > 0 and remaining_line[end_index - 1]
                       == LeafBlockProcessor.__atx_character):
                    end_index -= 1
                    remove_trailing_count += 1
                extracted_whitespace_before_end = ""
                if remove_trailing_count:
                    if end_index > 0:
                        if ParserHelper.is_character_at_index_whitespace(
                                remaining_line, end_index - 1):
                            remaining_line = remaining_line[:end_index]
                            (
                                end_index,
                                extracted_whitespace_before_end,
                            ) = ParserHelper.extract_whitespace_from_end(
                                remaining_line)
                            remaining_line = remaining_line[:end_index]
                        else:
                            extracted_whitespace_at_end = ""
                            remove_trailing_count = 0
                    else:
                        remaining_line = ""
                else:
                    extracted_whitespace_at_end = remaining_line[end_index:]
                    remaining_line = remaining_line[0:end_index]
                start_token = AtxHeadingMarkdownToken(
                    hash_count,
                    remove_trailing_count,
                    extracted_whitespace,
                    position_marker,
                )
                new_tokens.append(start_token)
                new_tokens.append(
                    TextMarkdownToken(remaining_line,
                                      extracted_whitespace_at_start))
                end_token = EndMarkdownToken(
                    "atx",
                    extracted_whitespace_at_end,
                    extracted_whitespace_before_end,
                    None,
                )
                end_token.start_markdown_token = start_token
                new_tokens.append(end_token)
        return new_tokens
예제 #8
0
    def parse_indented_code_block(
        parser_state,
        position_marker,
        extracted_whitespace,
        removed_chars_at_start,
        original_line_to_parse,
        last_block_quote_index,
        last_list_start_index,
    ):
        """
        Handle the parsing of an indented code block
        """

        new_tokens = []

        if (ParserHelper.is_length_greater_than_or_equal_to(
                extracted_whitespace, 4, start_index=removed_chars_at_start)
                and not parser_state.token_stack[-1].is_paragraph):
            if not parser_state.token_stack[-1].is_indented_code_block:
                parser_state.token_stack.append(IndentedCodeBlockStackToken())

                LOGGER.debug(">>__adjust_for_list_start")
                (
                    did_process,
                    offset_index,
                    last_block_quote_index,
                ) = LeafBlockProcessor.__adjust_for_list_start(
                    original_line_to_parse,
                    last_list_start_index,
                    last_block_quote_index,
                )
                LOGGER.debug("<<__adjust_for_list_start<<%s", str(did_process))

                force_me = False
                kludge_adjust = 0
                if not did_process:
                    LOGGER.debug(">>>>%s", str(parser_state.token_stack[-2]))
                    if parser_state.token_stack[-2].is_list:
                        LOGGER.debug(
                            ">>indent>>%s",
                            parser_state.token_stack[-2].indent_level,
                        )
                        last_block_quote_index = 0
                        kludge_adjust = 1
                        force_me = True

                LOGGER.debug(">>__adjust_for_block_quote_start")
                (
                    did_process,
                    special_parse_start_index,
                    whitespace_to_parse,
                    block_quote_adjust_delta,
                ) = LeafBlockProcessor.__adjust_for_block_quote_start(
                    force_me,
                    original_line_to_parse,
                    last_block_quote_index,
                    position_marker,
                    extracted_whitespace,
                )
                LOGGER.debug("<<__adjust_for_block_quote_start<<%s",
                             str(did_process))

                LOGGER.debug(
                    "__recalculate_whitespace>>%s>>%s",
                    whitespace_to_parse,
                    str(offset_index),
                )
                (
                    accumulated_whitespace_count,
                    actual_whitespace_index,
                    adj_ws,
                    left_ws,
                ) = LeafBlockProcessor.__recalculate_whitespace(
                    special_parse_start_index, whitespace_to_parse,
                    offset_index)

                # TODO revisit with tabs
                line_number = position_marker.line_number
                column_number = (position_marker.index_number +
                                 position_marker.index_indent -
                                 len(extracted_whitespace) + 1)
                if special_parse_start_index:
                    column_number = (actual_whitespace_index +
                                     special_parse_start_index +
                                     block_quote_adjust_delta)
                    LOGGER.debug(
                        "column_number(%s)=actual_whitespace_index(%s)+special_parse_start_index(%s)+block_quote_adjust_delta(%s)",
                        str(column_number),
                        str(actual_whitespace_index),
                        str(special_parse_start_index),
                        str(block_quote_adjust_delta),
                    )
                    excess_whitespace_count = (accumulated_whitespace_count -
                                               4 - offset_index)
                    LOGGER.debug(
                        "excess_whitespace_count(%s)=accumulated_whitespace_count(%s)-4-offset_index(%s)",
                        str(excess_whitespace_count),
                        str(accumulated_whitespace_count),
                        str(offset_index),
                    )
                    LOGGER.debug("before>>%s>>", left_ws.replace("\t", "\\t"))
                    if excess_whitespace_count:
                        excess_whitespace_count -= kludge_adjust
                        left_ws = " ".rjust(excess_whitespace_count) + left_ws
                    LOGGER.debug("after>>%s>>", left_ws.replace("\t", "\\t"))
                else:
                    column_number += actual_whitespace_index
                LOGGER.debug("column_number>>%s", str(column_number))
                new_tokens.append(
                    IndentedCodeBlockMarkdownToken(adj_ws, line_number,
                                                   column_number))
                extracted_whitespace = left_ws
                LOGGER.debug("left_ws>>%s<<",
                             extracted_whitespace.replace("\t", "\\t"))
            new_tokens.append(
                TextMarkdownToken(
                    position_marker.text_to_parse[position_marker.
                                                  index_number:],
                    extracted_whitespace,
                ))
        return new_tokens