Beispiel #1
0
    def is_html_block(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        token_stack: List[StackToken],
    ) -> Tuple[Optional[str], Optional[str]]:
        """
        Determine if the current sequence of characters would start a html block element.
        """

        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index(
            line_to_parse,
            start_index,
            HtmlHelper.__html_block_start_character,
        ):
            (
                html_block_type,
                remaining_html_tag,
            ) = HtmlHelper.__determine_html_block_type(
                token_stack,
                line_to_parse,
                start_index,
            )
        else:
            html_block_type, remaining_html_tag = None, None
        return html_block_type, remaining_html_tag
Beispiel #2
0
    def parse_html_block(parser_state, position_marker, extracted_whitespace):
        """
        Determine if we have the criteria that we need to start an HTML block.
        """

        new_tokens = []
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
        ) and ParserHelper.is_character_at_index(
            position_marker.text_to_parse,
            position_marker.index_number,
            HtmlHelper.__html_block_start_character,
        ):
            (
                html_block_type,
                remaining_html_tag,
            ) = HtmlHelper.__determine_html_block_type(
                parser_state,
                position_marker.text_to_parse,
                position_marker.index_number,
            )
            if html_block_type:
                new_tokens, _, _ = parser_state.close_open_blocks_fn(
                    parser_state, only_these_blocks=[ParagraphStackToken],
                )
                parser_state.token_stack.append(
                    HtmlBlockStackToken(html_block_type, remaining_html_tag)
                )
                new_tokens.append(
                    HtmlBlockMarkdownToken(position_marker, extracted_whitespace)
                )
        return new_tokens
Beispiel #3
0
    def append_text(
        string_to_append_to: str,
        text_to_append: str,
        alternate_escape_map: Optional[Dict[str, str]] = None,
        add_text_signature: bool = True,
    ) -> str:
        """
        Append the text to the given string, doing any needed encoding as we go.
        """

        if not alternate_escape_map:
            alternate_escape_map = InlineHelper.__html_character_escape_map
        key_map = "".join(alternate_escape_map.keys())
        start_index, text_parts = 0, [string_to_append_to]
        next_index = ParserHelper.index_any_of(text_to_append, key_map,
                                               start_index)
        while next_index != -1:
            escaped_part = alternate_escape_map[text_to_append[next_index]]
            text_parts.extend([
                text_to_append[start_index:next_index],
                ParserHelper.create_replacement_markers(
                    text_to_append[next_index], escaped_part)
                if add_text_signature else escaped_part,
            ])

            start_index = next_index + 1
            next_index = ParserHelper.index_any_of(text_to_append, key_map,
                                                   start_index)

        if start_index < len(text_to_append):
            text_parts.append(text_to_append[start_index:])

        return "".join(text_parts)
    def __munge(cls, show_whitespace: bool, log_format: str, args: List[Any]) -> str:
        split_log_format = log_format.split("$")
        split_log_format_length = len(split_log_format)
        args_length = len(args)
        if split_log_format_length != args_length + 1:
            raise Exception(
                "The number of $ substitution characters does not equal the number of arguments in the list."
            )

        recipient_array: List[str] = [""] * (split_log_format_length + args_length)
        for next_array_index, _ in enumerate(recipient_array):
            if next_array_index % 2 == 0:
                recipient_array[next_array_index] = split_log_format[
                    int(next_array_index / 2)
                ]
            elif show_whitespace:
                recipient_array[
                    next_array_index
                ] = ParserHelper.make_whitespace_visible(
                    args[int(next_array_index / 2)]
                )
            else:
                recipient_array[next_array_index] = ParserHelper.make_value_visible(
                    args[int(next_array_index / 2)]
                )
        return "".join(recipient_array)
Beispiel #5
0
    def __parse_angle_link_destination(source_text, new_index):
        """
        Parse a link destination that is included in angle brackets.
        """

        collected_destination = ""
        new_index += 1
        keep_collecting = True
        while keep_collecting:
            keep_collecting = False
            new_index, ert_new = ParserHelper.collect_until_one_of_characters(
                source_text, new_index,
                LinkHelper.__angle_link_destination_breaks)
            collected_destination = collected_destination + ert_new
            if ParserHelper.is_character_at_index(
                    source_text, new_index, InlineHelper.backslash_character):
                old_new_index = new_index
                inline_request = InlineRequest(source_text, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (collected_destination +
                                         source_text[old_new_index:new_index])
                keep_collecting = True

        if ParserHelper.is_character_at_index(source_text, new_index,
                                              LinkHelper.__angle_link_end):
            new_index += 1
        else:
            new_index = -1
            collected_destination = ""
        return new_index, collected_destination
Beispiel #6
0
    def __encode_link_destination(link_to_encode):

        encoded_link = ""
        percent_index, before_data = ParserHelper.collect_until_one_of_characters(
            link_to_encode, 0,
            LinkHelper.__special_link_destination_characters)
        encoded_link += urllib.parse.quote(
            before_data, safe=LinkHelper.__link_safe_characters)
        while percent_index < len(link_to_encode):
            special_character = link_to_encode[percent_index]
            percent_index += 1
            if special_character == "%":
                hex_guess_characters = link_to_encode[
                    percent_index:percent_index + 2]
                if len(hex_guess_characters) == 2:
                    try:
                        int(hex_guess_characters, 16)
                        encoded_link += "%" + hex_guess_characters
                        percent_index += 2
                    except ValueError:
                        encoded_link += "%25"
                else:
                    encoded_link += "%25"
            else:
                assert special_character == "&"
                encoded_link += "&amp;"

            percent_index, before_data = ParserHelper.collect_until_one_of_characters(
                link_to_encode,
                percent_index,
                LinkHelper.__special_link_destination_characters,
            )
            encoded_link += urllib.parse.quote(
                before_data, safe=LinkHelper.__link_safe_characters)
        return encoded_link
Beispiel #7
0
    def extract_optional_attribute_value(line_to_parse, value_index):
        """
        Determine and extract an optional attribute value.
        """

        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, value_index
        )
        if (
            non_whitespace_index < len(line_to_parse)
            and line_to_parse[non_whitespace_index]
            != HtmlHelper.__html_attribute_name_value_separator
        ) or non_whitespace_index >= len(line_to_parse):
            return non_whitespace_index

        non_whitespace_index += 1
        non_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_to_parse, non_whitespace_index
        )
        if non_whitespace_index < len(line_to_parse):
            first_character_of_value = line_to_parse[non_whitespace_index]
            if first_character_of_value == HtmlHelper.__html_attribute_value_double:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_character(
                    line_to_parse,
                    non_whitespace_index + 1,
                    HtmlHelper.__html_attribute_value_double,
                )
                if non_whitespace_index == len(line_to_parse):
                    return -1
                non_whitespace_index += 1
            elif first_character_of_value == HtmlHelper.__html_attribute_value_single:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_character(
                    line_to_parse,
                    non_whitespace_index + 1,
                    HtmlHelper.__html_attribute_value_single,
                )
                if non_whitespace_index == len(line_to_parse):
                    return -1
                non_whitespace_index += 1
            else:
                (
                    non_whitespace_index,
                    extracted_text,
                ) = ParserHelper.collect_until_one_of_characters(
                    line_to_parse,
                    non_whitespace_index,
                    HtmlHelper.__html_tag_attribute_value_terminators,
                )

                if not extracted_text:
                    non_whitespace_index = -1
        else:
            non_whitespace_index = -1
        return non_whitespace_index
Beispiel #8
0
    def __handle_text_token(
        cls,
        output_html: str,
        next_token: MarkdownToken,
        transform_state: TransformState,
    ) -> str:
        """
        Handle the text token.
        """
        text_token = cast(TextMarkdownToken, next_token)
        adjusted_text_token = ParserHelper.resolve_all_from_text(
            text_token.token_text)

        token_parts = []
        if transform_state.is_in_code_block:
            token_parts.extend([
                ParserHelper.resolve_all_from_text(
                    text_token.extracted_whitespace),
                adjusted_text_token,
            ])
        elif transform_state.is_in_html_block:
            token_parts.extend([
                text_token.extracted_whitespace,
                adjusted_text_token,
                ParserHelper.newline_character,
            ])
        else:
            token_parts.append(adjusted_text_token)

        token_parts.insert(0, output_html)
        return "".join(token_parts)
Beispiel #9
0
    def __handle_numeric_character_reference(source_text, new_index):
        """
        Handle a character reference that is numeric in nature.
        """

        original_reference = None
        new_index += 1
        translated_reference = -1
        if new_index < len(source_text) and (
                source_text[new_index]
                in InlineHelper.__hex_character_reference_start_character):
            hex_char = source_text[new_index]
            new_index += 1
            end_index, collected_string = ParserHelper.collect_while_one_of_characters(
                source_text, new_index, string.hexdigits)
            LOGGER.debug(
                "&#x>>a>>%s>>b>>%s>>%s",
                str(end_index),
                str(collected_string),
                str(len(source_text)),
            )
            delta = end_index - new_index
            LOGGER.debug("delta>>%s>>", str(delta))
            if 1 <= delta <= 6:
                translated_reference = int(collected_string, 16)
            new_string = (
                InlineHelper.character_reference_start_character +
                InlineHelper.__numeric_character_reference_start_character +
                hex_char + collected_string)
            new_index = end_index
        else:
            end_index, collected_string = ParserHelper.collect_while_one_of_characters(
                source_text, new_index, string.digits)
            LOGGER.debug(
                "&#>>a>>%s>>b>>%s>>%s",
                str(end_index),
                str(collected_string),
                str(len(source_text)),
            )
            delta = end_index - new_index
            LOGGER.debug("delta>>%s>>", str(delta))
            if 1 <= delta <= 7:
                translated_reference = int(collected_string)
            new_string = (
                InlineHelper.character_reference_start_character +
                InlineHelper.__numeric_character_reference_start_character +
                collected_string)
            new_index = end_index

        if (translated_reference >= 0 and new_index < len(source_text)
                and source_text[new_index]
                == InlineHelper.__character_reference_end_character):
            new_index += 1
            original_reference = new_string + ";"
            if translated_reference == 0:
                new_string = InlineHelper.__invalid_reference_character_substitute
            else:
                new_string = chr(translated_reference)
        return new_string, new_index, original_reference
Beispiel #10
0
    def parse_setext_headings(
        parser_state,
        position_marker,
        extracted_whitespace,
        this_bq_count,
        stack_bq_count,
    ):
        """
        Handle the parsing of an setext heading.
        """

        new_tokens = []
        if (ParserHelper.is_length_less_than_or_equal_to(
                extracted_whitespace, 3)
                and ParserHelper.is_character_at_index_one_of(
                    position_marker.text_to_parse,
                    position_marker.index_number,
                    LeafBlockProcessor.__setext_characters,
                ) and parser_state.token_stack[-1].is_paragraph
                and (this_bq_count == stack_bq_count)):
            _, collected_to_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                position_marker.text_to_parse[position_marker.index_number],
            )
            (
                after_whitespace_index,
                extra_whitespace_after_setext,
            ) = ParserHelper.extract_whitespace(position_marker.text_to_parse,
                                                collected_to_index)
            if after_whitespace_index == len(position_marker.text_to_parse):

                # This is unusual.  Normally, close_open_blocks is used to close off
                # blocks based on the stack token.  However, since the setext takes
                # the last paragraph of text (see case 61) and translates it
                # into a heading, this has to be done separately, as there is no
                # stack token to close.
                new_tokens.append(
                    EndMarkdownToken(
                        MarkdownToken.token_setext_heading,
                        extracted_whitespace,
                        extra_whitespace_after_setext,
                        None,
                    ))
                token_index = len(parser_state.token_document) - 1
                while not parser_state.token_document[token_index].is_paragraph:
                    token_index -= 1

                replacement_token = SetextHeadingMarkdownToken(
                    position_marker.text_to_parse[
                        position_marker.index_number],
                    collected_to_index - position_marker.index_number,
                    parser_state.token_document[token_index].extra_data,
                    position_marker,
                    parser_state.token_document[token_index],
                )
                parser_state.token_document[token_index] = replacement_token
                del parser_state.token_stack[-1]
        return new_tokens
Beispiel #11
0
    def __calculate_backtick_between_text(
            inline_request: InlineRequest, new_index: int,
            end_backtick_start_index: int) -> Tuple[str, str, str, str]:
        between_text = inline_request.source_text[
            new_index:end_backtick_start_index]
        original_between_text, leading_whitespace, trailing_whitespace = (
            between_text,
            "",
            "",
        )
        POGGER.debug(
            "after_collect>$>>$>>$<<",
            between_text,
            end_backtick_start_index,
            inline_request.source_text[end_backtick_start_index:],
        )
        if (len(between_text) > 2 and between_text[0] in [
                ParserHelper.space_character, ParserHelper.newline_character
        ] and between_text[-1] in [
                ParserHelper.space_character, ParserHelper.newline_character
        ]):
            stripped_between_attempt = between_text[1:-1]
            if len(stripped_between_attempt.strip()) != 0:
                leading_whitespace, trailing_whitespace = (
                    between_text[0],
                    between_text[-1],
                )
                between_text = stripped_between_attempt

        replaced_newline = ParserHelper.create_replacement_markers(
            ParserHelper.newline_character, ParserHelper.space_character)
        POGGER.debug("between_text>>$<<", between_text)
        between_text = ParserHelper.escape_special_characters(between_text)
        POGGER.debug("between_text>>$<<", between_text)
        POGGER.debug(
            "leading_whitespace>>$<<",
            leading_whitespace,
        )
        POGGER.debug(
            "trailing_whitespace>>$<<",
            trailing_whitespace,
        )
        between_text, leading_whitespace, trailing_whitespace = (
            between_text.replace(ParserHelper.newline_character,
                                 replaced_newline),
            leading_whitespace.replace(ParserHelper.newline_character,
                                       replaced_newline),
            trailing_whitespace.replace(ParserHelper.newline_character,
                                        replaced_newline),
        )
        return (
            between_text,
            original_between_text,
            leading_whitespace,
            trailing_whitespace,
        )
Beispiel #12
0
    def parse_setext_headings(
        parser_state: ParserState,
        position_marker: PositionMarker,
        extracted_whitespace: Optional[str],
        block_quote_data: BlockQuoteData,
    ) -> List[MarkdownToken]:

        """
        Handle the parsing of an setext heading.
        """

        new_tokens: List[MarkdownToken] = []
        assert extracted_whitespace is not None
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            and ParserHelper.is_character_at_index_one_of(
                position_marker.text_to_parse,
                position_marker.index_number,
                LeafBlockProcessor.__setext_characters,
            )
            and parser_state.token_stack[-1].is_paragraph
            and (block_quote_data.current_count == block_quote_data.stack_count)
        ):
            is_paragraph_continuation = (
                LeafBlockProcessor.__adjust_continuation_for_active_list(
                    parser_state, position_marker
                )
            )

            _, collected_to_index = ParserHelper.collect_while_character(
                position_marker.text_to_parse,
                position_marker.index_number,
                position_marker.text_to_parse[position_marker.index_number],
            )
            assert collected_to_index is not None
            (
                after_whitespace_index,
                extra_whitespace_after_setext,
            ) = ParserHelper.extract_whitespace(
                position_marker.text_to_parse, collected_to_index
            )

            if not is_paragraph_continuation and after_whitespace_index == len(
                position_marker.text_to_parse
            ):
                LeafBlockProcessor.__create_setext_token(
                    parser_state,
                    position_marker,
                    collected_to_index,
                    new_tokens,
                    extracted_whitespace,
                    extra_whitespace_after_setext,
                )
        return new_tokens
Beispiel #13
0
 def __parse_raw_tag_name(text_to_parse: str, start_index: int) -> str:
     """
     Parse a HTML tag name from the string.
     """
     if ParserHelper.is_character_at_index_one_of(
         text_to_parse, start_index, HtmlHelper.__valid_tag_name_start
     ):
         index, __ = ParserHelper.collect_while_one_of_characters(
             text_to_parse, start_index + 1, HtmlHelper.__valid_tag_name_characters
         )
         return text_to_parse[:index]
     return ""
Beispiel #14
0
    def __parse_non_angle_link_destination(source_text, new_index):
        """
        Parse a link destination that is not included in angle brackets.
        """

        collected_destination = ""
        nesting_level = 0
        keep_collecting = True
        while keep_collecting:
            LOGGER.debug(
                "collected_destination>>%s<<source_text<<%s>>nesting_level>>%s>>",
                str(collected_destination),
                source_text[new_index:],
                str(nesting_level),
            )
            keep_collecting = False
            new_index, before_part = ParserHelper.collect_until_one_of_characters(
                source_text, new_index, LinkHelper.__non_angle_link_breaks)
            collected_destination = collected_destination + before_part
            LOGGER.debug(">>>>>>%s<<<<<", source_text[new_index:])
            if ParserHelper.is_character_at_index(
                    source_text, new_index, InlineHelper.backslash_character):
                LOGGER.debug("backslash")
                old_new_index = new_index
                inline_request = InlineRequest(source_text, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (collected_destination +
                                         source_text[old_new_index:new_index])
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    source_text, new_index, LinkHelper.__non_angle_link_nest):
                LOGGER.debug("+1")
                nesting_level += 1
                collected_destination += LinkHelper.__non_angle_link_nest
                new_index += 1
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    source_text, new_index,
                    LinkHelper.__non_angle_link_unnest):
                LOGGER.debug("-1")
                if nesting_level != 0:
                    collected_destination += LinkHelper.__non_angle_link_unnest
                    new_index += 1
                    nesting_level -= 1
                    keep_collecting = True
        ex_link = collected_destination
        LOGGER.debug("collected_destination>>%s", str(collected_destination))
        if nesting_level != 0:
            return -1, None
        return new_index, ex_link
    def __is_front_matter_valid(
        collected_lines: List[str], ) -> Union[Dict[str, str], str]:

        ascii_letters_and_digits = f"{string.ascii_letters}{string.digits}_-"

        current_title = ""
        current_value = ""
        value_map: Dict[str, str] = {}

        for next_line in collected_lines:
            POGGER.debug("Next fm:>$s<", next_line)
            next_index, _ = ParserHelper.extract_whitespace(next_line, 0)
            assert next_index is not None
            if next_index >= 4:
                POGGER.debug("Indented line established.")
                if not current_title:
                    return "Continuation line encountered before a keyword line."
                current_value += f"\n{next_line.strip()}"
                POGGER.debug("current_value>$<", current_value)
            else:
                if not next_line.strip():
                    return "Blank line encountered before end of metadata."

                POGGER.debug("Non-indented line established.")
                if current_title:
                    POGGER.debug("Adding '$' as '$'.", current_title,
                                 current_value)
                    value_map[current_title] = current_value

                (
                    next_index,
                    collected_title,
                ) = ParserHelper.collect_while_one_of_characters(
                    next_line, next_index, ascii_letters_and_digits)
                assert next_index is not None
                assert collected_title is not None
                current_title = collected_title
                if next_index < len(
                        next_line) and next_line[next_index] == ":":
                    current_value = next_line[next_index + 1:].strip()
                else:
                    return "Newline did not start with `keyword:`."
        if current_title:
            POGGER.debug("Adding final '$' as '$'.", current_title,
                         current_value)
            value_map[current_title.lower()] = current_value

            # This is specifically to trigger test_front_matter_20.
            assert current_title != "test" or current_value != "assert"
        if not value_map:
            return "No valid metadata header lines were found."
        return value_map
Beispiel #16
0
    def is_thematic_break(
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: Optional[str],
        skip_whitespace_check: bool = False,
        whitespace_allowed_between_characters: bool = True,
    ) -> Tuple[Optional[str], Optional[int]]:
        """
        Determine whether or not we have a thematic break.
        """

        assert extracted_whitespace is not None
        thematic_break_character, end_of_break_index = None, None
        is_thematic_character = ParserHelper.is_character_at_index_one_of(
            line_to_parse, start_index, LeafBlockProcessor.__thematic_break_characters
        )
        POGGER.debug("skip_whitespace_check>>$", skip_whitespace_check)
        POGGER.debug("is_thematic_character>>$", is_thematic_character)
        if (
            ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3)
            or skip_whitespace_check
        ) and is_thematic_character:
            start_char, index, char_count, line_to_parse_size = (
                line_to_parse[start_index],
                start_index,
                0,
                len(line_to_parse),
            )

            while index < line_to_parse_size:
                if (
                    whitespace_allowed_between_characters
                    and ParserHelper.is_character_at_index_whitespace(
                        line_to_parse, index
                    )
                ):
                    index += 1
                elif line_to_parse[index] == start_char:
                    index += 1
                    char_count += 1
                else:
                    break  # pragma: no cover

            POGGER.debug("char_count>>$", char_count)
            POGGER.debug("index>>$", index)
            POGGER.debug("line_to_parse_size>>$", line_to_parse_size)
            if char_count >= 3 and index == line_to_parse_size:
                thematic_break_character, end_of_break_index = start_char, index

        return thematic_break_character, end_of_break_index
Beispiel #17
0
    def compare_versus_expected(
        cls,
        stream_name,
        actual_stream,
        expected_text,
        additional_text=None,
        log_extra=None,
    ):
        """
        Do a thorough comparison of the actual stream against the expected text.
        """

        if additional_text:
            assert actual_stream.getvalue().strip().startswith(expected_text.strip()), (
                f"Block\n---\n{expected_text}\n---\nwas not found at the start of"
                + "\n---\n{actual_stream.getvalue()}\nExtra:{log_extra}"
            )

            for next_text_block in additional_text:
                was_found = next_text_block.strip() in actual_stream.getvalue().strip()
                diff = difflib.ndiff(
                    next_text_block.strip().splitlines(),
                    actual_stream.getvalue().strip().splitlines(),
                )

                diff_values = ParserHelper.newline_character.join(list(diff))
                print(diff_values, file=sys.stderr)
                if not was_found:
                    raise AssertionError(
                        f"Block\n---\n{next_text_block}\n---\nwas not found in\n---\n{actual_stream.getvalue()}"
                    )
        elif actual_stream.getvalue().strip() != expected_text.strip():
            diff = difflib.ndiff(
                expected_text.splitlines(), actual_stream.getvalue().splitlines()
            )

            diff_values = f"{ParserHelper.newline_character.join(list(diff))}\n---\n"

            LOGGER.warning(
                "actual>>%s",
                ParserHelper.make_value_visible(actual_stream.getvalue()),
            )
            print(
                f"WARN>actual>>{ParserHelper.make_value_visible(actual_stream.getvalue())}"
            )
            LOGGER.warning("expect>>%s", ParserHelper.make_value_visible(expected_text))
            print(f"WARN>expect>>{ParserHelper.make_value_visible(expected_text)}")
            if log_extra:
                print(f"log_extra:{log_extra}")
            raise AssertionError(f"{stream_name} not as expected:\n{diff_values}")
Beispiel #18
0
    def extract_link_label(line_to_parse,
                           new_index,
                           include_reference_colon=True):
        """
        Extract the link reference definition's link label.
        """
        collected_destination = ""
        keep_collecting = True
        while keep_collecting:
            keep_collecting = False
            new_index, ert_new = ParserHelper.collect_until_one_of_characters(
                line_to_parse, new_index, LinkHelper.__link_label_breaks)
            collected_destination = collected_destination + ert_new
            if ParserHelper.is_character_at_index(
                    line_to_parse, new_index,
                    InlineHelper.backslash_character):
                old_new_index = new_index
                inline_request = InlineRequest(line_to_parse, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (
                    collected_destination +
                    line_to_parse[old_new_index:new_index])
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    line_to_parse, new_index, LinkHelper.link_label_start):
                LOGGER.debug(">> unescaped [, bailing")
                return False, -1, None

        LOGGER.debug("look for ]>>%s<<", line_to_parse[new_index:])
        if not ParserHelper.is_character_at_index(line_to_parse, new_index,
                                                  LinkHelper.link_label_end):
            LOGGER.debug(">> no end ], bailing")
            return False, new_index, None
        new_index += 1

        if include_reference_colon:
            LOGGER.debug("look for :>>%s<<", line_to_parse[new_index:])
            if not ParserHelper.is_character_at_index(
                    line_to_parse,
                    new_index,
                    LinkHelper.__link_label_is_definition_character,
            ):
                LOGGER.debug(">> no :, bailing")
                return False, -1, None
            new_index += 1

        return True, new_index, collected_destination
Beispiel #19
0
 def __parse_raw_tag_name(text_to_parse, start_index):
     """
     Parse a HTML tag name from the string.
     """
     tag_name = ""
     if ParserHelper.is_character_at_index_one_of(
         text_to_parse, start_index, HtmlHelper.__valid_tag_name_start
     ):
         index = start_index + 1
         while ParserHelper.is_character_at_index_one_of(
             text_to_parse, index, HtmlHelper.__valid_tag_name_characters
         ):
             index += 1
         tag_name = text_to_parse[0:index]
     return tag_name
Beispiel #20
0
    def __search_for_matches(
        self,
        string_to_check: str,
        context: PluginScanContext,
        token: MarkdownToken,
        same_line_offset: int = 0,
        start_x_offset: int = 0,
        start_y_offset: int = 0,
    ) -> None:

        string_to_check = ParserHelper.remove_all_from_text(string_to_check)
        string_to_check_lower = string_to_check.lower()
        for next_name in self.__proper_name_list:
            next_name_lower = next_name.lower()
            search_start = 0
            found_index = string_to_check_lower.find(next_name_lower,
                                                     search_start)
            while found_index != -1:

                self.__search_for_possible_matches(
                    string_to_check,
                    string_to_check_lower,
                    search_start,
                    found_index,
                    start_x_offset,
                    start_y_offset,
                    same_line_offset,
                    next_name,
                    context,
                    token,
                )

                search_start = found_index + len(next_name)
                found_index = string_to_check_lower.find(
                    next_name_lower, search_start)
Beispiel #21
0
 def __search_for_possible_matches(
     self,
     string_to_check: str,
     string_to_check_lower: str,
     search_start: int,
     found_index: int,
     start_x_offset: int,
     start_y_offset: int,
     same_line_offset: int,
     next_name: str,
     context: PluginScanContext,
     token: MarkdownToken,
 ) -> None:
     col_adjust, line_adjust = ParserHelper.adjust_for_newlines(
         string_to_check_lower, search_start, found_index)
     if line_adjust == 0 and start_y_offset == 0:
         col_adjust -= same_line_offset
     line_adjust += start_y_offset
     if col_adjust == 0 and start_x_offset:
         col_adjust += (-start_x_offset
                        if start_x_offset > 0 else -(-start_x_offset - 1))
         col_adjust = -col_adjust
     elif col_adjust > 0 and start_x_offset:
         col_adjust += -start_x_offset - 1
         col_adjust = -col_adjust
     self.__check_for_proper_match(
         string_to_check,
         found_index,
         next_name,
         context,
         token,
         line_adjust,
         col_adjust,
     )
Beispiel #22
0
    def extract_link_destination(line_to_parse, new_index, is_blank_line):
        """
        Extract the link reference definition's link destination.
        """
        new_index, prefix_whitespace = ParserHelper.collect_while_one_of_characters(
            line_to_parse, new_index, Constants.whitespace)
        if new_index == len(line_to_parse) and not is_blank_line:
            return False, new_index, None, None, None, None

        LOGGER.debug("LD>>%s<<", line_to_parse[new_index:])
        (
            inline_link,
            pre_inline_link,
            new_index,
            inline_raw_link,
        ) = LinkHelper.__parse_link_destination(line_to_parse, new_index)
        if new_index == -1:
            return False, -1, None, None, None, None
        return (
            True,
            new_index,
            inline_link,
            pre_inline_link,
            prefix_whitespace,
            inline_raw_link,
        )
Beispiel #23
0
    def __parse_valid_uri_autolink(
            text_to_parse: str, line_number: int,
            column_number: int) -> Optional[UriAutolinkMarkdownToken]:
        """
        Parse a possible uri autolink and determine if it is valid.
        """

        if (InlineHelper.angle_bracket_start not in text_to_parse
                and text_to_parse[0] in string.ascii_letters):
            path_index, uri_scheme = ParserHelper.collect_while_one_of_characters(
                text_to_parse, 1, InlineHelper.__valid_scheme_characters)
            assert path_index is not None
            uri_scheme, text_to_parse_size = f"{text_to_parse[0]}{uri_scheme}", len(
                text_to_parse)
            if (2 <= len(uri_scheme) <= 32 and path_index < text_to_parse_size
                    and text_to_parse[path_index]
                    == InlineHelper.__scheme_end_character):
                path_index += 1
                while path_index < text_to_parse_size:
                    if ord(text_to_parse[path_index]) <= 32:
                        break
                    path_index += 1
                if path_index == text_to_parse_size:
                    return UriAutolinkMarkdownToken(text_to_parse, line_number,
                                                    column_number)
        else:
            uri_scheme, path_index = "", -1
        return None
Beispiel #24
0
    def look_for_pragmas(
        position_marker: PositionMarker,
        line_to_parse: str,
        container_depth: int,
        extracted_whitespace: Optional[str],
        parser_properties: ParseBlockPassProperties,
    ) -> bool:
        """
        Look for a pragma in the current line.
        """

        if (not container_depth and not extracted_whitespace and
            (line_to_parse.startswith(PragmaToken.pragma_prefix) or
             line_to_parse.startswith(PragmaToken.pragma_alternate_prefix))):
            was_extended_prefix = line_to_parse.startswith(
                PragmaToken.pragma_alternate_prefix)

            start_index, _ = ParserHelper.extract_whitespace(
                line_to_parse,
                len(PragmaToken.pragma_alternate_prefix
                    if was_extended_prefix else PragmaToken.pragma_prefix),
            )
            remaining_line = line_to_parse[start_index:].rstrip().lower()
            if remaining_line.startswith(
                    PragmaToken.pragma_title) and remaining_line.endswith(
                        PragmaToken.pragma_suffix):
                index_number = (-position_marker.line_number
                                if was_extended_prefix else
                                position_marker.line_number)
                parser_properties.pragma_lines[index_number] = line_to_parse
                return True
        return False
Beispiel #25
0
    def extract_html_attribute_name(string_to_parse: str, string_index: int) -> int:
        """
        Attempt to extract the attribute name from the provided string.
        """

        string_to_parse_length = len(string_to_parse)
        if not (
            string_index < string_to_parse_length
            and (
                string_to_parse[string_index] in HtmlHelper.__attribute_start_characters
            )
        ):
            return -1

        new_string_index, __ = ParserHelper.collect_while_one_of_characters(
            string_to_parse, string_index + 1, HtmlHelper.__attribute_other_characters
        )
        assert new_string_index is not None

        if new_string_index < string_to_parse_length and string_to_parse[
            new_string_index
        ] in [
            HtmlHelper.__html_attribute_name_value_separator,
            HtmlHelper.__html_attribute_separator,
            HtmlHelper.__html_tag_start,
            HtmlHelper.__html_tag_end,
        ]:
            return new_string_index
        return -1
Beispiel #26
0
    def __determine_html_block_type(parser_state, line_to_parse, start_index):
        """
        Determine the type of the html block that we are starting.
        """

        character_index = start_index + 1
        remaining_html_tag = ""

        html_block_type = HtmlHelper.__check_for_special_html_blocks(
            line_to_parse, character_index
        )
        if not html_block_type:
            (
                character_index,
                remaining_html_tag,
            ) = ParserHelper.collect_until_one_of_characters(
                line_to_parse, character_index, HtmlHelper.__html_tag_name_end
            )
            remaining_html_tag = remaining_html_tag.lower()

            html_block_type = HtmlHelper.__check_for_normal_html_blocks(
                remaining_html_tag, line_to_parse, character_index
            )
        if not html_block_type:
            return None, None
        if html_block_type == HtmlHelper.html_block_7:
            if parser_state.token_stack[-1].is_paragraph:
                return None, None
        return html_block_type, remaining_html_tag
Beispiel #27
0
 def __evaluate_possible_url(
     self,
     source_text: str,
     url_prefix: str,
     found_index: int,
     context: PluginScanContext,
     token: MarkdownToken,
 ) -> None:
     if found_index == 0 or source_text[found_index - 1] in (
             " ",
             ParserHelper.newline_character,
     ):
         url_start_sequence = source_text[found_index + len(url_prefix):]
         if (len(url_start_sequence) >= 3
                 and url_start_sequence.startswith("//")
                 and url_start_sequence[2]
                 not in (" ", ParserHelper.newline_character)):
             (
                 column_number_delta,
                 line_number_delta,
             ) = ParserHelper.adjust_for_newlines(source_text, 0,
                                                  found_index)
             self.report_next_token_error(
                 context,
                 token,
                 line_number_delta=line_number_delta,
                 column_number_delta=column_number_delta,
             )
Beispiel #28
0
 def extract_link_title(line_to_parse, new_index, is_blank_line):
     """
     Extract the link reference definition's optional link title.
     """
     inline_title = ""
     pre_inline_title = ""
     LOGGER.debug("before ws>>%s>", line_to_parse[new_index:])
     new_index, ex_ws = ParserHelper.extract_any_whitespace(
         line_to_parse, new_index)
     LOGGER.debug(
         "after ws>>%s>ex_ws>%s",
         line_to_parse[new_index:],
         ex_ws.replace("\n", "\\n"),
     )
     start_index = new_index
     if new_index == len(line_to_parse) and not is_blank_line:
         return False, new_index, None, None, None, None
     if ex_ws and new_index < len(line_to_parse):
         inline_title, pre_inline_title, new_index = LinkHelper.__parse_link_title(
             line_to_parse, new_index)
         if new_index == -1:
             return False, -1, None, None, None, None
         if inline_title is None:
             return False, new_index, None, None, None, None
     return (
         True,
         new_index,
         inline_title,
         pre_inline_title,
         ex_ws,
         line_to_parse[start_index:new_index],
     )
Beispiel #29
0
    def __parse_link_title(source_text, new_index):
        """
        Parse an inline link's link title.
        """

        LOGGER.debug("parse_link_title>>new_index>>%s>>",
                     source_text[new_index:])
        ex_title = ""
        pre_ex_title = ""
        if ParserHelper.is_character_at_index(source_text, new_index,
                                              LinkHelper.__link_title_single):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text, new_index + 1, LinkHelper.__link_title_single,
                None)
        elif ParserHelper.is_character_at_index(
                source_text, new_index, LinkHelper.__link_title_double):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text, new_index + 1, LinkHelper.__link_title_double,
                None)
        elif ParserHelper.is_character_at_index(
                source_text, new_index,
                LinkHelper.__link_title_parenthesis_open):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text,
                new_index + 1,
                LinkHelper.__link_title_parenthesis_close,
                LinkHelper.__link_title_parenthesis_open,
            )
        else:
            new_index = -1
        LOGGER.debug(
            "parse_link_title>>new_index>>%s>>ex_link>>%s>>",
            str(new_index),
            str(ex_title),
        )
        pre_ex_title = ex_title
        if ex_title is not None:
            ex_title = InlineHelper.append_text(
                "",
                InlineHelper.handle_backslashes(ex_title,
                                                add_text_signature=False),
                add_text_signature=False,
            )
        LOGGER.debug("parse_link_title>>pre>>%s>>", str(pre_ex_title))
        LOGGER.debug("parse_link_title>>after>>%s>>", str(ex_title))

        return ex_title, pre_ex_title, new_index
    def is_block_quote_start(line_to_parse,
                             start_index,
                             extracted_whitespace,
                             adj_ws=None):
        """
        Determine if we have the start of a block quote section.
        """

        if adj_ws is None:
            adj_ws = extracted_whitespace

        if ParserHelper.is_length_less_than_or_equal_to(
                adj_ws, 3) and ParserHelper.is_character_at_index(
                    line_to_parse, start_index,
                    BlockQuoteProcessor.__block_quote_character):
            return True
        return False