def test_simple_case_from_end():
    """
    Make sure that we test a simple extraction from the end of the string.
    """

    # Arrange
    input_string = "this is a test"
    start_index = 10
    expected_output = (len(input_string), "test")

    # Act
    actual_output = ParserHelper.extract_until_whitespace(
        input_string, start_index)

    # Assert
    assert expected_output == actual_output
def test_empty_string_with_good_index():
    """
    Make sure that an empty string is handled properly with a good index
    """

    # Arrange
    input_string = ""
    start_index = 0
    expected_output = (0, "")

    # Act
    actual_output = ParserHelper.extract_until_whitespace(
        input_string, start_index)

    # Assert
    assert expected_output == actual_output
def test_empty_string_with_bad_left_index():
    """
    Make sure that an empty string is handled properly with an index that is too far to the left.
    """

    # Arrange
    input_string = ""
    start_index = -1
    expected_output = (None, None)

    # Act
    actual_output = ParserHelper.extract_until_whitespace(
        input_string, start_index)

    # Assert
    assert expected_output == actual_output
def test_already_on_whitespace():
    """
    Make sure that we test extracting while already on a whitespace character.
    """

    # Arrange
    input_string = "this is a test"
    start_index = 9
    expected_output = (9, "")

    # Act
    actual_output = ParserHelper.extract_until_whitespace(
        input_string, start_index)

    # Assert
    assert expected_output == actual_output
Example #5
0
    def next_line(self, context: PluginScanContext, line: str) -> None:
        """
        Event that a new line is being processed.
        """
        if (self.__leaf_token_index + 1 < len(self.__leaf_tokens)
                and self.__line_index
                == self.__leaf_tokens[self.__leaf_token_index +
                                      1].line_number):
            self.__leaf_token_index += 1

        line_length = len(line)
        compare_length = self.__line_length
        is_actually_longer = False
        if line_length > self.__minimum_line_length:

            is_actually_longer, compare_length = self.__is_really_longer(
                line_length, compare_length)
        if is_actually_longer:

            trigger_rule = False
            if self.__strict_mode:
                trigger_rule = True
            else:
                next_space_index, _ = ParserHelper.extract_until_whitespace(
                    line, compare_length)
                # print("next_index=" + str(next_space_index))

                if self.__stern_mode:
                    trigger_rule = line_length == next_space_index
                else:
                    trigger_rule = line_length != next_space_index

            if trigger_rule:
                extra_error_information = (
                    f"Expected: {compare_length}, Actual: {line_length}")
                self.report_next_line_error(
                    context,
                    1,
                    extra_error_information=extra_error_information)
        self.__line_index += 1
Example #6
0
    def __process_fenced_start(
        parser_state: ParserState,
        position_marker: PositionMarker,
        non_whitespace_index: int,
        collected_count: int,
        extracted_whitespace: Optional[str],
        extracted_whitespace_before_info_string: Optional[str],
    ) -> List[MarkdownToken]:

        POGGER.debug("pfcb->check")
        new_tokens: List[MarkdownToken] = []
        if (
            position_marker.text_to_parse[position_marker.index_number]
            == LeafBlockProcessor.__fenced_start_tilde
            or LeafBlockProcessor.__fenced_start_backtick
            not in position_marker.text_to_parse[non_whitespace_index:]
        ):
            POGGER.debug("pfcb->start")
            (
                after_extracted_text_index,
                extracted_text,
            ) = ParserHelper.extract_until_whitespace(
                position_marker.text_to_parse, non_whitespace_index
            )
            assert extracted_text is not None
            text_after_extracted_text = position_marker.text_to_parse[
                after_extracted_text_index:
            ]

            old_top_of_stack = parser_state.token_stack[-1]
            new_tokens, _ = parser_state.close_open_blocks_fn(
                parser_state,
                only_these_blocks=[ParagraphStackToken],
            )

            pre_extracted_text, pre_text_after_extracted_text = (
                extracted_text,
                text_after_extracted_text,
            )

            assert extracted_text is not None
            extracted_text = InlineHelper.handle_backslashes(extracted_text)
            text_after_extracted_text = InlineHelper.handle_backslashes(
                text_after_extracted_text
            )

            if pre_extracted_text == extracted_text:
                pre_extracted_text = ""
            if pre_text_after_extracted_text == text_after_extracted_text:
                pre_text_after_extracted_text = ""

            assert extracted_whitespace is not None
            assert extracted_whitespace_before_info_string is not None
            new_token = FencedCodeBlockMarkdownToken(
                position_marker.text_to_parse[position_marker.index_number],
                collected_count,
                extracted_text,
                pre_extracted_text,
                text_after_extracted_text,
                pre_text_after_extracted_text,
                extracted_whitespace,
                extracted_whitespace_before_info_string,
                position_marker,
            )
            new_tokens.append(new_token)
            assert extracted_whitespace is not None
            parser_state.token_stack.append(
                FencedCodeBlockStackToken(
                    code_fence_character=position_marker.text_to_parse[
                        position_marker.index_number
                    ],
                    fence_character_count=collected_count,
                    whitespace_start_count=ParserHelper.calculate_length(
                        extracted_whitespace
                    ),
                    matching_markdown_token=new_token,
                )
            )
            POGGER.debug("StackToken-->$<<", parser_state.token_stack[-1])
            POGGER.debug(
                "StackToken>start_markdown_token-->$<<",
                parser_state.token_stack[-1].matching_markdown_token,
            )

            LeafBlockProcessor.correct_for_leaf_block_start_in_list(
                parser_state,
                position_marker.index_indent,
                old_top_of_stack,
                new_tokens,
            )
        return new_tokens
Example #7
0
    def parse_fenced_code_block(
        parser_state,
        position_marker,
        extracted_whitespace,
    ):
        """
        Handle the parsing of a fenced code block
        """

        LOGGER.debug(
            "line>>%s>>index>>%s>>",
            position_marker.text_to_parse,
            position_marker.index_number,
        )
        new_tokens = []
        (
            is_fence_start,
            non_whitespace_index,
            extracted_whitespace_before_info_string,
            collected_count,
        ) = LeafBlockProcessor.is_fenced_code_block(
            position_marker.text_to_parse,
            position_marker.index_number,
            extracted_whitespace,
        )
        if is_fence_start and not parser_state.token_stack[-1].is_html_block:
            if parser_state.token_stack[-1].is_fenced_code_block:
                LOGGER.debug("pfcb->end")

                if (parser_state.token_stack[-1].code_fence_character
                        == position_marker.text_to_parse[
                            position_marker.index_number] and collected_count
                        >= parser_state.token_stack[-1].fence_character_count
                        and non_whitespace_index >= len(
                            position_marker.text_to_parse)):
                    new_end_token = parser_state.token_stack[
                        -1].generate_close_token(extracted_whitespace)
                    new_tokens.append(new_end_token)
                    new_end_token.start_markdown_token = parser_state.token_stack[
                        -1].start_markdown_token
                    new_end_token.extra_end_data = str(collected_count)
                    new_end_token.compose_data_field()
                    del parser_state.token_stack[-1]
            else:
                LOGGER.debug("pfcb->check")
                if (position_marker.text_to_parse[position_marker.index_number]
                        == LeafBlockProcessor.__fenced_start_tilde
                        or LeafBlockProcessor.__fenced_start_backtick not in
                        position_marker.text_to_parse[non_whitespace_index:]):
                    LOGGER.debug("pfcb->start")
                    (
                        after_extracted_text_index,
                        extracted_text,
                    ) = ParserHelper.extract_until_whitespace(
                        position_marker.text_to_parse, non_whitespace_index)
                    text_after_extracted_text = position_marker.text_to_parse[
                        after_extracted_text_index:]

                    new_tokens, _, _ = parser_state.close_open_blocks_fn(
                        parser_state,
                        only_these_blocks=[ParagraphStackToken],
                    )

                    pre_extracted_text = extracted_text
                    pre_text_after_extracted_text = text_after_extracted_text

                    extracted_text = InlineHelper.handle_backslashes(
                        extracted_text, add_text_signature=False)
                    text_after_extracted_text = InlineHelper.handle_backslashes(
                        text_after_extracted_text, add_text_signature=False)

                    if pre_extracted_text == extracted_text:
                        pre_extracted_text = ""
                    if pre_text_after_extracted_text == text_after_extracted_text:
                        pre_text_after_extracted_text = ""

                    new_token = FencedCodeBlockMarkdownToken(
                        position_marker.text_to_parse[
                            position_marker.index_number],
                        collected_count,
                        extracted_text,
                        pre_extracted_text,
                        text_after_extracted_text,
                        pre_text_after_extracted_text,
                        extracted_whitespace,
                        extracted_whitespace_before_info_string,
                        position_marker,
                    )
                    new_tokens.append(new_token)
                    parser_state.token_stack.append(
                        FencedCodeBlockStackToken(
                            code_fence_character=position_marker.text_to_parse[
                                position_marker.index_number],
                            fence_character_count=collected_count,
                            whitespace_start_count=ParserHelper.
                            calculate_length(extracted_whitespace),
                            start_markdown_token=new_token,
                        ))
                    LOGGER.debug("StackToken-->%s<<",
                                 str(parser_state.token_stack[-1]))
                    LOGGER.debug(
                        "StackToken>start_markdown_token-->%s<<",
                        str(parser_state.token_stack[-1].start_markdown_token),
                    )
        elif (parser_state.token_stack[-1].is_fenced_code_block
              and parser_state.token_stack[-1].whitespace_start_count
              and extracted_whitespace):

            current_whitespace_length = ParserHelper.calculate_length(
                extracted_whitespace)
            whitespace_left = max(
                0,
                current_whitespace_length -
                parser_state.token_stack[-1].whitespace_start_count,
            )
            LOGGER.debug("previous_ws>>%s", str(current_whitespace_length))
            LOGGER.debug("whitespace_left>>%s", str(whitespace_left))
            removed_whitespace = ("\a" + "".rjust(
                current_whitespace_length - whitespace_left, " ") + "\a\x03\a")
            extracted_whitespace = removed_whitespace + "".rjust(
                whitespace_left, " ")
        return new_tokens, extracted_whitespace
Example #8
0
    def compile_single_pragma(
        scan_file: str,
        next_line_number: int,
        pragma_lines: Dict[int, str],
        all_ids: Dict[str, FoundPlugin],
        document_pragmas: Dict[int, Set[str]],
        log_pragma_failure: Callable[[str, int, str], None],
    ) -> None:
        """
        Compile a single pragma line, validating it before adding it to the dictionary of pragmas.
        """
        if next_line_number > 0:
            prefix_length = len(PragmaToken.pragma_prefix)
            actual_line_number = next_line_number
        else:
            prefix_length = len(PragmaToken.pragma_alternate_prefix)
            actual_line_number = -next_line_number

        line_after_prefix = pragma_lines[next_line_number][
            prefix_length:].rstrip()
        after_whitespace_index, _ = ParserHelper.extract_whitespace(
            line_after_prefix, 0)
        assert after_whitespace_index is not None
        command_data = line_after_prefix[after_whitespace_index +
                                         len(PragmaToken.pragma_title
                                             ):-len(PragmaToken.pragma_suffix)]
        after_command_index, command = ParserHelper.extract_until_whitespace(
            command_data, 0)
        assert command is not None
        command = command.lower()
        if not command:
            log_pragma_failure(
                scan_file,
                actual_line_number,
                "Inline configuration specified without command.",
            )
        elif command == "disable-next-line":
            ids_to_disable = command_data[after_command_index:].split(",")
            processed_ids = set()
            for next_id in ids_to_disable:
                next_id = next_id.strip().lower()
                if not next_id:
                    log_pragma_failure(
                        scan_file,
                        actual_line_number,
                        f"Inline configuration command '{command}' specified a plugin with a blank id.",
                    )
                elif next_id in all_ids:
                    normalized_id = all_ids[next_id].plugin_id
                    processed_ids.add(normalized_id)
                else:
                    log_pragma_failure(
                        scan_file,
                        actual_line_number,
                        f"Inline configuration command '{command}' unable to find a plugin with the id '{next_id}'.",
                    )

            if processed_ids:
                document_pragmas[actual_line_number + 1] = processed_ids
        else:
            log_pragma_failure(
                scan_file,
                actual_line_number,
                f"Inline configuration command '{command}' not understood.",
            )