def test_simple_case_from_end(): """ Make sure that we test a simple extraction from the end of the string. """ # Arrange input_string = "this is a test" start_index = 10 expected_output = (len(input_string), "test") # Act actual_output = ParserHelper.extract_until_whitespace( input_string, start_index) # Assert assert expected_output == actual_output
def test_empty_string_with_good_index(): """ Make sure that an empty string is handled properly with a good index """ # Arrange input_string = "" start_index = 0 expected_output = (0, "") # Act actual_output = ParserHelper.extract_until_whitespace( input_string, start_index) # Assert assert expected_output == actual_output
def test_empty_string_with_bad_left_index(): """ Make sure that an empty string is handled properly with an index that is too far to the left. """ # Arrange input_string = "" start_index = -1 expected_output = (None, None) # Act actual_output = ParserHelper.extract_until_whitespace( input_string, start_index) # Assert assert expected_output == actual_output
def test_already_on_whitespace(): """ Make sure that we test extracting while already on a whitespace character. """ # Arrange input_string = "this is a test" start_index = 9 expected_output = (9, "") # Act actual_output = ParserHelper.extract_until_whitespace( input_string, start_index) # Assert assert expected_output == actual_output
def next_line(self, context: PluginScanContext, line: str) -> None: """ Event that a new line is being processed. """ if (self.__leaf_token_index + 1 < len(self.__leaf_tokens) and self.__line_index == self.__leaf_tokens[self.__leaf_token_index + 1].line_number): self.__leaf_token_index += 1 line_length = len(line) compare_length = self.__line_length is_actually_longer = False if line_length > self.__minimum_line_length: is_actually_longer, compare_length = self.__is_really_longer( line_length, compare_length) if is_actually_longer: trigger_rule = False if self.__strict_mode: trigger_rule = True else: next_space_index, _ = ParserHelper.extract_until_whitespace( line, compare_length) # print("next_index=" + str(next_space_index)) if self.__stern_mode: trigger_rule = line_length == next_space_index else: trigger_rule = line_length != next_space_index if trigger_rule: extra_error_information = ( f"Expected: {compare_length}, Actual: {line_length}") self.report_next_line_error( context, 1, extra_error_information=extra_error_information) self.__line_index += 1
def __process_fenced_start( parser_state: ParserState, position_marker: PositionMarker, non_whitespace_index: int, collected_count: int, extracted_whitespace: Optional[str], extracted_whitespace_before_info_string: Optional[str], ) -> List[MarkdownToken]: POGGER.debug("pfcb->check") new_tokens: List[MarkdownToken] = [] if ( position_marker.text_to_parse[position_marker.index_number] == LeafBlockProcessor.__fenced_start_tilde or LeafBlockProcessor.__fenced_start_backtick not in position_marker.text_to_parse[non_whitespace_index:] ): POGGER.debug("pfcb->start") ( after_extracted_text_index, extracted_text, ) = ParserHelper.extract_until_whitespace( position_marker.text_to_parse, non_whitespace_index ) assert extracted_text is not None text_after_extracted_text = position_marker.text_to_parse[ after_extracted_text_index: ] old_top_of_stack = parser_state.token_stack[-1] new_tokens, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[ParagraphStackToken], ) pre_extracted_text, pre_text_after_extracted_text = ( extracted_text, text_after_extracted_text, ) assert extracted_text is not None extracted_text = InlineHelper.handle_backslashes(extracted_text) text_after_extracted_text = InlineHelper.handle_backslashes( text_after_extracted_text ) if pre_extracted_text == extracted_text: pre_extracted_text = "" if pre_text_after_extracted_text == text_after_extracted_text: pre_text_after_extracted_text = "" assert extracted_whitespace is not None assert extracted_whitespace_before_info_string is not None new_token = FencedCodeBlockMarkdownToken( position_marker.text_to_parse[position_marker.index_number], collected_count, extracted_text, pre_extracted_text, text_after_extracted_text, pre_text_after_extracted_text, extracted_whitespace, extracted_whitespace_before_info_string, position_marker, ) new_tokens.append(new_token) assert extracted_whitespace is not None parser_state.token_stack.append( FencedCodeBlockStackToken( code_fence_character=position_marker.text_to_parse[ position_marker.index_number ], fence_character_count=collected_count, whitespace_start_count=ParserHelper.calculate_length( extracted_whitespace ), matching_markdown_token=new_token, ) ) POGGER.debug("StackToken-->$<<", parser_state.token_stack[-1]) POGGER.debug( "StackToken>start_markdown_token-->$<<", parser_state.token_stack[-1].matching_markdown_token, ) LeafBlockProcessor.correct_for_leaf_block_start_in_list( parser_state, position_marker.index_indent, old_top_of_stack, new_tokens, ) return new_tokens
def parse_fenced_code_block( parser_state, position_marker, extracted_whitespace, ): """ Handle the parsing of a fenced code block """ LOGGER.debug( "line>>%s>>index>>%s>>", position_marker.text_to_parse, position_marker.index_number, ) new_tokens = [] ( is_fence_start, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) = LeafBlockProcessor.is_fenced_code_block( position_marker.text_to_parse, position_marker.index_number, extracted_whitespace, ) if is_fence_start and not parser_state.token_stack[-1].is_html_block: if parser_state.token_stack[-1].is_fenced_code_block: LOGGER.debug("pfcb->end") if (parser_state.token_stack[-1].code_fence_character == position_marker.text_to_parse[ position_marker.index_number] and collected_count >= parser_state.token_stack[-1].fence_character_count and non_whitespace_index >= len( position_marker.text_to_parse)): new_end_token = parser_state.token_stack[ -1].generate_close_token(extracted_whitespace) new_tokens.append(new_end_token) new_end_token.start_markdown_token = parser_state.token_stack[ -1].start_markdown_token new_end_token.extra_end_data = str(collected_count) new_end_token.compose_data_field() del parser_state.token_stack[-1] else: LOGGER.debug("pfcb->check") if (position_marker.text_to_parse[position_marker.index_number] == LeafBlockProcessor.__fenced_start_tilde or LeafBlockProcessor.__fenced_start_backtick not in position_marker.text_to_parse[non_whitespace_index:]): LOGGER.debug("pfcb->start") ( after_extracted_text_index, extracted_text, ) = ParserHelper.extract_until_whitespace( position_marker.text_to_parse, non_whitespace_index) text_after_extracted_text = position_marker.text_to_parse[ after_extracted_text_index:] new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[ParagraphStackToken], ) pre_extracted_text = extracted_text pre_text_after_extracted_text = text_after_extracted_text extracted_text = InlineHelper.handle_backslashes( extracted_text, add_text_signature=False) text_after_extracted_text = InlineHelper.handle_backslashes( text_after_extracted_text, add_text_signature=False) if pre_extracted_text == extracted_text: pre_extracted_text = "" if pre_text_after_extracted_text == text_after_extracted_text: pre_text_after_extracted_text = "" new_token = FencedCodeBlockMarkdownToken( position_marker.text_to_parse[ position_marker.index_number], collected_count, extracted_text, pre_extracted_text, text_after_extracted_text, pre_text_after_extracted_text, extracted_whitespace, extracted_whitespace_before_info_string, position_marker, ) new_tokens.append(new_token) parser_state.token_stack.append( FencedCodeBlockStackToken( code_fence_character=position_marker.text_to_parse[ position_marker.index_number], fence_character_count=collected_count, whitespace_start_count=ParserHelper. calculate_length(extracted_whitespace), start_markdown_token=new_token, )) LOGGER.debug("StackToken-->%s<<", str(parser_state.token_stack[-1])) LOGGER.debug( "StackToken>start_markdown_token-->%s<<", str(parser_state.token_stack[-1].start_markdown_token), ) elif (parser_state.token_stack[-1].is_fenced_code_block and parser_state.token_stack[-1].whitespace_start_count and extracted_whitespace): current_whitespace_length = ParserHelper.calculate_length( extracted_whitespace) whitespace_left = max( 0, current_whitespace_length - parser_state.token_stack[-1].whitespace_start_count, ) LOGGER.debug("previous_ws>>%s", str(current_whitespace_length)) LOGGER.debug("whitespace_left>>%s", str(whitespace_left)) removed_whitespace = ("\a" + "".rjust( current_whitespace_length - whitespace_left, " ") + "\a\x03\a") extracted_whitespace = removed_whitespace + "".rjust( whitespace_left, " ") return new_tokens, extracted_whitespace
def compile_single_pragma( scan_file: str, next_line_number: int, pragma_lines: Dict[int, str], all_ids: Dict[str, FoundPlugin], document_pragmas: Dict[int, Set[str]], log_pragma_failure: Callable[[str, int, str], None], ) -> None: """ Compile a single pragma line, validating it before adding it to the dictionary of pragmas. """ if next_line_number > 0: prefix_length = len(PragmaToken.pragma_prefix) actual_line_number = next_line_number else: prefix_length = len(PragmaToken.pragma_alternate_prefix) actual_line_number = -next_line_number line_after_prefix = pragma_lines[next_line_number][ prefix_length:].rstrip() after_whitespace_index, _ = ParserHelper.extract_whitespace( line_after_prefix, 0) assert after_whitespace_index is not None command_data = line_after_prefix[after_whitespace_index + len(PragmaToken.pragma_title ):-len(PragmaToken.pragma_suffix)] after_command_index, command = ParserHelper.extract_until_whitespace( command_data, 0) assert command is not None command = command.lower() if not command: log_pragma_failure( scan_file, actual_line_number, "Inline configuration specified without command.", ) elif command == "disable-next-line": ids_to_disable = command_data[after_command_index:].split(",") processed_ids = set() for next_id in ids_to_disable: next_id = next_id.strip().lower() if not next_id: log_pragma_failure( scan_file, actual_line_number, f"Inline configuration command '{command}' specified a plugin with a blank id.", ) elif next_id in all_ids: normalized_id = all_ids[next_id].plugin_id processed_ids.add(normalized_id) else: log_pragma_failure( scan_file, actual_line_number, f"Inline configuration command '{command}' unable to find a plugin with the id '{next_id}'.", ) if processed_ids: document_pragmas[actual_line_number + 1] = processed_ids else: log_pragma_failure( scan_file, actual_line_number, f"Inline configuration command '{command}' not understood.", )