def test_remove_stop_words(input_tokens, input_stop_words, input_case, tokens_with_stopwords_removed): """Test the removal of stop words from text file functionality""" stats = tokenizer.TextStatistics(FILENAME, STOPFILE, input_case) stats.token_list = input_tokens stats.stopwords = input_stop_words stats.number_of_tokens_with_stop_words_removed() assert stats.no_stopword_count == tokens_with_stopwords_removed
def test_char_type_count(test_input, letters, numbers, punctuation): """Test to get type of each character""" stats = tokenizer.TextStatistics(FILENAME, STOPFILE, False) stats.chars = test_input stats.get_char_type_count() assert stats.letters == letters assert stats.numbers == numbers assert stats.punctuation == punctuation
def test_average_len(test_input, expected_output): """Test to compute average length of words in the text file""" stats = tokenizer.TextStatistics(FILENAME, None, False) stats.token_list = test_input stats.get_average_len() assert stats.average_len == expected_output
def test_get_unique_count(test_input_tokens, test_input_case, expected_output): """Test to get unique words in the text file""" stats = tokenizer.TextStatistics(FILENAME, STOPFILE, test_input_case) stats.token_list = test_input_tokens stats.get_unique_count() assert stats.unique_tokens == expected_output
def test_stopwords_file_not_exist(): """Test reading Non existing files""" with pytest.raises(IOError): stats = tokenizer.TextStatistics(FILENAME, 'dummy.txt', False) stats.read_stop_list()
def test_read_None_stopfile(): """Test for the stop file=None""" stats = tokenizer.TextStatistics(FILENAME, None, False) assert stats.stopwords == set()
def test_read_stop_list(test_input, test_input_case, expected_output): """Test reading stopwords file""" stopfile = os.path.join(ROOT_PATH, 'data', test_input) stats = tokenizer.TextStatistics(FILENAME, stopfile, test_input_case) assert stats.stopwords == expected_output
def test_read_file_not_exist(): """Test file reading and tokenizing for Non existing files""" with pytest.raises(IOError): tokenizer.TextStatistics('dummy.txt', STOPFILE, False)
def test_file_read(test_input, expected_tokenlist, expected_chars): """Test file reading and tokenizing""" filename = os.path.join(ROOT_PATH, 'data', test_input) stats = tokenizer.TextStatistics(filename, STOPFILE, False) assert stats.token_list == expected_tokenlist assert stats.chars == expected_chars