예제 #1
0
def test_remove_stop_words(input_tokens, input_stop_words, input_case,
                           tokens_with_stopwords_removed):
    """Test the removal of stop words from text file functionality"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, input_case)
    stats.token_list = input_tokens
    stats.stopwords = input_stop_words
    stats.number_of_tokens_with_stop_words_removed()
    assert stats.no_stopword_count == tokens_with_stopwords_removed
예제 #2
0
def test_char_type_count(test_input, letters, numbers, punctuation):
    """Test to get type of each character"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, False)
    stats.chars = test_input
    stats.get_char_type_count()
    assert stats.letters == letters
    assert stats.numbers == numbers
    assert stats.punctuation == punctuation
예제 #3
0
def test_average_len(test_input, expected_output):
    """Test to compute average length of words in the text file"""
    stats = tokenizer.TextStatistics(FILENAME, None, False)
    stats.token_list = test_input
    stats.get_average_len()
    assert stats.average_len == expected_output
예제 #4
0
def test_get_unique_count(test_input_tokens, test_input_case, expected_output):
    """Test to get unique words in the text file"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, test_input_case)
    stats.token_list = test_input_tokens
    stats.get_unique_count()
    assert stats.unique_tokens == expected_output
예제 #5
0
def test_stopwords_file_not_exist():
    """Test reading Non existing files"""
    with pytest.raises(IOError):
        stats = tokenizer.TextStatistics(FILENAME, 'dummy.txt', False)
        stats.read_stop_list()
예제 #6
0
def test_read_None_stopfile():
    """Test for the stop file=None"""
    stats = tokenizer.TextStatistics(FILENAME, None, False)
    assert stats.stopwords == set()
예제 #7
0
def test_read_stop_list(test_input, test_input_case, expected_output):
    """Test reading stopwords file"""
    stopfile = os.path.join(ROOT_PATH, 'data', test_input)
    stats = tokenizer.TextStatistics(FILENAME, stopfile, test_input_case)
    assert stats.stopwords == expected_output
예제 #8
0
def test_read_file_not_exist():
    """Test file reading and tokenizing for Non existing files"""
    with pytest.raises(IOError):
        tokenizer.TextStatistics('dummy.txt', STOPFILE, False)
예제 #9
0
def test_file_read(test_input, expected_tokenlist, expected_chars):
    """Test file reading and tokenizing"""
    filename = os.path.join(ROOT_PATH, 'data', test_input)
    stats = tokenizer.TextStatistics(filename, STOPFILE, False)
    assert stats.token_list == expected_tokenlist
    assert stats.chars == expected_chars