Python TextStatisticsの例

プログラミング言語: Python

名前空間/パッケージ名: tokens.tokenizer

メソッド/関数: TextStatistics

hotexamples.comのコード掲載数: 9

Python TextStatistics - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtokens.tokenizer.TextStatisticsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def test_remove_stop_words(input_tokens, input_stop_words, input_case,
                           tokens_with_stopwords_removed):
    """Test the removal of stop words from text file functionality"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, input_case)
    stats.token_list = input_tokens
    stats.stopwords = input_stop_words
    stats.number_of_tokens_with_stop_words_removed()
    assert stats.no_stopword_count == tokens_with_stopwords_removed

コード例 #2

ファイルを表示

def test_char_type_count(test_input, letters, numbers, punctuation):
    """Test to get type of each character"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, False)
    stats.chars = test_input
    stats.get_char_type_count()
    assert stats.letters == letters
    assert stats.numbers == numbers
    assert stats.punctuation == punctuation

コード例 #3

ファイルを表示

def test_average_len(test_input, expected_output):
    """Test to compute average length of words in the text file"""
    stats = tokenizer.TextStatistics(FILENAME, None, False)
    stats.token_list = test_input
    stats.get_average_len()
    assert stats.average_len == expected_output

コード例 #4

ファイルを表示

def test_get_unique_count(test_input_tokens, test_input_case, expected_output):
    """Test to get unique words in the text file"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, test_input_case)
    stats.token_list = test_input_tokens
    stats.get_unique_count()
    assert stats.unique_tokens == expected_output

コード例 #5

ファイルを表示

def test_stopwords_file_not_exist():
    """Test reading Non existing files"""
    with pytest.raises(IOError):
        stats = tokenizer.TextStatistics(FILENAME, 'dummy.txt', False)
        stats.read_stop_list()

コード例 #6

ファイルを表示

def test_read_None_stopfile():
    """Test for the stop file=None"""
    stats = tokenizer.TextStatistics(FILENAME, None, False)
    assert stats.stopwords == set()

コード例 #7

ファイルを表示

def test_read_stop_list(test_input, test_input_case, expected_output):
    """Test reading stopwords file"""
    stopfile = os.path.join(ROOT_PATH, 'data', test_input)
    stats = tokenizer.TextStatistics(FILENAME, stopfile, test_input_case)
    assert stats.stopwords == expected_output

コード例 #8

ファイルを表示

def test_read_file_not_exist():
    """Test file reading and tokenizing for Non existing files"""
    with pytest.raises(IOError):
        tokenizer.TextStatistics('dummy.txt', STOPFILE, False)

コード例 #9

ファイルを表示

def test_file_read(test_input, expected_tokenlist, expected_chars):
    """Test file reading and tokenizing"""
    filename = os.path.join(ROOT_PATH, 'data', test_input)
    stats = tokenizer.TextStatistics(filename, STOPFILE, False)
    assert stats.token_list == expected_tokenlist
    assert stats.chars == expected_chars