Python TextStatistics 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tokens.tokenizer

메소드/함수: TextStatistics

hotexamples.com에서의 예제들: 9

Python TextStatistics - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tokens.tokenizer.TextStatistics에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_remove_stop_words(input_tokens, input_stop_words, input_case,
                           tokens_with_stopwords_removed):
    """Test the removal of stop words from text file functionality"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, input_case)
    stats.token_list = input_tokens
    stats.stopwords = input_stop_words
    stats.number_of_tokens_with_stop_words_removed()
    assert stats.no_stopword_count == tokens_with_stopwords_removed

예제 #2

파일 보기

def test_char_type_count(test_input, letters, numbers, punctuation):
    """Test to get type of each character"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, False)
    stats.chars = test_input
    stats.get_char_type_count()
    assert stats.letters == letters
    assert stats.numbers == numbers
    assert stats.punctuation == punctuation

예제 #3

파일 보기

def test_average_len(test_input, expected_output):
    """Test to compute average length of words in the text file"""
    stats = tokenizer.TextStatistics(FILENAME, None, False)
    stats.token_list = test_input
    stats.get_average_len()
    assert stats.average_len == expected_output

예제 #4

파일 보기

def test_get_unique_count(test_input_tokens, test_input_case, expected_output):
    """Test to get unique words in the text file"""
    stats = tokenizer.TextStatistics(FILENAME, STOPFILE, test_input_case)
    stats.token_list = test_input_tokens
    stats.get_unique_count()
    assert stats.unique_tokens == expected_output

예제 #5

파일 보기

def test_stopwords_file_not_exist():
    """Test reading Non existing files"""
    with pytest.raises(IOError):
        stats = tokenizer.TextStatistics(FILENAME, 'dummy.txt', False)
        stats.read_stop_list()

예제 #6

파일 보기

def test_read_None_stopfile():
    """Test for the stop file=None"""
    stats = tokenizer.TextStatistics(FILENAME, None, False)
    assert stats.stopwords == set()

예제 #7

파일 보기

def test_read_stop_list(test_input, test_input_case, expected_output):
    """Test reading stopwords file"""
    stopfile = os.path.join(ROOT_PATH, 'data', test_input)
    stats = tokenizer.TextStatistics(FILENAME, stopfile, test_input_case)
    assert stats.stopwords == expected_output

예제 #8

파일 보기

def test_read_file_not_exist():
    """Test file reading and tokenizing for Non existing files"""
    with pytest.raises(IOError):
        tokenizer.TextStatistics('dummy.txt', STOPFILE, False)

예제 #9

파일 보기

def test_file_read(test_input, expected_tokenlist, expected_chars):
    """Test file reading and tokenizing"""
    filename = os.path.join(ROOT_PATH, 'data', test_input)
    stats = tokenizer.TextStatistics(filename, STOPFILE, False)
    assert stats.token_list == expected_tokenlist
    assert stats.chars == expected_chars