Python to_list 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: data_describe.text.text_preprocessing

메소드/함수: to_list

hotexamples.com에서의 예제들: 9

Python to_list - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 data_describe.text.text_preprocessing.to_list에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_remove_stopwords(_nltk, text_data, tokenized_test_list_main):
    assert (to_list(
        remove_stopwords(to_list(to_lower(tokenized_test_list_main)))) ==
            text_data["answer_key_remove_stop_words"]
            ), "Output did not correctly remove NLTK stopwords from documents"
    assert (
        to_list(
            remove_stopwords(
                to_list(to_lower(tokenized_test_list_main)),
                custom_stopwords=text_data["more_words"],
            )) == text_data["answer_key_remove_stop_words_more"]
    ), "Output did not correctly remove NLTK stopwords and custom added stopwords from documents"

예제 #2

파일 보기

def test_remove_punct(_nltk, text_data, tokenized_test_list_main):
    assert (
        to_list(remove_punct(
            tokenized_test_list_main)) == text_data["answer_key_remove_punct"]
    ), "Output did not correctly remove punctuation inside of words from documents"
    assert (
        to_list(
            remove_punct(tokenized_test_list_main,
                         remove_all=True,
                         replace_char="|")) ==
        text_data["answer_key_replace_all_punct_with_pipe"]
    ), "Output did not correctly replace all punctuation with the | character"
    assert (to_list(remove_punct(
        tokenized_test_list_main,
        remove_all=True)) == text_data["answer_key_remove_all_punct"]
            ), "Output did not correctly remove all punctuation from documents"

예제 #3

파일 보기

def test_remove_single_char_and_spaces(text_data):
    assert (
        to_list(
            remove_single_char_and_spaces(
                text_data["test_list_single_char_and_spaces"])) ==
        text_data["answer_key_single_char_and_spaces"]
    ), "Output did not correctly remove all instance of single character words and spaces from documents"

예제 #4

파일 보기

def test_custom_pipeline(_nltk, text_data):
    def shout(text_docs_bow):
        return [[word.upper() for word in doc] for doc in text_docs_bow]

    assert (
        to_list(
            preprocess_texts(
                text_data["test_list_custom"],
                custom_pipeline=["tokenize",
                                 shout])) == text_data["answer_key_custom"]
    ), "Output did not correctly incorporate custom function into preprocessing pipeline"

예제 #5

파일 보기

def test_bag_of_words(_nltk, text_data):
    bag = to_list(
        bag_of_words_to_docs(preprocess_texts(text_data["test_list_main"])))

    assert len(text_data["test_list_main"]) == len(
        bag
    ), "Number of documents in input does not match number of documents in output"
    assert isinstance(
        bag, list), "Output is not of the expected return type of list"
    assert isinstance(
        bag[0], str
    ), "Output does not contain expected type of string inside of return value"

예제 #6

파일 보기

def test_remove_digits(text_data):
    assert (to_list(remove_digits(text_data["test_list_digits"])) ==
            text_data["answer_key_remove_digits"]
            ), "Output did not correctly remove all digits from documents"

예제 #7

파일 보기

def test_to_lower(_nltk, text_data, tokenized_test_list_main):
    assert (to_list(
        to_lower(tokenized_test_list_main)) == text_data["answer_key_lower"]
            ), "Output did not correctly convert documents to lowercase"

예제 #8

파일 보기

def test_tokenizer(_nltk, text_data):
    assert (to_list(tokenize(
        text_data["test_list_main"])) == text_data["answer_key_tokenized"]
            ), "Output did not correctly tokenize documents"

예제 #9

파일 보기

def tokenized_test_list_main(text_data):
    return to_list(tokenize(text_data["test_list_main"]))