Python preprocess_text Examples

Programming Language: Python

Namespace/Package Name: nlprov.preprocessing

Method/Function: preprocess_text

Examples at hotexamples.com: 14

Python preprocess_text - 14 examples found. These are the top rated real world Python examples of nlprov.preprocessing.preprocess_text extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def test_whitespace_removal(whitespace_removal_actual,
                            whitespace_removal_expected):
    preprocessed = preprocess_text(whitespace_removal_actual,
                                   lowercase=False,
                                   regex='(?!).*',
                                   eng_lang=False)
    pd.testing.assert_series_equal(whitespace_removal_expected, preprocessed)

Example #2

Show file

def test_regex_default(sents_regex, sents_default_expected):
    sents_preprocessed = preprocess_text(sents_regex,
                                         lowercase=False,
                                         eng_lang=False)
    pd.testing.assert_series_equal(sents_default_expected, sents_preprocessed)

Example #3

Show file

def test_lowercase(lowercase_actual, lowercase_expected):
    preprocessed = preprocess_text(lowercase_actual,
                                   regex='(?!).*',
                                   eng_lang=False)
    pd.testing.assert_series_equal(lowercase_expected, preprocessed)

Example #4

Show file

def test_stopword_default(stopword_sents):
    preprocessed = preprocess_text(stopword_sents)
    pd.testing.assert_series_equal(stopword_sents, preprocessed)

Example #5

Show file

def test_stopword_removal(stopword_sents, stopword_removal_expected):
    preprocessed = preprocess_text(stopword_sents, stop_words=True)
    pd.testing.assert_series_equal(stopword_removal_expected, preprocessed)

Example #6

Show file

def test_stem_and_lemma(stem_actual):
    with pytest.raises(Exception):
        preprocess_text(stem_actual, stem=True, lemma=True, eng_lang=False)

Example #7

Show file

def test_language(language_actual, language_expected):
    preprocessed = preprocess_text(language_actual, eng_lang=True)
    pd.testing.assert_series_equal(language_expected, preprocessed)

Example #8

Show file

def test_token_list2(token_list_actual2, token_list_expected2):
    preprocessed = preprocess_text(token_list_actual2,
                                   lemma=False,
                                   token_list=True,
                                   eng_lang=False)
    pd.testing.assert_series_equal(token_list_expected2, preprocessed)

Example #9

Show file

def test_stem(stem_actual, stem_expected):
    preprocessed = preprocess_text(stem_actual, stem=True, eng_lang=False)
    pd.testing.assert_series_equal(stem_expected, preprocessed)

Example #10

Show file

def test_token_list(lemma_actual, token_list_expected):
    preprocessed = preprocess_text(lemma_actual,
                                   lemma=True,
                                   token_list=True,
                                   eng_lang=False)
    pd.testing.assert_series_equal(token_list_expected, preprocessed)

Example #11

Show file

def test_nan_replace(nan_removal_actual, nan_replace_expected):
    preprocessed = preprocess_text(nan_removal_actual,
                                   nan_handling='bad',
                                   eng_lang=False)
    pd.testing.assert_series_equal(nan_replace_expected, preprocessed)

Example #12

Show file

def test_dict_replace(dict_replace_actual, dict_replace_expected, sample_dict):
    preprocessed = preprocess_text(dict_replace_actual,
                                   replace_dict=sample_dict,
                                   eng_lang=False)
    pd.testing.assert_series_equal(dict_replace_expected, preprocessed)

Example #13

Show file

def test_regex_cases(sents_regex, expected, regex):
    sents_preprocessed = preprocess_text(sents_regex,
                                         lowercase=False,
                                         regex=regex,
                                         eng_lang=False)
    pd.testing.assert_series_equal(expected, sents_preprocessed)

Example #14

Show file

File: nlp_example.py Project: sara-von-hein-shaw/nlprov

"""
Copyright © 2020 Johnson & Johnson
"""

import pandas as pd
from nlprov.preprocessing import preprocess_text
from nlprov.vectorize import vectorize_text, vectorize_new_text
from nlprov.similarity_calc import similarity_calculation

text = pd.Series(data=[
    "  Combination  of   spaces.    ", "MixEd CASe", ",./;'[]\-=",
    '<>?:"{}|_+', '!@#$%^&*()`~"', "lemmas needed",
    "ducks and cats and ponies are not similar", "c'est français",
    "das ist deutsch", "this is una mezcla"
])
preprocessed_text = preprocess_text(text)
vec_text, vec_obj = vectorize_text(preprocessed_text)

new_text = pd.Series(data=["ducks and cats are not similar"])
new_preprocessed_text = preprocess_text(new_text)
new_vec_text = vectorize_new_text(new_preprocessed_text, vec_obj)

similarity = similarity_calculation(new_vec_text, vec_text)