def test_remove_digits_punctuation(self): s = pd.Series(string.punctuation) s_true = pd.Series(string.punctuation) self.assertEqual(preprocessing.remove_digits(s), s_true)
def test_remove_digits_end(self): s = pd.Series("end digits needs to be cleaned out 123") s_true = pd.Series("end digits needs to be cleaned out ") self.assertEqual(preprocessing.remove_digits(s), s_true)
def test_remove_digits_phone(self): s = pd.Series("+41 1234 5678") s_true = pd.Series("+ ") self.assertEqual(preprocessing.remove_digits(s), s_true)
def test_remove_digits_start(self): s = pd.Series("123 starting digits needs to be cleaned out") s_true = pd.Series(" starting digits needs to be cleaned out") self.assertEqual(preprocessing.remove_digits(s), s_true)
def test_remove_digits_brackets(self): s = pd.Series("Digits in bracket (123 $) needs to be cleaned out") s_true = pd.Series("Digits in bracket ( $) needs to be cleaned out") self.assertEqual(preprocessing.remove_digits(s), s_true)
def test_remove_digits_any(self): s = pd.Series("remove block of digits 1234 h1n1") s_true = pd.Series("remove block of digits h n ") self.assertEqual(preprocessing.remove_digits(s, only_blocks=False), s_true)
from texthero import preprocessing import pandas as pd """ Test `remove_digits` """ text = "remove_digits remove all the 1234 digits of a pandas series. H1N1" text_preprocessed = "remove_digits remove all the digits of a pandas series. H1N1" text_preprocessed_block_false = "remove_digits remove all the digits of a pandas series. HN" assert preprocessing.remove_digits(pd.Series(text)).equals( pd.Series(text_preprocessed)) assert preprocessing.remove_digits(pd.Series(text), only_blocks=False).equals( pd.Series(text_preprocessed_block_false)) """ Test `remove_punctuations` """ text = "hello." text_preprocessed = "hello " assert preprocessing.remove_punctuation(pd.Series(text)).equals( pd.Series(text_preprocessed)) """ Test `remove_diacritics` """ text = "hèllo" text_preprocessed = "hello" assert preprocessing.remove_diacritics(pd.Series(text)).equals(
from texthero import preprocessing import pandas as pd """ Test `remove_digits` """ # Check block s = pd.Series("remove block of digits 1234 h1n1") s_true = pd.Series("remove block of digits h1n1") assert preprocessing.remove_digits(s).equals(s_true) # Check with only_blocks = False s = pd.Series("remove block of digits 1234 h1n1") s_true = pd.Series("remove block of digits h n ") assert preprocessing.remove_digits(s, only_blocks=False).equals(s_true) # Check in brackets s = pd.Series("Digits in bracket (123 $) needs to be cleaned out") s_true = pd.Series("Digits in bracket ( $) needs to be cleaned out") assert preprocessing.remove_digits(s).equals(s_true) # Check start digits s = pd.Series("123 starting digits needs to be cleaned out") s_true = pd.Series(" starting digits needs to be cleaned out") assert preprocessing.remove_digits(s).equals(s_true) # Check end digits s = pd.Series("end digits needs to be cleaned out 123") s_true = pd.Series("end digits needs to be cleaned out ") assert preprocessing.remove_digits(s).equals(s_true)