def test_remove_digits_punctuation(self):
     s = pd.Series(string.punctuation)
     s_true = pd.Series(string.punctuation)
     self.assertEqual(preprocessing.remove_digits(s), s_true)
 def test_remove_digits_end(self):
     s = pd.Series("end digits needs to be cleaned out 123")
     s_true = pd.Series("end digits needs to be cleaned out  ")
     self.assertEqual(preprocessing.remove_digits(s), s_true)
 def test_remove_digits_phone(self):
     s = pd.Series("+41 1234 5678")
     s_true = pd.Series("+     ")
     self.assertEqual(preprocessing.remove_digits(s), s_true)
 def test_remove_digits_start(self):
     s = pd.Series("123 starting digits needs to be cleaned out")
     s_true = pd.Series("  starting digits needs to be cleaned out")
     self.assertEqual(preprocessing.remove_digits(s), s_true)
 def test_remove_digits_brackets(self):
     s = pd.Series("Digits in bracket (123 $) needs to be cleaned out")
     s_true = pd.Series("Digits in bracket (  $) needs to be cleaned out")
     self.assertEqual(preprocessing.remove_digits(s), s_true)
 def test_remove_digits_any(self):
     s = pd.Series("remove block of digits 1234 h1n1")
     s_true = pd.Series("remove block of digits   h n ")
     self.assertEqual(preprocessing.remove_digits(s, only_blocks=False), s_true)
from texthero import preprocessing
import pandas as pd
"""
Test `remove_digits`
"""

text = "remove_digits remove all the 1234 digits of a pandas series. H1N1"
text_preprocessed = "remove_digits remove all the digits of a pandas series. H1N1"
text_preprocessed_block_false = "remove_digits remove all the  digits of a pandas series. HN"
assert preprocessing.remove_digits(pd.Series(text)).equals(
    pd.Series(text_preprocessed))

assert preprocessing.remove_digits(pd.Series(text), only_blocks=False).equals(
    pd.Series(text_preprocessed_block_false))
"""
Test `remove_punctuations`
"""

text = "hello."
text_preprocessed = "hello "

assert preprocessing.remove_punctuation(pd.Series(text)).equals(
    pd.Series(text_preprocessed))
"""
Test `remove_diacritics`
"""

text = "hèllo"
text_preprocessed = "hello"

assert preprocessing.remove_diacritics(pd.Series(text)).equals(
from texthero import preprocessing
import pandas as pd
"""
Test `remove_digits`
"""

# Check block
s = pd.Series("remove block of digits 1234 h1n1")
s_true = pd.Series("remove block of digits   h1n1")
assert preprocessing.remove_digits(s).equals(s_true)

# Check with only_blocks = False
s = pd.Series("remove block of digits 1234 h1n1")
s_true = pd.Series("remove block of digits   h n ")
assert preprocessing.remove_digits(s, only_blocks=False).equals(s_true)

# Check in brackets
s = pd.Series("Digits in bracket (123 $) needs to be cleaned out")
s_true = pd.Series("Digits in bracket (  $) needs to be cleaned out")
assert preprocessing.remove_digits(s).equals(s_true)

# Check start digits
s = pd.Series("123 starting digits needs to be cleaned out")
s_true = pd.Series("  starting digits needs to be cleaned out")
assert preprocessing.remove_digits(s).equals(s_true)

# Check end digits
s = pd.Series("end digits needs to be cleaned out 123")
s_true = pd.Series("end digits needs to be cleaned out  ")
assert preprocessing.remove_digits(s).equals(s_true)