Esempio n. 1
0
 def test_pipeline_default(self):
     s = pd.Series([
         "Amazon! < br />< br /> If I was going to order any soft drink online, it would be Diet Coke with Lime",
         pd.NA,
         "-1234. Mère, Françoise, noël",
     ])
     s_true = pd.Series([
         "amazon going order soft drink online would diet coke lime",
         "",
         "mere francoise noel",
     ])
     self.assertEqual(preprocessing.clean(s), s_true)
Esempio n. 2
0
 def test_pipeline_stopwords(self):
     s = pd.Series("E-I-E-I-O\nAnd on")
     s_true = pd.Series("e-i-e-i-o\n ")
     pipeline = [preprocessing.lowercase, preprocessing.remove_stopwords]
     self.assertEqual(preprocessing.clean(s, pipeline=pipeline), s_true)