def test_pipeline_default(self): s = pd.Series([ "Amazon! < br />< br /> If I was going to order any soft drink online, it would be Diet Coke with Lime", pd.NA, "-1234. Mère, Françoise, noël", ]) s_true = pd.Series([ "amazon going order soft drink online would diet coke lime", "", "mere francoise noel", ]) self.assertEqual(preprocessing.clean(s), s_true)
def test_pipeline_stopwords(self): s = pd.Series("E-I-E-I-O\nAnd on") s_true = pd.Series("e-i-e-i-o\n ") pipeline = [preprocessing.lowercase, preprocessing.remove_stopwords] self.assertEqual(preprocessing.clean(s, pipeline=pipeline), s_true)