def test_CountVectorizerWrapper_on_Serie(): df = get_sample_df(size=100, seed=123) X = df["text_col"] vect = CountVectorizerWrapper() Xres = vect.fit_transform(X) assert len(Xres.shape) == 2 assert Xres.shape[0] == X.shape[0] assert Xres.shape[1] == len(vect.get_feature_names()) Xres = vect.transform(X) assert len(Xres.shape) == 2 assert Xres.shape[0] == X.shape[0] assert Xres.shape[1] == len(vect.get_feature_names())
def test_CountVectorizerWrapper(): df = get_sample_df(size=100, seed=123) vect = CountVectorizerWrapper(columns_to_use=["text_col"]) vect.fit(df) cols = vect.get_feature_names() for c in cols: assert c.startswith("text_col__BAG") vect = CountVectorizerWrapper(columns_to_use=[2]) vect.fit(df) cols = vect.get_feature_names() for c in cols: assert c.startswith("text_col__BAG") X = df.values vect = CountVectorizerWrapper(columns_to_use=[2]) vect.fit(X) cols = vect.get_feature_names() for c in cols: assert c.startswith("2__BAG")