Beispiel #1
0
def test_CountVectorizerWrapper_on_Serie():

    df = get_sample_df(size=100, seed=123)

    X = df["text_col"]
    vect = CountVectorizerWrapper()

    Xres = vect.fit_transform(X)

    assert len(Xres.shape) == 2
    assert Xres.shape[0] == X.shape[0]
    assert Xres.shape[1] == len(vect.get_feature_names())

    Xres = vect.transform(X)
    assert len(Xres.shape) == 2
    assert Xres.shape[0] == X.shape[0]
    assert Xres.shape[1] == len(vect.get_feature_names())
Beispiel #2
0
def test_CountVectorizerWrapper():

    df = get_sample_df(size=100, seed=123)

    vect = CountVectorizerWrapper(columns_to_use=["text_col"])
    vect.fit(df)

    cols = vect.get_feature_names()
    for c in cols:
        assert c.startswith("text_col__BAG")

    vect = CountVectorizerWrapper(columns_to_use=[2])
    vect.fit(df)

    cols = vect.get_feature_names()
    for c in cols:
        assert c.startswith("text_col__BAG")

    X = df.values
    vect = CountVectorizerWrapper(columns_to_use=[2])
    vect.fit(X)
    cols = vect.get_feature_names()
    for c in cols:
        assert c.startswith("2__BAG")