Ejemplo n.º 1
0
def test_subset_categories_in_test(df):
    df_train = df[:5]
    X_train, y_train = df_train[["a", "b", "c", "d"]], df_train[["e"]].values.ravel()

    df_test = df[5:]
    X_test, _ = df_test[["a", "b", "c", "d"]], df_test[["e"]].values.ravel()

    trf = PatsyTransformer("a + np.log(a) + b + c + d - 1")

    trf.fit(X_train, y_train)

    assert trf.transform(X_test).shape[1] == trf.transform(X_train).shape[1]
Ejemplo n.º 2
0
def test_transform_dummy2(df):
    X, y = df[["a", "b", "c", "d"]], df[["e"]]
    tf = PatsyTransformer("a + b + c + d")
    print(tf.fit(X, y).transform(X))
    assert tf.fit(X, y).transform(X).shape == (6, 6)
Ejemplo n.º 3
0
def test_mult_usage(df):
    X, y = df[["a", "b", "c", "d"]], df[["e"]]
    tf = PatsyTransformer("a*b - 1")
    print(tf.fit(X, y).transform(X))
    assert tf.fit(X, y).transform(X).shape == (6, 3)
Ejemplo n.º 4
0
def test_apply_numpy_transform(df):
    X, y = df[["a", "b", "c", "d"]], df[["e"]]
    tf = PatsyTransformer("a + np.log(a) + b - 1")
    assert tf.fit(X, y).transform(X).shape == (6, 3)
Ejemplo n.º 5
0
def test_min_sign_usage(df):
    X, y = df[["a", "b", "c", "d"]], df[["e"]]
    tf = PatsyTransformer("a + b - 1")
    assert tf.fit(X, y).transform(X).shape == (6, 2)
Ejemplo n.º 6
0
def test_basic_usage(df):
    X, y = df[["a", "b", "c", "d"]], df[["e"]]
    tf = PatsyTransformer("a + b")
    assert tf.fit(X, y).transform(X).shape == (6, 3)
Ejemplo n.º 7
0
def test_return_type_dataframe(df):
    X, y = df[["a", "b", "c", "d"]], df[["e"]]
    tf = PatsyTransformer("a + b - 1", return_type="dataframe")
    df_fit_transformed = tf.fit(X, y).transform(X)
    assert isinstance(df_fit_transformed, pd.DataFrame)
Ejemplo n.º 8
0
def test_return_type_dmatrix(df):
    X, y = df[["a", "b", "c", "d"]], df[["e"]]
    tf = PatsyTransformer("a + b - 1", return_type="matrix")
    # test for DesignMatrix this way as per https://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignMatrix
    df_fit_transformed = tf.fit(X, y).transform(X)
    assert hasattr(df_fit_transformed, "design_info")