def test_subset_categories_in_test(df): df_train = df[:5] X_train, y_train = df_train[["a", "b", "c", "d"]], df_train[["e"]].values.ravel() df_test = df[5:] X_test, _ = df_test[["a", "b", "c", "d"]], df_test[["e"]].values.ravel() trf = PatsyTransformer("a + np.log(a) + b + c + d - 1") trf.fit(X_train, y_train) assert trf.transform(X_test).shape[1] == trf.transform(X_train).shape[1]
def test_transform_dummy2(df): X, y = df[["a", "b", "c", "d"]], df[["e"]] tf = PatsyTransformer("a + b + c + d") print(tf.fit(X, y).transform(X)) assert tf.fit(X, y).transform(X).shape == (6, 6)
def test_mult_usage(df): X, y = df[["a", "b", "c", "d"]], df[["e"]] tf = PatsyTransformer("a*b - 1") print(tf.fit(X, y).transform(X)) assert tf.fit(X, y).transform(X).shape == (6, 3)
def test_apply_numpy_transform(df): X, y = df[["a", "b", "c", "d"]], df[["e"]] tf = PatsyTransformer("a + np.log(a) + b - 1") assert tf.fit(X, y).transform(X).shape == (6, 3)
def test_min_sign_usage(df): X, y = df[["a", "b", "c", "d"]], df[["e"]] tf = PatsyTransformer("a + b - 1") assert tf.fit(X, y).transform(X).shape == (6, 2)
def test_basic_usage(df): X, y = df[["a", "b", "c", "d"]], df[["e"]] tf = PatsyTransformer("a + b") assert tf.fit(X, y).transform(X).shape == (6, 3)
def test_return_type_dataframe(df): X, y = df[["a", "b", "c", "d"]], df[["e"]] tf = PatsyTransformer("a + b - 1", return_type="dataframe") df_fit_transformed = tf.fit(X, y).transform(X) assert isinstance(df_fit_transformed, pd.DataFrame)
def test_return_type_dmatrix(df): X, y = df[["a", "b", "c", "d"]], df[["e"]] tf = PatsyTransformer("a + b - 1", return_type="matrix") # test for DesignMatrix this way as per https://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignMatrix df_fit_transformed = tf.fit(X, y).transform(X) assert hasattr(df_fit_transformed, "design_info")