def test_patsy_deviation_coding(self): df = pdml.ModelFrame({'X': [1, 2, 3, 4, 5], 'Y': [1, 3, 2, 2, 1], 'Z': [1, 1, 1, 2, 2]}, target='Z', index=['a', 'b', 'c', 'd', 'e']) result = df.transform('C(X, Sum)') expected = pd.DataFrame({'Intercept': [1, 1, 1, 1, 1], 'C(X, Sum)[S.1]': [1, 0, 0, 0, -1], 'C(X, Sum)[S.2]': [0, 1, 0, 0, -1], 'C(X, Sum)[S.3]': [0, 0, 1, 0, -1], 'C(X, Sum)[S.4]': [0, 0, 0, 1, -1]}, index=['a', 'b', 'c', 'd', 'e'], columns=['Intercept', 'C(X, Sum)[S.1]', 'C(X, Sum)[S.2]', 'C(X, Sum)[S.3]', 'C(X, Sum)[S.4]'], dtype=float) tm.assert_frame_equal(result, expected) result = df.transform('C(Y, Sum)') expected = pd.DataFrame({'Intercept': [1, 1, 1, 1, 1], 'C(Y, Sum)[S.1]': [1, -1, 0, 0, 1], 'C(Y, Sum)[S.2]': [0, -1, 1, 1, 0]}, index=['a', 'b', 'c', 'd', 'e'], columns=['Intercept', 'C(Y, Sum)[S.1]', 'C(Y, Sum)[S.2]'], dtype=float) tm.assert_frame_equal(result, expected)
def test_patsy_matrices(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([10, 11, 12], index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=s) result = mdf.transform('A ~ B + C') self.assertIsInstance(result, pdml.ModelFrame) self.assertEqual(result.shape, (3, 4)) tm.assert_index_equal(result.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(result.columns, pd.Index(['A', 'Intercept', 'B', 'C'])) expected = pd.DataFrame({'A': [1, 2, 3], 'Intercept': [1, 1, 1], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'Intercept', 'B', 'C'], dtype=float) tm.assert_frame_equal(result, expected) expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='A', dtype=float) tm.assert_series_equal(result.target, expected) self.assertEqual(result.target.name, 'A') self.assertEqual(result.target_name, 'A')
def test_frame_target_object_set(self): df = pd.DataFrame({datetime.datetime(2014, 1, 1): [1, 2, 3], datetime.datetime(2015, 1, 1): [4, 5, 6], datetime.datetime(2016, 1, 1): [7, 8, 9]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df) mdf.target = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.Index([5, datetime.datetime(2014, 1, 1), datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1)]) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df) expected = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, 5) # name will be ignored if ModelFrame already has a target mdf.target = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name='X') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.Index([5, datetime.datetime(2014, 1, 1), datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1)]) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name=5) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, 5)
def test_multioutput(self): # http://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import RandomForestRegressor # Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(600, 1) - 100, axis=0) y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T y += (0.5 - rng.rand(*y.shape)) df = pdml.ModelFrame(X, target=y) max_depth = 30 rf1 = df.ensemble.RandomForestRegressor(max_depth=max_depth, random_state=self.random_state) reg1 = df.multioutput.MultiOutputRegressor(rf1) rf2 = RandomForestRegressor(max_depth=max_depth, random_state=self.random_state) reg2 = MultiOutputRegressor(rf2) df.fit(reg1) reg2.fit(X, y) result = df.predict(reg2) expected = pd.DataFrame(reg2.predict(X)) tm.assert_frame_equal(result, expected)
def test_frame_init_df_series(self): # initialization by dataframe and no-named series df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') s = pd.Series([1, 2, 3]) with self.assertRaisesRegexp(ValueError, 'data and target must have equal index'): mdf = pdml.ModelFrame(df, target=s) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='XXX') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['XXX', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, 'XXX') self.assertEqual(mdf.target_name, 'XXX')
def test_multioutput(self): # http://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import RandomForestRegressor # Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(600, 1) - 100, axis=0) y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T y += (0.5 - rng.rand(*y.shape)) df = pdml.ModelFrame(X, target=y) max_depth = 30 rf1 = df.ensemble.RandomForestRegressor(max_depth=max_depth, random_state=self.random_state) reg1 = df.multioutput.MultiOutputRegressor(rf1) rf2 = RandomForestRegressor(max_depth=max_depth, random_state=self.random_state) reg2 = MultiOutputRegressor(rf2) df.fit(reg1) reg2.fit(X, y) result = df.predict(reg2) expected = pd.DataFrame(reg2.predict(X)) tm.assert_frame_equal(result, expected)
def test_frame_data_proparty_series(self): df = pdml.ModelFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, target=[7, 8, 9], index=['a', 'b', 'c']) df.data = df['A'] exp = pdml.ModelFrame({'A': [1, 2, 3]}, target=[7, 8, 9], index=['a', 'b', 'c']) tm.assert_frame_equal(df, exp) df = pdml.ModelFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, target=[7, 8, 9], index=['a', 'b', 'c']) df.data = pd.Series([1, 2, 3], name='x', index=['a', 'b', 'c']) exp = pdml.ModelFrame({'x': [1, 2, 3]}, target=[7, 8, 9], index=['a', 'b', 'c']) tm.assert_frame_equal(df, exp) df = pdml.ModelFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, target=[7, 8, 9], index=['a', 'b', 'c']) with self.assertRaises(TypeError): df.data = [1, 2, 3]
def test_frame_data_proparty_series(self): df = pdml.ModelFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }, target=[7, 8, 9], index=['a', 'b', 'c']) df.data = df['A'] exp = pdml.ModelFrame({'A': [1, 2, 3]}, target=[7, 8, 9], index=['a', 'b', 'c']) tm.assert_frame_equal(df, exp) df = pdml.ModelFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }, target=[7, 8, 9], index=['a', 'b', 'c']) df.data = pd.Series([1, 2, 3], name='x', index=['a', 'b', 'c']) exp = pdml.ModelFrame({'x': [1, 2, 3]}, target=[7, 8, 9], index=['a', 'b', 'c']) tm.assert_frame_equal(df, exp) df = pdml.ModelFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }, target=[7, 8, 9], index=['a', 'b', 'c']) with pytest.raises(TypeError): df.data = [1, 2, 3]
def test_frame_init_df_duplicated(self): # initialization by dataframe and duplicated target df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, columns=['A', 'B', 'C']) s = pd.Series([10, 11, 12], name='A') msg = "data and target must have unique names" with pytest.raises(ValueError, match=msg): pdml.ModelFrame(df, target=s) df = pdml.ModelFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, columns=['A', 'B', 'C']) df.target = pd.Series([10, 11, 12], name='A') expected = pdml.ModelFrame( { 'A': [10, 11, 12], 'B': [4, 5, 6], 'C': [7, 8, 9] }, columns=['A', 'B', 'C']) tm.assert_frame_equal(df, expected)
def test_LabelEncoder_frame(self): arr = np.array(['X', 'Y', 'Z', 'X']) df = pdml.ModelFrame(arr, index=['a', 'b', 'c', 'd'], columns=['A']) mod1 = df.pp.LabelEncoder() df.fit(mod1) result = df.transform(mod1) expected = np.array([0, 1, 2, 0]).reshape(-1, 1) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.values, expected) tm.assert_index_equal(result.columns, df.columns) tm.assert_index_equal(result.index, df.index) mod1 = df.pp.LabelEncoder() result = df.fit_transform(mod1) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.values, expected) tm.assert_index_equal(result.columns, df.columns) tm.assert_index_equal(result.index, df.index) inversed = result.inverse_transform(mod1) self.assertIsInstance(inversed, pdml.ModelFrame) tm.assert_frame_equal(inversed, df)
def test_LabelEncoder_frame(self): arr = np.array(['X', 'Y', 'Z', 'X']) df = pdml.ModelFrame(arr, index=['a', 'b', 'c', 'd'], columns=['A']) mod1 = df.pp.LabelEncoder() df.fit(mod1) result = df.transform(mod1) expected = np.array([0, 1, 2, 0]).reshape(-1, 1) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.values, expected) tm.assert_index_equal(result.columns, df.columns) tm.assert_index_equal(result.index, df.index) mod1 = df.pp.LabelEncoder() result = df.fit_transform(mod1) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.values, expected) tm.assert_index_equal(result.columns, df.columns) tm.assert_index_equal(result.index, df.index) inversed = result.inverse_transform(mod1) self.assertIsInstance(inversed, pdml.ModelFrame) tm.assert_frame_equal(inversed, df)
def test_FunctionTransformer(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) mod1 = df.pp.FunctionTransformer(func=lambda x: x + 1) df.fit(mod1) result = df.transform(mod1) exp = df.copy() exp.data = exp.data + 1 self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, exp)
def test_transform_standard(self): # check pandas standard transform works df = pd.DataFrame({ 'A': ['A', 'B', 'A', 'A', 'A', 'B', 'B', 'B'], 'B': np.random.randn(8), 'C': np.random.randn(8) }) mdf = pdml.ModelFrame(df) tm.assert_frame_equal( df.groupby('A').transform('mean'), mdf.groupby('A').transform('mean'))
def test_grid_search(self): tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100]}, {'kernel': ['linear'], 'C': [1, 10, 100]}] df = pdml.ModelFrame(datasets.load_digits()) cv = df.model_selection.GridSearchCV(df.svm.SVC(C=1), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.model_selection.describe(cv) expected = pd.DataFrame(cv.cv_results_) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, expected)
def test_grid_search(self): tuned_parameters = [{'max_depth': [3, 4], 'n_estimators': [50, 100]}] df = pdml.ModelFrame(datasets.load_digits()) cv = df.model_selection.GridSearchCV(df.xgb.XGBClassifier(), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.model_selection.describe(cv) expected = pd.DataFrame(cv.cv_results_) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, expected)
def test_frame_init_dict_list_series_index(self): # initialization by dataframe and list df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]} target = pd.Series([9, 8, 7], name='X', index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['X', 'A', 'B', 'C'])) expected = pd.DataFrame(df, index=['a', 'b', 'c']) tm.assert_frame_equal(mdf.data, expected) tm.assert_series_equal(mdf.target, target) self.assertEqual(mdf.target.name, 'X') self.assertEqual(mdf.target_name, 'X')
def test_frame_init_dict_list_series_index(self): # initialization by dataframe and list df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]} target = pd.Series([9, 8, 7], name='X', index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['X', 'A', 'B', 'C'])) expected = pd.DataFrame(df, index=['a', 'b', 'c']) tm.assert_frame_equal(mdf.data, expected) tm.assert_series_equal(mdf.target, target) self.assertEqual(mdf.target.name, 'X') self.assertEqual(mdf.target_name, 'X')
def test_frame_init_dict_list(self): # initialization by dataframe and list df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = [1, 2, 3] mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]} s = [1, 2, 3] mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) expected = pd.DataFrame(df) tm.assert_frame_equal(mdf.data, expected) expected = pd.Series([1, 2, 3], index=[0, 1, 2], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') mdf = pdml.ModelFrame(df, target='A') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) expected = pd.DataFrame(df) tm.assert_frame_equal(mdf.data, expected[['B', 'C']]) tm.assert_series_equal(mdf.target, expected['A']) self.assertEqual(mdf.target.name, 'A') self.assertEqual(mdf.target_name, 'A') mdf = pdml.ModelFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, mdf) self.assertEqual(mdf.target_name, '.target')
def test_frame_init_df_none(self): # initialization by dataframe and none df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) mdf = pdml.ModelFrame(df, target=None) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) self.assertTrue(mdf.has_data()) self.assertTrue(mdf.target is None) self.assertEqual(mdf.target_name, '.target')
def test_FunctionTransformer(self): if not pdml.compat._SKLEARN_ge_017: import nose raise nose.SkipTest() iris = datasets.load_iris() df = pdml.ModelFrame(iris) mod1 = df.pp.FunctionTransformer(func=lambda x: x + 1) df.fit(mod1) result = df.transform(mod1) exp = df.copy() exp.data = exp.data + 1 self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, exp)
def test_frame_init_df_df(self): # initialization by dataframe and dataframe df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) target = pd.DataFrame({ 't1': [10, 11, 12], 't2': [13, 14, 15] }, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 5)) expected = pd.DataFrame( { 't1': [10, 11, 12], 't2': [13, 14, 15], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['t1', 't2', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 't2', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_frame_equal(mdf.target, target) tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2'])) tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2'])) self.assertTrue(mdf.has_multi_targets()) target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]}) msg = 'data and target must have equal index' with pytest.raises(ValueError, match=msg): mdf = pdml.ModelFrame(df, target=target) # single column DataFrame will results in single target column target = pd.DataFrame({'t1': [10, 11, 12]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) target = pd.Series([10, 11, 12], name='t1', index=['a', 'b', 'c']) tm.assert_series_equal(mdf.target, target) self.assertEqual(mdf.target_name, 't1')
def test_frame_init_df_array_series(self): s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), target=s, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) expected = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), index=['a', 'b', 'c'], columns=['A', 'B', 'C']) tm.assert_frame_equal(mdf.data, expected) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')
def test_frame_target_object(self): df = pd.DataFrame({datetime.datetime(2014, 1, 1): [1, 2, 3], datetime.datetime(2015, 1, 1): [4, 5, 6], datetime.datetime(2016, 1, 1): [7, 8, 9]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=datetime.datetime(2016, 1, 1)) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.DatetimeIndex(['2014-01-01', '2015-01-01', '2016-01-01']) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df.iloc[:, :2]) expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name=pd.Timestamp('2016-01-01')) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, datetime.datetime(2016, 1, 1)) self.assertEqual(mdf.target_name, datetime.datetime(2016, 1, 1))
def test_grid_search(self): tuned_parameters = [{'max_depth': [3, 4], 'n_estimators': [50, 100]}] df = pdml.ModelFrame(datasets.load_digits()) cv = df.grid_search.GridSearchCV(df.xgb.XGBClassifier(), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.grid_search.describe(cv) expected = pd.DataFrame({'mean': [0.89705064, 0.91764051, 0.91263216, 0.91930996], 'std': [0.03244061, 0.03259985, 0.02764891, 0.0266436], 'max_depth': [3, 3, 4, 4], 'n_estimators': [50, 100, 50, 100]}, columns=['mean', 'std', 'max_depth', 'n_estimators']) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, expected)
def test_frame_init_df_none(self): # initialization by dataframe and none df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) mdf = pdml.ModelFrame(df, target=None) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) self.assertTrue(mdf.has_data()) self.assertTrue(mdf.target is None) self.assertEqual(mdf.target_name, '.target')
def test_frame_target_object_set(self): df = pd.DataFrame( { datetime.datetime(2014, 1, 1): [1, 2, 3], datetime.datetime(2015, 1, 1): [4, 5, 6], datetime.datetime(2016, 1, 1): [7, 8, 9] }, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df) mdf.target = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.Index([ 5, datetime.datetime(2014, 1, 1), datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1) ]) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df) expected = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, 5) # name will be ignored if ModelFrame already has a target mdf.target = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name='X') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.Index([ 5, datetime.datetime(2014, 1, 1), datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1) ]) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name=5) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, 5)
def test_frame_init_df_array_series(self): s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), target=s, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) expected = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), index=['a', 'b', 'c'], columns=['A', 'B', 'C']) tm.assert_frame_equal(mdf.data, expected) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')
def test_frame_init_df_duplicated_columns(self): # initialization by dataframe and dataframe which have same columns df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) target = pd.DataFrame({'A': [10, 11, 12], 'B': [13, 14, 15]}) mdf = pdml.ModelFrame(df, target=target) cols = pd.MultiIndex.from_tuples([('.target', 'A'), ('.target', 'B'), ('.data', 'A'), ('.data', 'B'), ('.data', 'C')]) expected = pd.DataFrame( { ('.target', 'A'): [10, 11, 12], ('.target', 'B'): [13, 14, 15], ('.data', 'A'): [1, 2, 3], ('.data', 'B'): [4, 5, 6], ('.data', 'C'): [7, 8, 9] }, columns=cols) tm.assert_frame_equal(mdf, expected)
def test_frame_init_df_str(self): # initialization by dataframe and str df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) mdf = pdml.ModelFrame(df, target='A') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df[['B', 'C']]) tm.assert_series_equal(mdf.target, df['A']) self.assertEqual(mdf.target.name, 'A') self.assertEqual(mdf.target_name, 'A') msg = "Specified target 'X' is not included in data" with self.assertRaisesRegexp(ValueError, msg): mdf = pdml.ModelFrame(df, target='X')
def test_LabelBinarizer2(self): arr = np.array(['X', 'Y', 'Z', 'X']) s = pdml.ModelSeries(arr) lb = s.preprocessing.LabelBinarizer() s.fit(lb) binarized = s.transform(lb) self.assertIsInstance(binarized, pdml.ModelFrame) expected = pd.DataFrame({ 0: [1, 0, 0, 1], 1: [0, 1, 0, 0], 2: [0, 0, 1, 0] }) tm.assert_frame_equal(binarized, expected) df = pdml.ModelFrame(datasets.load_iris()) df.target.fit(lb) binarized = df.target.transform(lb) expected = pd.DataFrame({ 0: [1] * 50 + [0] * 100, 1: [0] * 50 + [1] * 50 + [0] * 50, 2: [0] * 100 + [1] * 50 }) tm.assert_frame_equal(binarized, expected) df = pdml.ModelFrame(datasets.load_iris()) df.target.fit(lb) df.target = df.target.transform(lb) self.assertEqual(df.shape, (150, 7)) tm.assert_frame_equal(df.target, expected)
def test_LabelBinarizer2(self): arr = np.array(['X', 'Y', 'Z', 'X']) s = pdml.ModelSeries(arr) lb = s.preprocessing.LabelBinarizer() s.fit(lb) binarized = s.transform(lb) self.assertIsInstance(binarized, pdml.ModelFrame) expected = pd.DataFrame({0: [1, 0, 0, 1], 1: [0, 1, 0, 0], 2: [0, 0, 1, 0]}) tm.assert_frame_equal(binarized, expected) df = pdml.ModelFrame(datasets.load_iris()) df.target.fit(lb) binarized = df.target.transform(lb) expected = pd.DataFrame({0: [1] * 50 + [0] * 100, 1: [0] * 50 + [1] * 50 + [0] * 50, 2: [0] * 100 + [1] * 50}) tm.assert_frame_equal(binarized, expected) df = pdml.ModelFrame(datasets.load_iris()) df.target.fit(lb) df.target = df.target.transform(lb) self.assertEqual(df.shape, (150, 7)) tm.assert_frame_equal(df.target, expected)
def test_frame_init_df_duplicated(self): # initialization by dataframe and duplicated target df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, columns=['A', 'B', 'C']) s = pd.Series([10, 11, 12], name='A') msg = "data and target must have unique names" with self.assertRaisesRegexp(ValueError, msg): pdml.ModelFrame(df, target=s) df = pdml.ModelFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, columns=['A', 'B', 'C']) df.target = pd.Series([10, 11, 12], name='A') expected = pdml.ModelFrame({'A': [10, 11, 12], 'B': [4, 5, 6], 'C': [7, 8, 9]}, columns=['A', 'B', 'C']) tm.assert_frame_equal(df, expected)
def test_frame_init_df_duplicated_columns(self): # initialization by dataframe and dataframe which have same columns df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) target = pd.DataFrame({'A': [10, 11, 12], 'B': [13, 14, 15]}) mdf = pdml.ModelFrame(df, target=target) cols = pd.MultiIndex.from_tuples([('.target', 'A'), ('.target', 'B'), ('.data', 'A'), ('.data', 'B'), ('.data', 'C')]) expected = pd.DataFrame({('.target', 'A'): [10, 11, 12], ('.target', 'B'): [13, 14, 15], ('.data', 'A'): [1, 2, 3], ('.data', 'B'): [4, 5, 6], ('.data', 'C'): [7, 8, 9]}, columns=cols) tm.assert_frame_equal(mdf, expected)
def test_grid_search(self): tuned_parameters = [{ 'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100] }, { 'kernel': ['linear'], 'C': [1, 10, 100] }] df = pdml.ModelFrame(datasets.load_digits()) cv = df.model_selection.GridSearchCV(df.svm.SVC(C=1), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.model_selection.describe(cv) expected = pd.DataFrame( { 'mean': [ 0.97161937, 0.9476906, 0.97273233, 0.95937674, 0.97273233, 0.96271564, 0.94936004, 0.94936004, 0.94936004 ], 'std': [ 0.01546977, 0.0221161, 0.01406514, 0.02295168, 0.01406514, 0.01779749, 0.01911084, 0.01911084, 0.01911084 ], 'C': [1, 1, 10, 10, 100, 100, 1, 10, 100], 'gamma': [ 0.001, 0.0001, 0.001, 0.0001, 0.001, 0.0001, np.nan, np.nan, np.nan ], 'kernel': ['rbf'] * 6 + ['linear'] * 3 }, columns=['mean', 'std', 'C', 'gamma', 'kernel']) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, expected)
def test_grid_search(self): tuned_parameters = [{'max_depth': [3, 4], 'n_estimators': [50, 100]}] df = pdml.ModelFrame(datasets.load_digits()) cv = df.grid_search.GridSearchCV(df.xgb.XGBClassifier(), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.grid_search.describe(cv) expected = pd.DataFrame( { 'mean': [0.89705064, 0.91764051, 0.91263216, 0.91930996], 'std': [0.03244061, 0.03259985, 0.02764891, 0.0266436], 'max_depth': [3, 3, 4, 4], 'n_estimators': [50, 100, 50, 100] }, columns=['mean', 'std', 'max_depth', 'n_estimators']) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, expected)
def test_frame_target_object(self): df = pd.DataFrame( { datetime.datetime(2014, 1, 1): [1, 2, 3], datetime.datetime(2015, 1, 1): [4, 5, 6], datetime.datetime(2016, 1, 1): [7, 8, 9] }, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=datetime.datetime(2016, 1, 1)) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.DatetimeIndex(['2014-01-01', '2015-01-01', '2016-01-01']) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df.iloc[:, :2]) expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name=pd.Timestamp('2016-01-01')) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, datetime.datetime(2016, 1, 1)) self.assertEqual(mdf.target_name, datetime.datetime(2016, 1, 1))
def test_frame_init_df_df(self): # initialization by dataframe and dataframe df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 5)) expected = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['t1', 't2', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 't2', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_frame_equal(mdf.target, target) tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2'])) tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2'])) self.assertTrue(mdf.has_multi_targets()) target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]}) msg = 'data and target must have equal index' with self.assertRaisesRegexp(ValueError, msg): mdf = pdml.ModelFrame(df, target=target) # single column DataFrame will results in single target column target = pd.DataFrame({'t1': [10, 11, 12]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) target = pd.Series([10, 11, 12], name='t1', index=['a', 'b', 'c']) tm.assert_series_equal(mdf.target, target) self.assertEqual(mdf.target_name, 't1')
def test_frame_init_df_str(self): # initialization by dataframe and str df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) mdf = pdml.ModelFrame(df, target='A') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df[['B', 'C']]) tm.assert_series_equal(mdf.target, df['A']) self.assertEqual(mdf.target.name, 'A') self.assertEqual(mdf.target_name, 'A') msg = "Specified target 'X' is not included in data" with pytest.raises(ValueError, match=msg): mdf = pdml.ModelFrame(df, target='X')
def test_grid_search(self): tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100]}, {'kernel': ['linear'], 'C': [1, 10, 100]}] df = pdml.ModelFrame(datasets.load_digits()) cv = df.model_selection.GridSearchCV(df.svm.SVC(C=1), tuned_parameters, cv=5) with tm.RNGContext(1): df.fit(cv) result = df.model_selection.describe(cv) expected = pd.DataFrame({'mean': [0.97161937, 0.9476906, 0.97273233, 0.95937674, 0.97273233, 0.96271564, 0.94936004, 0.94936004, 0.94936004], 'std': [0.01546977, 0.0221161, 0.01406514, 0.02295168, 0.01406514, 0.01779749, 0.01911084, 0.01911084, 0.01911084], 'C': [1, 1, 10, 10, 100, 100, 1, 10, 100], 'gamma': [0.001, 0.0001, 0.001, 0.0001, 0.001, 0.0001, np.nan, np.nan, np.nan], 'kernel': ['rbf'] * 6 + ['linear'] * 3}, columns=['mean', 'std', 'C', 'gamma', 'kernel']) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_frame_equal(result, expected)
def test_frame_init_df_series(self): # initialization by dataframe and no-named series df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') s = pd.Series([1, 2, 3]) with pytest.raises(ValueError, match='data and target must have equal index'): mdf = pdml.ModelFrame(df, target=s) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='XXX') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['XXX', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, 'XXX') self.assertEqual(mdf.target_name, 'XXX')
def test_train_test_split_keep_index(self): df = pdml.ModelFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': [1, 2, 3, 4, 5, 6, 7, 8]}, index='a b c d e f g h'.split(' ')) tr, te = df.ms.train_test_split(random_state=self.random_state) tm.assert_frame_equal(tr, df.loc[['g', 'a', 'e', 'f', 'd', 'h']]) tm.assert_frame_equal(te, df.loc[['c', 'b']]) tr, te = df.ms.train_test_split(random_state=self.random_state, reset_index=True) tm.assert_frame_equal(tr, df.loc[['g', 'a', 'e', 'f', 'd', 'h']].reset_index(drop=True)) tm.assert_frame_equal(te, df.loc[['c', 'b']].reset_index(drop=True)) df = pdml.ModelFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': [1, 2, 3, 4, 5, 6, 7, 8]}, index='a b c d e f g h'.split(' '), target=[1, 2, 3, 4, 5, 6, 7, 8]) tr, te = df.ms.train_test_split(random_state=self.random_state) tm.assert_frame_equal(tr, df.loc[['g', 'a', 'e', 'f', 'd', 'h']]) tm.assert_numpy_array_equal(tr.target.values, np.array([7, 1, 5, 6, 4, 8])) tm.assert_frame_equal(te, df.loc[['c', 'b']]) tm.assert_numpy_array_equal(te.target.values, np.array([3, 2])) tr, te = df.ms.train_test_split(random_state=self.random_state, reset_index=True) tm.assert_frame_equal(tr, df.loc[['g', 'a', 'e', 'f', 'd', 'h']].reset_index(drop=True)) tm.assert_numpy_array_equal(tr.target.values, np.array([7, 1, 5, 6, 4, 8])) tm.assert_frame_equal(te, df.loc[['c', 'b']].reset_index(drop=True)) tm.assert_numpy_array_equal(te.target.values, np.array([3, 2]))
def test_frame_data_proparty(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) new = pd.DataFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['X', 'Y']) # set data property mdf.data = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'X', 'Y'])) tm.assert_frame_equal(mdf.data, new) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pdml.ModelFrame({'M': [1, 2, 3], 'N': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['M', 'N']) # set data property mdf.data = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'M', 'N'])) tm.assert_frame_equal(mdf.data, new) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pd.DataFrame({'.target': [1, 2, 3], 'K': [4, 5, 6]}, index=['a', 'b', 'c']) # unable to set data if passed value has the same column as the target msg = "Passed data has the same column name as the target '.target'" with self.assertRaisesRegexp(ValueError, msg): mdf.data = new # unable to set ModelFrame with target attribute msg = "Cannot update with ModelFrame which has target attribute" with self.assertRaisesRegexp(ValueError, msg): mdf.data = mdf # set delete property del mdf.data self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 1)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target'])) self.assertTrue(mdf.data is None) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')
def test_split_keep_index(self): df = pdml.ModelFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': [1, 2, 3, 4, 5, 6, 7, 8]}, index='a b c d e f g h'.split(' ')) kf = df.model_selection.KFold(3, random_state=self.random_state) folded = [f for f in df.model_selection.split(kf)] self.assertEqual(len(folded), 3) tm.assert_frame_equal(folded[0][0], df.iloc[3:, :]) tm.assert_frame_equal(folded[0][1], df.iloc[:3, :]) tm.assert_frame_equal(folded[1][0], df.iloc[[0, 1, 2, 6, 7], :]) tm.assert_frame_equal(folded[1][1], df.iloc[3:6, :]) tm.assert_frame_equal(folded[2][0], df.iloc[:6, :]) tm.assert_frame_equal(folded[2][1], df.iloc[6:, :]) folded = [f for f in df.model_selection.split(kf, reset_index=True)] self.assertEqual(len(folded), 3) tm.assert_frame_equal(folded[0][0], df.iloc[3:, :].reset_index(drop=True)) tm.assert_frame_equal(folded[0][1], df.iloc[:3, :].reset_index(drop=True)) tm.assert_frame_equal(folded[1][0], df.iloc[[0, 1, 2, 6, 7], :].reset_index(drop=True)) tm.assert_frame_equal(folded[1][1], df.iloc[3:6, :].reset_index(drop=True)) tm.assert_frame_equal(folded[2][0], df.iloc[:6, :].reset_index(drop=True)) tm.assert_frame_equal(folded[2][1], df.iloc[6:, :].reset_index(drop=True))
def test_frame_init_dict_list(self): # initialization by dataframe and list df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = [1, 2, 3] mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]} s = [1, 2, 3] mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) expected = pd.DataFrame(df) tm.assert_frame_equal(mdf.data, expected) expected = pd.Series([1, 2, 3], index=[0, 1, 2], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') mdf = pdml.ModelFrame(df, target='A') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) expected = pd.DataFrame(df) tm.assert_frame_equal(mdf.data, expected[['B', 'C']]) tm.assert_series_equal(mdf.target, expected['A']) self.assertEqual(mdf.target.name, 'A') self.assertEqual(mdf.target_name, 'A') mdf = pdml.ModelFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, mdf) self.assertEqual(mdf.target_name, '.target')
def test_frame_target_proparty(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=s) new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target') # set target property mdf.target = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, new) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') with tm.assert_produces_warning(UserWarning): new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx') # set target property mdf.target = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) exp_target = pd.Series(new, name='.target') tm.assert_series_equal(mdf.target, exp_target) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pd.Series([4, 5, 6], name='.target') with self.assertRaisesRegexp(ValueError, 'data and target must have equal index'): mdf.target = new # set target property mdf.target = [7, 8, 9] self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') with self.assertRaisesRegexp(ValueError, 'Wrong number of items passed 2, placement implies 3'): mdf.target = [1, 2] # set target property mdf.target = None self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) self.assertEqual(mdf.target_name, '.target')
def test_frame_init_df_target_setter(self): # initialization by dataframe and dataframe df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) mdf = pdml.ModelFrame(df) self.assertFalse(mdf.has_target()) target = pd.DataFrame({ 't1': [10, 11, 12], 't2': [13, 14, 15] }, index=['a', 'b', 'c']) mdf.target = target self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 5)) expected = pd.DataFrame( { 't1': [10, 11, 12], 't2': [13, 14, 15], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['t1', 't2', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 't2', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_frame_equal(mdf.target, target) tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2'])) tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2'])) self.assertTrue(mdf.has_multi_targets()) target = pd.DataFrame({ 'x1': [20, 21, 22], 'x2': [23, 24, 25] }, index=['a', 'b', 'c']) with tm.assert_produces_warning(UserWarning): # when the target has the same length as the target_name, # is renamed to existing target ['t1', 't2'] mdf.target = target self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 5)) expected = pd.DataFrame( { 't1': [20, 21, 22], 't2': [23, 24, 25], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['t1', 't2', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 't2', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.DataFrame({ 't1': [20, 21, 22], 't2': [23, 24, 25] }, index=['a', 'b', 'c']) tm.assert_frame_equal(mdf.target, expected) tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2'])) tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2'])) self.assertTrue(mdf.has_multi_targets()) target = pd.DataFrame( { 'x1': [20, 21, 22], 'x2': [23, 24, 25], 'x3': [25, 26, 27] }, index=['a', 'b', 'c']) # when the target has the different length as the target_name, # target is being replaced mdf.target = target self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 6)) expected = pd.DataFrame( { 'x1': [20, 21, 22], 'x2': [23, 24, 25], 'x3': [25, 26, 27], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['x1', 'x2', 'x3', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['x1', 'x2', 'x3', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_frame_equal(mdf.target, target) tm.assert_index_equal(mdf.target.columns, pd.Index(['x1', 'x2', 'x3'])) tm.assert_index_equal(mdf.target_name, pd.Index(['x1', 'x2', 'x3'])) self.assertTrue(mdf.has_multi_targets())
def test_split_keep_index(self): df = pdml.ModelFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': [1, 2, 3, 4, 5, 6, 7, 8]}, index='a b c d e f g h'.split(' ')) kf = df.model_selection.KFold(3, random_state=self.random_state) folded = [f for f in df.model_selection.split(kf)] self.assertEqual(len(folded), 3) tm.assert_frame_equal(folded[0][0], df.iloc[3:, :]) tm.assert_frame_equal(folded[0][1], df.iloc[:3, :]) tm.assert_frame_equal(folded[1][0], df.iloc[[0, 1, 2, 6, 7], :]) tm.assert_frame_equal(folded[1][1], df.iloc[3:6, :]) tm.assert_frame_equal(folded[2][0], df.iloc[:6, :]) tm.assert_frame_equal(folded[2][1], df.iloc[6:, :]) folded = [f for f in df.model_selection.split(kf, reset_index=True)] self.assertEqual(len(folded), 3) tm.assert_frame_equal(folded[0][0], df.iloc[3:, :].reset_index(drop=True)) tm.assert_frame_equal(folded[0][1], df.iloc[:3, :].reset_index(drop=True)) tm.assert_frame_equal(folded[1][0], df.iloc[[0, 1, 2, 6, 7], :].reset_index(drop=True)) tm.assert_frame_equal(folded[1][1], df.iloc[3:6, :].reset_index(drop=True)) tm.assert_frame_equal(folded[2][0], df.iloc[:6, :].reset_index(drop=True)) tm.assert_frame_equal(folded[2][1], df.iloc[6:, :].reset_index(drop=True))
def test_frame_init_df_target_setter(self): # initialization by dataframe and dataframe df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) mdf = pdml.ModelFrame(df) self.assertFalse(mdf.has_target()) target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]}, index=['a', 'b', 'c']) mdf.target = target self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 5)) expected = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['t1', 't2', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 't2', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_frame_equal(mdf.target, target) tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2'])) tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2'])) self.assertTrue(mdf.has_multi_targets()) target = pd.DataFrame({'x1': [20, 21, 22], 'x2': [23, 24, 25]}, index=['a', 'b', 'c']) with tm.assert_produces_warning(UserWarning): # when the target has the same length as the target_name, # is renamed to existing target ['t1', 't2'] mdf.target = target self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 5)) expected = pd.DataFrame({'t1': [20, 21, 22], 't2': [23, 24, 25], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['t1', 't2', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 't2', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.DataFrame({'t1': [20, 21, 22], 't2': [23, 24, 25]}, index=['a', 'b', 'c']) tm.assert_frame_equal(mdf.target, expected) tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2'])) tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2'])) self.assertTrue(mdf.has_multi_targets()) target = pd.DataFrame({'x1': [20, 21, 22], 'x2': [23, 24, 25], 'x3': [25, 26, 27]}, index=['a', 'b', 'c']) # when the target has the different length as the target_name, # target is being replaced mdf.target = target self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 6)) expected = pd.DataFrame({'x1': [20, 21, 22], 'x2': [23, 24, 25], 'x3': [25, 26, 27], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['x1', 'x2', 'x3', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['x1', 'x2', 'x3', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_frame_equal(mdf.target, target) tm.assert_index_equal(mdf.target.columns, pd.Index(['x1', 'x2', 'x3'])) tm.assert_index_equal(mdf.target_name, pd.Index(['x1', 'x2', 'x3'])) self.assertTrue(mdf.has_multi_targets())
def test_frame_target_proparty(self): df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=s) new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target') # set target property mdf.target = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, new) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') with tm.assert_produces_warning(UserWarning): new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx') # set target property mdf.target = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) exp_target = pd.Series(new, name='.target') tm.assert_series_equal(mdf.target, exp_target) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pd.Series([4, 5, 6], name='.target') with pytest.raises(ValueError, match='data and target must have equal index'): mdf.target = new # set target property mdf.target = [7, 8, 9] self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') if pdml.compat._PANDAS_ge_023: msg = 'Length of passed values is 2, index implies 3' else: msg = 'Wrong number of items passed 2, placement implies 3' with pytest.raises(ValueError, match=msg): mdf.target = [1, 2] # set target property mdf.target = None self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) self.assertEqual(mdf.target_name, '.target')
def test_train_test_split_keep_index(self): df = pdml.ModelFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': [1, 2, 3, 4, 5, 6, 7, 8]}, index='a b c d e f g h'.split(' ')) tr, te = df.ms.train_test_split(random_state=self.random_state) tm.assert_frame_equal(tr, df.loc[['g', 'a', 'e', 'f', 'd', 'h']]) tm.assert_frame_equal(te, df.loc[['c', 'b']]) tr, te = df.ms.train_test_split(random_state=self.random_state, reset_index=True) tm.assert_frame_equal(tr, df.loc[['g', 'a', 'e', 'f', 'd', 'h']].reset_index(drop=True)) tm.assert_frame_equal(te, df.loc[['c', 'b']].reset_index(drop=True)) df = pdml.ModelFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8], 'B': [1, 2, 3, 4, 5, 6, 7, 8]}, index='a b c d e f g h'.split(' '), target=[1, 2, 3, 4, 5, 6, 7, 8]) tr, te = df.ms.train_test_split(random_state=self.random_state) tm.assert_frame_equal(tr, df.loc[['g', 'a', 'e', 'f', 'd', 'h']]) tm.assert_numpy_array_equal(tr.target.values, np.array([7, 1, 5, 6, 4, 8])) tm.assert_frame_equal(te, df.loc[['c', 'b']]) tm.assert_numpy_array_equal(te.target.values, np.array([3, 2])) tr, te = df.ms.train_test_split(random_state=self.random_state, reset_index=True) tm.assert_frame_equal(tr, df.loc[['g', 'a', 'e', 'f', 'd', 'h']].reset_index(drop=True)) tm.assert_numpy_array_equal(tr.target.values, np.array([7, 1, 5, 6, 4, 8])) tm.assert_frame_equal(te, df.loc[['c', 'b']].reset_index(drop=True)) tm.assert_numpy_array_equal(te.target.values, np.array([3, 2]))
def test_frame_data_proparty(self): df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) new = pd.DataFrame({ 'X': [1, 2, 3], 'Y': [4, 5, 6] }, index=['a', 'b', 'c'], columns=['X', 'Y']) # set data property mdf.data = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'X', 'Y'])) tm.assert_frame_equal(mdf.data, new) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pdml.ModelFrame({ 'M': [1, 2, 3], 'N': [4, 5, 6] }, index=['a', 'b', 'c'], columns=['M', 'N']) # set data property mdf.data = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'M', 'N'])) tm.assert_frame_equal(mdf.data, new) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pd.DataFrame({ '.target': [1, 2, 3], 'K': [4, 5, 6] }, index=['a', 'b', 'c']) # unable to set data if passed value has the same column as the target msg = "Passed data has the same column name as the target '.target'" with pytest.raises(ValueError, match=msg): mdf.data = new # unable to set ModelFrame with target attribute msg = "Cannot update with ModelFrame which has target attribute" with pytest.raises(ValueError, match=msg): mdf.data = mdf # set delete property del mdf.data self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 1)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target'])) self.assertTrue(mdf.data is None) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')