def test_frame_target_object_set(self): df = pd.DataFrame({datetime.datetime(2014, 1, 1): [1, 2, 3], datetime.datetime(2015, 1, 1): [4, 5, 6], datetime.datetime(2016, 1, 1): [7, 8, 9]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df) mdf.target = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.Index([5, datetime.datetime(2014, 1, 1), datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1)]) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df) expected = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, 5) # name will be ignored if ModelFrame already has a target mdf.target = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name='X') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.Index([5, datetime.datetime(2014, 1, 1), datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1)]) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name=5) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, 5)
def test_patsy_matrices(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([10, 11, 12], index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=s) result = mdf.transform('A ~ B + C') self.assertIsInstance(result, pdml.ModelFrame) self.assertEqual(result.shape, (3, 4)) tm.assert_index_equal(result.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(result.columns, pd.Index(['A', 'Intercept', 'B', 'C'])) expected = pd.DataFrame({'A': [1, 2, 3], 'Intercept': [1, 1, 1], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'Intercept', 'B', 'C'], dtype=float) tm.assert_frame_equal(result, expected) expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='A', dtype=float) tm.assert_series_equal(result.target, expected) self.assertEqual(result.target.name, 'A') self.assertEqual(result.target_name, 'A')
def test_inverse_transform(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) models = ['PCA'] for model in models: mod1 = getattr(df.decomposition, model)() mod2 = getattr(decomposition, model)() df.fit(mod1) mod2.fit(iris.data, iris.target) result = df.transform(mod1) expected = mod2.transform(iris.data) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_series_equal(df.target, result.target) self.assert_numpy_array_almost_equal(result.data.values, expected) result = df.inverse_transform(mod1) expected = mod2.inverse_transform(iris.data) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_series_equal(df.target, result.target) self.assert_numpy_array_almost_equal(result.data.values, expected) tm.assert_index_equal(result.columns, df.columns)
def test_frame_init_df_series(self): # initialization by dataframe and no-named series df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') s = pd.Series([1, 2, 3]) with self.assertRaisesRegexp(ValueError, 'data and target must have equal index'): mdf = pdml.ModelFrame(df, target=s) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='XXX') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['XXX', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, 'XXX') self.assertEqual(mdf.target_name, 'XXX')
def test_frame_init_df_df(self): # initialization by dataframe and dataframe df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) target = pd.DataFrame({ 't1': [10, 11, 12], 't2': [13, 14, 15] }, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 5)) expected = pd.DataFrame( { 't1': [10, 11, 12], 't2': [13, 14, 15], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['t1', 't2', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 't2', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_frame_equal(mdf.target, target) tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2'])) tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2'])) self.assertTrue(mdf.has_multi_targets()) target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]}) msg = 'data and target must have equal index' with pytest.raises(ValueError, match=msg): mdf = pdml.ModelFrame(df, target=target) # single column DataFrame will results in single target column target = pd.DataFrame({'t1': [10, 11, 12]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) target = pd.Series([10, 11, 12], name='t1', index=['a', 'b', 'c']) tm.assert_series_equal(mdf.target, target) self.assertEqual(mdf.target_name, 't1')
def test_series_groupby(self): s = pdml.ModelSeries([1, 2, 1, 2], name='X') self.assertIsInstance(s, pdml.ModelSeries) grouped = s.groupby([1, 1, 1, 2]) self.assertIsInstance(grouped, pdml.core.groupby.ModelSeriesGroupBy) gs = grouped.get_group(1) self.assertIsInstance(gs, pdml.ModelSeries) expected = pd.Series([1, 2, 1], index=[0, 1, 2], name='X') tm.assert_series_equal(gs, expected) self.assertEqual(gs.name, 'X')
def test_fit_transform_PCA(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) mod1 = df.decomposition.PCA() mod2 = decomposition.PCA() result = df.fit_transform(mod1) expected = mod2.fit_transform(iris.data, iris.target) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_series_equal(df.target, result.target) self.assert_numpy_array_almost_equal(result.data.values, expected)
def test_fit_transform_KernelPCA(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) mod1 = df.decomposition.KernelPCA() mod2 = decomposition.KernelPCA() result = df.fit_transform(mod1) expected = mod2.fit_transform(iris.data, iris.target) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_series_equal(df.target, result.target) self.assert_numpy_array_almost_equal(result.data.values[:, :40], expected[:, :40])
def test_frame_init_dict_list_series_index(self): # initialization by dataframe and list df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]} target = pd.Series([9, 8, 7], name='X', index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['X', 'A', 'B', 'C'])) expected = pd.DataFrame(df, index=['a', 'b', 'c']) tm.assert_frame_equal(mdf.data, expected) tm.assert_series_equal(mdf.target, target) self.assertEqual(mdf.target.name, 'X') self.assertEqual(mdf.target_name, 'X')
def test_frame_init_df_df(self): # initialization by dataframe and dataframe df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 5)) expected = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15], 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['t1', 't2', 'A', 'B', 'C']) tm.assert_frame_equal(mdf, expected) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 't2', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_frame_equal(mdf.target, target) tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2'])) tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2'])) self.assertTrue(mdf.has_multi_targets()) target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]}) msg = 'data and target must have equal index' with self.assertRaisesRegexp(ValueError, msg): mdf = pdml.ModelFrame(df, target=target) # single column DataFrame will results in single target column target = pd.DataFrame({'t1': [10, 11, 12]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=target) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['t1', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) target = pd.Series([10, 11, 12], name='t1', index=['a', 'b', 'c']) tm.assert_series_equal(mdf.target, target) self.assertEqual(mdf.target_name, 't1')
def _assert_fit_transform(self, df, exp_data, model1, model2): result = df.fit_transform(model1) expected = model2.fit_transform(exp_data) self.assertIsInstance(result, pdml.ModelFrame) # target is unchanged if df.has_target(): # target is unchanged tm.assert_series_equal(df.target, result.target) else: self.assertIsNone(result.target) self.assert_numpy_array_almost_equal(result.data.values, expected) # index and columns are kept tm.assert_index_equal(result.index, df.index) tm.assert_index_equal(result.columns, df.columns)
def test_frame_target_object(self): df = pd.DataFrame({datetime.datetime(2014, 1, 1): [1, 2, 3], datetime.datetime(2015, 1, 1): [4, 5, 6], datetime.datetime(2016, 1, 1): [7, 8, 9]}, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=datetime.datetime(2016, 1, 1)) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.DatetimeIndex(['2014-01-01', '2015-01-01', '2016-01-01']) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df.iloc[:, :2]) expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name=pd.Timestamp('2016-01-01')) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, datetime.datetime(2016, 1, 1)) self.assertEqual(mdf.target_name, datetime.datetime(2016, 1, 1))
def test_frame_init_df_array_series(self): s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), target=s, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) expected = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), index=['a', 'b', 'c'], columns=['A', 'B', 'C']) tm.assert_frame_equal(mdf.data, expected) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')
def test_frame_target_object_set(self): df = pd.DataFrame( { datetime.datetime(2014, 1, 1): [1, 2, 3], datetime.datetime(2015, 1, 1): [4, 5, 6], datetime.datetime(2016, 1, 1): [7, 8, 9] }, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df) mdf.target = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.Index([ 5, datetime.datetime(2014, 1, 1), datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1) ]) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df) expected = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, 5) # name will be ignored if ModelFrame already has a target mdf.target = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name='X') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.Index([ 5, datetime.datetime(2014, 1, 1), datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1) ]) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name=5) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, 5)
def test_frame_init_df_str(self): # initialization by dataframe and str df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) mdf = pdml.ModelFrame(df, target='A') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df[['B', 'C']]) tm.assert_series_equal(mdf.target, df['A']) self.assertEqual(mdf.target.name, 'A') self.assertEqual(mdf.target_name, 'A') msg = "Specified target 'X' is not included in data" with self.assertRaisesRegexp(ValueError, msg): mdf = pdml.ModelFrame(df, target='X')
def test_frame_data_none(self): msg = "ModelFrame must have either data or target" with self.assertRaisesRegexp(ValueError, msg): mdf = pdml.ModelFrame(None) msg = "target must be list-like when data is None" with self.assertRaisesRegexp(ValueError, msg): mdf = pdml.ModelFrame(None, target='X') # initialization without data s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(None, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 1)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target'])) self.assertTrue(mdf.data is None) self.assertFalse(mdf.has_data()) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')
def test_frame_data_none(self): msg = "ModelFrame must have either data or target" with pytest.raises(ValueError, match=msg): mdf = pdml.ModelFrame(None) msg = "target must be list-like when data is None" with pytest.raises(ValueError, match=msg): mdf = pdml.ModelFrame(None, target='X') # initialization without data s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(None, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 1)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target'])) self.assertTrue(mdf.data is None) self.assertFalse(mdf.has_data()) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')
def test_frame_target_object(self): df = pd.DataFrame( { datetime.datetime(2014, 1, 1): [1, 2, 3], datetime.datetime(2015, 1, 1): [4, 5, 6], datetime.datetime(2016, 1, 1): [7, 8, 9] }, index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=datetime.datetime(2016, 1, 1)) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) expected = pd.DatetimeIndex(['2014-01-01', '2015-01-01', '2016-01-01']) tm.assert_index_equal(mdf.columns, expected) tm.assert_frame_equal(mdf.data, df.iloc[:, :2]) expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name=pd.Timestamp('2016-01-01')) tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, datetime.datetime(2016, 1, 1)) self.assertEqual(mdf.target_name, datetime.datetime(2016, 1, 1))
def test_frame_groupby(self): df = pd.DataFrame( { 'A': [1, 2, 1, 2], 'B': [4, 5, 6, 7], 'C': [7, 8, 9, 10] }, columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3, 4]) mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) grouped = mdf.groupby('A') self.assertIsInstance(grouped, pdml.core.groupby.ModelFrameGroupBy) df = grouped.get_group(1) self.assertIsInstance(df, pdml.ModelFrame) expected = pd.Series([1, 3], index=[0, 2], name='.target') tm.assert_series_equal(df.target, expected) self.assertIsInstance(df.target, pdml.ModelSeries)
def test_LabelEncoder_series(self): arr = np.array(['X', 'Y', 'Z', 'X']) s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd']) mod1 = s.pp.LabelEncoder() s.fit(mod1) result = s.transform(mod1) expected = np.array([0, 1, 2, 0]) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) tm.assert_index_equal(result.index, s.index) mod1 = s.pp.LabelEncoder() result = s.fit_transform(mod1) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) inversed = result.inverse_transform(mod1) self.assertIsInstance(inversed, pdml.ModelSeries) tm.assert_series_equal(inversed, s)
def test_frame_init_df_str(self): # initialization by dataframe and str df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) mdf = pdml.ModelFrame(df, target='A') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df[['B', 'C']]) tm.assert_series_equal(mdf.target, df['A']) self.assertEqual(mdf.target.name, 'A') self.assertEqual(mdf.target_name, 'A') msg = "Specified target 'X' is not included in data" with pytest.raises(ValueError, match=msg): mdf = pdml.ModelFrame(df, target='X')
def test_frame_init_dict_list(self): # initialization by dataframe and list df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = [1, 2, 3] mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]} s = [1, 2, 3] mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) expected = pd.DataFrame(df) tm.assert_frame_equal(mdf.data, expected) expected = pd.Series([1, 2, 3], index=[0, 1, 2], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') mdf = pdml.ModelFrame(df, target='A') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) expected = pd.DataFrame(df) tm.assert_frame_equal(mdf.data, expected[['B', 'C']]) tm.assert_series_equal(mdf.target, expected['A']) self.assertEqual(mdf.target.name, 'A') self.assertEqual(mdf.target_name, 'A') mdf = pdml.ModelFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, mdf) self.assertEqual(mdf.target_name, '.target')
def test_frame_init_df_series(self): # initialization by dataframe and no-named series df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') s = pd.Series([1, 2, 3]) with pytest.raises(ValueError, match='data and target must have equal index'): mdf = pdml.ModelFrame(df, target=s) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='XXX') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['XXX', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, 'XXX') self.assertEqual(mdf.target_name, 'XXX')
def test_frame_data_proparty(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) new = pd.DataFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['X', 'Y']) # set data property mdf.data = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'X', 'Y'])) tm.assert_frame_equal(mdf.data, new) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pdml.ModelFrame({'M': [1, 2, 3], 'N': [4, 5, 6]}, index=['a', 'b', 'c'], columns=['M', 'N']) # set data property mdf.data = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'M', 'N'])) tm.assert_frame_equal(mdf.data, new) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pd.DataFrame({'.target': [1, 2, 3], 'K': [4, 5, 6]}, index=['a', 'b', 'c']) # unable to set data if passed value has the same column as the target msg = "Passed data has the same column name as the target '.target'" with self.assertRaisesRegexp(ValueError, msg): mdf.data = new # unable to set ModelFrame with target attribute msg = "Cannot update with ModelFrame which has target attribute" with self.assertRaisesRegexp(ValueError, msg): mdf.data = mdf # set delete property del mdf.data self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 1)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target'])) self.assertTrue(mdf.data is None) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')
def test_frame_init_dict_list(self): # initialization by dataframe and list df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = [1, 2, 3] mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]} s = [1, 2, 3] mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) expected = pd.DataFrame(df) tm.assert_frame_equal(mdf.data, expected) expected = pd.Series([1, 2, 3], index=[0, 1, 2], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') mdf = pdml.ModelFrame(df, target='A') self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) expected = pd.DataFrame(df) tm.assert_frame_equal(mdf.data, expected[['B', 'C']]) tm.assert_series_equal(mdf.target, expected['A']) self.assertEqual(mdf.target.name, 'A') self.assertEqual(mdf.target_name, 'A') mdf = pdml.ModelFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, mdf) self.assertEqual(mdf.target_name, '.target')
def test_frame_target_proparty(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=s) new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target') # set target property mdf.target = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, new) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') with tm.assert_produces_warning(UserWarning): new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx') # set target property mdf.target = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) exp_target = pd.Series(new, name='.target') tm.assert_series_equal(mdf.target, exp_target) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pd.Series([4, 5, 6], name='.target') with self.assertRaisesRegexp(ValueError, 'data and target must have equal index'): mdf.target = new # set target property mdf.target = [7, 8, 9] self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') with self.assertRaisesRegexp(ValueError, 'Wrong number of items passed 2, placement implies 3'): mdf.target = [1, 2] # set target property mdf.target = None self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) self.assertEqual(mdf.target_name, '.target')
def test_frame_target_proparty(self): df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) mdf = pdml.ModelFrame(df, target=s) new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target') # set target property mdf.target = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) tm.assert_series_equal(mdf.target, new) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') with tm.assert_produces_warning(UserWarning): new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx') # set target property mdf.target = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) exp_target = pd.Series(new, name='.target') tm.assert_series_equal(mdf.target, exp_target) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pd.Series([4, 5, 6], name='.target') with pytest.raises(ValueError, match='data and target must have equal index'): mdf.target = new # set target property mdf.target = [7, 8, 9] self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 4)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target') tm.assert_series_equal(mdf.target, expected) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') if pdml.compat._PANDAS_ge_023: msg = 'Length of passed values is 2, index implies 3' else: msg = 'Wrong number of items passed 2, placement implies 3' with pytest.raises(ValueError, match=msg): mdf.target = [1, 2] # set target property mdf.target = None self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(mdf.data, df) self.assertEqual(mdf.target_name, '.target')
def test_frame_data_proparty(self): df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9] }, index=['a', 'b', 'c'], columns=['A', 'B', 'C']) s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target') mdf = pdml.ModelFrame(df, target=s) self.assertIsInstance(mdf, pdml.ModelFrame) new = pd.DataFrame({ 'X': [1, 2, 3], 'Y': [4, 5, 6] }, index=['a', 'b', 'c'], columns=['X', 'Y']) # set data property mdf.data = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'X', 'Y'])) tm.assert_frame_equal(mdf.data, new) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pdml.ModelFrame({ 'M': [1, 2, 3], 'N': [4, 5, 6] }, index=['a', 'b', 'c'], columns=['M', 'N']) # set data property mdf.data = new self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 3)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'M', 'N'])) tm.assert_frame_equal(mdf.data, new) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target') new = pd.DataFrame({ '.target': [1, 2, 3], 'K': [4, 5, 6] }, index=['a', 'b', 'c']) # unable to set data if passed value has the same column as the target msg = "Passed data has the same column name as the target '.target'" with pytest.raises(ValueError, match=msg): mdf.data = new # unable to set ModelFrame with target attribute msg = "Cannot update with ModelFrame which has target attribute" with pytest.raises(ValueError, match=msg): mdf.data = mdf # set delete property del mdf.data self.assertIsInstance(mdf, pdml.ModelFrame) self.assertEqual(mdf.shape, (3, 1)) tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c'])) tm.assert_index_equal(mdf.columns, pd.Index(['.target'])) self.assertTrue(mdf.data is None) tm.assert_series_equal(mdf.target, s) self.assertEqual(mdf.target.name, '.target') self.assertEqual(mdf.target_name, '.target')