Example #1
0
    def test_frame_target_object_set(self):

        df = pd.DataFrame({datetime.datetime(2014, 1, 1): [1, 2, 3],
                           datetime.datetime(2015, 1, 1): [4, 5, 6],
                           datetime.datetime(2016, 1, 1): [7, 8, 9]},
                          index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df)

        mdf.target = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        expected = pd.Index([5, datetime.datetime(2014, 1, 1),
                             datetime.datetime(2015, 1, 1), datetime.datetime(2016, 1, 1)])
        tm.assert_index_equal(mdf.columns, expected)
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5)
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, 5)

        # name will be ignored if ModelFrame already has a target
        mdf.target = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name='X')
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        expected = pd.Index([5,
                             datetime.datetime(2014, 1, 1),
                             datetime.datetime(2015, 1, 1),
                             datetime.datetime(2016, 1, 1)])
        tm.assert_index_equal(mdf.columns, expected)
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name=5)
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, 5)
Example #2
0
    def test_patsy_matrices(self):
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([10, 11, 12], index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=s)

        result = mdf.transform('A ~ B + C')
        self.assertIsInstance(result, pdml.ModelFrame)
        self.assertEqual(result.shape, (3, 4))
        tm.assert_index_equal(result.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(result.columns, pd.Index(['A', 'Intercept', 'B', 'C']))
        expected = pd.DataFrame({'A': [1, 2, 3],
                                 'Intercept': [1, 1, 1],
                                 'B': [4, 5, 6],
                                 'C': [7, 8, 9]},
                                index=['a', 'b', 'c'],
                                columns=['A', 'Intercept', 'B', 'C'],
                                dtype=float)
        tm.assert_frame_equal(result, expected)
        expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='A', dtype=float)
        tm.assert_series_equal(result.target, expected)
        self.assertEqual(result.target.name, 'A')
        self.assertEqual(result.target_name, 'A')
Example #3
0
    def test_inverse_transform(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        models = ['PCA']
        for model in models:
            mod1 = getattr(df.decomposition, model)()
            mod2 = getattr(decomposition, model)()

            df.fit(mod1)
            mod2.fit(iris.data, iris.target)

            result = df.transform(mod1)
            expected = mod2.transform(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_series_equal(df.target, result.target)
            self.assert_numpy_array_almost_equal(result.data.values, expected)

            result = df.inverse_transform(mod1)
            expected = mod2.inverse_transform(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_series_equal(df.target, result.target)
            self.assert_numpy_array_almost_equal(result.data.values, expected)
            tm.assert_index_equal(result.columns, df.columns)
Example #4
0
    def test_frame_init_df_series(self):
        # initialization by dataframe and no-named series
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')

        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        s = pd.Series([1, 2, 3])
        with self.assertRaisesRegexp(ValueError, 'data and target must have equal index'):
            mdf = pdml.ModelFrame(df, target=s)

        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='XXX')
        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['XXX', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, 'XXX')
        self.assertEqual(mdf.target_name, 'XXX')
Example #5
0
    def test_frame_init_df_df(self):
        # initialization by dataframe and dataframe
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        target = pd.DataFrame({
            't1': [10, 11, 12],
            't2': [13, 14, 15]
        },
                              index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=target)

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 5))
        expected = pd.DataFrame(
            {
                't1': [10, 11, 12],
                't2': [13, 14, 15],
                'A': [1, 2, 3],
                'B': [4, 5, 6],
                'C': [7, 8, 9]
            },
            index=['a', 'b', 'c'],
            columns=['t1', 't2', 'A', 'B', 'C'])
        tm.assert_frame_equal(mdf, expected)
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns,
                              pd.Index(['t1', 't2', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_frame_equal(mdf.target, target)
        tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2']))
        tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2']))
        self.assertTrue(mdf.has_multi_targets())

        target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]})
        msg = 'data and target must have equal index'
        with pytest.raises(ValueError, match=msg):
            mdf = pdml.ModelFrame(df, target=target)

        # single column DataFrame will results in single target column
        target = pd.DataFrame({'t1': [10, 11, 12]}, index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=target)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['t1', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)

        target = pd.Series([10, 11, 12], name='t1', index=['a', 'b', 'c'])
        tm.assert_series_equal(mdf.target, target)
        self.assertEqual(mdf.target_name, 't1')
Example #6
0
    def test_series_groupby(self):
        s = pdml.ModelSeries([1, 2, 1, 2], name='X')
        self.assertIsInstance(s, pdml.ModelSeries)

        grouped = s.groupby([1, 1, 1, 2])
        self.assertIsInstance(grouped, pdml.core.groupby.ModelSeriesGroupBy)

        gs = grouped.get_group(1)
        self.assertIsInstance(gs, pdml.ModelSeries)
        expected = pd.Series([1, 2, 1], index=[0, 1, 2], name='X')
        tm.assert_series_equal(gs, expected)
        self.assertEqual(gs.name, 'X')
Example #7
0
    def test_fit_transform_PCA(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        mod1 = df.decomposition.PCA()
        mod2 = decomposition.PCA()

        result = df.fit_transform(mod1)
        expected = mod2.fit_transform(iris.data, iris.target)

        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_series_equal(df.target, result.target)
        self.assert_numpy_array_almost_equal(result.data.values, expected)
Example #8
0
    def test_fit_transform_KernelPCA(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        mod1 = df.decomposition.KernelPCA()
        mod2 = decomposition.KernelPCA()

        result = df.fit_transform(mod1)
        expected = mod2.fit_transform(iris.data, iris.target)

        self.assertIsInstance(result, pdml.ModelFrame)
        tm.assert_series_equal(df.target, result.target)
        self.assert_numpy_array_almost_equal(result.data.values[:, :40],
                                             expected[:, :40])
Example #9
0
    def test_frame_init_dict_list_series_index(self):
        # initialization by dataframe and list
        df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
        target = pd.Series([9, 8, 7], name='X', index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=target)

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['X', 'A', 'B', 'C']))
        expected = pd.DataFrame(df, index=['a', 'b', 'c'])
        tm.assert_frame_equal(mdf.data, expected)
        tm.assert_series_equal(mdf.target, target)
        self.assertEqual(mdf.target.name, 'X')
        self.assertEqual(mdf.target_name, 'X')
Example #10
0
    def test_frame_init_dict_list_series_index(self):
        # initialization by dataframe and list
        df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
        target = pd.Series([9, 8, 7], name='X', index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=target)

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['X', 'A', 'B', 'C']))
        expected = pd.DataFrame(df, index=['a', 'b', 'c'])
        tm.assert_frame_equal(mdf.data, expected)
        tm.assert_series_equal(mdf.target, target)
        self.assertEqual(mdf.target.name, 'X')
        self.assertEqual(mdf.target_name, 'X')
Example #11
0
    def test_frame_init_df_df(self):
        # initialization by dataframe and dataframe
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        target = pd.DataFrame({'t1': [10, 11, 12],
                               't2': [13, 14, 15]},
                              index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=target)

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 5))
        expected = pd.DataFrame({'t1': [10, 11, 12],
                                 't2': [13, 14, 15],
                                 'A': [1, 2, 3],
                                 'B': [4, 5, 6],
                                 'C': [7, 8, 9]},
                                index=['a', 'b', 'c'],
                                columns=['t1', 't2', 'A', 'B', 'C'])
        tm.assert_frame_equal(mdf, expected)
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns,
                              pd.Index(['t1', 't2', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_frame_equal(mdf.target, target)
        tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2']))
        tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2']))
        self.assertTrue(mdf.has_multi_targets())

        target = pd.DataFrame({'t1': [10, 11, 12], 't2': [13, 14, 15]})
        msg = 'data and target must have equal index'
        with self.assertRaisesRegexp(ValueError, msg):
            mdf = pdml.ModelFrame(df, target=target)

        # single column DataFrame will results in single target column
        target = pd.DataFrame({'t1': [10, 11, 12]}, index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=target)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['t1', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)

        target = pd.Series([10, 11, 12], name='t1', index=['a', 'b', 'c'])
        tm.assert_series_equal(mdf.target, target)
        self.assertEqual(mdf.target_name, 't1')
Example #12
0
    def _assert_fit_transform(self, df, exp_data, model1, model2):
        result = df.fit_transform(model1)
        expected = model2.fit_transform(exp_data)

        self.assertIsInstance(result, pdml.ModelFrame)
        # target is unchanged
        if df.has_target():
            # target is unchanged
            tm.assert_series_equal(df.target, result.target)
        else:
            self.assertIsNone(result.target)

        self.assert_numpy_array_almost_equal(result.data.values, expected)
        # index and columns are kept
        tm.assert_index_equal(result.index, df.index)
        tm.assert_index_equal(result.columns, df.columns)
    def _assert_fit_transform(self, df, exp_data, model1, model2):
        result = df.fit_transform(model1)
        expected = model2.fit_transform(exp_data)

        self.assertIsInstance(result, pdml.ModelFrame)
        # target is unchanged
        if df.has_target():
            # target is unchanged
            tm.assert_series_equal(df.target, result.target)
        else:
            self.assertIsNone(result.target)

        self.assert_numpy_array_almost_equal(result.data.values, expected)
        # index and columns are kept
        tm.assert_index_equal(result.index, df.index)
        tm.assert_index_equal(result.columns, df.columns)
Example #14
0
    def test_frame_target_object(self):
        df = pd.DataFrame({datetime.datetime(2014, 1, 1): [1, 2, 3],
                           datetime.datetime(2015, 1, 1): [4, 5, 6],
                           datetime.datetime(2016, 1, 1): [7, 8, 9]},
                          index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=datetime.datetime(2016, 1, 1))

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        expected = pd.DatetimeIndex(['2014-01-01', '2015-01-01', '2016-01-01'])
        tm.assert_index_equal(mdf.columns, expected)
        tm.assert_frame_equal(mdf.data, df.iloc[:, :2])
        expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'],
                             name=pd.Timestamp('2016-01-01'))
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, datetime.datetime(2016, 1, 1))
        self.assertEqual(mdf.target_name, datetime.datetime(2016, 1, 1))
Example #15
0
    def test_frame_init_df_array_series(self):
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')
        mdf = pdml.ModelFrame(np.array([[1, 2, 3], [4, 5, 6],
                                        [7, 8, 9]]), target=s,
                              index=['a', 'b', 'c'], columns=['A', 'B', 'C'])

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))

        expected = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6],
                                          [7, 8, 9]]),
                                index=['a', 'b', 'c'], columns=['A', 'B', 'C'])
        tm.assert_frame_equal(mdf.data, expected)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')
Example #16
0
    def test_frame_target_object_set(self):

        df = pd.DataFrame(
            {
                datetime.datetime(2014, 1, 1): [1, 2, 3],
                datetime.datetime(2015, 1, 1): [4, 5, 6],
                datetime.datetime(2016, 1, 1): [7, 8, 9]
            },
            index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df)

        mdf.target = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        expected = pd.Index([
            5,
            datetime.datetime(2014, 1, 1),
            datetime.datetime(2015, 1, 1),
            datetime.datetime(2016, 1, 1)
        ])
        tm.assert_index_equal(mdf.columns, expected)
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series(['A', 'B', 'C'], index=['a', 'b', 'c'], name=5)
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, 5)

        # name will be ignored if ModelFrame already has a target
        mdf.target = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name='X')
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        expected = pd.Index([
            5,
            datetime.datetime(2014, 1, 1),
            datetime.datetime(2015, 1, 1),
            datetime.datetime(2016, 1, 1)
        ])
        tm.assert_index_equal(mdf.columns, expected)
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([10, 11, 12], index=['a', 'b', 'c'], name=5)
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, 5)
Example #17
0
    def test_frame_init_df_array_series(self):
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')
        mdf = pdml.ModelFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                              target=s,
                              index=['a', 'b', 'c'],
                              columns=['A', 'B', 'C'])

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))

        expected = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                                index=['a', 'b', 'c'],
                                columns=['A', 'B', 'C'])
        tm.assert_frame_equal(mdf.data, expected)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')
Example #18
0
    def test_frame_init_df_str(self):
        # initialization by dataframe and str
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])

        mdf = pdml.ModelFrame(df, target='A')
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df[['B', 'C']])
        tm.assert_series_equal(mdf.target, df['A'])
        self.assertEqual(mdf.target.name, 'A')
        self.assertEqual(mdf.target_name, 'A')

        msg = "Specified target 'X' is not included in data"
        with self.assertRaisesRegexp(ValueError, msg):
            mdf = pdml.ModelFrame(df, target='X')
Example #19
0
    def test_frame_data_none(self):
        msg = "ModelFrame must have either data or target"
        with self.assertRaisesRegexp(ValueError, msg):
            mdf = pdml.ModelFrame(None)

        msg = "target must be list-like when data is None"
        with self.assertRaisesRegexp(ValueError, msg):
            mdf = pdml.ModelFrame(None, target='X')

        # initialization without data
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')
        mdf = pdml.ModelFrame(None, target=s)

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 1))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target']))
        self.assertTrue(mdf.data is None)
        self.assertFalse(mdf.has_data())
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')
Example #20
0
    def test_frame_data_none(self):
        msg = "ModelFrame must have either data or target"
        with pytest.raises(ValueError, match=msg):
            mdf = pdml.ModelFrame(None)

        msg = "target must be list-like when data is None"
        with pytest.raises(ValueError, match=msg):
            mdf = pdml.ModelFrame(None, target='X')

        # initialization without data
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')
        mdf = pdml.ModelFrame(None, target=s)

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 1))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target']))
        self.assertTrue(mdf.data is None)
        self.assertFalse(mdf.has_data())
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')
Example #21
0
    def test_frame_target_object(self):
        df = pd.DataFrame(
            {
                datetime.datetime(2014, 1, 1): [1, 2, 3],
                datetime.datetime(2015, 1, 1): [4, 5, 6],
                datetime.datetime(2016, 1, 1): [7, 8, 9]
            },
            index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=datetime.datetime(2016, 1, 1))

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        expected = pd.DatetimeIndex(['2014-01-01', '2015-01-01', '2016-01-01'])
        tm.assert_index_equal(mdf.columns, expected)
        tm.assert_frame_equal(mdf.data, df.iloc[:, :2])
        expected = pd.Series([7, 8, 9],
                             index=['a', 'b', 'c'],
                             name=pd.Timestamp('2016-01-01'))
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, datetime.datetime(2016, 1, 1))
        self.assertEqual(mdf.target_name, datetime.datetime(2016, 1, 1))
Example #22
0
    def test_frame_groupby(self):
        df = pd.DataFrame(
            {
                'A': [1, 2, 1, 2],
                'B': [4, 5, 6, 7],
                'C': [7, 8, 9, 10]
            },
            columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3, 4])

        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)

        grouped = mdf.groupby('A')
        self.assertIsInstance(grouped, pdml.core.groupby.ModelFrameGroupBy)

        df = grouped.get_group(1)
        self.assertIsInstance(df, pdml.ModelFrame)

        expected = pd.Series([1, 3], index=[0, 2], name='.target')
        tm.assert_series_equal(df.target, expected)
        self.assertIsInstance(df.target, pdml.ModelSeries)
    def test_LabelEncoder_series(self):
        arr = np.array(['X', 'Y', 'Z', 'X'])
        s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd'])

        mod1 = s.pp.LabelEncoder()
        s.fit(mod1)
        result = s.transform(mod1)

        expected = np.array([0, 1, 2, 0])

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)
        tm.assert_index_equal(result.index, s.index)

        mod1 = s.pp.LabelEncoder()
        result = s.fit_transform(mod1)

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)

        inversed = result.inverse_transform(mod1)
        self.assertIsInstance(inversed, pdml.ModelSeries)
        tm.assert_series_equal(inversed, s)
Example #24
0
    def test_frame_init_df_str(self):
        # initialization by dataframe and str
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])

        mdf = pdml.ModelFrame(df, target='A')
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df[['B', 'C']])
        tm.assert_series_equal(mdf.target, df['A'])
        self.assertEqual(mdf.target.name, 'A')
        self.assertEqual(mdf.target_name, 'A')

        msg = "Specified target 'X' is not included in data"
        with pytest.raises(ValueError, match=msg):
            mdf = pdml.ModelFrame(df, target='X')
Example #25
0
    def test_LabelEncoder_series(self):
        arr = np.array(['X', 'Y', 'Z', 'X'])
        s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd'])

        mod1 = s.pp.LabelEncoder()
        s.fit(mod1)
        result = s.transform(mod1)

        expected = np.array([0, 1, 2, 0])

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)
        tm.assert_index_equal(result.index, s.index)

        mod1 = s.pp.LabelEncoder()
        result = s.fit_transform(mod1)

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)

        inversed = result.inverse_transform(mod1)
        self.assertIsInstance(inversed, pdml.ModelSeries)
        tm.assert_series_equal(inversed, s)
Example #26
0
    def test_frame_init_dict_list(self):
        # initialization by dataframe and list
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = [1, 2, 3]
        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        df = {'A': [1, 2, 3],
              'B': [4, 5, 6],
              'C': [7, 8, 9]}
        s = [1, 2, 3]
        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2]))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
        expected = pd.DataFrame(df)
        tm.assert_frame_equal(mdf.data, expected)
        expected = pd.Series([1, 2, 3], index=[0, 1, 2], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        mdf = pdml.ModelFrame(df, target='A')
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2]))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        expected = pd.DataFrame(df)
        tm.assert_frame_equal(mdf.data, expected[['B', 'C']])
        tm.assert_series_equal(mdf.target, expected['A'])
        self.assertEqual(mdf.target.name, 'A')
        self.assertEqual(mdf.target_name, 'A')

        mdf = pdml.ModelFrame({'A': [1, 2, 3],
                               'B': [4, 5, 6],
                               'C': [7, 8, 9]},
                              index=['a', 'b', 'c'],
                              columns=['A', 'B', 'C'])
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, mdf)
        self.assertEqual(mdf.target_name, '.target')
Example #27
0
    def test_frame_init_df_series(self):
        # initialization by dataframe and no-named series
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')

        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        s = pd.Series([1, 2, 3])
        with pytest.raises(ValueError,
                           match='data and target must have equal index'):
            mdf = pdml.ModelFrame(df, target=s)

        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='XXX')
        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['XXX', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, 'XXX')
        self.assertEqual(mdf.target_name, 'XXX')
Example #28
0
    def test_frame_data_proparty(self):
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')

        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)

        new = pd.DataFrame({'X': [1, 2, 3],
                            'Y': [4, 5, 6]},
                           index=['a', 'b', 'c'],
                           columns=['X', 'Y'])
        # set data property
        mdf.data = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'X', 'Y']))
        tm.assert_frame_equal(mdf.data, new)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        new = pdml.ModelFrame({'M': [1, 2, 3],
                               'N': [4, 5, 6]},
                              index=['a', 'b', 'c'],
                              columns=['M', 'N'])

        # set data property
        mdf.data = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'M', 'N']))
        tm.assert_frame_equal(mdf.data, new)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        new = pd.DataFrame({'.target': [1, 2, 3],
                            'K': [4, 5, 6]},
                           index=['a', 'b', 'c'])

        # unable to set data if passed value has the same column as the target
        msg = "Passed data has the same column name as the target '.target'"
        with self.assertRaisesRegexp(ValueError, msg):
            mdf.data = new

        # unable to set ModelFrame with target attribute
        msg = "Cannot update with ModelFrame which has target attribute"
        with self.assertRaisesRegexp(ValueError, msg):
            mdf.data = mdf

        # set delete property
        del mdf.data
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 1))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target']))
        self.assertTrue(mdf.data is None)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')
Example #29
0
    def test_frame_init_dict_list(self):
        # initialization by dataframe and list
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = [1, 2, 3]
        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        df = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
        s = [1, 2, 3]
        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2]))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        expected = pd.DataFrame(df)
        tm.assert_frame_equal(mdf.data, expected)
        expected = pd.Series([1, 2, 3], index=[0, 1, 2], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        mdf = pdml.ModelFrame(df, target='A')
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index([0, 1, 2]))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        expected = pd.DataFrame(df)
        tm.assert_frame_equal(mdf.data, expected[['B', 'C']])
        tm.assert_series_equal(mdf.target, expected['A'])
        self.assertEqual(mdf.target.name, 'A')
        self.assertEqual(mdf.target_name, 'A')

        mdf = pdml.ModelFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                              index=['a', 'b', 'c'],
                              columns=['A', 'B', 'C'])
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, mdf)
        self.assertEqual(mdf.target_name, '.target')
Example #30
0
    def test_frame_target_proparty(self):
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=s)

        new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target')
        # set target property
        mdf.target = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, new)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with tm.assert_produces_warning(UserWarning):
            new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx')
            # set target property
            mdf.target = new

            self.assertIsInstance(mdf, pdml.ModelFrame)
            self.assertEqual(mdf.shape, (3, 4))
            tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
            tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
            tm.assert_frame_equal(mdf.data, df)

            exp_target = pd.Series(new, name='.target')
            tm.assert_series_equal(mdf.target, exp_target)
            self.assertEqual(mdf.target.name, '.target')
            self.assertEqual(mdf.target_name, '.target')

        new = pd.Series([4, 5, 6], name='.target')
        with self.assertRaisesRegexp(ValueError, 'data and target must have equal index'):
            mdf.target = new

        # set target property
        mdf.target = [7, 8, 9]

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with self.assertRaisesRegexp(ValueError, 'Wrong number of items passed 2, placement implies 3'):
            mdf.target = [1, 2]

        # set target property
        mdf.target = None

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        self.assertEqual(mdf.target_name, '.target')
Example #31
0
    def test_frame_target_proparty(self):
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=s)

        new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target')
        # set target property
        mdf.target = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, new)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with tm.assert_produces_warning(UserWarning):
            new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx')
            # set target property
            mdf.target = new

            self.assertIsInstance(mdf, pdml.ModelFrame)
            self.assertEqual(mdf.shape, (3, 4))
            tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
            tm.assert_index_equal(mdf.columns,
                                  pd.Index(['.target', 'A', 'B', 'C']))
            tm.assert_frame_equal(mdf.data, df)

            exp_target = pd.Series(new, name='.target')
            tm.assert_series_equal(mdf.target, exp_target)
            self.assertEqual(mdf.target.name, '.target')
            self.assertEqual(mdf.target_name, '.target')

        new = pd.Series([4, 5, 6], name='.target')
        with pytest.raises(ValueError,
                           match='data and target must have equal index'):
            mdf.target = new

        # set target property
        mdf.target = [7, 8, 9]

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        if pdml.compat._PANDAS_ge_023:
            msg = 'Length of passed values is 2, index implies 3'
        else:
            msg = 'Wrong number of items passed 2, placement implies 3'

        with pytest.raises(ValueError, match=msg):
            mdf.target = [1, 2]

        # set target property
        mdf.target = None

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        self.assertEqual(mdf.target_name, '.target')
Example #32
0
    def test_frame_data_proparty(self):
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='.target')

        mdf = pdml.ModelFrame(df, target=s)
        self.assertIsInstance(mdf, pdml.ModelFrame)

        new = pd.DataFrame({
            'X': [1, 2, 3],
            'Y': [4, 5, 6]
        },
                           index=['a', 'b', 'c'],
                           columns=['X', 'Y'])
        # set data property
        mdf.data = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'X', 'Y']))
        tm.assert_frame_equal(mdf.data, new)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        new = pdml.ModelFrame({
            'M': [1, 2, 3],
            'N': [4, 5, 6]
        },
                              index=['a', 'b', 'c'],
                              columns=['M', 'N'])

        # set data property
        mdf.data = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'M', 'N']))
        tm.assert_frame_equal(mdf.data, new)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        new = pd.DataFrame({
            '.target': [1, 2, 3],
            'K': [4, 5, 6]
        },
                           index=['a', 'b', 'c'])

        # unable to set data if passed value has the same column as the target
        msg = "Passed data has the same column name as the target '.target'"
        with pytest.raises(ValueError, match=msg):
            mdf.data = new

        # unable to set ModelFrame with target attribute
        msg = "Cannot update with ModelFrame which has target attribute"
        with pytest.raises(ValueError, match=msg):
            mdf.data = mdf

        # set delete property
        del mdf.data
        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 1))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target']))
        self.assertTrue(mdf.data is None)
        tm.assert_series_equal(mdf.target, s)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')