コード例 #1
0
 def test_objectmapper_deprecated(self):
     df = pdml.ModelFrame([])
     with tm.assert_produces_warning(FutureWarning):
         self.assertIs(df.lda.LinearDiscriminantAnalysis,
                       da.LinearDiscriminantAnalysis)
     with tm.assert_produces_warning(FutureWarning):
         self.assertIs(df.qda.QuadraticDiscriminantAnalysis,
                       da.QuadraticDiscriminantAnalysis)
コード例 #2
0
 def test_objectmapper_deprecated(self):
     df = pdml.ModelFrame([])
     with tm.assert_produces_warning(FutureWarning):
         self.assertIs(df.lda.LinearDiscriminantAnalysis,
                       da.LinearDiscriminantAnalysis)
     with tm.assert_produces_warning(FutureWarning):
         self.assertIs(df.qda.QuadraticDiscriminantAnalysis,
                       da.QuadraticDiscriminantAnalysis)
コード例 #3
0
    def test_predict_automatic(self):
        with warnings.catch_warnings():
            warnings.simplefilter("always", UserWarning)

            iris = datasets.load_iris()
            df = pdml.ModelFrame(iris)

            model = 'SVC'

            df = pdml.ModelFrame(iris)
            mod1 = getattr(df.svm, model)(probability=True,
                                          random_state=self.random_state)
            mod2 = getattr(svm, model)(probability=True,
                                       random_state=self.random_state)

            df.fit(mod1)
            mod2.fit(iris.data, iris.target)

            # test automatically calls related methods
            with tm.assert_produces_warning(UserWarning):
                result = df.predicted
            expected = mod2.predict(iris.data)

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected)

            # with tm.assert_produces_warning(UserWarning):
            result = df.proba
            expected = mod2.predict_proba(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)

            with tm.assert_produces_warning(UserWarning):
                result = df.log_proba
            expected = mod2.predict_log_proba(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)

            # with tm.assert_produces_warning(UserWarning):
            result = df.decision
            expected = mod2.decision_function(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)

        warnings.simplefilter("default")
コード例 #4
0
ファイル: test_frame.py プロジェクト: sinhrks/pandas-ml
    def test_predict_automatic(self):
        with warnings.catch_warnings():
            warnings.simplefilter("always", UserWarning)

            iris = datasets.load_iris()
            df = pdml.ModelFrame(iris)

            model = 'SVC'

            df = pdml.ModelFrame(iris)
            mod1 = getattr(df.svm, model)(probability=True,
                                          random_state=self.random_state)
            mod2 = getattr(svm, model)(probability=True,
                                       random_state=self.random_state)

            df.fit(mod1)
            mod2.fit(iris.data, iris.target)

            # test automatically calls related methods
            with tm.assert_produces_warning(UserWarning):
                result = df.predicted
            expected = mod2.predict(iris.data)

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected)

            # with tm.assert_produces_warning(UserWarning):
            result = df.proba
            expected = mod2.predict_proba(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)

            with tm.assert_produces_warning(UserWarning):
                result = df.log_proba
            expected = mod2.predict_log_proba(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)

            # with tm.assert_produces_warning(UserWarning):
            result = df.decision
            expected = mod2.decision_function(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)

        warnings.simplefilter("default")
コード例 #5
0
    def test_StratifiedShuffleSplit(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)
        sf1 = df.model_selection.StratifiedShuffleSplit(
            random_state=self.random_state)
        sf2 = ms.StratifiedShuffleSplit(random_state=self.random_state)

        # consume generator
        ind1 = [x for x in sf1.split(df.data.values, df.target.values)]
        ind2 = [x for x in sf2.split(iris.data, iris.target)]

        for i1, i2 in zip(ind1, ind2):
            self.assertIsInstance(i1, tuple)
            self.assertEqual(len(i1), 2)
            self.assertIsInstance(i2, tuple)
            self.assertEqual(len(i2), 2)
            self.assert_numpy_array_equal(i1[0], i1[0])
            self.assert_numpy_array_equal(i1[1], i2[1])

        sf1 = df.model_selection.StratifiedShuffleSplit(
            random_state=self.random_state)
        with tm.assert_produces_warning(FutureWarning):
            gen = df.model_selection.iterate(sf1)

        # StratifiedShuffleSplit is not a subclass of BaseCrossValidator
        for train_df, test_df in gen:
            self.assertIsInstance(train_df, pdml.ModelFrame)
            self.assertIsInstance(test_df, pdml.ModelFrame)
            self.assert_index_equal(df.columns, train_df.columns)
            self.assert_index_equal(df.columns, test_df.columns)

            self.assertTrue(df.shape[0], train_df.shape[0] + test_df.shape[1])
コード例 #6
0
    def test_StratifiedShuffleSplit(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)
        sf1 = df.model_selection.StratifiedShuffleSplit(random_state=self.random_state)
        sf2 = ms.StratifiedShuffleSplit(random_state=self.random_state)

        # consume generator
        ind1 = [x for x in sf1.split(df.data.values, df.target.values)]
        ind2 = [x for x in sf2.split(iris.data, iris.target)]

        for i1, i2 in zip(ind1, ind2):
            self.assertIsInstance(i1, tuple)
            self.assertEqual(len(i1), 2)
            self.assertIsInstance(i2, tuple)
            self.assertEqual(len(i2), 2)
            tm.assert_numpy_array_equal(i1[0], i1[0])
            tm.assert_numpy_array_equal(i1[1], i2[1])

        sf1 = df.model_selection.StratifiedShuffleSplit(random_state=self.random_state)
        with tm.assert_produces_warning(FutureWarning):
            gen = df.model_selection.iterate(sf1)

        # StratifiedShuffleSplit is not a subclass of BaseCrossValidator
        for train_df, test_df in gen:
            self.assertIsInstance(train_df, pdml.ModelFrame)
            self.assertIsInstance(test_df, pdml.ModelFrame)
            tm.assert_index_equal(df.columns, train_df.columns)
            tm.assert_index_equal(df.columns, test_df.columns)

            self.assertTrue(df.shape[0], train_df.shape[0] + test_df.shape[1])
コード例 #7
0
    def test_iterate(self):
        df = pdml.ModelFrame(datasets.load_iris())
        kf = df.model_selection.KFold(4, random_state=self.random_state)

        with tm.assert_produces_warning(FutureWarning):
            gen = df.model_selection.iterate(kf)

        for train_df, test_df in gen:
            self.assertIsInstance(train_df, pdml.ModelFrame)
            self.assertIsInstance(test_df, pdml.ModelFrame)
            self.assert_index_equal(df.columns, train_df.columns)
            self.assert_index_equal(df.columns, test_df.columns)

            self.assertTrue(df.shape[0], train_df.shape[0] + test_df.shape[1])
コード例 #8
0
    def test_iterate(self):
        df = pdml.ModelFrame(datasets.load_iris())
        kf = df.model_selection.KFold(4, random_state=self.random_state)

        with tm.assert_produces_warning(FutureWarning):
            gen = df.model_selection.iterate(kf)

        for train_df, test_df in gen:
            self.assertIsInstance(train_df, pdml.ModelFrame)
            self.assertIsInstance(test_df, pdml.ModelFrame)
            tm.assert_index_equal(df.columns, train_df.columns)
            tm.assert_index_equal(df.columns, test_df.columns)

            self.assertTrue(df.shape[0], train_df.shape[0] + test_df.shape[1])
コード例 #9
0
    def test_StratifiedShuffleSplit(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)
        sf1 = df.cross_validation.StratifiedShuffleSplit(random_state=self.random_state)
        sf2 = cv.StratifiedShuffleSplit(iris.target, random_state=self.random_state)
        for idx1, idx2 in zip(sf1, sf2):
            self.assert_numpy_array_equal(idx1[0], idx2[0])
            self.assert_numpy_array_equal(idx1[1], idx2[1])

        sf1 = df.cross_validation.StratifiedShuffleSplit(random_state=self.random_state)
        with tm.assert_produces_warning(UserWarning):
            # StratifiedShuffleSplit is not a subclass of PartitionIterator
            for train_df, test_df in df.cross_validation.iterate(sf1):
                self.assertTrue(isinstance(train_df, pdml.ModelFrame))
                self.assertTrue(isinstance(test_df, pdml.ModelFrame))
                self.assert_index_equal(df.columns, train_df.columns)
                self.assert_index_equal(df.columns, test_df.columns)

                self.assertTrue(df.shape[0], train_df.shape[0] + test_df.shape[1])
コード例 #10
0
    def test_StratifiedShuffleSplit(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)
        sf1 = df.cross_validation.StratifiedShuffleSplit(
            random_state=self.random_state)
        sf2 = cv.StratifiedShuffleSplit(iris.target,
                                        random_state=self.random_state)
        for idx1, idx2 in zip(sf1, sf2):
            self.assert_numpy_array_equal(idx1[0], idx2[0])
            self.assert_numpy_array_equal(idx1[1], idx2[1])

        sf1 = df.cross_validation.StratifiedShuffleSplit(
            random_state=self.random_state)
        with tm.assert_produces_warning(UserWarning):
            # StratifiedShuffleSplit is not a subclass of PartitionIterator
            for train_df, test_df in df.cross_validation.iterate(sf1):
                self.assertIsInstance(train_df, pdml.ModelFrame)
                self.assertIsInstance(test_df, pdml.ModelFrame)
                self.assert_index_equal(df.columns, train_df.columns)
                self.assert_index_equal(df.columns, test_df.columns)

                self.assertTrue(df.shape[0],
                                train_df.shape[0] + test_df.shape[1])
コード例 #11
0
    def test_frame_init_df_target_setter(self):
        # initialization by dataframe and dataframe
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        mdf = pdml.ModelFrame(df)
        self.assertFalse(mdf.has_target())
        target = pd.DataFrame({
            't1': [10, 11, 12],
            't2': [13, 14, 15]
        },
                              index=['a', 'b', 'c'])
        mdf.target = target

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 5))
        expected = pd.DataFrame(
            {
                't1': [10, 11, 12],
                't2': [13, 14, 15],
                'A': [1, 2, 3],
                'B': [4, 5, 6],
                'C': [7, 8, 9]
            },
            index=['a', 'b', 'c'],
            columns=['t1', 't2', 'A', 'B', 'C'])
        tm.assert_frame_equal(mdf, expected)
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns,
                              pd.Index(['t1', 't2', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_frame_equal(mdf.target, target)
        tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2']))
        tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2']))
        self.assertTrue(mdf.has_multi_targets())

        target = pd.DataFrame({
            'x1': [20, 21, 22],
            'x2': [23, 24, 25]
        },
                              index=['a', 'b', 'c'])

        with tm.assert_produces_warning(UserWarning):
            # when the target has the same length as the target_name,
            # is renamed to existing target ['t1', 't2']
            mdf.target = target

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 5))
        expected = pd.DataFrame(
            {
                't1': [20, 21, 22],
                't2': [23, 24, 25],
                'A': [1, 2, 3],
                'B': [4, 5, 6],
                'C': [7, 8, 9]
            },
            index=['a', 'b', 'c'],
            columns=['t1', 't2', 'A', 'B', 'C'])
        tm.assert_frame_equal(mdf, expected)
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns,
                              pd.Index(['t1', 't2', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.DataFrame({
            't1': [20, 21, 22],
            't2': [23, 24, 25]
        },
                                index=['a', 'b', 'c'])
        tm.assert_frame_equal(mdf.target, expected)
        tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2']))
        tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2']))
        self.assertTrue(mdf.has_multi_targets())

        target = pd.DataFrame(
            {
                'x1': [20, 21, 22],
                'x2': [23, 24, 25],
                'x3': [25, 26, 27]
            },
            index=['a', 'b', 'c'])

        # when the target has the different length as the target_name,
        # target is being replaced
        mdf.target = target

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 6))
        expected = pd.DataFrame(
            {
                'x1': [20, 21, 22],
                'x2': [23, 24, 25],
                'x3': [25, 26, 27],
                'A': [1, 2, 3],
                'B': [4, 5, 6],
                'C': [7, 8, 9]
            },
            index=['a', 'b', 'c'],
            columns=['x1', 'x2', 'x3', 'A', 'B', 'C'])
        tm.assert_frame_equal(mdf, expected)
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns,
                              pd.Index(['x1', 'x2', 'x3', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_frame_equal(mdf.target, target)
        tm.assert_index_equal(mdf.target.columns, pd.Index(['x1', 'x2', 'x3']))
        tm.assert_index_equal(mdf.target_name, pd.Index(['x1', 'x2', 'x3']))
        self.assertTrue(mdf.has_multi_targets())
コード例 #12
0
    def test_frame_target_proparty(self):
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=s)

        new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target')
        # set target property
        mdf.target = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, new)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with tm.assert_produces_warning(UserWarning):
            new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx')
            # set target property
            mdf.target = new

            self.assertIsInstance(mdf, pdml.ModelFrame)
            self.assertEqual(mdf.shape, (3, 4))
            tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
            tm.assert_index_equal(mdf.columns,
                                  pd.Index(['.target', 'A', 'B', 'C']))
            tm.assert_frame_equal(mdf.data, df)

            exp_target = pd.Series(new, name='.target')
            tm.assert_series_equal(mdf.target, exp_target)
            self.assertEqual(mdf.target.name, '.target')
            self.assertEqual(mdf.target_name, '.target')

        new = pd.Series([4, 5, 6], name='.target')
        with pytest.raises(ValueError,
                           match='data and target must have equal index'):
            mdf.target = new

        # set target property
        mdf.target = [7, 8, 9]

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B',
                                                     'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        if pdml.compat._PANDAS_ge_023:
            msg = 'Length of passed values is 2, index implies 3'
        else:
            msg = 'Wrong number of items passed 2, placement implies 3'

        with pytest.raises(ValueError, match=msg):
            mdf.target = [1, 2]

        # set target property
        mdf.target = None

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        self.assertEqual(mdf.target_name, '.target')
コード例 #13
0
ファイル: test_frame.py プロジェクト: sinhrks/pandas-ml
    def test_frame_init_df_target_setter(self):
        # initialization by dataframe and dataframe
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        mdf = pdml.ModelFrame(df)
        self.assertFalse(mdf.has_target())
        target = pd.DataFrame({'t1': [10, 11, 12],
                               't2': [13, 14, 15]},
                              index=['a', 'b', 'c'])
        mdf.target = target

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 5))
        expected = pd.DataFrame({'t1': [10, 11, 12],
                                 't2': [13, 14, 15],
                                 'A': [1, 2, 3],
                                 'B': [4, 5, 6],
                                 'C': [7, 8, 9]},
                                index=['a', 'b', 'c'],
                                columns=['t1', 't2', 'A', 'B', 'C'])
        tm.assert_frame_equal(mdf, expected)
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns,
                              pd.Index(['t1', 't2', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_frame_equal(mdf.target, target)
        tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2']))
        tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2']))
        self.assertTrue(mdf.has_multi_targets())

        target = pd.DataFrame({'x1': [20, 21, 22],
                               'x2': [23, 24, 25]},
                              index=['a', 'b', 'c'])

        with tm.assert_produces_warning(UserWarning):
            # when the target has the same length as the target_name,
            # is renamed to existing target ['t1', 't2']
            mdf.target = target

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 5))
        expected = pd.DataFrame({'t1': [20, 21, 22],
                                 't2': [23, 24, 25],
                                 'A': [1, 2, 3],
                                 'B': [4, 5, 6],
                                 'C': [7, 8, 9]},
                                index=['a', 'b', 'c'],
                                columns=['t1', 't2', 'A', 'B', 'C'])
        tm.assert_frame_equal(mdf, expected)
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns,
                              pd.Index(['t1', 't2', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.DataFrame({'t1': [20, 21, 22],
                                 't2': [23, 24, 25]},
                                index=['a', 'b', 'c'])
        tm.assert_frame_equal(mdf.target, expected)
        tm.assert_index_equal(mdf.target.columns, pd.Index(['t1', 't2']))
        tm.assert_index_equal(mdf.target_name, pd.Index(['t1', 't2']))
        self.assertTrue(mdf.has_multi_targets())

        target = pd.DataFrame({'x1': [20, 21, 22],
                               'x2': [23, 24, 25],
                               'x3': [25, 26, 27]},
                              index=['a', 'b', 'c'])

        # when the target has the different length as the target_name,
        # target is being replaced
        mdf.target = target

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 6))
        expected = pd.DataFrame({'x1': [20, 21, 22],
                                 'x2': [23, 24, 25],
                                 'x3': [25, 26, 27],
                                 'A': [1, 2, 3],
                                 'B': [4, 5, 6],
                                 'C': [7, 8, 9]},
                                index=['a', 'b', 'c'],
                                columns=['x1', 'x2', 'x3', 'A', 'B', 'C'])
        tm.assert_frame_equal(mdf, expected)
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns,
                              pd.Index(['x1', 'x2', 'x3', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_frame_equal(mdf.target, target)
        tm.assert_index_equal(mdf.target.columns,
                              pd.Index(['x1', 'x2', 'x3']))
        tm.assert_index_equal(mdf.target_name, pd.Index(['x1', 'x2', 'x3']))
        self.assertTrue(mdf.has_multi_targets())
コード例 #14
0
ファイル: test_frame.py プロジェクト: sinhrks/pandas-ml
    def test_frame_target_proparty(self):
        df = pd.DataFrame({'A': [1, 2, 3],
                           'B': [4, 5, 6],
                           'C': [7, 8, 9]},
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=s)

        new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target')
        # set target property
        mdf.target = new

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        tm.assert_series_equal(mdf.target, new)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with tm.assert_produces_warning(UserWarning):
            new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx')
            # set target property
            mdf.target = new

            self.assertIsInstance(mdf, pdml.ModelFrame)
            self.assertEqual(mdf.shape, (3, 4))
            tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
            tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
            tm.assert_frame_equal(mdf.data, df)

            exp_target = pd.Series(new, name='.target')
            tm.assert_series_equal(mdf.target, exp_target)
            self.assertEqual(mdf.target.name, '.target')
            self.assertEqual(mdf.target_name, '.target')

        new = pd.Series([4, 5, 6], name='.target')
        with self.assertRaisesRegexp(ValueError, 'data and target must have equal index'):
            mdf.target = new

        # set target property
        mdf.target = [7, 8, 9]

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 4))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['.target', 'A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target')
        tm.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with self.assertRaisesRegexp(ValueError, 'Wrong number of items passed 2, placement implies 3'):
            mdf.target = [1, 2]

        # set target property
        mdf.target = None

        self.assertIsInstance(mdf, pdml.ModelFrame)
        self.assertEqual(mdf.shape, (3, 3))
        tm.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        tm.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        tm.assert_frame_equal(mdf.data, df)
        self.assertEqual(mdf.target_name, '.target')
コード例 #15
0
ファイル: test_frame.py プロジェクト: the872/pandas-ml
    def test_frame_target_proparty(self):
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6],
            'C': [7, 8, 9]
        },
                          index=['a', 'b', 'c'],
                          columns=['A', 'B', 'C'])
        s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
        mdf = pdml.ModelFrame(df, target=s)

        new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='.target')
        # set target property
        mdf.target = new

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 4))
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['.target', 'A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        self.assert_series_equal(mdf.target, new)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with tm.assert_produces_warning(UserWarning):
            new = pd.Series([4, 5, 6], index=['a', 'b', 'c'], name='xxx')
            # set target property
            mdf.target = new

            self.assertTrue(isinstance(mdf, pdml.ModelFrame))
            self.assertEqual(mdf.shape, (3, 4))
            self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
            self.assert_index_equal(mdf.columns,
                                    pd.Index(['.target', 'A', 'B', 'C']))
            self.assert_frame_equal(mdf.data, df)

            exp_target = pd.Series(new, name='.target')
            self.assert_series_equal(mdf.target, exp_target)
            self.assertEqual(mdf.target.name, '.target')
            self.assertEqual(mdf.target_name, '.target')

        new = pd.Series([4, 5, 6], name='.target')
        with self.assertRaisesRegexp(ValueError,
                                     'data and target must have equal index'):
            mdf.target = new

        # set target property
        mdf.target = [7, 8, 9]

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 4))
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns,
                                pd.Index(['.target', 'A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        expected = pd.Series([7, 8, 9], index=['a', 'b', 'c'], name='.target')
        self.assert_series_equal(mdf.target, expected)
        self.assertEqual(mdf.target.name, '.target')
        self.assertEqual(mdf.target_name, '.target')

        with self.assertRaisesRegexp(
                ValueError,
                'Wrong number of items passed 2, placement implies 3'):
            mdf.target = [1, 2]

        # set target property
        mdf.target = None

        self.assertTrue(isinstance(mdf, pdml.ModelFrame))
        self.assertEqual(mdf.shape, (3, 3))
        self.assert_index_equal(mdf.index, pd.Index(['a', 'b', 'c']))
        self.assert_index_equal(mdf.columns, pd.Index(['A', 'B', 'C']))
        self.assert_frame_equal(mdf.data, df)
        self.assertEqual(mdf.target_name, '.target')