コード例 #1
0
    def test_transform_series_int(self):
        arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
        s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' '))

        # reshape arr to 2d
        arr = arr.reshape(-1, 1)

        if pd.compat.PY3:
            models = ['Binarizer', 'Imputer', 'StandardScaler']
            # MinMaxScalar raises TypeError in ufunc
        else:
            models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler']

        for model in models:
            mod1 = getattr(s.preprocessing, model)()
            mod2 = getattr(pp, model)()
            s.fit(mod1)
            mod2.fit(arr)

            result = s.transform(mod1)
            expected = mod2.transform(arr).flatten()

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected)

            mod1 = getattr(s.preprocessing, model)()
            mod2 = getattr(pp, model)()

            result = s.fit_transform(mod1)
            expected = mod2.fit_transform(arr).flatten()

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected)
コード例 #2
0
    def test_LabelBinarizer2(self):
        arr = np.array(['X', 'Y', 'Z', 'X'])
        s = pdml.ModelSeries(arr)

        lb = s.preprocessing.LabelBinarizer()
        s.fit(lb)

        binarized = s.transform(lb)
        self.assertTrue(isinstance(binarized, pdml.ModelFrame))

        expected = pd.DataFrame({0: [1, 0, 0, 1], 1: [0, 1, 0, 0], 2: [0, 0, 1, 0]})
        self.assert_frame_equal(binarized, expected)

        df = pdml.ModelFrame(datasets.load_iris())
        df.target.fit(lb)
        binarized = df.target.transform(lb)

        expected = pd.DataFrame({0: [1] * 50 + [0] * 100,
                                 1: [0] * 50 + [1] * 50 + [0] * 50,
                                 2: [0] * 100 + [1] * 50})
        self.assert_frame_equal(binarized, expected)

        df = pdml.ModelFrame(datasets.load_iris())
        df.target.fit(lb)
        df.target = df.target.transform(lb)
        self.assertEqual(df.shape, (150, 7))
        self.assert_frame_equal(df.target, expected)
コード例 #3
0
    def test_transform_series_float(self):
        arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype=np.float)
        s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' '))

        models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler']
        for model in models:
            mod1 = getattr(s.preprocessing, model)()
            mod2 = getattr(pp, model)()

            s.fit(mod1)
            mod2.fit(arr)

            result = s.transform(mod1)
            expected = mod2.transform(arr).flatten()

            self.assertTrue(isinstance(result, pdml.ModelSeries))
            self.assert_numpy_array_almost_equal(result.values, expected)

            mod1 = getattr(s.preprocessing, model)()
            mod2 = getattr(pp, model)()

            result = s.fit_transform(mod1)
            expected = mod2.fit_transform(arr).flatten()

            self.assertTrue(isinstance(result, pdml.ModelSeries))
            self.assert_numpy_array_almost_equal(result.values, expected)
コード例 #4
0
    def test_transform_series_float(self, algo):
        arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype=np.float)
        s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' '))

        # reshape arr to 2d
        arr = arr.reshape(-1, 1)

        mod1 = getattr(s.preprocessing, algo)()
        mod2 = getattr(pp, algo)()

        s.fit(mod1)
        mod2.fit(arr)

        result = s.transform(mod1)
        expected = mod2.transform(arr).flatten()

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)

        mod1 = getattr(s.preprocessing, algo)()
        mod2 = getattr(pp, algo)()

        result = s.fit_transform(mod1)
        expected = mod2.fit_transform(arr).flatten()

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)
コード例 #5
0
    def test_series_instance(self):
        s = pdml.ModelSeries([1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E'])
        self.assertTrue(isinstance(s, pdml.ModelSeries))

        s = s[['A', 'B']]
        self.assertEqual(len(s), 2)
        self.assertTrue(isinstance(s, pdml.ModelSeries))
コード例 #6
0
    def test_preprocessing_normalize(self):
        s = pdml.ModelSeries([1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E'])
        self.assertTrue(isinstance(s, pdml.ModelSeries))
        result = s.preprocessing.normalize()
        expected = pp.normalize(np.atleast_2d(s.values.astype(np.float)))[0, :]

        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)
        self.assert_index_equal(result.index, s.index)
コード例 #7
0
ファイル: test_groupby.py プロジェクト: zstarpak/pandas-ml
    def test_series_groupby(self):
        s = pdml.ModelSeries([1, 2, 1, 2], name='X')
        self.assertIsInstance(s, pdml.ModelSeries)

        grouped = s.groupby([1, 1, 1, 2])
        self.assertIsInstance(grouped, pdml.core.groupby.ModelSeriesGroupBy)

        gs = grouped.get_group(1)
        self.assertIsInstance(gs, pdml.ModelSeries)
        expected = pd.Series([1, 2, 1], index=[0, 1, 2], name='X')
        tm.assert_series_equal(gs, expected)
        self.assertEqual(gs.name, 'X')
コード例 #8
0
    def test_series_to_frame(self):
        s = pdml.ModelSeries([1, 2, 3, 4, 5])
        self.assertTrue(isinstance(s, pdml.ModelSeries))

        df = s.to_frame()
        self.assertTrue(isinstance(df, pdml.ModelFrame))
        self.assert_index_equal(df.columns, pd.Index([0]))

        df = s.to_frame(name='x')
        self.assertTrue(isinstance(df, pdml.ModelFrame))
        self.assert_index_equal(df.columns, pd.Index(['x']))

        s = pdml.ModelSeries([1, 2, 3, 4, 5], name='name')
        self.assertTrue(isinstance(s, pdml.ModelSeries))

        df = s.to_frame()
        self.assertTrue(isinstance(df, pdml.ModelFrame))
        self.assert_index_equal(df.columns, pd.Index(['name']))

        df = s.to_frame(name='x')
        self.assertTrue(isinstance(df, pdml.ModelFrame))
        self.assert_index_equal(df.columns, pd.Index(['x']))
コード例 #9
0
    def test_Imputer(self):
        arr = np.array([1, np.nan, 3, 2])
        s = pdml.ModelSeries(arr)

        mod1 = s.pp.Imputer(axis=1)
        s.fit(mod1)
        result = s.transform(mod1)

        expected = np.array([1, 2, 3, 2])

        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)

        mod1 = s.pp.Imputer(axis=1)
        result = s.fit_transform(mod1)

        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)
コード例 #10
0
    def test_LabelEncoder_series(self):
        arr = np.array(['X', 'Y', 'Z', 'X'])
        s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd'])

        mod1 = s.pp.LabelEncoder()
        s.fit(mod1)
        result = s.transform(mod1)

        expected = np.array([0, 1, 2, 0])

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)
        tm.assert_index_equal(result.index, s.index)

        mod1 = s.pp.LabelEncoder()
        result = s.fit_transform(mod1)

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)

        inversed = result.inverse_transform(mod1)
        self.assertIsInstance(inversed, pdml.ModelSeries)
        tm.assert_series_equal(inversed, s)
コード例 #11
0
    def test_LabelEncoder(self):
        arr = np.array(['X', 'Y', 'Z', 'X'])
        s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd'])

        mod1 = s.pp.LabelEncoder()
        s.fit(mod1)
        result = s.transform(mod1)

        expected = np.array([0, 1, 2, 0])

        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)
        self.assert_index_equal(result.index, pd.Index(['a', 'b', 'c', 'd']))

        mod1 = s.pp.LabelEncoder()
        result = s.fit_transform(mod1)

        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)

        inversed = result.inverse_transform(mod1)
        self.assertTrue(isinstance(inversed, pdml.ModelSeries))
        self.assert_numpy_array_equal(inversed.values.flatten(), arr)
        self.assert_index_equal(result.index, pd.Index(['a', 'b', 'c', 'd']))
コード例 #12
0
    def test_LabelBinarizer(self):
        arr = np.array([1, 2, 3, 2])
        s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd'])

        mod1 = s.pp.LabelBinarizer()
        s.fit(mod1)
        result = s.transform(mod1)

        expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0]])

        self.assertIsInstance(result, pdml.ModelFrame)
        self.assert_numpy_array_almost_equal(result.values, expected)
        tm.assert_index_equal(result.index, s.index)

        mod1 = s.pp.LabelBinarizer()
        result = s.fit_transform(mod1)

        self.assertIsInstance(result, pdml.ModelFrame)
        self.assert_numpy_array_almost_equal(result.values, expected)

        inversed = result.inverse_transform(mod1)
        self.assertIsInstance(inversed, pdml.ModelFrame)
        self.assert_numpy_array_almost_equal(inversed.values.flatten(), arr)
        tm.assert_index_equal(result.index, s.index)