예제 #1
0
    def test_min_max_scaler_1d(self):
        """Test scaling of dataset along single axis"""
        rng = np.random.RandomState(0)
        X = rng.randn(5)
        X_orig_copy = X.copy()

        scaler = MinMaxScaler()
        X_scaled = scaler.fit(X).transform(X)
        assert_array_almost_equal(X_scaled.min(axis=0), 0.0)
        assert_array_almost_equal(X_scaled.max(axis=0), 1.0)

        # check inverse transform
        X_scaled_back = scaler.inverse_transform(X_scaled)
        assert_array_almost_equal(X_scaled_back, X_orig_copy)

        # Test with 1D list
        X = [0., 1., 2, 0.4, 1.]
        scaler = MinMaxScaler()
        X_scaled = scaler.fit(X).transform(X)
        assert_array_almost_equal(X_scaled.min(axis=0), 0.0)
        assert_array_almost_equal(X_scaled.max(axis=0), 1.0)
예제 #2
0
    def test_min_max_scaler_1d(self):
        """Test scaling of dataset along single axis"""
        rng = np.random.RandomState(0)
        X = rng.randn(5)
        X_orig_copy = X.copy()

        scaler = MinMaxScaler()
        X_scaled = scaler.fit(X).transform(X)
        assert_array_almost_equal(X_scaled.min(axis=0), 0.0)
        assert_array_almost_equal(X_scaled.max(axis=0), 1.0)

        # check inverse transform
        X_scaled_back = scaler.inverse_transform(X_scaled)
        assert_array_almost_equal(X_scaled_back, X_orig_copy)

        # Test with 1D list
        X = [0., 1., 2, 0.4, 1.]
        scaler = MinMaxScaler()
        X_scaled = scaler.fit(X).transform(X)
        assert_array_almost_equal(X_scaled.min(axis=0), 0.0)
        assert_array_almost_equal(X_scaled.max(axis=0), 1.0)
예제 #3
0
    def test_min_max_scaler_sparse_boston_data(self):
        # Use the boston housing dataset, because column three is 1HotEncoded!
        # This is important to test; because the normal sklearn rescaler
        # would set all values of the 1Hot Encoded column to zero, while we
        # keep the values at 1.
        X_train, Y_train, X_test, Y_test = get_dataset('boston',
                                                       make_sparse=True)
        num_data_points = len(X_train.data)
        expected_max_values = [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        expected_max_values = np.array(expected_max_values).reshape((1, -1))

        scaler = MinMaxScaler()
        scaler.fit(X_train, Y_train)
        transformation = scaler.transform(X_train)

        assert_array_almost_equal(np.array(transformation.todense().min(axis=0)),
                                  np.zeros((1, 13)))
        assert_array_almost_equal(np.array(transformation.todense().max(axis=0)),
                                  expected_max_values)
        # Test that the matrix is still sparse
        self.assertTrue(sparse.issparse(transformation))
        self.assertEqual(num_data_points, len(transformation.data))
예제 #4
0
    def test_min_max_scaler_sparse_boston_data(self):
        # Use the boston housing dataset, because column three is 1HotEncoded!
        # This is important to test; because the normal sklearn rescaler
        # would set all values of the 1Hot Encoded column to zero, while we
        # keep the values at 1.
        X_train, Y_train, X_test, Y_test = get_dataset('boston',
                                                       make_sparse=True)
        num_data_points = len(X_train.data)
        expected_max_values = [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        expected_max_values = np.array(expected_max_values).reshape((1, -1))

        scaler = MinMaxScaler()
        scaler.fit(X_train, Y_train)
        transformation = scaler.transform(X_train)

        assert_array_almost_equal(
            np.array(transformation.todense().min(axis=0)), np.zeros((1, 13)))
        assert_array_almost_equal(
            np.array(transformation.todense().max(axis=0)),
            expected_max_values)
        # Test that the matrix is still sparse
        self.assertTrue(sparse.issparse(transformation))
        self.assertEqual(num_data_points, len(transformation.data))