Beispiel #1
0
    def test_standard_scaler_sparse_boston_data(self):
        X_train, Y_train, X_test, Y_test = get_dataset('boston',
                                                       make_sparse=True)
        num_data_points = len(X_train.data)

        scaler = StandardScaler()
        scaler.fit(X_train, Y_train)
        tr = scaler.transform(X_train)

        # Test this for every single dimension!
        means = np.array([tr.data[tr.indptr[i]:tr.indptr[i + 1]].mean()
                          for i in range(13)])
        vars = np.array([tr.data[tr.indptr[i]:tr.indptr[i + 1]].var()
                         for i in range(13)])

        for i in chain(range(1, 3), range(4, 13)):
            self.assertAlmostEqual(means[i], 0, 2)
            self.assertAlmostEqual(vars[i], 1, 2)
        self.assertAlmostEqual(means[3], 1)
        self.assertAlmostEqual(vars[3], 0)
        # Test that the matrix is still sparse
        self.assertTrue(scipy.sparse.issparse(tr))
        self.assertEqual(num_data_points, len(tr.data))