def test_standard_scaler_sparse_boston_data(self): X_train, Y_train, X_test, Y_test = get_dataset('boston', make_sparse=True) num_data_points = len(X_train.data) scaler = StandardScaler() scaler.fit(X_train, Y_train) tr = scaler.transform(X_train) # Test this for every single dimension! means = np.array([tr.data[tr.indptr[i]:tr.indptr[i + 1]].mean() for i in range(13)]) vars = np.array([tr.data[tr.indptr[i]:tr.indptr[i + 1]].var() for i in range(13)]) for i in chain(range(1, 3), range(4, 13)): self.assertAlmostEqual(means[i], 0, 2) self.assertAlmostEqual(vars[i], 1, 2) self.assertAlmostEqual(means[3], 1) self.assertAlmostEqual(vars[3], 0) # Test that the matrix is still sparse self.assertTrue(scipy.sparse.issparse(tr)) self.assertEqual(num_data_points, len(tr.data))