def test_X_normalization_transformer(self): """Tests normalization transformer.""" solubility_dataset = self.load_solubility_data() normalization_transformer = NormalizationTransformer( transform_X=True, dataset=solubility_dataset) X, y, w, ids = (solubility_dataset.X, solubility_dataset.y, solubility_dataset.w, solubility_dataset.ids) normalization_transformer.transform(solubility_dataset) X_t, y_t, w_t, ids_t = (solubility_dataset.X, solubility_dataset.y, solubility_dataset.w, solubility_dataset.ids) # Check ids are unchanged. for id_elt, id_t_elt in zip(ids, ids_t): assert id_elt == id_t_elt # Check y is unchanged since this is a X transformer np.testing.assert_allclose(y, y_t) # Check w is unchanged since this is a y transformer np.testing.assert_allclose(w, w_t) # Check that X_t has zero mean, unit std. #np.set_printoptions(threshold='nan') mean = X_t.mean(axis=0) assert np.amax(np.abs(mean-np.zeros_like(mean))) < 1e-7 orig_std_array = X.std(axis=0) std_array = X_t.std(axis=0) # Entries with zero std are not normalized for orig_std, std in zip(orig_std_array, std_array): if not np.isclose(orig_std, 0): assert np.isclose(std, 1)
def test_X_normalization_transformer(self): """Tests normalization transformer.""" solubility_dataset = self.load_solubility_data() normalization_transformer = NormalizationTransformer( transform_X=True, dataset=solubility_dataset) X, y, w, ids = solubility_dataset.to_numpy() normalization_transformer.transform(solubility_dataset) X_t, y_t, w_t, ids_t = solubility_dataset.to_numpy() # Check ids are unchanged. for id_elt, id_t_elt in zip(ids, ids_t): assert id_elt == id_t_elt # Check y is unchanged since this is a X transformer np.testing.assert_allclose(y, y_t) # Check w is unchanged since this is a y transformer np.testing.assert_allclose(w, w_t) # Check that X_t has zero mean, unit std. #np.set_printoptions(threshold='nan') mean = X_t.mean(axis=0) assert np.amax(np.abs(mean-np.zeros_like(mean))) < 1e-7 orig_std_array = X.std(axis=0) std_array = X_t.std(axis=0) # Entries with zero std are not normalized for orig_std, std in zip(orig_std_array, std_array): if not np.isclose(orig_std, 0): assert np.isclose(std, 1)
def test_y_normalization_transformer(self): """Tests normalization transformer.""" solubility_dataset = self.load_solubility_data() normalization_transformer = NormalizationTransformer( transform_y=True, dataset=solubility_dataset) X, y, w, ids = (solubility_dataset.X, solubility_dataset.y, solubility_dataset.w, solubility_dataset.ids) normalization_transformer.transform(solubility_dataset) X_t, y_t, w_t, ids_t = (solubility_dataset.X, solubility_dataset.y, solubility_dataset.w, solubility_dataset.ids) # Check ids are unchanged. for id_elt, id_t_elt in zip(ids, ids_t): assert id_elt == id_t_elt # Check X is unchanged since this is a y transformer np.testing.assert_allclose(X, X_t) # Check w is unchanged since this is a y transformer np.testing.assert_allclose(w, w_t) # Check that y_t has zero mean, unit std. assert np.isclose(y_t.mean(), 0.) assert np.isclose(y_t.std(), 1.) # Check that untransform does the right thing. np.testing.assert_allclose(normalization_transformer.untransform(y_t), y)
def test_y_normalization_transformer(self): """Tests normalization transformer.""" solubility_dataset = self.load_solubility_data() normalization_transformer = NormalizationTransformer( transform_y=True, dataset=solubility_dataset) X, y, w, ids = solubility_dataset.to_numpy() normalization_transformer.transform(solubility_dataset) X_t, y_t, w_t, ids_t = solubility_dataset.to_numpy() # Check ids are unchanged. for id_elt, id_t_elt in zip(ids, ids_t): assert id_elt == id_t_elt # Check X is unchanged since this is a y transformer np.testing.assert_allclose(X, X_t) # Check w is unchanged since this is a y transformer np.testing.assert_allclose(w, w_t) # Check that y_t has zero mean, unit std. assert np.isclose(y_t.mean(), 0.) assert np.isclose(y_t.std(), 1.) # Check that untransform does the right thing. np.testing.assert_allclose(normalization_transformer.untransform(y_t), y)