def scale_features(self): """return dataframe of scaled feature data (via z-score)""" df = stats.scale_features(self, metadata_string=self.metadata_string, prefix=self.prefix) return DataFrame(df, metadata_string=self.metadata_string, prefix=self.prefix)
def test_scale_features_with_metadata(): x = np.random.normal(loc=5, scale=1, size=1000) y = np.random.normal(loc=10, scale=5, size=1000) metadata_plate = ["A"] * 1000 df = pd.DataFrame(list(zip(x, y, metadata_plate))) df.columns = ["x", "y", "Metadata_plate"] out = stats.scale_features(df) assert out.shape == df.shape assert out.columns.tolist() == df.columns.tolist()
def test_scale_features_no_metadata(): x = np.random.normal(loc=5, scale=1, size=1000) y = np.random.normal(loc=10, scale=5, size=1000) df = pd.DataFrame(list(zip(x, y))) df.columns = ["x", "y"] out = stats.scale_features(df) assert abs(out["x"].mean() - 0.0) < 1e-6 assert abs(np.std(out["x"]) - 1.0) < 1e-6 assert abs(out["y"].mean() - 0.0) < 1e-6 assert abs(np.std(out["y"]) - 1.0) < 1e-6