def test_correlated_w_true(density): n_features = 50 X, y, w_true = make_correlated_data(n_features=n_features, density=density, random_state=42) assert len(w_true.nonzero()[0]) == int(density * n_features) X, y, w_new = make_correlated_data(n_features=n_features, density=density, w_true=w_true) assert all(w_new == w_true)
def test_correlated_validation_check(param_name, p_range): min_val, max_val = p_range pattern = f'{param_name}.* should be chosen in .*{min_val}, {max_val}' if min_val == 0: with pytest.raises(ValueError, match=pattern): kwargs = {param_name: -1} make_correlated_data(**kwargs) if max_val == 1: with pytest.raises(ValueError, match=pattern): kwargs = {param_name: 2} make_correlated_data(**kwargs)
def test_correlated(): n_features = 52 n_samples = 101 X, y, w_true = make_correlated_data(n_samples=n_samples, n_features=n_features, random_state=42) assert X.shape == (n_samples, n_features) assert y.shape == (n_samples, ) assert w_true.shape == (n_features, )
def test_correlated_n_tasks(n_tasks): X, y, w_true = make_correlated_data(n_tasks=n_tasks, random_state=42) if n_tasks == 1: assert y.ndim == 1 assert w_true.ndim == 1 else: assert y.ndim == 2 assert y.shape[1] == w_true.shape[1]
def get_data(self): rng = np.random.RandomState(self.random_state) X, y, _ = make_correlated_data(self.n_samples, self.n_features, rho=self.rho, random_state=rng) data = dict(X=X, y=y) return self.n_features, data
def test_correlated_snr(snr): n_features = 50 X, y, w_true = make_correlated_data(n_samples=10000, n_features=n_features, snr=snr, random_state=42) y_pred = X @ w_true if snr == 0: assert abs(np.corrcoef(y_pred, y)[0, 1]) < 1e-2 elif snr == np.inf: np.testing.assert_allclose(y, y_pred) else: np.testing.assert_allclose(snr, norm(y_pred) / norm(y - y_pred))
def test_correlated_sparse_X(X_density): if not 0 < X_density <= 1: np.testing.assert_raises(ValueError, make_correlated_data, X_density=X_density) else: X, y, _ = make_correlated_data(X_density=X_density, random_state=0) if X_density == 1: assert isinstance(X, np.ndarray) else: assert isinstance(X, sparse.csc_matrix) # check that X's density is equal to X_density up to sampling noise np.testing.assert_allclose(X_density * X.shape[0] * X.shape[1], len(X.indices - 1), rtol=0.05)
def get_data(self): n_samples = 2 * self.n_samples # take half of train and half for test X, y, _ = make_correlated_data(n_samples, self.n_features, rho=self.rho, random_state=self.random_state) # make it balanced classification y -= np.mean(y) y = np.sign(y) # Split train and test X_train, X_test = X[:self.n_samples], X[self.n_samples:] y_train, y_test = y[:self.n_samples], y[self.n_samples:] data = dict(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test) return self.n_features, data