def features_normal_cov_toeplitz(n_samples: int = 200, n_features: int = 30, cov_corr: float = 0.5): """Normal features generator with toeplitz covariance An example of features obtained as samples of a centered Gaussian vector with a toeplitz covariance matrix Parameters ---------- n_samples : `int`, default=200 Number of samples n_features : `int`, default=30 Number of features cov_corr : `float`, default=0.5 correlation coefficient of the Toeplitz correlation matrix Returns ------- output : `numpy.ndarray`, shape=(n_samples, n_features) n_samples realization of a Gaussian vector with the described covariance """ cov = toeplitz(cov_corr**np.arange(0, n_features)) return np.random.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
def simu_linreg(x, n, std=1., corr=0.5): """Simulation for the least-squares problem. Parameters ---------- x : ndarray, shape (d,) The coefficients of the model n : int Sample size std : float, default=1. Standard-deviation of the noise corr : float, default=0.5 Correlation of the features matrix Returns ------- A : ndarray, shape (n, d) The design matrix. b : ndarray, shape (n,) The targets. """ d = x.shape[0] cov = toeplitz(corr**np.arange(0, d)) A = multivariate_normal(np.zeros(d), cov, size=n) noise = std * randn(n) b = A.dot(x) + noise return A, b
def simu_linreg(coefs=None, n_samples=1000, n_features=1000, corr=0.5, random_state=42): """Simulation of a linear regression model Parameters ---------- coefs : `numpy.array`, shape (n_features,) Coefficients of the model n_samples : `int`, default=1000 Number of samples to simulate corr : `float`, default=0.5 Correlation between features i and j is corr^|i - j| Returns ------- X : `numpy.ndarray`, shape (n_samples, n_features) Simulated features matrix. It samples of a centered Gaussian vector with covariance given by the Toeplitz matrix y : `numpy.array`, shape (n_samples,) Simulated targets """ rng = check_random_state(random_state) if coefs is None: coefs = rng.randn(n_features) cov = toeplitz(corr**np.arange(0, n_features)) X = rng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) y = X.dot(coefs) + rng.randn(n_samples) return X, y
def build_data_linear(true_model_param: torch.FloatTensor, n_samples=NB_OF_POINTS_BY_DEVICE, n_dimensions=DIM, n_devices: int = 1, with_seed: bool = False, without_noise=False, features_corr=0.6, labels_std=0.4): """Build data for least-square regression. Args: true_model_param: the true parameters of the model. n_samples: number of sample by devices. n_dimensions: dimension of the problem. n_devices: number of devices. with_seed: true if we want to initialize the pseudo-random number generator. features_corr: correlation coefficient used to generate data points. labels_std: standard deviation coefficient of the noises added on labels. Returns: if more than one device, a list of pytorch tensor, otherwise a single tensor. """ X, Y = [], [] for i in range(n_devices): # Construction of a covariance matrix cov = toeplitz(features_corr**np.arange(0, n_dimensions)) if with_seed: np.random.seed(0) x = torch.from_numpy( multivariate_normal( np.zeros(n_dimensions), cov, size=floor(n_samples)).astype(dtype=np.float64)) # Simulation of the labels y = x.mv(true_model_param) + BIAS # We add or not a noise if not without_noise: if with_seed: y += torch.normal(0, labels_std, size=(floor(n_samples), 1), generator=torch.manual_seed(0), dtype=torch.float64)[0] else: y += torch.normal(0, labels_std, size=(floor(n_samples), 1), dtype=torch.float64)[0] X.append(x) Y.append(y) if n_devices == 1: return X[0], Y[0] return X, Y
def simu_linreg(x, n_samples, std=1., corr=0.5): """Simulation pour le probleme des moindres carrées""" d = x.shape[0] cov = toeplitz(corr**np.arange(0, d)) A = multivariate_normal(np.zeros(d), cov, size=n_samples) noise = std * randn(n_samples) b = A.dot(x) + noise return A, b
def build_data_logistic(true_model_param: torch.FloatTensor, n_samples=NB_OF_POINTS_BY_DEVICE, n_dimensions=DIM, n_devices: int = 1, with_seed: bool = False, features_corr=0.6, labels_std=0.4): """Build data for logistic regression. Args: true_model_param: the true parameters of the model. n_samples: number of sample by devices. n_dimensions: dimension of the problem. n_devices: number of devices. with_seed: true if we want to initialize the pseudo-random number generator. features_corr: correlation coefficient used to generate data points. labels_std: standard deviation coefficient of the noises added on labels. Returns: if more than one device, a list of pytorch tensor, otherwise a single tensor. """ X, Y = [], [] model_copy = deepcopy(true_model_param) for i in range(n_devices): # We use two different model to simulate non iid data. if i % 2 == 0: model_copy[(i + 1) % n_dimensions] *= -1 else: model_copy = deepcopy(true_model_param) # Construction of a covariance matrix cov = toeplitz(features_corr**np.arange(0, n_dimensions)) if not with_seed: np.random.seed(0) sign = np.array([1 for j in range(n_dimensions)]) if i % 2 == 0: sign[i % n_dimensions] = -1 x = torch.from_numpy(sign * multivariate_normal( np.zeros(n_dimensions), cov, size=floor(n_samples)).astype(dtype=np.float64)) # Simulation of the labels # NB : Logistic syntethic dataset is used to show how Artemis is used in non-i.i.d. settings. # This is why, we don't introduce a bias here. y = torch.bernoulli(torch.sigmoid(x.mv(model_copy.T))) y[y == 0] = -1 X.append(x) Y.append(y) if n_devices == 1: return X[0], Y[0] return X, Y
def simu_linreg_data(n_samples=5000, n_features=50, interc=-1., p_nnz=0.3): np.random.seed(123) idx = np.arange(1, n_features + 1) weights = (-1) ** (idx - 1) * np.exp(-idx / 10.) corr = 0.5 cov = toeplitz(corr ** np.arange(0, n_features)) X = np.random.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X *= np.random.binomial(1, p_nnz, size=X.shape) idx = np.nonzero(X.sum(axis=1)) X = X[idx] n_samples = X.shape[0] noise = np.random.randn(n_samples) y = X.dot(weights) + noise if interc: y += interc return X, y
def features_normal_cov_toeplitz(n_samples=200, n_features=10, rho=0.5): """Features obtained as samples of a centered Gaussian vector with a toeplitz covariance matrix Parameters ---------- n_samples : `int`, default=200 Number of samples n_features : `int`, default=10 Number of features rho : `float`, default=0.5 Correlation coefficient of the toeplitz correlation matrix Returns ------- output : `np.ndarray`, shape=(n_samples, n_features) """ cov = toeplitz(rho**np.arange(0, n_features)) return np.random.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
def test_mm_mixed_norm_bayes(): """Basic test of the mm_mixed_norm_bayes function""" # First we define the problem size and the location of the active sources. n_features = 16 n_samples = 24 n_times = 5 X_true = np.zeros((n_features, n_times)) # Active sources at indices 10 and 30 X_true[5, :] = 2. X_true[10, :] = 2. # Construction of a covariance matrix rng = np.random.RandomState(0) # Set the correlation of each simulated source corr = [0.6, 0.95] cov = [] for c in corr: this_cov = toeplitz(c**np.arange(0, n_features // len(corr))) cov.append(this_cov) cov = np.array(linalg.block_diag(*cov)) # Simulation of the design matrix / forward operator G = rng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) # Simulation of the data with some noise M = G.dot(X_true) M += 0.1 * np.std(M) * rng.randn(n_samples, n_times) n_orient = 1 # Define the regularization parameter and run the solver lambda_max = norm_l2inf(np.dot(G.T, M), n_orient) lambda_ref = 0.3 * lambda_max K = 10 random_state = 0 # set random seed to make results replicable out = mm_mixed_norm_bayes(M, G, lambda_ref, n_orient=n_orient, K=K, random_state=random_state) Xs, active_sets = out[:2] lpp_samples, lppMAP, pobj_l2half = out[2:] freq_occ = np.mean(active_sets, axis=0) assert_equal(np.argsort(freq_occ)[-2:], [9, 5]) assert len(Xs) == K assert lpp_samples.shape == (K, ) assert pobj_l2half.shape == (K, ) assert lppMAP.shape == (K, ) out = mm_mixed_norm_bayes(M, G, lambda_ref, n_orient=n_orient, K=K, return_samples=True, random_state=random_state) Xs, active_sets = out[:2] lpp_samples, lppMAP, pobj_l2half = out[2:-2] X_samples, gamma_samples = out[-2:] freq_occ = np.mean(active_sets, axis=0) assert_equal(np.argsort(freq_occ)[-2:], [9, 5]) assert len(Xs) == K assert lpp_samples.shape == (K, ) assert pobj_l2half.shape == (K, ) assert lppMAP.shape == (K, ) assert X_samples.shape == (K, n_features, n_times, 2) assert gamma_samples.shape == (K, n_features, 2)
lambda_percent = 20. K = 1000 X_true = np.zeros((n_features, n_times)) # Active sources at indices 4 and 14 X_true[4, :] = 1. X_true[14, :] = 1. ############################################################################### # Construction of a covariance matrix rng = np.random.RandomState(0) # Set the correlation of each simulated source corr = [0.5, 0.95] cov = [] for c in corr: this_cov = toeplitz(c**np.arange(0, n_features // len(corr))) cov.append(this_cov) cov = np.array(linalg.block_diag(*cov)) plt.matshow(cov) plt.gca().xaxis.set_ticks_position('bottom') plt.title('True Covariance') ############################################################################### # Simulation of the design matrix / forward operator G = rng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) plt.matshow(G.T.dot(G)) plt.gca().xaxis.set_ticks_position('bottom') plt.title("Feature covariance")
n_samples = 10 n_times = 1 lambda_percent = 50. K = 5000 X_true = np.zeros((n_features, n_times)) # Active sources at indices 4 and 14 X_true[4, :] = 1. X_true[14, :] = 1. ############################################################################### # Construction of a covariance matrix rng = np.random.RandomState(0) # Set the correlation of each simulated source corr = 0.95 cov = toeplitz(corr**np.arange(0, n_features // 2)) ############################################################################### # Simulation of the design matrix / forward operator G = rng.multivariate_normal(np.zeros(len(cov)), cov, size=n_samples) G = np.concatenate((G, G), axis=1) G /= np.linalg.norm(G, axis=0)[np.newaxis, :] # normalize columns plt.matshow(G.T.dot(G)) plt.title("Feature covariance") ############################################################################### # Simulation of the data with some noise M = G.dot(X_true) M += 0.2 * np.max(np.abs(M)) * rng.randn(n_samples, n_times)