def make_sparse_low_rank(n_dim_obs=3, n_dim_lat=2, T=10, epsilon=1e-3, n_samples=50, **kwargs): """Generate dataset (new new version).""" from sklearn.datasets import make_sparse_spd_matrix, make_low_rank_matrix K = make_sparse_spd_matrix(n_dim_obs) L = make_low_rank_matrix(n_dim_obs, n_dim_obs, effective_rank=n_dim_lat) Ks = [K] Ls = [L] Kobs = [K - L] for i in range(1, T): K = K + make_sparse_spd_matrix(n_dim_obs) L = L + make_low_rank_matrix( n_dim_obs, n_dim_obs, effective_rank=n_dim_lat) # assert is_pos_def(K - L) # assert is_pos_semidef(L) Ks.append(K) Ls.append(L) Kobs.append(K - L) return Ks, Kobs, Ls
def get_sparse_high_correlations(dim=25, seed=1, rep_num=1000, sparsity_alpha=0.9): """Gets sparse inverse covariance matrix. The method draw a few matrices and returns te one where the average correlation between variables is the highest. Args: dim: the dimension of the matrix to be returned. seed: seed for reproducibility. rep_num: number of matrices to draw and choose from. sparsity_alpha: sparsity parameter. see details of make_sparse_spd_matrix. Returns: A sparse inverse covariance matrix. """ np.random.seed(seed) max_mean = 0 for _ in range(rep_num): candidate_matrix = make_sparse_spd_matrix(dim, alpha=sparsity_alpha, smallest_coef=.4, largest_coef=.7) candidate_correlations = np.linalg.inv(candidate_matrix) diag_part = np.sqrt( np.expand_dims(np.diag(candidate_correlations), axis=0)) candidate_correlations /= diag_part candidate_correlations /= diag_part.transpose() cur_mean = np.tril(np.abs(candidate_correlations)).mean() if max_mean < cur_mean: best_candidate = candidate_matrix max_mean = cur_mean return best_candidate
def test_matrix(n, sparse=False, d=-0.5): """ Returns symmetric matrices on which to test algorithms Inputs: n: int, matrix size sparse: bool (False), sparsity rank: str/int, if 'full', then rank=n, otherwise rank=r in {1,2,...,n}. Output: A: double, symmetric positive definite matrix with specified rank (hopefully) and sparsity. """ if sparse: # # Sparse matrix # A = make_sparse_spd_matrix(dim=n, alpha=0.95, norm_diag=False, smallest_coef=.1, largest_coef=.9); A = sp.csc_matrix(A) else: # # Full matrix # X = np.random.rand(n, n) X = X + X.T U, dummy, V = linalg.svd(np.dot(X.T, X)) A = np.dot(np.dot(U, d + np.diag(np.random.rand(n))), V) return A
def gm_params_generator(d, k, sparse_proba=None, alpha=5, min_center_dist=None): """ We generate centers in [-0.5, 0.5] and verify that they are separated enough alpha is the size of the grid """ # we scatter the unit square on k squares, the min distance is given by alpha/sqrt(k) if min_center_dist == None: min_center_dist = alpha / np.sqrt(k) centers = [alpha*(np.random.rand(1, d)[0]-0.5)] for i in range(k-1): center = alpha*(np.random.rand(1, d)[0]-0.5) distances = np.linalg.norm( np.array(centers) - np.array(center), axis=1) while len(distances[distances < min_center_dist]) > 0: center = alpha*(np.random.rand(1, d)[0]-0.5) distances = np.linalg.norm( np.array(centers) - np.array(center), axis=1) centers.append(center) # if sparse_proba is set : # generate covariance matrix with the possibility to set the sparsity on the precision matrix, # we multiply by 1/k^2 to avoid overlapping if sparse_proba == None: A = [random.rand(d, d) for _ in range(k)] cov = [alpha * 1e-2 / (k ** 2) * (np.diag(np.ones(d)) + np.dot(a, a.transpose())) for a in A] else: cov = np.array([np.linalg.inv(make_sparse_spd_matrix(d, alpha=sparse_proba)) for _ in range(k)]) p = np.random.randint(1000, size=(1, k))[0] weights = 1.0*p/p.sum() return weights, centers, cov
def prototype_adjacency(self, n_block_features, alpha): """Build a new graph. Doc for ".create(n_features, alpha)" Parameters ----------- n_features : int alpha : float (0,1) The complexity / sparsity factor. This is (1 - alpha_0) in sklearn.datasets.make_sparse_spd_matrix where alpha_0 is the probability that a coefficient is zero. Returns ----------- (n_features, n_features) matrices: covariance, precision, adjacency """ return make_sparse_spd_matrix( n_block_features, alpha=np.abs(1.0 - alpha), smallest_coef=self.spd_low, largest_coef=self.spd_high, random_state=self.prng, )
def make_correlation_matrix(asvs, prefix, norm_diag=1, alpha=0.9, smallest_coef=0.1, largest_coef=0.9): """ Create a correlation matrix: symmetric, definite positive (diagonal >0) sparse (many 0)""" # alpha: The probability that a coefficient is zero (see notes). Larger values enforce more sparsity. # norm_diag: Whether to normalize the output matrix to make the leading diagonal elements all 1 # smallest_coef: The value of the smallest coefficient # largest_coef: The value of the largest coefficient # rows = ['Sp' + str(i) for i in range(nSpecies)] # columns = ['Sp' + str(i) for i in range(nSpecies)] corrMatrix = make_sparse_spd_matrix(len(asvs), alpha=alpha, norm_diag=norm_diag, smallest_coef=smallest_coef, largest_coef=largest_coef) #corrDf = pd.DataFrame(corrMatrix, index = rows, columns = columns) CorrMatrixDF = write_table(corrMatrix, outputDir=os.getcwd(), title='{}.correlationMatrix'.format(prefix), rows=asvs, columns=asvs, dataframe=True) #plot_heatmap(CorrMatrixDF, outputDir = os.getcwd(), vmin = -1, vmax = 1, center = 0, title = '{}.correlationMatrix'.format(prefix), legendtitle = 'Correlation', text = None, symmetric = True) return (corrMatrix)
def gm_params_gen(d, k): centers = np.random.randint(20, size=(k, d)) - 10 cov = np.array( [np.linalg.inv(make_sparse_spd_matrix(d)) for _ in range(k)]) p = np.random.randint(1000, size=(1, k))[0] weights = 1.0 * p / p.sum() return weights, centers, cov
def random_er_network(n_features, alpha,random_state=np.random.RandomState(1)): adj = make_sparse_spd_matrix(n_features, alpha=alpha, # prob that a coeff is zero smallest_coef=0.7, largest_coef=0.7, random_state=random_state) return adj
def inv_cov(self, low=0.3, upper=0.6, p=0.2, symmetric=True) -> np.array: """Generate inverse covariance matrices for n_features Parameters ---------- low : float, default = 0.3 Lower bound of inverse covariance values between features. upper : float, default = 0.6 Upper bound of inverse covariance values between features. p : float > 0, default = 0.2 Probability of edge between nodes in random graph, ie inverse covariance matrix sparsity. Returns ------- S : array (n_features, n_features) Randomly generated covariance matrix. """ rs = self.rng.integers(10000) return make_sparse_spd_matrix(dim=self.n_features, alpha=1 - p, smallest_coef=low, largest_coef=upper, random_state=rs)
def generate_random_sparse_psd(p, zero_entry_chance=0.75): """ Generate a random sparse PSD array. :param p: The dimension. :param zero_entry_chance: Zero-entry chance. :return: The PSD array. """ return datasets.make_sparse_spd_matrix(p, alpha=zero_entry_chance)
def test_neighbourhood_selection_overall_cv(self): p = 10 n = 200 l = 0.5 K = make_sparse_spd_matrix(p, 0.7) C = np.linalg.inv(K) X = np.random.multivariate_normal(np.zeros(p), C, n) ns = nitk.NeighbourhoodSelectionCV() ns.fit(X)
def test_sparse_inv_covariance(self, q, alpha_ratio): # minimize -log(det(S)) + trace(S*Q) + \alpha*||S||_1 subject to S is symmetric PSD. # Problem data. # q: Dimension of matrix. p = 1000 # Number of samples. ratio = 0.9 # Fraction of zeros in S. S_true = sparse.csc_matrix(make_sparse_spd_matrix(q, ratio)) Sigma = sparse.linalg.inv(S_true).todense() z_sample = sp.linalg.sqrtm(Sigma).dot(np.random.randn(q, p)) Q = np.cov(z_sample) mask = np.ones(Q.shape, dtype=bool) np.fill_diagonal(mask, 0) alpha_max = np.max(np.abs(Q)[mask]) alpha = alpha_ratio * alpha_max # 0.001 for q = 100, 0.01 for q = 50 # Convert problem to standard form. # f_1(S) = -log(det(S)) + trace(S*Q) on symmetric PSD matrices, f_2(S) = \alpha*||S||_1. # A_1 = I, A_2 = -I, b = 0. prox_list = [ lambda v, t: prox_neg_log_det( v.reshape( (q, q), order='C'), t, lin_term=t * Q).ravel(order='C'), lambda v, t: prox_norm1(v, t * alpha) ] A_list = [sparse.eye(q * q), -sparse.eye(q * q)] b = np.zeros(q * q) # Solve with DRS. drs_result = a2dr(prox_list, A_list, b, anderson=False, precond=True, max_iter=self.MAX_ITER) #drs_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER, ada_reg=False) #drs_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER, ada_reg=False, lam_accel=0) #drs_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER, ada_reg=False, lam_accel=1e-12) print('Finished DRS.') # Solve with A2DR. a2dr_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER) #a2dr_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER, lam_accel=1e-12) # lam_accel = 0 seems to work well sometimes, although it oscillates a lot. a2dr_S = a2dr_result["x_vals"][-1].reshape((q, q), order='C') self.compare_total(drs_result, a2dr_result) print('Finished A2DR.') print('recovered sparsity = {}'.format( np.sum(a2dr_S != 0) * 1.0 / a2dr_S.shape[0]**2))
def add_noise(theta, p, alpha, threshold=0.1): noise_mat = make_sparse_spd_matrix(dim=p, alpha=alpha, norm_diag=False, smallest_coef=-threshold, largest_coef=threshold) np.fill_diagonal(theta, 0.0) theta_star = cov_nearest(noise_mat + theta, method="clipped", threshold=0.1) return theta_star
def test_neighbourhood_selection(self): """ Create a sparse matrix that we attempt to estimate """ p = 10 n = 200 l = 0.5 K = make_sparse_spd_matrix(p, 0.7) C = np.linalg.inv(K) X = np.random.multivariate_normal(np.zeros(p), C, n) ns = nitk.NeighbourhoodSelection(l) ns.fit(X)
def _gista(self, theta0, S, _lambdas, verbose=False): """ G-ISTA algorithm https://papers.nips.cc/paper/4574-iterative-thresholding-algorithm-for-sparse-inverse-covariance-estimation.pdf """ theta = theta0 t = min(np.linalg.eigvals(theta0))**2 p = len(theta) if verbose: print(f'f(X,S) = {self.sfunc.eval(theta0, S)}') print(f'g(X,rho) = {self.nsfunc.eval(theta0, _lambdas)}') print( f'Initial Objective: {self._pgm_objective(theta0, S, _lambdas)}' ) if self._pgm_objective(self.theta0, S, _lambdas) > 10000: # Skip, bad starting point theta = make_sparse_spd_matrix(p, alpha=0.5, norm_diag=False, smallest_coef=-1.0, largest_coef=1.0) for i in range(self.max_iters): if not _is_pos_def(theta): print('Clipped Precision matrix') theta = cov_nearest(theta, method="clipped", threshold=0.1) if self.ss_type == 'backtracking': t = self._step_size(theta, S, _lambdas, t) delta = self._duality_gap(p, theta, S, _lambdas) if verbose: print(f'Duality Gap: {delta}.') if delta < self.epsilon and self.dual_gap: print(f'iterations: {i}') print(f'Duality Gap: {delta} < {self.epsilon}. Exiting.') break theta_k1 = self.nsfunc.prox( theta - t * self.sfunc.gradient(theta, S), _lambdas) if self.ss_type == 'backtracking': t = _set_next_inital_step_size(theta_k1, theta) theta = theta_k1 return theta
def makeDataset(n_dimensions, n_total, random_states=[None, None]): ''' Generate a n_dimensions-D dataset by sampling from two Gaussian of fixed properties. Inputs: n_dimension = number of dimensions n_total = total number of events to generate random_states = list of two numpy.random.RandomState objects, or integer to seed internal RandomState objects Output: array containing generated n_dimensional-D data ''' # Create the covariance matrices for the two component Gaussians # random_states are specified for reproducibility cov1 = make_sparse_spd_matrix( dim=n_dimensions, alpha=0.1, random_state=47, norm_diag=True, ) cov2 = make_sparse_spd_matrix( dim=n_dimensions, alpha=-0.5, random_state=1701, norm_diag=True, ) # Create mean position of first Gaussian. np.random.seed(52) mu1 = np.random.rand(1, n_dimensions)[0] # Create data from first Gaussian component X1 = stats.multivariate_normal.rvs( mean=mu1, cov=cov1, size=int(0.667 * n_total), random_state=random_states[0], ) # Second Gaussian mean is fixed to be shifted by -1 from that of first X2 = stats.multivariate_normal.rvs(mean=mu1 - 1., cov=cov2, size=int(0.333 * n_total), random_state=random_states[1]) return np.append(X1, X2, axis=0)
def main(): with open(OUTPUT_DIR + 'coef_original_beta.csv', 'w') as file: file.write(','.join(map(str, beta[0]))) '''generate dataset''' X_in_all_alpha = {} y_in_all_alpha = {} for a in all_alpha: all_X = [] all_y = [] for j in range(0, N): # Generate a sparse symmetric definite positive matrix. sigma = make_sparse_spd_matrix(p, alpha=a, smallest_coef=-1, largest_coef=1, norm_diag=False) if (j + 1) % 20 == 0: sigma.tofile( OUTPUT_DIR + 'sigma_alpha={}_{}th-example.txt'.format(a, j + 1), sep=",", format="%s") X, y = generate_data(n, PARAMS['prob'], mu, sigma, beta) all_X.append(X) all_y.append(y) X_in_all_alpha[a] = all_X y_in_all_alpha[a] = all_y result = {} for m in models: train_acc, test_acc = evaluation(all_alpha, X_in_all_alpha, y_in_all_alpha, N, p, n, model=m) result[m] = (train_acc, test_acc) with open(OUTPUT_DIR + 'accuracy_comparison.csv', 'w') as fout: fout.write(','.join([ 'alpha', 'lasso-train', 'lasso-test', 'dlda-train', 'dlda-test', 'svm-train', 'svm-test', 'tc-train', 'tc-test' ])) fout.write('\n') for i, a in enumerate(all_alpha): output_list = [ a, result['lasso'][0][i], result['lasso'][1][i], result['dlda'][0][i], result['dlda'][1][i], result['svm'][0][i], result['svm'][1][i], result['tc'][0][i], result['tc'][1][i] ] fout.write(','.join(map(str, output_list))) fout.write('\n')
def sample_data(): n_samples = np.random.randint(100, 30000) features_coeff = np.random.choice(np.linspace(0.1, 3)) n_features = int(n_samples * features_coeff) alpha = beta(10, 1).rvs() X = np.abs( make_sparse_spd_matrix(dim=n_samples, alpha=alpha, norm_diag=False, smallest_coef=0.1, largest_coef=0.7)) k = np.random.randint(5, 50) return X, k
def test_correlation_permutation(self): """ Generates a distribution with a sparse covariance matrix and sees if the non-zero values are correctly picked up by the correlation permuter """ p = 10 n = 200 C = make_sparse_spd_matrix(p, 0.7) X = np.random.multivariate_normal(np.zeros(p), C, n) corr_model = correlation_permuter.CorrelationPermutationNetwork() corr_model.fit(X) corr = corr_model.correlation_ C = methods.threshold_matrix(C, 0.001, binary=True) corr = methods.threshold_matrix(corr, 0.001, binary=True)
def _new_graph(n_features, alpha): global prng prec = make_sparse_spd_matrix(n_features, alpha=alpha, # prob that a coeff is zero smallest_coef=0.7, largest_coef=0.7, random_state=prng) cov = np.linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] return cov, prec
def test_scio_bic_columnwise(self): """ Generates a distribution with a sparse precision matrix and sees if the non-zero values are correctly picked up by SCIO using BIC over each column """ p = 10 n = 200 K = make_sparse_spd_matrix(p, 0.7) C = np.linalg.inv(K) X = np.random.multivariate_normal(np.zeros(p), C, n) sc = SCIOColumnBIC() sc.fit(X) K = methods.threshold_matrix(K, 0.001, binary=True) prec_ = methods.threshold_matrix(sc.precision_, 0.001, binary=True)
def test_clime_cv(self): """ Sees how CLIME performs when we use cross validation to select lambda """ p = 50 n = 10 K = make_sparse_spd_matrix(p, 0.7) C = np.linalg.inv(K) X = np.random.multivariate_normal(np.zeros(p), C, n) l = 0.5 r_prec = self._estimate_precision_matrix_using_r(X, l) print(r_prec) cl = CLIMECV(True) cl.fit(X) print(cl.precision_)
def make_data(n_samples, n_features): prng = np.random.RandomState(1) prec = make_sparse_spd_matrix(n_features, alpha=.98, smallest_coef=.4, largest_coef=.7, random_state=prng) cov = np.linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X -= X.mean(axis=0) X /= X.std(axis=0) return X, cov, prec
def test_scaled_lasso_precision_network(self): """ We test our implementation of the scaled lasso based precision matrix estimation against that of the authors. This sometimes fails, as long as the tolerence is low that's ok """ p = 10 n = 200 K = make_sparse_spd_matrix(p, 0.7) C = np.linalg.inv(K) X = np.random.multivariate_normal(np.zeros(p), C, n) sli = scaled_lasso.ScaledLassoInference() sli.fit(X) prec_r = self._estimate_precision_matrix_using_r(X) assert_array_almost_equal(prec_r, sli.precision_, decimal=1)
def test_graphical_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=0.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphicalLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X) finally: sys.stdout = orig_stdout
def test_scio_with_diag_penalty(self): """ Generates a distribution with a sparse precision matrix and sees if the non-zero values are correctly picked up by SCIO """ p = 10 n = 200 K = make_sparse_spd_matrix(p, 0.7) C = np.linalg.inv(K) X = np.random.multivariate_normal(np.zeros(p), C, n) l = 0.5 sc = SCIO(l, penalize_diag=True) sc.fit(X) r_prec = self._estimate_precision_matrix_using_r(X, l, True) assert_array_almost_equal(r_prec, sc.precision_, decimal=4)
def get_synthetic_data(n_samples, n_features, precision_matrix=None, alpha=0.98, seed=1): """ Generate synthetic data using a covariance matrix obtained by inverting a randomly generated precision matrix. Args: n_samples ([type]): [description] n_features ([type]): [description] precision_matrix ([type], optional): [description]. Defaults to None. alpha (float, optional): [description]. Defaults to 0.98. seed (int, optional): [description]. Defaults to 1. Returns: tuple: a tuple with two elements. The first is a pd.DataFrame represeting the data. The second is the precision matrix used to generate the data. """ prng = np.random.RandomState(seed) if precision_matrix is None: prec = make_sparse_spd_matrix(n_features, alpha=alpha, smallest_coef=.1, largest_coef=.9, random_state=prng) else: prec = precision_matrix cov = linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X -= X.mean(axis=0) X /= X.std(axis=0) X = pd.DataFrame(X) X.columns = ["gene" + str(i) for i in X.columns] X.index = ["sample" + str(i) for i in X.index] return X, prec
def generate_latent_network(n_obs=100, n_lat=10, n_samples=500, sparsity_obs=0.3, sparsity_lat=0.7, sparsityinter=0.3, random_state=None): # random_state = check_random_state(random_state) # glob = np.zeros((n_obs+n_lat, n_obs+n_lat)) # glob[n_lat:, n_lat:] = make_sparse_spd_matrix(dim=n_obs, alpha=1-sparsity_obs, # random_state=random_state) # glob[:n_lat, :n_lat] = make_sparse_spd_matrix(dim=n_lat, alpha=1-sparsity_lat, # random_state=random_state) # inter = np.zeros((n_obs, n_lat)) # prod = np.array(list(product(np.arange(0, n_obs), np.arange(0, n_lat)))) # np.random.shuffle(prod) # length = int(prod.shape[0]*(1-sparsityinter)) # indices_r = [p[0] for p in prod[:length]] # indices_c = [p[1] for p in prod[:length]] # inter[indices_r, indices_c] = random_state.randn(length) # inter /= 1e-6 # glob[n_lat:, :n_lat] = inter # glob[:n_lat, n_lat:] = inter.T # #sum_ = np.sum(glob[:n_lat, :n_lat], axis=0) + np.sum(inter, axis=0) # #glob[:n_lat, :n_lat] += np.diag(sum_) # T_obs = glob[n_lat:, n_lat:] - \ # inter.dot(np.linalg.inv(glob[:n_lat, :n_lat])).dot(inter.T) # print(is_pos_semi_def(inter.dot(np.linalg.inv(glob[:n_lat, :n_lat])).dot(inter.T))) # samples = np.random.multivariate_normal(np.zeros(n_obs), # np.linalg.inv(T_obs), n_samples) A = make_sparse_spd_matrix(dim=n_obs + n_lat, alpha=sparsity_obs, random_state=random_state) T_true = A[n_lat:, n_lat:] K_true = A[n_lat:, :n_lat] H_true = A[0:n_lat, 0:n_lat] per_cov = K_true * 0.3 T_obs = T_true - per_cov.dot(np.linalg.inv(H_true)).dot(per_cov.T) print(is_pos_semi_def(per_cov.dot(np.linalg.inv(H_true)).dot(per_cov.T))) samples = np.random.multivariate_normal(np.zeros(n_obs), np.linalg.inv(T_obs), n_samples) return T_obs, T_true, H_true, K_true, samples
def test_clime(self): """ Generates a distribution with a sparse precision matrix and sees if the non-zero values are correctly picked up by the CLIME """ p = 50 n = 10 K = make_sparse_spd_matrix(p, 0.7) C = np.linalg.inv(K) X = np.random.multivariate_normal(np.zeros(p), C, n) l = 0.5 r_prec = self._estimate_precision_matrix_using_r(X, l) print(r_prec) cl = CLIME(l, True) cl.fit(X) assert_array_almost_equal(r_prec, cl.precision_, decimal=2)
def para_gen(n_area): sparsity = np.random.uniform(0.1, 0.7) lower_b = np.random.uniform(-0.1, 0.1) * scale upper_b = np.random.uniform(lower_b, 0.1) * scale if which_data == 1: which_kind = 1.0 / 6 elif which_data == 2: which_kind = 1.0 / 2 elif which_data == 3: which_kind = 5.0 / 6 else: which_kind = np.random.rand() def rand_1(n): return np.random.uniform(-1, 1, n) if which_kind < 1.0 / 3: tmp = -make_sparse_spd_matrix(n_area, 1 - sparsity, smallest_coef=lower_b, largest_coef=upper_b) elif which_kind < 2.0 / 3: while True: lower_b = np.random.uniform(-0.1, 0.1) upper_b = np.random.uniform(lower_b, 0.1) tmp = np.random.uniform(-1, 1, (n_area, n_area)) tmp = tmp - np.sort(np.real( LA.eig(tmp)[0]))[-1] * np.eye(n_area) fmax = np.amax(tmp) fmin = np.amin(tmp) tmp = (upper_b - lower_b) / (fmax - fmin) * tmp + ( lower_b * fmax - upper_b * fmin) / (fmax - fmin) tmp = (abs(sp.random(n_area, n_area, density=sparsity).A) > 0) * tmp if np.sort(np.real(LA.eig(tmp)[0]))[-1] <= 0: break else: #tmp = np.random.uniform(-1, 1, (n_area,n_area)) tmp = sp.random(n_area, n_area, density=sparsity, data_rvs=rand_1).A tmp = (tmp - tmp.T) / 2 tmp = abs(lower_b) / np.amax(abs(tmp)) * tmp return tmp
def instance(n, p, alpha, rho): # Generate the data prec = make_sparse_spd_matrix(p, alpha=alpha, smallest_coef=rho, largest_coef=rho, norm_diag=True) off_diagonal = ~np.identity(p, dtype=bool) nonzero = np.where(prec[off_diagonal] != 0)[0] cov = np.linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] X = np.random.multivariate_normal(np.zeros(p), cov, size=n) X /= np.sqrt(n) return X, prec, nonzero
# Copyright: INRIA import numpy as np from scipy import linalg from sklearn.datasets import make_sparse_spd_matrix from sklearn.covariance import GraphLassoCV, ledoit_wolf import pylab as pl ############################################################################## # Generate the data n_samples = 60 n_features = 20 prng = np.random.RandomState(1) prec = make_sparse_spd_matrix(n_features, alpha=.98, smallest_coef=.4, largest_coef=.7, random_state=prng) cov = linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X -= X.mean(axis=0) X /= X.std(axis=0) ############################################################################## # Estimate the covariance emp_cov = np.dot(X.T, X) / n_samples
from optparse import OptionParser, Option import numpy as np from sklearn.datasets import make_sparse_spd_matrix parser = OptionParser() parser.add_option("-n", "--nodes", dest="nodes", type="int", default=10, help="Number of nodes") parser.add_option("-s", "--bkgrnd_sparsity", dest="bkgrnd_sparsity", type="float", default=0.95, help="Sparsity of generated precision matrix") parser.add_option("-d", "--delta_sparsity", dest="delta_sparsity", type="float", default=0.95, help="Sparsity of generated delta precision matrix") parser.add_option("-b", "--bkgrnd_datapoints", dest="bkgrnd_datapoints", type="int", default=100000, help="Number of background datapoints") parser.add_option("-f", "--foregrnd_datapoints", dest="foregrnd_datapoints", type="int", default=100000, help="Number of foreground datapoints") (options, args) = parser.parse_args() mean = [0.0 for n in range(options.nodes)] bkgrnd_prec = make_sparse_spd_matrix(options.nodes, alpha=options.bkgrnd_sparsity, smallest_coef=0.5, largest_coef=0.9, random_state=np.random.RandomState(1), norm_diag=True) delta_prec = make_sparse_spd_matrix(options.nodes, alpha=options.delta_sparsity, smallest_coef=0.5, largest_coef=0.9, random_state=np.random.RandomState(100), norm_diag=True) foregrnd_prec = bkgrnd_prec + delta_prec bkgrnd_cov = np.linalg.inv(bkgrnd_prec) foregrnd_cov = np.linalg.inv(foregrnd_prec) bkgrnd_data = np.random.multivariate_normal(mean,bkgrnd_cov,options.bkgrnd_datapoints) foregrnd_data = np.random.multivariate_normal(mean,foregrnd_cov,options.foregrnd_datapoints) np.savetxt('mean.csv', mean, delimiter=',') np.savetxt('bkgrnd_prec.csv', bkgrnd_prec, delimiter=',') np.savetxt('delta_prec.csv', delta_prec, delimiter=',') np.savetxt('foregrnd_prec.csv', foregrnd_prec, delimiter=',') np.savetxt('bkgrnd_data.csv', bkgrnd_data, delimiter=',') np.savetxt('foregrnd_data.csv', foregrnd_data, delimiter=',')