def test_gowl_vs_glasso_duality_gap_3(self): """ Duality Gap goes negative in this case. Should that happen? """ np.random.seed(680) p = 10 blocks = [ Block(dim=p, idx=0, block_min_size=2, block_max_size=6, block_value=0.9), Block(dim=p, idx=1, block_min_size=2, block_max_size=6, block_value=-0.9), Block(dim=p, idx=3, block_min_size=2, block_max_size=6, block_value=-0.5), ] theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p, alpha=0.5, noise=0.1, blocks=blocks) lam1 = 0.001 # controls sparsity lam2 = 0.01 # encourages equality of coefficients rho = oscar_weights(lam1, lam2, (p ** 2 - p) / 2) theta_star = theta_star[0] sigma = np.linalg.inv(theta_star) n = 100 X = np.random.multivariate_normal(np.zeros(p), sigma, n) X = standardize(X) S = np.cov(X.T) theta_0 = np.linalg.inv(S) model = GOWLModel(X, S, theta_0, lam1, lam2, 'backtracking', max_iters=100000) model.fit() theta_gowl = model.theta_hat gl = GraphicalLasso(max_iter=200) gl.fit(S) theta_glasso = gl.get_precision() print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_gowl))) plot_multiple_theta_matrices_2d([theta_blocks, theta_star, theta_glasso, theta_gowl], [f"Blocks: {len(blocks)}", 'True Theta', 'GLASSO', 'GOWL']) _fit_evaluations(theta_star, theta_glasso, 3, 'GLASSO') _fit_evaluations(theta_star, theta_gowl, 3, 'GOWL') y_hat_gowl = spectral_clustering(theta=theta_gowl, K=4) y_hat_glasso = spectral_clustering(theta=theta_glasso, K=4) y_true = spectral_clustering(theta=theta_blocks, K=4).flatten() _cluster_evaluations(y_true, y_hat_gowl, 'GOWL') _cluster_evaluations(y_true, y_hat_glasso, 'GLASSO')
def test_ccgowl_vs_grab_2(self): np.random.seed(680) p = 50 block_percentage = 0.1 alpha = 0.5 noise = 0.1 K = int(p * block_percentage) block_min_size = p * 0.1 block_max_size = p * 0.4 ccl_1 = 0.1 # p = 50 ccl_2 = 0.004 # p = 50 thetas_with_noise, theta_blocks, scov_matrices, X_matrices = generate_synthetic_data(K, p, block_min_size, block_max_size, alpha, noise, 1) S = scov_matrices[0] model = CCGOWLModel(X_matrices[0], ccl_1, ccl_2) model.fit() theta_ccgowl = model.theta_hat lmbda = .2 K = 10 o_size = .3 # The size of overlap, as an input parameter max_iter = 20 tol = 1e-4 dual_max_iter = 600 dual_tol = 1e-4 theta_grab, blocks = grab.BCD(S, lmbda=lmbda, K=K, o_size=o_size, max_iter=max_iter, tol=tol, dual_max_iter=dual_max_iter, dual_tol=dual_tol) theta_grab = np.asarray(theta_grab) plot_multiple_theta_matrices_2d([S, theta_blocks, thetas_with_noise[0], theta_grab, theta_ccgowl], ['Sample Covariance', f"Blocks: {len(blocks)}", 'True Theta', 'GRAB', 'CCGOWL']) _fit_evaluations(theta_blocks, theta_grab, 1, 'GRAB') _fit_evaluations(theta_blocks, theta_ccgowl, 1, 'CCGOWL') y_hat_gowl = spectral_clustering(theta=theta_ccgowl, K=2) y_hat_grab = spectral_clustering(theta=theta_grab, K=2) y_true = spectral_clustering(theta=theta_blocks, K=2).flatten() _cluster_evaluations(y_true, y_hat_gowl, 'CCGOWL') _cluster_evaluations(y_true, y_hat_grab, 'GRAB')
def test_ccgowl_vs_grab_1(self): np.random.seed(680) p = 10 n_blocks = 1 theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p, alpha=0.5, noise=0.1, n_blocks=n_blocks, block_min_size=2, block_max_size=6) theta_star = theta_star[0] sigma = np.linalg.inv(theta_star) n = 100 X = np.random.multivariate_normal(np.zeros(p), sigma, n) X = standardize(X) S = np.cov(X.T) lam1 = 0.05263158 lam2 = 0.05263158 theta_owl = np.zeros((p, p)) model = CCGOWLModel(X, lam1, lam2) model.fit() theta_ccgowl = model.theta_hat lmbda = .2 K = 10 o_size = .3 # The size of overlap, as an input parameter max_iter = 20 tol = 1e-4 dual_max_iter = 600 dual_tol = 1e-4 theta_grab, blocks = grab.BCD(S, lmbda=lmbda, K=K, o_size=o_size, max_iter=max_iter, tol=tol, dual_max_iter=dual_max_iter, dual_tol=dual_tol) theta_grab = np.asarray(theta_grab) print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_owl))) plot_multiple_theta_matrices_2d([S, theta_blocks, theta_star, theta_grab, theta_ccgowl], ['Sample Covariance', f"Blocks: {len(blocks)}", 'True Theta', 'GRAB', 'CCGOWL']) _fit_evaluations(theta_star, theta_grab, 1, 'GRAB') _fit_evaluations(theta_star, theta_owl, 1, 'GOWL') y_hat_gowl = spectral_clustering(theta=theta_owl, K=2) y_hat_grab = spectral_clustering(theta=theta_grab, K=2) y_true = spectral_clustering(theta=theta_blocks, K=2).flatten() _cluster_evaluations(y_true, y_hat_gowl, 'CCGOWL') _cluster_evaluations(y_true, y_hat_grab, 'GRAB')
def test_gowl_vs_grab_1(self): np.random.seed(680) p = 10 n_blocks = 1 theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p, alpha=0.5, noise=0.1, n_blocks=n_blocks, block_min_size=2, block_max_size=6) theta_star = theta_star[0] sigma = np.linalg.inv(theta_star) n = 100 X = np.random.multivariate_normal(np.zeros(p), sigma, n) X = standardize(X) S = np.cov(X.T) lam1 = 0.001 # controls sparsity lam2 = 0.01 # encourages equality of coefficients lmbda = .2 K = 10 o_size = .3 # The size of overlap, as an input parameter max_iter = 20 tol = 1e-4 dual_max_iter = 600 dual_tol = 1e-4 theta_grab, blocks = grab.BCD(S, lmbda=lmbda, K=K, o_size=o_size, max_iter=max_iter, tol=tol, dual_max_iter=dual_max_iter, dual_tol=dual_tol) theta_grab = np.asarray(theta_grab) model = GOWLModel(X, S, lam1, lam2, 'backtracking', max_iters=100000) model.fit() theta_gowl = model.theta_hat print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_gowl))) plot_multiple_theta_matrices_2d([theta_blocks, theta_star, theta_grab, theta_gowl], [f"1 Block of Size 2", 'True Theta', 'GRAB', 'GOWL']) _fit_evaluations(theta_star, theta_grab, 1, 'GRAB') _fit_evaluations(theta_star, theta_gowl, 1, 'GOWL') y_hat_gowl = spectral_clustering(theta=theta_gowl, K=2) y_hat_grab = spectral_clustering(theta=theta_grab, K=2) y_true = spectral_clustering(theta=theta_blocks, K=2).flatten() _cluster_evaluations(y_true, y_hat_gowl, 'GOWL') _cluster_evaluations(y_true, y_hat_grab, 'GRAB')
def run(self): np.random.seed(680) self.model.fit() theta_hat = self.model.theta_hat y_true_clusters_df = compute_true_group(theta_hat, self.info) K = len(np.unique(y_true_clusters_df.values[np.tril_indices(self.p, -1)].tolist())) theta_clusters = spectral_clustering(theta=theta_hat, K=K) theta_clusters = [int(cluster) for cluster in theta_clusters] theta_mat_clusters = np.zeros((self.p, self.p)) theta_mat_clusters[np.tril_indices(self.p, -1)] = theta_clusters clusters_df = convert_to_df_with_labels(self.info, theta_mat_clusters.copy()) y_true_clusters_df = normalize_dfs(y_true_clusters_df, self.info, self.p) clusters_df = normalize_dfs(clusters_df, self.info, self.p) self.true_groups = pairs_in_clusters(y_true_clusters_df, K) self.predicted_groups = pairs_in_clusters(clusters_df, K)