Example #1
0
    def test_gowl_vs_glasso_duality_gap_3(self):
        """
        Duality Gap goes negative in this case. Should that happen?
        """
        np.random.seed(680)
        p = 10
        blocks = [
            Block(dim=p,
                  idx=0,
                  block_min_size=2,
                  block_max_size=6,
                  block_value=0.9),
            Block(dim=p,
                  idx=1,
                  block_min_size=2,
                  block_max_size=6,
                  block_value=-0.9),
            Block(dim=p,
                  idx=3,
                  block_min_size=2,
                  block_max_size=6,
                  block_value=-0.5),
        ]
        theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p,
                                                                    alpha=0.5,
                                                                    noise=0.1,
                                                                    blocks=blocks)
        lam1 = 0.001  # controls sparsity
        lam2 = 0.01  # encourages equality of coefficients
        rho = oscar_weights(lam1, lam2, (p ** 2 - p) / 2)

        theta_star = theta_star[0]
        sigma = np.linalg.inv(theta_star)
        n = 100
        X = np.random.multivariate_normal(np.zeros(p), sigma, n)
        X = standardize(X)
        S = np.cov(X.T)

        theta_0 = np.linalg.inv(S)
        model = GOWLModel(X, S, theta_0, lam1, lam2, 'backtracking', max_iters=100000)
        model.fit()
        theta_gowl = model.theta_hat

        gl = GraphicalLasso(max_iter=200)
        gl.fit(S)
        theta_glasso = gl.get_precision()

        print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_gowl)))
        plot_multiple_theta_matrices_2d([theta_blocks, theta_star, theta_glasso, theta_gowl],
                                        [f"Blocks: {len(blocks)}", 'True Theta', 'GLASSO', 'GOWL'])

        _fit_evaluations(theta_star, theta_glasso, 3, 'GLASSO')
        _fit_evaluations(theta_star, theta_gowl, 3, 'GOWL')

        y_hat_gowl = spectral_clustering(theta=theta_gowl, K=4)
        y_hat_glasso = spectral_clustering(theta=theta_glasso, K=4)
        y_true = spectral_clustering(theta=theta_blocks, K=4).flatten()
        _cluster_evaluations(y_true, y_hat_gowl, 'GOWL')
        _cluster_evaluations(y_true, y_hat_glasso, 'GLASSO')
Example #2
0
    def test_ccgowl_vs_grab_2(self):
        np.random.seed(680)
        p = 50
        block_percentage = 0.1

        alpha = 0.5
        noise = 0.1
        K = int(p * block_percentage)
        block_min_size = p * 0.1
        block_max_size = p * 0.4
        ccl_1 = 0.1  # p = 50
        ccl_2 = 0.004  # p = 50

        thetas_with_noise, theta_blocks, scov_matrices, X_matrices = generate_synthetic_data(K,
                                                                                             p,
                                                                                             block_min_size,
                                                                                             block_max_size,
                                                                                             alpha,
                                                                                             noise,
                                                                                             1)

        S = scov_matrices[0]
        model = CCGOWLModel(X_matrices[0], ccl_1, ccl_2)
        model.fit()
        theta_ccgowl = model.theta_hat

        lmbda = .2
        K = 10
        o_size = .3  # The size of overlap, as an input parameter
        max_iter = 20
        tol = 1e-4
        dual_max_iter = 600
        dual_tol = 1e-4
        theta_grab, blocks = grab.BCD(S,
                                      lmbda=lmbda,
                                      K=K,
                                      o_size=o_size,
                                      max_iter=max_iter,
                                      tol=tol,
                                      dual_max_iter=dual_max_iter,
                                      dual_tol=dual_tol)

        theta_grab = np.asarray(theta_grab)

        plot_multiple_theta_matrices_2d([S, theta_blocks, thetas_with_noise[0], theta_grab, theta_ccgowl],
                                        ['Sample Covariance', f"Blocks: {len(blocks)}", 'True Theta', 'GRAB', 'CCGOWL'])

        _fit_evaluations(theta_blocks, theta_grab, 1, 'GRAB')
        _fit_evaluations(theta_blocks, theta_ccgowl, 1, 'CCGOWL')

        y_hat_gowl = spectral_clustering(theta=theta_ccgowl, K=2)
        y_hat_grab = spectral_clustering(theta=theta_grab, K=2)
        y_true = spectral_clustering(theta=theta_blocks, K=2).flatten()
        _cluster_evaluations(y_true, y_hat_gowl, 'CCGOWL')
        _cluster_evaluations(y_true, y_hat_grab, 'GRAB')
Example #3
0
    def test_ccgowl_vs_grab_1(self):
        np.random.seed(680)
        p = 10
        n_blocks = 1
        theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p,
                                                                    alpha=0.5,
                                                                    noise=0.1,
                                                                    n_blocks=n_blocks,
                                                                    block_min_size=2,
                                                                    block_max_size=6)
        theta_star = theta_star[0]
        sigma = np.linalg.inv(theta_star)
        n = 100
        X = np.random.multivariate_normal(np.zeros(p), sigma, n)
        X = standardize(X)
        S = np.cov(X.T)

        lam1 = 0.05263158
        lam2 = 0.05263158

        theta_owl = np.zeros((p, p))
        model = CCGOWLModel(X, lam1, lam2)
        model.fit()
        theta_ccgowl = model.theta_hat

        lmbda = .2
        K = 10
        o_size = .3  # The size of overlap, as an input parameter
        max_iter = 20
        tol = 1e-4
        dual_max_iter = 600
        dual_tol = 1e-4
        theta_grab, blocks = grab.BCD(S,
                                      lmbda=lmbda,
                                      K=K,
                                      o_size=o_size,
                                      max_iter=max_iter,
                                      tol=tol,
                                      dual_max_iter=dual_max_iter,
                                      dual_tol=dual_tol)

        theta_grab = np.asarray(theta_grab)

        print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_owl)))
        plot_multiple_theta_matrices_2d([S, theta_blocks, theta_star, theta_grab, theta_ccgowl],
                                        ['Sample Covariance', f"Blocks: {len(blocks)}", 'True Theta', 'GRAB', 'CCGOWL'])

        _fit_evaluations(theta_star, theta_grab, 1, 'GRAB')
        _fit_evaluations(theta_star, theta_owl, 1, 'GOWL')

        y_hat_gowl = spectral_clustering(theta=theta_owl, K=2)
        y_hat_grab = spectral_clustering(theta=theta_grab, K=2)
        y_true = spectral_clustering(theta=theta_blocks, K=2).flatten()
        _cluster_evaluations(y_true, y_hat_gowl, 'CCGOWL')
        _cluster_evaluations(y_true, y_hat_grab, 'GRAB')
    def test_gowl_vs_grab_1(self):
        np.random.seed(680)
        p = 10
        n_blocks = 1
        theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p,
                                                                    alpha=0.5,
                                                                    noise=0.1,
                                                                    n_blocks=n_blocks,
                                                                    block_min_size=2,
                                                                    block_max_size=6)
        theta_star = theta_star[0]
        sigma = np.linalg.inv(theta_star)
        n = 100
        X = np.random.multivariate_normal(np.zeros(p), sigma, n)
        X = standardize(X)
        S = np.cov(X.T)

        lam1 = 0.001  # controls sparsity
        lam2 = 0.01  # encourages equality of coefficients

        lmbda = .2
        K = 10
        o_size = .3  # The size of overlap, as an input parameter
        max_iter = 20
        tol = 1e-4
        dual_max_iter = 600
        dual_tol = 1e-4
        theta_grab, blocks = grab.BCD(S,
                                      lmbda=lmbda,
                                      K=K,
                                      o_size=o_size,
                                      max_iter=max_iter,
                                      tol=tol,
                                      dual_max_iter=dual_max_iter,
                                      dual_tol=dual_tol)

        theta_grab = np.asarray(theta_grab)

        model = GOWLModel(X, S, lam1, lam2, 'backtracking', max_iters=100000)
        model.fit()
        theta_gowl = model.theta_hat

        print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_gowl)))
        plot_multiple_theta_matrices_2d([theta_blocks, theta_star, theta_grab, theta_gowl],
                                        [f"1 Block of Size 2", 'True Theta', 'GRAB', 'GOWL'])

        _fit_evaluations(theta_star, theta_grab, 1, 'GRAB')
        _fit_evaluations(theta_star, theta_gowl, 1, 'GOWL')

        y_hat_gowl = spectral_clustering(theta=theta_gowl, K=2)
        y_hat_grab = spectral_clustering(theta=theta_grab, K=2)
        y_true = spectral_clustering(theta=theta_blocks, K=2).flatten()
        _cluster_evaluations(y_true, y_hat_gowl, 'GOWL')
        _cluster_evaluations(y_true, y_hat_grab, 'GRAB')
    def run(self):
        np.random.seed(680)

        self.model.fit()
        theta_hat = self.model.theta_hat

        y_true_clusters_df = compute_true_group(theta_hat, self.info)
        K = len(np.unique(y_true_clusters_df.values[np.tril_indices(self.p, -1)].tolist()))

        theta_clusters = spectral_clustering(theta=theta_hat, K=K)
        theta_clusters = [int(cluster) for cluster in theta_clusters]
        theta_mat_clusters = np.zeros((self.p, self.p))
        theta_mat_clusters[np.tril_indices(self.p, -1)] = theta_clusters
        clusters_df = convert_to_df_with_labels(self.info, theta_mat_clusters.copy())

        y_true_clusters_df = normalize_dfs(y_true_clusters_df, self.info, self.p)
        clusters_df = normalize_dfs(clusters_df, self.info, self.p)

        self.true_groups = pairs_in_clusters(y_true_clusters_df, K)
        self.predicted_groups = pairs_in_clusters(clusters_df, K)