Example #1
0
    def test_MX_knockoff_dist(self):

        # Test knockoff construction for mvr and SDP
        # on equicorrelated matrices
        np.random.seed(110)
        n = 100000
        copies = 3
        p = 5

        # Check with a non-correlation matrix
        V = 4 * graphs.AR1(p=p, rho=0.5)
        mu = np.random.randn(p)
        print(f"true mu: {mu}")
        X, _, _, _, _ = graphs.sample_data(
            corr_matrix=V,
            n=n,
            mu=mu,
            p=p,
        )
        print(f"X mean: {X.mean(axis=0)}")

        # Check validity for oracle cov matrix
        self.check_valid_mxknockoffs(X,
                                     mu=mu,
                                     Sigma=V,
                                     copies=1,
                                     msg=f'ORACLE 3*AR1(rho=0.5)')

        # Check validity for estimated cov matrix
        self.check_valid_mxknockoffs(X,
                                     copies=3,
                                     msg=f'ESTIMATED 3*AR1(rho=0.5)')

        # Check for many types of data
        for rho in [0.1, 0.9]:
            for gamma in [0.5, 1]:
                for method in ['mvr', 'sdp']:

                    mu = 10 * np.random.randn(p)
                    X, _, _, _, corr_matrix, _ = graphs.daibarber2016_graph(
                        n=n, p=p, gamma=gamma, rho=rho, mu=mu)

                    # Check validity for oracle correlation matrix
                    self.check_valid_mxknockoffs(
                        X,
                        mu=mu,
                        Sigma=corr_matrix,
                        copies=copies,
                        msg=f'daibarber graph, rho = {rho}, gamma = {gamma}')

                    # Check validity for estimation
                    self.check_valid_mxknockoffs(
                        X,
                        copies=copies,
                        msg=
                        f'ESTIMATED daibarber graph, rho = {rho}, gamma = {gamma}'
                    )
Example #2
0
    def setUpClass(cls):

        # Create dgp
        cls.n = 200
        cls.p = 30
        cls.q = 0.4
        np.random.seed(110)
        cls.X, cls.y, cls.beta, _, cls.corr_matrix, cls.groups = graphs.daibarber2016_graph(
            n=cls.n, p=cls.p, y_dist='binomial', sparsity=0.5)
        cls.link = graphs.create_correlation_tree(cls.corr_matrix,
                                                  method='average')

        # Create class
        cls.gkval = GroupKnockoffEval(cls.corr_matrix,
                                      cls.q,
                                      cls.beta,
                                      verbose=False,
                                      feature_stat_kwargs={'use_pyglm': False})

        # Repeat, but with gamma = 1 and a larger p
        cls.n2 = 1000
        cls.p2 = 100
        cls.q2 = 0.2
        np.random.seed(110)
        cls.X2, cls.y2, cls.beta2, _, cls.corr_matrix2, _ = graphs.daibarber2016_graph(
            n=cls.n2, p=cls.p2, gamma=0.01, y_dist='binomial')
        cls.groups2 = np.arange(0, cls.p2, 1) + 1
        cls.link2 = graphs.create_correlation_tree(cls.corr_matrix2,
                                                   method='average')

        # Create class
        cls.gkval2 = GroupKnockoffEval(cls.corr_matrix2,
                                       cls.q2,
                                       cls.beta2,
                                       feature_stat_kwargs={'use_pyglm': True},
                                       verbose=True,
                                       method='ASDP')
Example #3
0
    def test_daibarber2016_sample(self):

        # Check that defaults are correct - start w cov matrix
        _, _, beta, _, V, _ = graphs.daibarber2016_graph()

        # Construct expected cov matrix -  this is a different
        # construction than the actual function
        def construct_expected_V(p, groupsize, rho, gamma):

            # Construct groups with rho ingroup correlation
            block = np.zeros((groupsize, groupsize)) + rho
            block += (1 - rho) * np.eye(groupsize)
            blocks = [block for _ in range(int(p / groupsize))]
            expected = sp.linalg.block_diag(*blocks)

            # Add gamma between-group correlations
            expected[expected == 0] = gamma * rho
            return expected

        expected = construct_expected_V(p=1000, groupsize=5, rho=0.5, gamma=0)

        # Test equality with actual one
        np.testing.assert_array_almost_equal(
            V,
            expected,
            err_msg='Default daibarber2016 cov matrix is incorrect')

        # Check number of nonzero groups
        groupsize = 5
        nonzero_inds = np.arange(0, 1000, 1)[beta != 0]
        num_nonzero_groups = np.unique(nonzero_inds // 5).shape[0]
        self.assertTrue(
            num_nonzero_groups == 20,
            msg=
            f'Default daibarber2016 beta has {num_nonzero_groups} nonzero groups, expected 20'
        )

        # Check number of nonzero features
        num_nonzero_features = (beta != 0).sum()
        self.assertTrue(
            num_nonzero_features == 100,
            msg=
            f'Default daibarber2016 beta has {num_nonzero_features} nonzero features, expected 100'
        )
Example #4
0
    def test_FX_knockoff_dist(self):
        # Test knockoff construction for mvr and SDP
        # on equicorrelated matrices
        n = 500
        p = 5
        for rho in [0.1, 0.9]:
            for gamma in [0.5, 1]:
                for method in ['mvr', 'sdp']:
                    # X values
                    X, _, _, _, corr_matrix, _ = graphs.daibarber2016_graph(
                        n=n, p=p, gamma=gamma, rho=rho)
                    # S matrix
                    trivial_groups = np.arange(0, p, 1) + 1
                    all_knockoffs, S = knockoffs.gaussian_knockoffs(
                        X=X,
                        fixedX=True,
                        copies=int(gamma) + 1,
                        method=method,
                        return_S=True,
                        verbose=False)

                    # Scale properly so we can calculate
                    scale = np.sqrt(np.diag(np.dot(X.T, X)).reshape(1, -1))
                    X = X / scale
                    knockoff_copy = all_knockoffs[:, :, -1] / scale
                    S = S / np.outer(scale, scale)

                    # # Compute empirical (scaled) cov matrix
                    features = np.concatenate([X, knockoff_copy], axis=1)
                    G_hat = np.dot(features.T, features)

                    # Calculate what this should be
                    Sigma = np.dot(X.T, X)
                    G = np.concatenate([
                        np.concatenate([Sigma, Sigma - S]),
                        np.concatenate([Sigma - S, Sigma])
                    ],
                                       axis=1)

                    # Test G has correct structure
                    msg = f"Feature-knockoff cov matrix has incorrect values "
                    msg += f"for daibarber graph, FX knockoffs, rho = {rho}, gamma = {gamma}"
                    np.testing.assert_array_almost_equal(G_hat, G, 5, msg)
Example #5
0
    def test_error_raising(self):

        # Generate data
        n = 100
        p = 100
        X, _, _, _, corr_matrix, groups = graphs.daibarber2016_graph(n=n,
                                                                     p=p,
                                                                     gamma=1,
                                                                     rho=0.8)
        S_bad = np.eye(p)

        def fdr_vio_knockoffs():
            knockoffs.gaussian_knockoffs(X=X,
                                         Sigma=corr_matrix,
                                         S=S_bad,
                                         verbose=False)

        self.assertRaisesRegex(
            np.linalg.LinAlgError,
            "meaning FDR control violations are extremely likely",
            fdr_vio_knockoffs,
        )

        # Test FX knockoff violations
        def fx_knockoffs_low_n():
            knockoffs.gaussian_knockoffs(
                X=X,
                Sigma=corr_matrix,
                S=None,
                fixedX=True,
            )

        self.assertRaisesRegex(
            np.linalg.LinAlgError,
            "FX knockoffs can't be generated with n",
            fx_knockoffs_low_n,
        )
Example #6
0
    def test_easy_sdp(self):

        # Test non-group SDP first
        n = 200
        p = 50
        X, _, _, _, corr_matrix, groups = graphs.daibarber2016_graph(n=n,
                                                                     p=p,
                                                                     gamma=0.3)

        # S matrix
        trivial_groups = np.arange(0, p, 1) + 1
        S_triv = knockoffs.compute_S_matrix(
            Sigma=corr_matrix,
            groups=trivial_groups,
            method='sdp',
            verbose=True,
        )
        np.testing.assert_array_almost_equal(
            S_triv,
            np.eye(p),
            decimal=2,
            err_msg=
            'solve_group_SDP does not produce optimal S matrix (daibarber graphs)'
        )
        self.check_S_properties(corr_matrix, S_triv, trivial_groups)

        # Repeat for gaussian_knockoffs method
        _, S_triv2 = knockoffs.gaussian_knockoffs(
            X=X,
            Sigma=corr_matrix,
            groups=trivial_groups,
            return_S=True,
            verbose=False,
            method='sdp',
        )
        np.testing.assert_array_almost_equal(
            S_triv2,
            np.eye(p),
            decimal=2,
            err_msg=
            'solve_group_SDP does not produce optimal S matrix (daibarber graphs)'
        )
        self.check_S_properties(corr_matrix, S_triv2, trivial_groups)

        # Test slightly harder case
        _, _, _, _, expected_out, _ = graphs.daibarber2016_graph(n=n,
                                                                 p=p,
                                                                 gamma=0)
        _, S_harder = knockoffs.gaussian_knockoffs(X=X,
                                                   Sigma=corr_matrix,
                                                   groups=groups,
                                                   return_S=True,
                                                   verbose=False,
                                                   method='sdp')
        np.testing.assert_almost_equal(
            S_harder,
            expected_out,
            decimal=2,
            err_msg=
            'solve_group_SDP does not produce optimal S matrix (daibarber graphs)'
        )
        self.check_S_properties(corr_matrix, S_harder, groups)

        # Repeat for ASDP
        _, S_harder_ASDP = knockoffs.gaussian_knockoffs(X=X,
                                                        Sigma=corr_matrix,
                                                        groups=groups,
                                                        method='ASDP',
                                                        return_S=True,
                                                        verbose=False,
                                                        max_block=10)
        np.testing.assert_almost_equal(
            S_harder_ASDP,
            expected_out,
            decimal=2,
            err_msg=
            'solve_group_ASDP does not produce optimal S matrix (daibarber graphs)'
        )
        self.check_S_properties(corr_matrix, S_harder_ASDP, groups)