def test_MX_knockoff_dist(self): # Test knockoff construction for mvr and SDP # on equicorrelated matrices np.random.seed(110) n = 100000 copies = 3 p = 5 # Check with a non-correlation matrix V = 4 * graphs.AR1(p=p, rho=0.5) mu = np.random.randn(p) print(f"true mu: {mu}") X, _, _, _, _ = graphs.sample_data( corr_matrix=V, n=n, mu=mu, p=p, ) print(f"X mean: {X.mean(axis=0)}") # Check validity for oracle cov matrix self.check_valid_mxknockoffs(X, mu=mu, Sigma=V, copies=1, msg=f'ORACLE 3*AR1(rho=0.5)') # Check validity for estimated cov matrix self.check_valid_mxknockoffs(X, copies=3, msg=f'ESTIMATED 3*AR1(rho=0.5)') # Check for many types of data for rho in [0.1, 0.9]: for gamma in [0.5, 1]: for method in ['mvr', 'sdp']: mu = 10 * np.random.randn(p) X, _, _, _, corr_matrix, _ = graphs.daibarber2016_graph( n=n, p=p, gamma=gamma, rho=rho, mu=mu) # Check validity for oracle correlation matrix self.check_valid_mxknockoffs( X, mu=mu, Sigma=corr_matrix, copies=copies, msg=f'daibarber graph, rho = {rho}, gamma = {gamma}') # Check validity for estimation self.check_valid_mxknockoffs( X, copies=copies, msg= f'ESTIMATED daibarber graph, rho = {rho}, gamma = {gamma}' )
def setUpClass(cls): # Create dgp cls.n = 200 cls.p = 30 cls.q = 0.4 np.random.seed(110) cls.X, cls.y, cls.beta, _, cls.corr_matrix, cls.groups = graphs.daibarber2016_graph( n=cls.n, p=cls.p, y_dist='binomial', sparsity=0.5) cls.link = graphs.create_correlation_tree(cls.corr_matrix, method='average') # Create class cls.gkval = GroupKnockoffEval(cls.corr_matrix, cls.q, cls.beta, verbose=False, feature_stat_kwargs={'use_pyglm': False}) # Repeat, but with gamma = 1 and a larger p cls.n2 = 1000 cls.p2 = 100 cls.q2 = 0.2 np.random.seed(110) cls.X2, cls.y2, cls.beta2, _, cls.corr_matrix2, _ = graphs.daibarber2016_graph( n=cls.n2, p=cls.p2, gamma=0.01, y_dist='binomial') cls.groups2 = np.arange(0, cls.p2, 1) + 1 cls.link2 = graphs.create_correlation_tree(cls.corr_matrix2, method='average') # Create class cls.gkval2 = GroupKnockoffEval(cls.corr_matrix2, cls.q2, cls.beta2, feature_stat_kwargs={'use_pyglm': True}, verbose=True, method='ASDP')
def test_daibarber2016_sample(self): # Check that defaults are correct - start w cov matrix _, _, beta, _, V, _ = graphs.daibarber2016_graph() # Construct expected cov matrix - this is a different # construction than the actual function def construct_expected_V(p, groupsize, rho, gamma): # Construct groups with rho ingroup correlation block = np.zeros((groupsize, groupsize)) + rho block += (1 - rho) * np.eye(groupsize) blocks = [block for _ in range(int(p / groupsize))] expected = sp.linalg.block_diag(*blocks) # Add gamma between-group correlations expected[expected == 0] = gamma * rho return expected expected = construct_expected_V(p=1000, groupsize=5, rho=0.5, gamma=0) # Test equality with actual one np.testing.assert_array_almost_equal( V, expected, err_msg='Default daibarber2016 cov matrix is incorrect') # Check number of nonzero groups groupsize = 5 nonzero_inds = np.arange(0, 1000, 1)[beta != 0] num_nonzero_groups = np.unique(nonzero_inds // 5).shape[0] self.assertTrue( num_nonzero_groups == 20, msg= f'Default daibarber2016 beta has {num_nonzero_groups} nonzero groups, expected 20' ) # Check number of nonzero features num_nonzero_features = (beta != 0).sum() self.assertTrue( num_nonzero_features == 100, msg= f'Default daibarber2016 beta has {num_nonzero_features} nonzero features, expected 100' )
def test_FX_knockoff_dist(self): # Test knockoff construction for mvr and SDP # on equicorrelated matrices n = 500 p = 5 for rho in [0.1, 0.9]: for gamma in [0.5, 1]: for method in ['mvr', 'sdp']: # X values X, _, _, _, corr_matrix, _ = graphs.daibarber2016_graph( n=n, p=p, gamma=gamma, rho=rho) # S matrix trivial_groups = np.arange(0, p, 1) + 1 all_knockoffs, S = knockoffs.gaussian_knockoffs( X=X, fixedX=True, copies=int(gamma) + 1, method=method, return_S=True, verbose=False) # Scale properly so we can calculate scale = np.sqrt(np.diag(np.dot(X.T, X)).reshape(1, -1)) X = X / scale knockoff_copy = all_knockoffs[:, :, -1] / scale S = S / np.outer(scale, scale) # # Compute empirical (scaled) cov matrix features = np.concatenate([X, knockoff_copy], axis=1) G_hat = np.dot(features.T, features) # Calculate what this should be Sigma = np.dot(X.T, X) G = np.concatenate([ np.concatenate([Sigma, Sigma - S]), np.concatenate([Sigma - S, Sigma]) ], axis=1) # Test G has correct structure msg = f"Feature-knockoff cov matrix has incorrect values " msg += f"for daibarber graph, FX knockoffs, rho = {rho}, gamma = {gamma}" np.testing.assert_array_almost_equal(G_hat, G, 5, msg)
def test_error_raising(self): # Generate data n = 100 p = 100 X, _, _, _, corr_matrix, groups = graphs.daibarber2016_graph(n=n, p=p, gamma=1, rho=0.8) S_bad = np.eye(p) def fdr_vio_knockoffs(): knockoffs.gaussian_knockoffs(X=X, Sigma=corr_matrix, S=S_bad, verbose=False) self.assertRaisesRegex( np.linalg.LinAlgError, "meaning FDR control violations are extremely likely", fdr_vio_knockoffs, ) # Test FX knockoff violations def fx_knockoffs_low_n(): knockoffs.gaussian_knockoffs( X=X, Sigma=corr_matrix, S=None, fixedX=True, ) self.assertRaisesRegex( np.linalg.LinAlgError, "FX knockoffs can't be generated with n", fx_knockoffs_low_n, )
def test_easy_sdp(self): # Test non-group SDP first n = 200 p = 50 X, _, _, _, corr_matrix, groups = graphs.daibarber2016_graph(n=n, p=p, gamma=0.3) # S matrix trivial_groups = np.arange(0, p, 1) + 1 S_triv = knockoffs.compute_S_matrix( Sigma=corr_matrix, groups=trivial_groups, method='sdp', verbose=True, ) np.testing.assert_array_almost_equal( S_triv, np.eye(p), decimal=2, err_msg= 'solve_group_SDP does not produce optimal S matrix (daibarber graphs)' ) self.check_S_properties(corr_matrix, S_triv, trivial_groups) # Repeat for gaussian_knockoffs method _, S_triv2 = knockoffs.gaussian_knockoffs( X=X, Sigma=corr_matrix, groups=trivial_groups, return_S=True, verbose=False, method='sdp', ) np.testing.assert_array_almost_equal( S_triv2, np.eye(p), decimal=2, err_msg= 'solve_group_SDP does not produce optimal S matrix (daibarber graphs)' ) self.check_S_properties(corr_matrix, S_triv2, trivial_groups) # Test slightly harder case _, _, _, _, expected_out, _ = graphs.daibarber2016_graph(n=n, p=p, gamma=0) _, S_harder = knockoffs.gaussian_knockoffs(X=X, Sigma=corr_matrix, groups=groups, return_S=True, verbose=False, method='sdp') np.testing.assert_almost_equal( S_harder, expected_out, decimal=2, err_msg= 'solve_group_SDP does not produce optimal S matrix (daibarber graphs)' ) self.check_S_properties(corr_matrix, S_harder, groups) # Repeat for ASDP _, S_harder_ASDP = knockoffs.gaussian_knockoffs(X=X, Sigma=corr_matrix, groups=groups, method='ASDP', return_S=True, verbose=False, max_block=10) np.testing.assert_almost_equal( S_harder_ASDP, expected_out, decimal=2, err_msg= 'solve_group_ASDP does not produce optimal S matrix (daibarber graphs)' ) self.check_S_properties(corr_matrix, S_harder_ASDP, groups)