예제 #1
0
    def test_trueER_sample(self):
        """ ER sampling following nodewise knockoffs paper """

        # Try er = Q
        p = 500
        delta = 0.5
        np.random.seed(110)
        _, _, _, Q, V = graphs.sample_data(p=p, delta=delta, method='qer')

        prop_nonzero = (np.abs(Q) > 0.001).mean()
        self.assertTrue(
            abs(prop_nonzero - delta) < 0.02,
            "True (Q)ErdosRenyi sampler fails to give correct sparsity")

        mean_val = (Q.sum() - np.diag(Q).sum()) / (p**2 - p)
        self.assertTrue(
            abs(mean_val) < 0.1,
            "True (Q)ErdosRenyi sampler fails to give correct mean val")

        # Try er = V
        delta = 0.1
        np.random.seed(110)
        _, _, _, Q, V = graphs.sample_data(p=p, delta=delta, method='ver')

        prop_nonzero = (np.abs(V) > 0.001).mean()
        self.assertTrue(
            abs(prop_nonzero - delta) < 0.02,
            "True (V)ErdosRenyi sampler fails to give correct sparsity")

        mean_val = (V.sum() - np.diag(V).sum()) / (p**2 - p)
        self.assertTrue(
            abs(mean_val) < 0.1,
            "True (V)ErdosRenyi sampler fails to give correct mean val")
예제 #2
0
    def test_AR1_sample(self):

        # Check that rho parameter works
        rho = 0.3
        p = 500
        _, _, _, _, Sigma = graphs.sample_data(p=p, method='AR1', rho=rho)
        np.testing.assert_almost_equal(
            np.diag(Sigma, k=1),
            np.array([rho for _ in range(p - 1)]),
            decimal=4,
            err_msg="Rho parameter for AR1 graph sampling fails")

        # Error testing
        def ARsample():
            graphs.sample_data(method='AR1', rho=1.5)

        self.assertRaisesRegex(ValueError,
                               "must be a correlation between -1 and 1",
                               ARsample)

        # Check that a, b parameters work
        np.random.seed(110)
        a = 100
        b = 100
        _, _, _, _, Sigma = graphs.sample_data(p=500, method='AR1', a=a, b=b)
        mean_rho = np.diag(Sigma, k=1).mean()
        expected = a / (a + b)
        np.testing.assert_almost_equal(
            mean_rho,
            a / (a + b),
            decimal=2,
            err_msg=
            f'random AR1 gen has unexpected avg rho {mean_rho} vs {expected} ')
예제 #3
0
    def test_dsliu2020_sample(self):

        rho = 0.8
        n = 500
        p = 500
        _, _, beta, _, _ = graphs.sample_data(
            rho=rho,
            gamma=1,
            p=p,
            n=n,
            sparsity=0.1,
            method='daibarber2016',
            coeff_dist='dsliu2020',
        )
        self.assertTrue((beta != 0).sum() == 50,
                        f"Sparsity constraint for dsliu2020 violated")

        p = 2000
        _, _, beta, _, _ = graphs.sample_data(
            rho=rho,
            gamma=1,
            p=p,
            n=n,
            sparsity=0.025,
            method='daibarber2016',
            coeff_dist='dsliu2020',
        )
        self.assertTrue((beta != 0).sum() == 50,
                        f"Sparsity constraint for dsliu2020 violated")
예제 #4
0
    def test_coeff_dist(self):

        # Test normal
        np.random.seed(110)
        p = 1000
        _, _, beta, _, _ = graphs.sample_data(p=p,
                                              sparsity=1,
                                              coeff_size=1,
                                              coeff_dist='normal',
                                              sign_prob=0)
        expected = 1
        mean_est = beta.mean()
        self.assertTrue(
            np.abs(mean_est - expected) < 0.1,
            msg=
            f"coeff_dist (normal) mean is wrong: expected mean 1 but got mean {mean_est}"
        )

        # Test uniform
        np.random.seed(110)
        p = 1000
        _, _, beta, _, _ = graphs.sample_data(p=p,
                                              sparsity=1,
                                              coeff_size=1,
                                              coeff_dist='uniform',
                                              sign_prob=0)
        expected = 0.75
        mean_est = beta.mean()
        self.assertTrue(
            np.abs(mean_est - expected) < 0.1,
            msg=
            f"coeff_dist (uniform) mean is wrong: expected mean 1 but got mean {mean_est}"
        )
        maxbeta = np.max(beta)
        self.assertTrue(
            maxbeta <= 1,
            msg=
            f'coeff_dist (uniform) produces max beta abs of {maxbeta} > 1 for coeff_size = 1'
        )
        minbeta = np.min(beta)
        self.assertTrue(
            minbeta >= 0.5,
            msg=
            f'coeff_dist (uniform) produces min beta abs of {minbeta} 0.5 for coeff_size = 1'
        )

        # Test Value-Error
        def sample_bad_dist():
            graphs.sample_data(p=100, coeff_dist='baddist')

        self.assertRaisesRegex(ValueError,
                               "must be 'none', 'normal', or 'uniform'",
                               sample_bad_dist)
예제 #5
0
    def test_compatibility_error(self):
        """ Ensures metro class errors when you pass a non-compatible
		proposal matrix """

        # Fake data
        np.random.seed(110)
        n = 5
        p = 200
        X, _, _, Q, V = graphs.sample_data(method='AR1', rho=0.3, n=n, p=p)

        # Metro sampler, proposal params
        def incorrect_undir_graph():
            metro_sampler = metro.MetropolizedKnockoffSampler(
                lf=lambda x: np.log(x).sum(),
                X=X,
                mu=np.zeros(p),
                V=V,
                undir_graph=np.eye(p),
                S=np.eye(p),
            )

        # Make sure the value error increases
        self.assertRaisesRegex(ValueError,
                               "Precision matrix Q is not compatible",
                               incorrect_undir_graph)
예제 #6
0
    def test_ar1_sample(self):

        # Fake data
        np.random.seed(110)
        n = 30000
        p = 8
        X, _, _, Q, V = graphs.sample_data(method='AR1', n=n, p=p)
        _, S = knockadapt.knockoffs.gaussian_knockoffs(X=X,
                                                       Sigma=V,
                                                       method='mvr',
                                                       return_S=True)

        # Graph structure + junction tree
        Q_graph = (np.abs(Q) > 1e-5)
        Q_graph = Q_graph - np.eye(p)

        # Metro sampler + likelihood
        mvn = stats.multivariate_normal(mean=np.zeros(p), cov=V)

        def mvn_likelihood(X):
            return mvn.logpdf(X)

        gamma = 0.9999
        metro_sampler = metro.MetropolizedKnockoffSampler(
            lf=mvn_likelihood,
            X=X,
            mu=np.zeros(p),
            V=V,
            undir_graph=Q_graph,
            S=S,
            gamma=gamma,
        )

        # Output knockoffs
        Xk = metro_sampler.sample_knockoffs()

        # Acceptance rate should be exactly one
        acc_rate = metro_sampler.final_acc_probs.mean()
        self.assertTrue(
            acc_rate - gamma > -1e-3,
            msg=
            f'For AR1 gaussian design, metro has acc_rate={acc_rate} < gamma={gamma}'
        )

        # Check covariance matrix
        features = np.concatenate([X, Xk], axis=1)
        emp_corr_matrix = np.corrcoef(features.T)
        G = np.concatenate([
            np.concatenate([V, V - S]),
            np.concatenate([V - S, V]),
        ],
                           axis=1)

        np.testing.assert_almost_equal(
            emp_corr_matrix,
            G,
            decimal=2,
            err_msg=
            f"For AR1 gaussian design, metro does not match theoretical matrix"
        )
예제 #7
0
    def test_t_sample(self):

        # Check that we get the right covariance matrix
        np.random.seed(110)
        n = 100000
        p = 5
        X, _, _, Q, V = graphs.sample_data(n=n,
                                           p=p,
                                           method='AR1',
                                           x_dist='ar1t',
                                           df_t=5)

        emp_corr = np.corrcoef(X.T)
        np.testing.assert_array_almost_equal(
            V,
            emp_corr,
            decimal=2,
            err_msg=
            f"ar1t empirical correlation matrix does not match theoretical one"
        )

        # Check that this fails correctly for non-ar1-method
        def non_ar1_t():
            graphs.sample_data(n=n, p=p, method='ver', x_dist='ar1t')

        self.assertRaisesRegex(ValueError, "should equal 'ar1'", non_ar1_t)
예제 #8
0
    def test_consistency_of_inferring_sigma(self):
        """ Checks that the same knockoffs are produced
        whether you infer the covariance matrix first and
        pass it to the gaussian_knockoffs generator, or
        you let the generator do the work for you
        """

        n = 25
        p = 300
        rho = 0.5
        X, _, _, _, _ = graphs.sample_data(n=n, p=p, rho=rho, method='AR1')

        # Method 1: infer cov first
        V, _ = utilities.estimate_covariance(X, tol=1e-2)
        np.random.seed(110)
        Xk1 = knockoffs.gaussian_knockoffs(X=X,
                                           Sigma=V,
                                           method='sdp',
                                           max_epochs=1)

        # Method 2: Infer during
        np.random.seed(110)
        Xk2 = knockoffs.gaussian_knockoffs(X=X, method='sdp', max_epochs=1)
        np.testing.assert_array_almost_equal(
            Xk1, Xk2, 5, err_msg='Knockoff gen is inconsistent')
예제 #9
0
    def test_MX_knockoff_dist(self):

        # Test knockoff construction for mvr and SDP
        # on equicorrelated matrices
        np.random.seed(110)
        n = 100000
        copies = 3
        p = 5

        # Check with a non-correlation matrix
        V = 4 * graphs.AR1(p=p, rho=0.5)
        mu = np.random.randn(p)
        print(f"true mu: {mu}")
        X, _, _, _, _ = graphs.sample_data(
            corr_matrix=V,
            n=n,
            mu=mu,
            p=p,
        )
        print(f"X mean: {X.mean(axis=0)}")

        # Check validity for oracle cov matrix
        self.check_valid_mxknockoffs(X,
                                     mu=mu,
                                     Sigma=V,
                                     copies=1,
                                     msg=f'ORACLE 3*AR1(rho=0.5)')

        # Check validity for estimated cov matrix
        self.check_valid_mxknockoffs(X,
                                     copies=3,
                                     msg=f'ESTIMATED 3*AR1(rho=0.5)')

        # Check for many types of data
        for rho in [0.1, 0.9]:
            for gamma in [0.5, 1]:
                for method in ['mvr', 'sdp']:

                    mu = 10 * np.random.randn(p)
                    X, _, _, _, corr_matrix, _ = graphs.daibarber2016_graph(
                        n=n, p=p, gamma=gamma, rho=rho, mu=mu)

                    # Check validity for oracle correlation matrix
                    self.check_valid_mxknockoffs(
                        X,
                        mu=mu,
                        Sigma=corr_matrix,
                        copies=copies,
                        msg=f'daibarber graph, rho = {rho}, gamma = {gamma}')

                    # Check validity for estimation
                    self.check_valid_mxknockoffs(
                        X,
                        copies=copies,
                        msg=
                        f'ESTIMATED daibarber graph, rho = {rho}, gamma = {gamma}'
                    )
예제 #10
0
    def test_beta_gen(self):

        # Test sparsity
        p = 100
        _, _, beta, _, _ = graphs.sample_data(
            p=p,
            sparsity=0.3,
            coeff_size=0.3,
        )
        self.assertTrue((beta != 0).sum() == 30,
                        msg='sparsity parameter yields incorrect sparsity')
        abs_coefs = np.unique(np.abs(beta[beta != 0]))
        np.testing.assert_array_almost_equal(
            abs_coefs,
            np.array([0.3]),
            err_msg='beta generation yields incorrect coefficients')

        # Test number of selections for groups
        sparsity = 0.2
        groups = np.concatenate([np.arange(0, 50, 1), np.arange(0, 50, 1)])
        _, _, beta, _, _ = graphs.sample_data(
            p=p,
            sparsity=sparsity,
            groups=groups,
        )

        # First, test that the correct number of features is chosen
        num_groups = np.unique(groups).shape[0]
        expected_nonnull_features = sparsity * p
        self.assertTrue(
            (beta != 0).sum() == expected_nonnull_features,
            msg='sparsity for groups chooses incorrect number of features')

        # Check that the correct number of GROUPS has been chosen
        expected_nonnull_groups = sparsity * num_groups
        selected_groups = np.unique(groups[beta != 0]).shape[0]
        self.assertTrue(
            selected_groups == expected_nonnull_groups,
            msg='group sparsity parameter does not choose coeffs within a group'
        )
예제 #11
0
    def test_proposal_covs(self):

        # Fake data
        np.random.seed(110)
        n = 5
        p = 200
        X, _, _, Q, V = graphs.sample_data(method='AR1', rho=0.1, n=n, p=p)

        # Metro sampler, proposal params
        metro_sampler = metro.MetropolizedKnockoffSampler(
            lf=lambda x: np.log(x).sum(),
            X=X,
            mu=np.zeros(p),
            V=V,
            undir_graph=np.abs(Q) > 1e-3,
            S=np.eye(p),
        )

        # Test that proposal likelihood is correct
        mu = np.zeros(2 * p)
        mvn = stats.multivariate_normal(mean=mu, cov=metro_sampler.G)

        # Scipy likelihood
        features = mvn.rvs()
        scipy_likelihood = mvn.logpdf(features)

        # Calculate a new likelihood using the proposal params
        X = features[0:p].reshape(1, -1)
        Xstar = features[p:].reshape(1, -1)

        # Base likelihood for first p variables
        loglike = stats.multivariate_normal(mean=np.zeros(p), cov=V).logpdf(X)

        # Likelihood of jth variable given first j - 1
        prev_proposals = None
        for j in range(p):

            # Test = scipy likelihood at this point
            scipy_likelihood = stats.multivariate_normal(
                mean=np.zeros(p + j),
                cov=metro_sampler.G[0:p + j,
                                    0:p + j]).logpdf(features[0:p + j])
            self.assertTrue(
                np.abs(loglike - scipy_likelihood) < 0.001,
                f"Proposal likelihood for j={j-1} fails: output {loglike}, expected {scipy_likelihood} (scipy)"
            )

            # Add loglike
            loglike += metro_sampler.q_ll(Xjstar=Xstar[:, j],
                                          X=X,
                                          prev_proposals=prev_proposals)
            prev_proposals = Xstar[:, 0:j + 1]
예제 #12
0
    def test_dirichlet_matrices(self):
        """ Simple test that ensures there are no errors, we get corr matrix 
		with expected eigenvalues"""

        # Try one with low temp
        p = 2000
        temp = 0.1
        np.random.seed(110)
        _, _, _, Q, V = graphs.sample_data(p=p, temp=temp, method='dirichlet')
        np.testing.assert_almost_equal(
            np.diag(V),
            np.ones(p),
            decimal=6,
            err_msg=f"DirichletCorr generation {V} is not a correlation matrix"
        )
        min_eig = np.linalg.eigh(V)[0].min()
        self.assertTrue(
            min_eig < 0.003,
            msg=
            f"Minimum eigenvalue of dirichlet {min_eig} should be <=0.001 when temp={temp}"
        )

        # Try 2 with high temp
        temp = 10
        np.random.seed(110)
        _, _, _, Q, V = graphs.sample_data(p=p, temp=temp, method='dirichlet')
        np.testing.assert_almost_equal(
            np.diag(V),
            np.ones(p),
            decimal=6,
            err_msg=f"DirichletCorr generation {V} is not a correlation matrix"
        )
        min_eig = np.linalg.eigh(V)[0].min()
        self.assertTrue(
            min_eig > 0.001,
            msg=
            f"Minimum eigenvalue of dirichlet {min_eig} should be >=0.001 when temp={temp}"
        )
예제 #13
0
    def test_dot_corr_matrices(self):
        """ Tests wishart and uniform corr matrices """

        d = 1000
        p = 4
        _, _, _, _, Sigma = graphs.sample_data(p=p, d=d, method='wishart')
        np.testing.assert_almost_equal(
            Sigma,
            np.eye(p),
            decimal=1,
            err_msg=
            f'random Wishart generation {Sigma} unexpectedly deviates from the identity'
        )

        # Repeat for the uniform case
        _, _, _, _, Sigma = graphs.sample_data(p=p, d=d, method='uniformdot')
        expected = 0.25 * np.eye(p) + 0.75 * np.ones((p, p))
        np.testing.assert_almost_equal(
            Sigma,
            expected,
            decimal=1,
            err_msg=
            f'random unifdot generation {Sigma} unexpectedly deviates from the {expected}'
        )
예제 #14
0
    def test_partialcorr_sample(self):

        p = 50
        rho = 0.99
        _, _, _, _, V = graphs.sample_data(p=p, method='partialcorr', rho=rho)
        diag_diff = np.mean(np.abs(np.diag(V) - 1))
        self.assertTrue(
            diag_diff < 1e-4,
            f'Partial corr corr_matrix={V} for rho={rho} is not a correlation matrix'
        )
        pairwise_corr = V[0, 1]
        expected = -1 / (p - 1)
        self.assertTrue(
            np.abs(pairwise_corr - expected) < 1e-4,
            f"Partial corr pairwise_corr {pairwise_corr} deviates from expectation {expected} for rho={rho}"
        )
예제 #15
0
    def test_beta_sign_prob(self):

        # Test signs of beta
        p = 100
        for sign_prob in [0, 1]:
            _, _, beta, _, _ = graphs.sample_data(p=p,
                                                  sparsity=1,
                                                  coeff_size=1,
                                                  sign_prob=sign_prob)
            sum_beta = beta.sum()
            expected = p * (1 - 2 * sign_prob)
            self.assertTrue(
                sum_beta == expected,
                msg=
                f"sign_prob ({sign_prob}) fails to correctly control sign of beta"
            )
예제 #16
0
	def test_covariance_estimation(self):

		# Random data
		np.random.seed(110)
		n = 50
		p = 100
		rho = 0.3
		V = (1-rho)*np.eye(p) + (rho)*np.ones((p,p))
		X,_,_,_,_ = graphs.sample_data(n=n, corr_matrix=V)

		# Estimate covariance matrix
		Vest,_ = utilities.estimate_covariance(X, tol=1e-2)
		frobenius = np.sqrt(np.power(Vest - V, 2).mean())
		self.assertTrue(
			frobenius < 0.2,
			f"High-dimension covariance estimation is horrible"
		)
예제 #17
0
    def test_gmliu2019_sample(self):

        n = 300
        p = 1000
        rho = 0.8
        np.random.seed(110)
        _, _, beta, _, _ = graphs.sample_data(
            rho=rho,
            gamma=1,
            p=p,
            n=n,
            sparsity=0.06,
            method='daibarber2016',
            coeff_dist='gmliu2019',
        )
        self.assertTrue((beta != 0).sum() == 60,
                        f"Sparsity constraint for gmliu2019 violated")
예제 #18
0
		def non_sklearn_backend_cvscore():
			X, y, beta, _, corr_matrix = graphs.sample_data(
				n = n, p = p, y_dist = 'binomial', 
				coeff_size = 100, sign_prob = 1
			)
			groups = np.random.randint(1, p+1, size=(p,))
			group = utilities.preprocess_groups(groups)
			pyglm_logit = kstats.LassoStatistic()
			pyglm_logit.fit(
				X,
				knockoffs,
				y,
				use_pyglm=True,
				group_lasso=True,
				groups=groups,
				cv_score=True,
			)
예제 #19
0
    def test_beta_corr_signals(self):

        # Test signals are grouped together
        p = 4
        sparsity = 0.5
        expected_nn = int(sparsity * p)
        for j in range(10):
            _, _, beta, _, _ = graphs.sample_data(p=p,
                                                  sparsity=0.5,
                                                  corr_signals=True)
            nn_flags = (beta != 0)
            self.assertTrue(
                nn_flags.sum() == expected_nn,
                f"Corr_signals breaks sparsity (beta = {beta}, should have {expected_nn} non-nulls)"
            )
            first_nonzero = np.where(nn_flags)[0].min()
            self.assertTrue(
                nn_flags[first_nonzero + 1],
                f"Corr_signals does not produce correlated signals (beta = {beta})"
            )
예제 #20
0
    def test_nested_AR1(self):

        # Check that a, b parameters work
        np.random.seed(110)
        a = 100
        b = 40
        _, _, _, _, Sigma = graphs.sample_data(p=500,
                                               method='nestedar1',
                                               a=a,
                                               b=b,
                                               nest_size=2,
                                               num_nests=1)
        mean_rho = np.diag(Sigma, k=1).mean()
        expected = a / (2 * (a + b)) + (a / (a + b))**2 / 2
        np.testing.assert_almost_equal(
            mean_rho,
            expected,
            decimal=2,
            err_msg=
            f'random nested AR1 gen has unexpected avg rho {mean_rho}, should be ~ {expected} '
        )
예제 #21
0
	def test_misaligned_covariance_estimation(self):

		# Inputs
		seed = 110
		sample_kwargs = {
			'n':640,
			'p':300,
			'method':'daibarber2016',
			'gamma':1,
			'rho':0.8,
		}

		# Extracta couple of constants
		n = sample_kwargs['n']
		p = sample_kwargs['p']

		# Create data generating process
		np.random.seed(seed)
		X, y, beta, _, V = graphs.sample_data(**sample_kwargs)  

		# Make sure this does not raise an error
		# (even though it is ill-conditioned and the graph lasso doesn't fail)
		utilities.estimate_covariance(X, shrinkage='graphicallasso')
예제 #22
0
    def test_tblock_sample(self):

        # Fake data --> we want the right cov matrix
        np.random.seed(110)
        n = 1000000
        p = 4
        df_t = 6
        X, _, _, Q, V = graphs.sample_data(n=n,
                                           p=p,
                                           method='daibarber2016',
                                           gamma=0,
                                           x_dist='blockt',
                                           group_size=2,
                                           df_t=df_t)
        emp_corr = np.cov(X.T)

        # Check empirical covariance matrice
        np.testing.assert_array_almost_equal(
            V,
            emp_corr,
            decimal=2,
            err_msg=
            f"t-block empirical correlation matrix does not match theoretical one"
        )
예제 #23
0
    def test_logistic(self):

        np.random.seed(110)

        p = 50
        X, y, beta, Q, corr_matrix = graphs.sample_data(p=p, y_dist='binomial')

        # Test outputs are binary
        y_vals = np.unique(y)
        np.testing.assert_array_almost_equal(
            y_vals,
            np.array([0, 1]),
            err_msg='Binomial flag not producing binary responses')

        # Test conditional mean for a single X val - start by
        # sampling ys
        N = 5000
        X_repeated = np.repeat(X[0], N).reshape(p, N).T
        ys = graphs.sample_response(X_repeated, beta, y_dist='binomial')

        # Then check that the theoretical/empirical mean are the same
        cond_mean = 1 / (1 + np.exp(-1 * np.dot(X_repeated[0], beta)))
        emp_cond_mean = ys.mean(axis=0)
        np.testing.assert_almost_equal(cond_mean, emp_cond_mean, decimal=2)
예제 #24
0
 def bad_xdist():
     graphs.sample_data(method='ver', x_dist='t_dist')
예제 #25
0
    def test_gibbs_sample(self):

        # Check that we get a decent correlation matrix
        # with the right type of Q matrix
        np.random.seed(110)
        n = 50000
        p = 9
        X, _, _, Q, V = graphs.sample_data(
            n=n,
            p=p,
            method='ising',
            x_dist='gibbs',
        )
        # Mean test
        np.testing.assert_almost_equal(
            X.mean(),
            0,
            decimal=1,
            err_msg=
            f"Ising sampler has unexpected mean (expected 0, got {X.mean()})")
        # Test Q
        expected_edges = 4 * p - (4 * np.sqrt(p))
        num_edges = (Q != 0).sum()
        self.assertTrue(
            num_edges == expected_edges,
            f"Ising gibbs dist has unexpected number of edges ({num_edges}, expected {expected_edges})"
        )
        # Check the non-grid-based method
        X, _, _, Q, V = graphs.sample_data(
            n=n,
            p=p,
            method=3,
            x_dist='gibbs',
            y_dist='binomial',
        )
        # Mean test
        np.testing.assert_almost_equal(
            X.mean(),
            0,
            decimal=1,
            err_msg=
            f"Gibbs (non-ising) sampler has unexpected mean (expected 0, got {X.mean()})"
        )
        # Test consistency of Q
        X2, _, _, Q2, V2 = graphs.sample_data(n=n,
                                              p=p,
                                              gibbs_graph=Q,
                                              method=3,
                                              x_dist='gibbs',
                                              y_dist='binomial')
        np.testing.assert_array_almost_equal(
            Q,
            Q2,
            decimal=5,
            err_msg=
            f"Gibbs (non-ising) sampler is not consistent when Q passed in")
        self.assertTrue(
            np.abs(V - V2).mean() < 0.01,
            msg=f"Gibbs (non-ising) data is not consistent when Q passed in")

        # Test this works without errors for n < p
        _ = graphs.sample_data(n=5,
                               p=p,
                               method=3,
                               x_dist='gibbs',
                               y_dist='binomial')
예제 #26
0
 def non_ar1_t():
     graphs.sample_data(n=n, p=p, method='ver', x_dist='ar1t')
예제 #27
0
    def test_dense_sample(self):

        # Fake data
        np.random.seed(110)
        n = 10000
        p = 4

        X, _, _, Q, V = graphs.sample_data(method='daibarber2016',
                                           rho=0.6,
                                           n=n,
                                           p=p,
                                           gamma=1,
                                           group_size=p)
        _, S = knockadapt.knockoffs.gaussian_knockoffs(X=X,
                                                       Sigma=V,
                                                       method='mvr',
                                                       return_S=True)

        # Network graph
        Q_graph = (np.abs(Q) > 1e-5)
        Q_graph = Q_graph - np.eye(p)
        undir_graph = nx.Graph(Q_graph)
        width, T = tree_processing.treewidth_decomp(undir_graph)
        order, active_frontier = tree_processing.get_ordering(T)

        # Metro sampler and likelihood
        mvn = stats.multivariate_normal(mean=np.zeros(p), cov=V)

        def mvn_likelihood(X):
            return mvn.logpdf(X)

        gamma = 0.99999
        metro_sampler = metro.MetropolizedKnockoffSampler(
            lf=mvn_likelihood,
            X=X,
            mu=np.zeros(p),
            V=V,
            order=order,
            active_frontier=active_frontier,
            gamma=gamma,
            S=S,
            metro_verbose=True)

        # Output knockoffs
        Xk = metro_sampler.sample_knockoffs()

        # Acceptance rate should be exactly one
        acc_rate = metro_sampler.final_acc_probs.mean()
        self.assertTrue(
            acc_rate - gamma > -1e-3,
            msg=
            f'For equi gaussian design, metro has acc_rate={acc_rate} < gamma={gamma}'
        )

        # Check covariance matrix
        features = np.concatenate([X, Xk], axis=1)
        emp_corr_matrix = np.corrcoef(features.T)
        G = np.concatenate([
            np.concatenate([V, V - S]),
            np.concatenate([V - S, V]),
        ],
                           axis=1)

        np.testing.assert_almost_equal(
            emp_corr_matrix,
            G,
            decimal=2,
            err_msg=
            f"For equi gaussian design, metro does not match theoretical matrix"
        )
예제 #28
0
 def sample_bad_dist():
     graphs.sample_data(p=100, coeff_dist='baddist')
예제 #29
0
 def ARsample():
     graphs.sample_data(method='AR1', rho=1.5)
예제 #30
0
	def test_debiased_lasso(self):

		# Create data generating process
		n = 200
		p = 20
		rho = 0.3
		np.random.seed(110)
		X, y, beta, _, corr_matrix = graphs.sample_data(
			n = n, p = p, y_dist = 'gaussian', 
			coeff_size = 100, sign_prob = 0.5,
			method = 'daibarber2016',
			rho=rho
		)       
		groups = np.arange(1, p+1, 1)

		# Create knockoffs
		knockoffs, S = knockadapt.knockoffs.gaussian_knockoffs(
			X=X, 
			groups=groups,
			Sigma=corr_matrix,
			return_S=True,
			verbose=False,
			sdp_verbose=False,
			S = (1-rho)*np.eye(p)
		)
		knockoffs = knockoffs[:, :, 0]
		G = np.concatenate([
				np.concatenate([corr_matrix, corr_matrix-S]),
				np.concatenate([corr_matrix-S, corr_matrix])],
				axis=1
			)
		Ginv = utilities.chol2inv(G)

		# Debiased lasso - test accuracy
		dlasso_stat = kstats.LassoStatistic()
		dlasso_stat.fit(
			X,
			knockoffs,
			y,
			use_lars=False,
			cv_score=False,
			debias=True,
			Ginv=Ginv
		)
		W = dlasso_stat.W
		l2norm = np.power(W - beta, 2).mean()
		self.assertTrue(l2norm > 1,
			msg = f'Debiased lasso fits gauissan very poorly (l2norm = {l2norm} btwn real/fitted coeffs)'
		)

		# Test that this throws the correct errors
		# first for Ginv
		def debiased_lasso_sans_Ginv():
			dlasso_stat.fit(
				X,
				knockoffs,
				y,
				use_lars=False,
				cv_score=False,
				debias=True,
				Ginv=None
			)
		self.assertRaisesRegex(
			ValueError, "Ginv must be provided",
			debiased_lasso_sans_Ginv
		)

		# Second for logistic data
		y = np.random.binomial(1, 0.5, n)
		def binomial_debiased_lasso():
			dlasso_stat.fit(
				X,
				knockoffs,
				y,
				use_lars=False,
				cv_score=False,
				debias=True,
				Ginv=Ginv,
			)
		self.assertRaisesRegex(
			ValueError, "Debiased lasso is not implemented for binomial data",
			binomial_debiased_lasso
		)