예제 #1
0
def test_reconstruction(s=3,
                        n=200,
                        p=50,
                        snr=7,
                        rho=0.1,
                        split_frac=0.8,
                        lam_frac=0.7,
                        ndraw=100,
                        burnin=200,
                        bootstrap=True,
                        solve_args={
                            'min_its': 50,
                            'tol': 1.e-10
                        },
                        reference_known=False):

    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)

    m = int(split_frac * n)
    nonzero = np.where(beta)[0]

    loss = rr.glm.logistic(X, y)
    epsilon = 1. / np.sqrt(n)

    lam = lam_frac * np.mean(
        np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 2000)))).max(0))
    W = np.ones(p) * lam
    W[0] = 0  # use at least some unpenalized
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)),
                             lagrange=1.)

    M_est = split_glm_group_lasso(loss, epsilon, m, penalty)
    mv = multiple_queries([M_est])
    mv.solve()

    M_est.selection_variable['variables'] = M_est.selection_variable[
        'variables']
    nactive = np.sum(M_est.selection_variable['variables'])

    if nactive == 0:
        return None

    if set(nonzero).issubset(
            np.nonzero(M_est.selection_variable['variables'])[0]):

        active_set = np.nonzero(M_est.selection_variable['variables'])[0]

        target_sampler, target_observed = glm_target(
            loss, M_est.selection_variable['variables'], mv)

        target_sample = target_sampler.sample(ndraw=ndraw,
                                              burnin=burnin,
                                              keep_opt=True)

        reconstruction = target_sampler.reconstruction_map(target_sample)
        logdens = target_sampler.log_randomization_density(target_sample)
        return logdens.shape
def test_multiple_queries_individual_coeff_small(ndraw=10000, 
                                                 burnin=2000, 
                                                 bootstrap=True):
    s, n, p = 3, 100, 20

    randomizer = randomization.laplace((p,), scale=1)
    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0, snr=20.)

    nonzero = np.where(beta)[0]
    lam_frac = 3.

    loss = rr.glm.logistic(X, y)
    epsilon = 1.

    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
    W = np.ones(p)*lam
    W[0] = 0 # use at least some unpenalized
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)), lagrange=1.)

    # randomization
    M_est = glm_group_lasso(loss, epsilon, penalty, randomizer)
    mv = multiple_queries([M_est])
    mv.solve()

    active_vars = M_est.selection_variable['variables'] 

    nactive = np.sum(active_vars)
    active_set = np.nonzero(active_vars)[0]

    pvalues = []
    true_beta = beta[active_vars]

    print(nonzero, active_set)
    if set(nonzero).issubset(active_set):

        for j in range(nactive):

            print(j)
            subset = np.zeros(p, np.bool)
            subset[active_set[j]] = True
            target_sampler, target_observed = glm_target(loss,
                                                         active_vars,
                                                         mv,
                                                         subset=subset,
                                                         bootstrap=bootstrap,
                                                         reference=np.zeros((1,)))

            test_stat = lambda x: x 

            pval = target_sampler.hypothesis_test(test_stat,
                                                  target_observed,
                                                  alternative='twosided',
                                                  ndraw=ndraw,
                                                  burnin=burnin)
            pvalues.append(pval)
        return pvalues, [active_set[j] in nonzero for j in range(nactive)]