def test_reconstruction(s=3, n=200, p=50, snr=7, rho=0.1, split_frac=0.8, lam_frac=0.7, ndraw=100, burnin=200, bootstrap=True, solve_args={ 'min_its': 50, 'tol': 1.e-10 }, reference_known=False): X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr) m = int(split_frac * n) nonzero = np.where(beta)[0] loss = rr.glm.logistic(X, y) epsilon = 1. / np.sqrt(n) lam = lam_frac * np.mean( np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 2000)))).max(0)) W = np.ones(p) * lam W[0] = 0 # use at least some unpenalized penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.) M_est = split_glm_group_lasso(loss, epsilon, m, penalty) mv = multiple_queries([M_est]) mv.solve() M_est.selection_variable['variables'] = M_est.selection_variable[ 'variables'] nactive = np.sum(M_est.selection_variable['variables']) if nactive == 0: return None if set(nonzero).issubset( np.nonzero(M_est.selection_variable['variables'])[0]): active_set = np.nonzero(M_est.selection_variable['variables'])[0] target_sampler, target_observed = glm_target( loss, M_est.selection_variable['variables'], mv) target_sample = target_sampler.sample(ndraw=ndraw, burnin=burnin, keep_opt=True) reconstruction = target_sampler.reconstruction_map(target_sample) logdens = target_sampler.log_randomization_density(target_sample) return logdens.shape
def test_multiple_queries_individual_coeff_small(ndraw=10000, burnin=2000, bootstrap=True): s, n, p = 3, 100, 20 randomizer = randomization.laplace((p,), scale=1) X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0, snr=20.) nonzero = np.where(beta)[0] lam_frac = 3. loss = rr.glm.logistic(X, y) epsilon = 1. lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0)) W = np.ones(p)*lam W[0] = 0 # use at least some unpenalized penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.) # randomization M_est = glm_group_lasso(loss, epsilon, penalty, randomizer) mv = multiple_queries([M_est]) mv.solve() active_vars = M_est.selection_variable['variables'] nactive = np.sum(active_vars) active_set = np.nonzero(active_vars)[0] pvalues = [] true_beta = beta[active_vars] print(nonzero, active_set) if set(nonzero).issubset(active_set): for j in range(nactive): print(j) subset = np.zeros(p, np.bool) subset[active_set[j]] = True target_sampler, target_observed = glm_target(loss, active_vars, mv, subset=subset, bootstrap=bootstrap, reference=np.zeros((1,))) test_stat = lambda x: x pval = target_sampler.hypothesis_test(test_stat, target_observed, alternative='twosided', ndraw=ndraw, burnin=burnin) pvalues.append(pval) return pvalues, [active_set[j] in nonzero for j in range(nactive)]