def test_data_carving_IC(n=100, p=200, s=7, sigma=5, rho=0.3, snr=7., split_frac=0.9, ndraw=5000, burnin=1000, df=np.inf, coverage=0.90, compute_intervals=False): counter = 0 while True: counter += 1 X, y, beta, active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr, df=df) mu = np.dot(X, beta) splitn = int(n*split_frac) indices = np.arange(n) np.random.shuffle(indices) stage_one = indices[:splitn] FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one) if set(range(s)).issubset(FS.active): results, FS = data_carving_IC(y, X, sigma, stage_one=stage_one, splitting=True, ndraw=ndraw, burnin=burnin, coverage=coverage, compute_intervals=compute_intervals, cost=np.log(n)) carve = [r[1] for r in results] split = [r[3] for r in results] Xa = X[:,FS.variables[:-1]] truth = np.dot(np.linalg.pinv(Xa), mu) split_coverage = [] carve_coverage = [] for result, t in zip(results, truth): _, _, ci, _, si = result carve_coverage.append((ci[0] < t) * (t < ci[1])) split_coverage.append((si[0] < t) * (t < si[1])) return ([carve[j] for j, i in enumerate(FS.active) if i >= s], [split[j] for j, i in enumerate(FS.active) if i >= s], [carve[j] for j, i in enumerate(FS.active) if i < s], [split[j] for j, i in enumerate(FS.active) if i < s], counter, carve_coverage, split_coverage)
def test_BIC(k=10, do_sample=True): n, p = 100, 200 X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) Y = np.random.standard_normal(100) * 0.5 FS = info_crit_stop(Y, X, 0.5, cost=np.log(n)) final_model = len(FS.variables) - 1 if do_sample: return [p[-1] for p in FS.model_pivots(final_model, saturated=False, burnin=5000, ndraw=5000)] else: saturated_pivots = FS.model_pivots(final_model) return [p[-1] for p in saturated_pivots]
def test_BIC(do_sample=True, ndraw=8000, burnin=2000, force=False): X, Y, beta, active, sigma, _ = gaussian_instance() n, p = X.shape FS = info_crit_stop(Y, X, sigma, cost=np.log(n)) final_model = len(FS.variables) active = set(list(active)) if active.issubset(FS.variables) or force: which_var = [v for v in FS.variables if v not in active] if do_sample: return [pval[-1] for pval in FS.model_pivots(final_model, saturated=False, burnin=burnin, ndraw=ndraw, which_var=which_var)] else: saturated_pivots = FS.model_pivots(final_model, which_var=which_var) return [pval[-1] for pval in saturated_pivots] return []
def test_BIC(do_sample=True, ndraw=8000, burnin=2000, nsim=None, force=False): X, Y, beta, active, sigma = instance() n, p = X.shape FS = info_crit_stop(Y, X, sigma, cost=np.log(n)) final_model = len(FS.variables) active = set(list(active)) if active.issubset(FS.variables) or force: which_var = [v for v in FS.variables if v not in active] if do_sample: return [pval[-1] for pval in FS.model_pivots(final_model, saturated=False, burnin=burnin, ndraw=ndraw, which_var=which_var)] else: saturated_pivots = FS.model_pivots(final_model, which_var=which_var) return [pval[-1] for pval in saturated_pivots] return []
def test_BIC(k=10, do_sample=True): n, p = 100, 200 X = np.random.standard_normal( (n, p)) + 0.4 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) Y = np.random.standard_normal(100) * 0.5 FS = info_crit_stop(Y, X, 0.5, cost=np.log(n)) final_model = len(FS.variables) - 1 if do_sample: return [ p[-1] for p in FS.model_pivots( final_model, saturated=False, burnin=5000, ndraw=5000) ] else: saturated_pivots = FS.model_pivots(final_model) return [p[-1] for p in saturated_pivots]
def test_data_carving_IC(nsim=500, n=100, p=200, s=7, sigma=5, rho=0.3, signal=7., split_frac=0.9, ndraw=5000, burnin=1000, df=np.inf, coverage=0.90, compute_intervals=False): counter = 0 while counter < nsim: counter += 1 X, y, beta, active, sigma, _ = gaussian_instance(n=n, p=p, s=s, sigma=sigma, rho=rho, signal=signal, df=df) mu = np.dot(X, beta) splitn = int(n*split_frac) indices = np.arange(n) np.random.shuffle(indices) stage_one = indices[:splitn] FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one) if set(active).issubset(FS.active): results, FS = data_carving_IC(y, X, sigma, stage_one=stage_one, splitting=True, ndraw=ndraw, burnin=burnin, coverage=coverage, compute_intervals=compute_intervals, cost=np.log(n)) carve_split = [(r[1], r[3]) for r in results] carve = np.array(carve_split)[:,0] split = np.array(carve_split)[:,1] Xa = X[:,FS.variables[:-1]] truth = np.dot(np.linalg.pinv(Xa), mu) split_coverage = [] carve_coverage = [] for result, t in zip(results, truth): _, _, ci, _, si = result carve_coverage.append((ci[0] < t) * (t < ci[1])) split_coverage.append((si[0] < t) * (t < si[1])) print(carve, 'carve') print(split, 'split') print(results, 'results') return ([carve[j] for j, i in enumerate(FS.active) if i >= s], [split[j] for j, i in enumerate(FS.active) if i >= s], [carve[j] for j, i in enumerate(FS.active) if i < s], [split[j] for j, i in enumerate(FS.active) if i < s], counter, carve_coverage, split_coverage)
def test_data_carving_IC(n=600, p=100, s=10, sigma=5, rho=0.25, signal=(3.5,5.), split_frac=0.9, ndraw=25000, burnin=5000, df=np.inf, coverage=0.90, compute_intervals=False): X, y, beta, active, sigma, _ = gaussian_instance(n=n, p=p, s=s, sigma=sigma, rho=rho, signal=signal, df=df, equicorrelated=False) mu = np.dot(X, beta) splitn = int(n*split_frac) indices = np.arange(n) np.random.shuffle(indices) stage_one = indices[:splitn] FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one) con = FS.constraints() X_E = X[:,FS.active] X_Ei = np.linalg.pinv(X_E) beta_bar = X_Ei.dot(y) mu_E = X_E.dot(beta_bar) sigma_E = np.linalg.norm(y-mu_E) / np.sqrt(n - len(FS.active)) con.mean[:] = mu_E con.covariance = sigma_E**2 * np.identity(n) print(sigma_E, sigma) Z = sample_from_constraints(con, y, ndraw=ndraw, burnin=burnin) pvalues = [] for idx, var in enumerate(FS.active): active = copy(FS.active) active.remove(var) X_r = X[:,active] # restricted design mu_r = X_r.dot(np.linalg.pinv(X_r).dot(y)) delta_mu = (mu_r - mu_E) / sigma_E**2 W = np.exp(Z.dot(delta_mu)) fam = discrete_family(Z.dot(X_Ei[idx].T), W) pval = fam.cdf(0, x=beta_bar[idx]) pval = 2 * min(pval, 1 - pval) pvalues.append((pval, beta[var])) return pvalues