def test_data_carving_logistic(n=500, p=300, s=5, sigma=5, rho=0.05, snr=4., split_frac=0.8, ndraw=8000, burnin=2000, df=np.inf, coverage=0.90, compute_intervals=True, nsim=None, use_full_cov=False): counter = 0 return_value = [] while True: counter += 1 X, y, beta, active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr, df=df) mu = X.dot(beta) prob = np.exp(mu) / (1 + np.exp(mu)) X = np.hstack([np.ones((n,1)), X]) z = np.random.binomial(1, prob) active = np.array(active) active += 1 s += 1 active = [0] + list(active) idx = np.arange(n) np.random.shuffle(idx) stage_one = idx[:int(n*split_frac)] n1 = len(stage_one) lam_theor = 1.0 * np.ones(p+1) lam_theor[0] = 0. DC = data_carving.logistic(X, z, feature_weights=lam_theor, stage_one=stage_one) DC.fit() if len(DC.active) < n - int(n*split_frac): DS = data_splitting.logistic(X, z, feature_weights=lam_theor, stage_one=stage_one) DS.fit(use_full_cov=use_full_cov) data_split = True else: print('not enough data for data splitting second stage') print(DC.active) data_split = False if set(range(s)).issubset(DC.active): carve = [] split = [] for var in DC.active: carve.append(DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw)) if data_split: split.append(DS.hypothesis_test(var)) else: split.append(np.random.sample()) Xa = X[:,DC.active] split_coverage = np.nan carve_coverage = np.nan TP = s FP = DC.active.shape[0] - TP v = (carve[s:], split[s:], carve[:s], split[:s], counter, carve_coverage, split_coverage, TP, FP) return_value.append(v) break else: TP = len(set(DC.active).intersection(range(s))) FP = DC.active.shape[0] - TP v = (None, None, None, None, counter, np.nan, np.nan, TP, FP) return_value.append(v) return return_value
def test_data_carving_logistic(n=500, p=300, s=5, sigma=5, rho=0.05, snr=4., split_frac=0.8, ndraw=8000, burnin=2000, df=np.inf, coverage=0.90, compute_intervals=True, nsim=None, use_full_cov=False): counter = 0 return_value = [] while True: counter += 1 X, y, beta, active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr, df=df) mu = X.dot(beta) prob = np.exp(mu) / (1 + np.exp(mu)) X = np.hstack([np.ones((n, 1)), X]) z = np.random.binomial(1, prob) active = np.array(active) active += 1 s += 1 active = [0] + list(active) idx = np.arange(n) np.random.shuffle(idx) stage_one = idx[:int(n * split_frac)] n1 = len(stage_one) lam_theor = 1.0 * np.ones(p + 1) lam_theor[0] = 0. DC = data_carving.logistic(X, z, feature_weights=lam_theor, stage_one=stage_one) DC.fit() if len(DC.active) < n - int(n * split_frac): DS = data_splitting.logistic(X, z, feature_weights=lam_theor, stage_one=stage_one) DS.fit(use_full_cov=use_full_cov) data_split = True else: print('not enough data for data splitting second stage') print(DC.active) data_split = False if set(range(s)).issubset(DC.active): carve = [] split = [] for var in DC.active: carve.append( DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw)) if data_split: split.append(DS.hypothesis_test(var)) else: split.append(np.random.sample()) Xa = X[:, DC.active] split_coverage = np.nan carve_coverage = np.nan TP = s FP = DC.active.shape[0] - TP v = (carve[s:], split[s:], carve[:s], split[:s], counter, carve_coverage, split_coverage, TP, FP) return_value.append(v) break else: TP = len(set(DC.active).intersection(range(s))) FP = DC.active.shape[0] - TP v = (None, None, None, None, counter, np.nan, np.nan, TP, FP) return_value.append(v) return return_value
def test_data_carving_logistic(n=700, p=300, s=5, rho=0.05, signal=12., split_frac=0.8, ndraw=8000, burnin=2000, df=np.inf, compute_intervals=True, use_full_cov=False, return_only_screening=True): X, y, beta, true_active, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal, equicorrelated=False) mu = X.dot(beta) prob = np.exp(mu) / (1 + np.exp(mu)) X = np.hstack([np.ones((n, 1)), X]) active = np.array(true_active) active += 1 s += 1 active = [0] + list(active) true_active = active idx = np.arange(n) np.random.shuffle(idx) stage_one = idx[:int(n * split_frac)] n1 = len(stage_one) lam_theor = 1.0 * np.ones(p + 1) lam_theor[0] = 0. DC = data_carving.logistic(X, y, feature_weights=lam_theor, stage_one=stage_one) DC.fit() if len(DC.active) < n - int(n * split_frac): DS = data_splitting.logistic(X, y, feature_weights=lam_theor, stage_one=stage_one) DS.fit(use_full_cov=True) data_split = True else: print('not enough data for data splitting second stage') print(DC.active) data_split = False print(true_active, DC.active) if set(true_active).issubset(DC.active): carve = [] split = [] for var in DC.active: carve.append(DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw)) if data_split: split.append(DS.hypothesis_test(var)) else: split.append(np.random.sample()) Xa = X[:, DC.active] active = np.zeros(p + 1, np.bool) active[true_active] = 1 v = (carve, split, active) return v