def test_data_carving_coxph(n=400, p=20, split_frac=0.8, lam_frac=1.2, ndraw=8000, burnin=2000, df=np.inf, compute_intervals=True, return_only_screening=True): X = np.random.standard_normal((n,p)) T = np.random.standard_exponential(n) S = np.random.binomial(1, 0.5, size=(n,)) true_active = [] s = 0 active = np.array(true_active) idx = np.arange(n) np.random.shuffle(idx) stage_one = idx[:int(n*split_frac)] n1 = len(stage_one) lam_theor = 10. * np.ones(p) lam_theor[0] = 0. DC = data_carving.coxph(X, T, S, feature_weights=lam_theor, stage_one=stage_one) DC.fit() if len(DC.active) < n - int(n*split_frac): DS = data_splitting.coxph(X, T, S, feature_weights=lam_theor, stage_one=stage_one) DS.fit(use_full_cov=True) data_split = True else: print('not enough data for data splitting second stage') print(DC.active) data_split = False if set(true_active).issubset(DC.active): carve = [] split = [] for var in DC.active: carve.append(DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw)) if data_split: split.append(DS.hypothesis_test(var)) else: split.append(np.random.sample()) Xa = X[:,DC.active] active = np.zeros_like(DC.active, np.bool) active[true_active] = 1 v = (carve, split, active) return v
def test_data_carving_coxph(n=100, p=20, split_frac=0.8, lam_frac=1.2, ndraw=8000, burnin=2000, df=np.inf, coverage=0.90, compute_intervals=True, nsim=None): counter = 0 return_value = [] while True: counter += 1 X = np.random.standard_normal((n, p)) T = np.random.standard_exponential(n) S = np.random.binomial(1, 0.5, size=(n, )) active = [] s = 0 active = np.array(active) idx = np.arange(n) np.random.shuffle(idx) stage_one = idx[:int(n * split_frac)] n1 = len(stage_one) lam_theor = 10. * np.ones(p) lam_theor[0] = 0. DC = data_carving.coxph(X, T, S, feature_weights=lam_theor, stage_one=stage_one) DC.fit() if len(DC.active) < n - int(n * split_frac): DS = data_splitting.coxph(X, T, S, feature_weights=lam_theor, stage_one=stage_one) DS.fit() data_split = True else: print('not enough data for data splitting second stage') print(DC.active) data_split = False if set(range(s)).issubset(DC.active): carve = [] split = [] for var in DC.active: carve.append( DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw)) if data_split: split.append(DS.hypothesis_test(var)) else: split.append(np.random.sample()) Xa = X[:, DC.active] split_coverage = np.nan carve_coverage = np.nan TP = s FP = DC.active.shape[0] - TP v = (carve[s:], split[s:], carve[:s], split[:s], counter, carve_coverage, split_coverage, TP, FP) return_value.append(v) break else: TP = len(set(DC.active).intersection(range(s))) FP = DC.active.shape[0] - TP v = (None, None, None, None, counter, np.nan, np.nan, TP, FP) return_value.append(v) return return_value
def test_data_carving_coxph(n=100, p=20, split_frac=0.8, lam_frac=1.2, ndraw=8000, burnin=2000, df=np.inf, coverage=0.90, compute_intervals=True, nsim=None): counter = 0 return_value = [] while True: counter += 1 X = np.random.standard_normal((n,p)) T = np.random.standard_exponential(n) S = np.random.binomial(1, 0.5, size=(n,)) active = [] s = 0 active = np.array(active) idx = np.arange(n) np.random.shuffle(idx) stage_one = idx[:int(n*split_frac)] n1 = len(stage_one) lam_theor = 10. * np.ones(p) lam_theor[0] = 0. DC = data_carving.coxph(X, T, S, feature_weights=lam_theor, stage_one=stage_one) DC.fit() if len(DC.active) < n - int(n*split_frac): DS = data_splitting.coxph(X, T, S, feature_weights=lam_theor, stage_one=stage_one) DS.fit() data_split = True else: print('not enough data for data splitting second stage') print(DC.active) data_split = False if set(range(s)).issubset(DC.active): carve = [] split = [] for var in DC.active: carve.append(DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw)) if data_split: split.append(DS.hypothesis_test(var)) else: split.append(np.random.sample()) Xa = X[:,DC.active] split_coverage = np.nan carve_coverage = np.nan TP = s FP = DC.active.shape[0] - TP v = (carve[s:], split[s:], carve[:s], split[:s], counter, carve_coverage, split_coverage, TP, FP) return_value.append(v) break else: TP = len(set(DC.active).intersection(range(s))) FP = DC.active.shape[0] - TP v = (None, None, None, None, counter, np.nan, np.nan, TP, FP) return_value.append(v) return return_value