def test_unbatched(self): W1 = np.array([1, -2, 3, 6, 3, -2, 1, 2, 5, 3, 0.5, 1, 1, 1, 1, 1, 1, 1]) T1 = data_dependent_threshhold(W1, fdr = 0.2) expected = np.abs(W1).min() self.assertTrue(T1==expected, msg=f'Incorrect data dependent threshhold: T1 should be 0, not {T1}') W2 = np.array([-1, -2, -3]) T2 = data_dependent_threshhold(W2, fdr = 0.3) self.assertTrue(T2==np.inf, msg=f'Incorrect data dependent threshhold: T2 should be inf, not {T2}') W3 = np.array([-5, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) T3 = data_dependent_threshhold(W3, fdr = 0.2) self.assertTrue(T3 == 5, msg=f'Incorrect data dependent threshhold: T3 should be 5, not {T3}')
def Z2selections(Z, groups, q, **kwargs): # Calculate W statistics W = kstats.combine_Z_stats(Z, groups, **kwargs) # Calculate selections T = kstats.data_dependent_threshhold(W=W, fdr=q) selected_flags = (W >= T).astype("float32") return selected_flags, W
def test_batched(self): W1 = np.array([1]*10) W2 = np.array([-2, -1, 1, 2, 3, 4, 5, 6, 7, 8]) W3 = np.array([-1]*10) combined = np.stack([W1, W2, W3]).transpose() Ts = data_dependent_threshhold(combined, fdr = 0.2) expected = np.array([1, 2, np.inf]) np.testing.assert_array_almost_equal( Ts, expected, err_msg = f"Incorrect data dependent threshhold (batched): Ts should be {expected}, not {Ts}" )
def check_kstat_fit( self, fstat, fstat_name, fstat_kwargs={}, min_power=0.8, max_l2norm=9, seed=110, group_features=False, **sample_kwargs ): """ fstat should be a class instance inheriting from FeatureStatistic """ # Add defaults to sample kwargs if 'method' not in sample_kwargs: sample_kwargs['method'] = 'daibarber2016' if 'gamma' not in sample_kwargs: sample_kwargs['gamma'] = 1 if 'n' not in sample_kwargs: sample_kwargs['n'] = 200 if 'p' not in sample_kwargs: sample_kwargs['p'] = 50 if 'rho' not in sample_kwargs: sample_kwargs['rho'] = 0.5 if 'y_dist' not in sample_kwargs: sample_kwargs['y_dist'] = 'gaussian' n = sample_kwargs['n'] p = sample_kwargs['p'] rho = sample_kwargs['rho'] y_dist = sample_kwargs['y_dist'] # Create data generating process np.random.seed(seed) X, y, beta, _, corr_matrix = graphs.sample_data(**sample_kwargs) # XtXinv = np.linalg.inv(np.dot(X.T, X)) # Xty = np.dot(X.T, y) # print(np.dot(XtXinv, Xty)) # Create groups if group_features: groups = np.random.randint(1, p+1, size=(p,)) groups = utilities.preprocess_groups(groups) else: groups = np.arange(1, p+1, 1) # Create knockoffs knockoffs, S = knockadapt.knockoffs.gaussian_knockoffs( X=X, groups=groups, Sigma=corr_matrix, return_S=True, verbose=False, sdp_verbose=False, S = (1-rho)*np.eye(p) ) knockoffs = knockoffs[:, :, 0] # Fit and extract coeffs/T fstat.fit( X, knockoffs, y, groups=groups, **fstat_kwargs, ) W = fstat.W T = data_dependent_threshhold(W, fdr = 0.2) # Test L2 norm m = np.unique(groups).shape[0] if m == p: pair_W = W else: pair_W = kstats.combine_Z_stats(fstat.Z, pair_agg='cd') l2norm = np.power(pair_W - np.abs(beta), 2) l2norm = l2norm.mean() self.assertTrue(l2norm < max_l2norm, msg = f'{fstat_name} fits {y_dist} data very poorly (l2norm = {l2norm} btwn real {beta} / fitted {pair_W} coeffs)' ) # Test power for non-grouped setting. # (For group setting, power will be much lower.) selections = (W >= T).astype('float32') group_nnulls = utilities.fetch_group_nonnulls(beta, groups) power = ((group_nnulls != 0)*selections).sum()/np.sum(group_nnulls != 0) fdp = ((group_nnulls == 0)*selections).sum()/max(np.sum(selections), 1) self.assertTrue( power >= min_power, msg = f"Power {power} for {fstat_name} in equicor case (n={n},p={p},rho={rho}, y_dist {y_dist}, grouped={group_features}) should be > {min_power}. W stats are {W}, beta is {beta}" )