Ejemplo n.º 1
0
	def test_unbatched(self):

		W1 = np.array([1, -2, 3, 6, 3, -2, 1, 2, 5, 3, 0.5, 1, 1, 1, 1, 1, 1, 1])
		T1 = data_dependent_threshhold(W1, fdr = 0.2)
		expected = np.abs(W1).min()
		self.assertTrue(T1==expected, msg=f'Incorrect data dependent threshhold: T1 should be 0, not {T1}')

		W2 = np.array([-1, -2, -3])
		T2 = data_dependent_threshhold(W2, fdr = 0.3)
		self.assertTrue(T2==np.inf, msg=f'Incorrect data dependent threshhold: T2 should be inf, not {T2}')

		W3 = np.array([-5, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
		T3 = data_dependent_threshhold(W3, fdr = 0.2)
		self.assertTrue(T3 == 5, msg=f'Incorrect data dependent threshhold: T3 should be 5, not {T3}')
Ejemplo n.º 2
0
def Z2selections(Z, groups, q, **kwargs):
	
	# Calculate W statistics
	W = kstats.combine_Z_stats(Z, groups, **kwargs)

	# Calculate selections 
	T = kstats.data_dependent_threshhold(W=W, fdr=q)
	selected_flags = (W >= T).astype("float32")
	return selected_flags, W
Ejemplo n.º 3
0
	def test_batched(self):

		W1 = np.array([1]*10)
		W2 = np.array([-2, -1, 1, 2, 3, 4, 5, 6, 7, 8])
		W3 = np.array([-1]*10)
		combined = np.stack([W1, W2, W3]).transpose()
		Ts = data_dependent_threshhold(combined, fdr = 0.2)
		expected = np.array([1, 2, np.inf])
		np.testing.assert_array_almost_equal(
			Ts, expected, 
			err_msg = f"Incorrect data dependent threshhold (batched): Ts should be {expected}, not {Ts}"
		)
Ejemplo n.º 4
0
	def check_kstat_fit(
		self,
		fstat,
		fstat_name,
		fstat_kwargs={},
		min_power=0.8,
		max_l2norm=9,
		seed=110,
		group_features=False,
		**sample_kwargs
	):
		""" fstat should be a class instance inheriting from FeatureStatistic """

		# Add defaults to sample kwargs
		if 'method' not in sample_kwargs:
			sample_kwargs['method'] = 'daibarber2016'
		if 'gamma' not in sample_kwargs:
			sample_kwargs['gamma'] = 1
		if 'n' not in sample_kwargs:
			sample_kwargs['n'] = 200
		if 'p' not in sample_kwargs:
			sample_kwargs['p'] = 50
		if 'rho' not in sample_kwargs:
			sample_kwargs['rho'] = 0.5
		if 'y_dist' not in sample_kwargs:
			sample_kwargs['y_dist'] = 'gaussian'
		n = sample_kwargs['n']
		p = sample_kwargs['p']
		rho = sample_kwargs['rho']
		y_dist = sample_kwargs['y_dist']


		# Create data generating process
		np.random.seed(seed)
		X, y, beta, _, corr_matrix = graphs.sample_data(**sample_kwargs)

		# XtXinv = np.linalg.inv(np.dot(X.T, X))
		# Xty = np.dot(X.T, y)
		# print(np.dot(XtXinv, Xty))

		# Create groups
		if group_features:
			groups = np.random.randint(1, p+1, size=(p,))
			groups = utilities.preprocess_groups(groups)
		else:
			groups = np.arange(1, p+1, 1)

		# Create knockoffs
		knockoffs, S = knockadapt.knockoffs.gaussian_knockoffs(
			X=X, 
			groups=groups,
			Sigma=corr_matrix,
			return_S=True,
			verbose=False,
			sdp_verbose=False,
			S = (1-rho)*np.eye(p)
		)
		knockoffs = knockoffs[:, :, 0]

		# Fit and extract coeffs/T
		fstat.fit(
			X,
			knockoffs,
			y,
			groups=groups,
			**fstat_kwargs,
		)
		W = fstat.W
		T = data_dependent_threshhold(W, fdr = 0.2)

		# Test L2 norm 
		m = np.unique(groups).shape[0]
		if m == p:
			pair_W = W
		else:
			pair_W = kstats.combine_Z_stats(fstat.Z, pair_agg='cd')
		l2norm = np.power(pair_W - np.abs(beta), 2)
		l2norm = l2norm.mean()
		self.assertTrue(l2norm < max_l2norm,
			msg = f'{fstat_name} fits {y_dist} data very poorly (l2norm = {l2norm} btwn real {beta} / fitted {pair_W} coeffs)'
		)

		# Test power for non-grouped setting.
		# (For group setting, power will be much lower.)
		selections = (W >= T).astype('float32')
		group_nnulls = utilities.fetch_group_nonnulls(beta, groups)
		power = ((group_nnulls != 0)*selections).sum()/np.sum(group_nnulls != 0)
		fdp = ((group_nnulls == 0)*selections).sum()/max(np.sum(selections), 1)
		self.assertTrue(
			power >= min_power,
			msg = f"Power {power} for {fstat_name} in equicor case (n={n},p={p},rho={rho}, y_dist {y_dist}, grouped={group_features}) should be > {min_power}. W stats are {W}, beta is {beta}"
		)