def main(n=50): from regreg.atoms._isotonic import _isotonic_regression import matplotlib.pyplot as plt initial = np.ones(n) + 0.01 * np.random.standard_normal(n) grad_map = lambda val: _grad_log_wishart_white(val, n) def projection_map(vals): iso = np.zeros_like(vals) _isotonic_regression(vals, np.ones_like(vals), iso) vals = np.asarray(iso) return np.maximum(vals, 1.e-6) sampler = projected_langevin(initial, grad_map, projection_map, 0.01) sampler = iter(sampler) path = [initial.copy()] for _ in range(200): print(sampler.state) sampler.next() path.append(sampler.state.copy()) path = np.array(path) [plt.plot(path[:, i]) for i in range(5)] plt.show()
def main(n=50): initial = np.ones(n) + 0.01 * np.random.standard_normal(n) grad_map = lambda val: _grad_log_wishart_white(val, n) def projection_map(vals): iso = IsotonicRegression(y_min=1.e-6) vals = np.asarray(vals) return np.maximum(vals, 1.e-6) sampler = projected_langevin(initial, grad_map, projection_map, 0.01) sampler = iter(sampler) path = [initial.copy()] for _ in range(200): print(sampler.state) sampler.next() path.append(sampler.state.copy()) path = np.array(path) [plt.plot(path[:, i]) for i in range(5)] plt.show()
def pval(vec_state, full_gradient, full_projection, move_data, bootstrap_samples, X, y, nonzero, active): """ """ n, p = X.shape y0 = y.copy() null = [] alt = [] X_E = X[:, active] ndata = y.shape[0] active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set if set(nonzero).issubset(active_set): for j, idx in enumerate(active_set): eta = X[:, idx] #keep = np.copy(active) keep = np.ones(p, dtype=bool) keep[idx] = False linear_part = X[:, keep].T P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) I = np.identity(linear_part.shape[1]) R = I - P sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, 1. / (2 * p)) samples = [] boot_samples = bootstrap_samples(y0, P, R) for _ in range(1000): sampler.next() new_data = move_data(sampler.state, boot_samples) sampler.state[:ndata] = new_data samples.append(sampler.state.copy()) samples = np.array(samples) data_samples = samples[:, :n] pop = [np.dot(eta, z) for z in data_samples] obs = np.dot(eta, y0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) if idx in nonzero: alt.append(pval) else: null.append(pval) return null, alt
def test_kfstep(k=4, s=3, n=100, p=10, Langevin_steps=10000, burning=2000): X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0, signal=10) epsilon = 0. randomization = laplace(loc=0, scale=1.) j_seq = np.empty(k, dtype=int) s_seq = np.empty(k) left = np.ones(p, dtype=bool) obs = 0 initial_state = np.zeros(n + np.sum([i for i in range(p-k+1,p+1)])) initial_state[:n] = y.copy() mat = [np.array((n, ncol)) for ncol in range(p,p-k,-1)] curr = n keep = np.zeros(p, dtype=bool) for i in range(k): X_left = X[:,left] X_selected = X[:, ~left] if (np.sum(left)<p): P_perp = np.identity(n) - X_selected.dot(np.linalg.pinv(X_selected)) mat[i] = P_perp.dot(X_left) else: mat[i] = X mat_complete = np.zeros((n,p)) mat_complete[:, left] = mat[i] T = np.dot(mat[i].T, y) T_complete = np.dot(mat_complete.T, y) obs = np.max(np.abs(T)) keep = np.copy(~left) random_Z = randomization.rvs(T.shape[0]) T_random = T + random_Z initial_state[curr:(curr+p-i)] = T_random # initializing subgradients curr = curr + p-i j_seq[i] = np.argmax(np.abs(T_random)) s_seq[i] = np.sign(T_random[j_seq[i]]) #def find_index(v, idx1): # _sumF = 0 # _sumT = 0 # idx = idx1+1 # for i in range(v.shape[0]): # if (v[i] == False): # _sumF = _sumF + 1 # else: # _sumT = _sumT + 1 # if _sumT >= idx: break # return (_sumT + _sumF-1) T_complete[left] += random_Z left[np.argmax(np.abs(T_complete))] = False # conditioning linear_part = X[:, keep].T P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) I = np.identity(linear_part.shape[1]) R = I - P def full_projection(state, n=n, p=p, k=k): """ """ new_state = np.empty(state.shape, np.float) new_state[:n] = state[:n] curr = n for i in range(k): projection = projection_cone(p-i, j_seq[i], s_seq[i]) new_state[curr:(curr+p-i)] = projection(state[curr:(curr+p-i)]) curr = curr+p-i return new_state def full_gradient(state, n=n, p=p, k=k, X=X, mat=mat): data = state[:n] grad = np.empty(n + np.sum([i for i in range(p-k+1,p+1)])) grad[:n] = - data curr = n for i in range(k): subgrad = state[curr:(curr+p-i)] sign_vec = np.sign(-mat[i].T.dot(data) + subgrad) grad[curr:(curr + p - i)] = -sign_vec curr = curr+p-i grad[:n] += mat[i].dot(sign_vec) return grad sampler = projected_langevin(initial_state, full_gradient, full_projection, 1./p) samples = [] for i in range(Langevin_steps): if i>burning: old_state = sampler.state.copy() old_data = old_state[:n] sampler.next() new_state = sampler.state.copy() new_data = new_state[:n] new_data = np.dot(P, old_data) + np.dot(R, new_data) sampler.state[:n] = new_data samples.append(sampler.state.copy()) samples = np.array(samples) Z = samples[:,:n] pop = np.abs(mat[k-1].T.dot(Z.T)).max(0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) #stop print('pvalue:', pval) return pval
def test_fstep(s=0, n=50, p=10, weights = "gumbel", randomization_dist ="logistic", Langevin_steps = 10000, burning=1000): X, y, _, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0) epsilon = 0. if randomization_dist == "laplace": randomization = laplace(loc=0, scale=1.) random_Z = randomization.rvs(p) if randomization_dist=="logistic": random_Z = np.random.logistic(loc=0, scale=1, size=p) T = np.dot(X.T,y) T_random = T + random_Z T_abs = np.abs(T_random) j_star = np.argmax(T_abs) s_star = np.sign(T_random[j_star]) # this is the subgradient part of the projection projection = projection_cone(p, j_star, s_star) def full_projection(state, n=n, p=p): """ State is (y, u) -- first n coordinates are y, last p are u. """ new_state = np.empty(state.shape, np.float) new_state[:n] = state[:n] new_state[n:] = projection(state[n:]) return new_state obs = np.max(np.abs(T)) eta_star = np.zeros(p) eta_star[j_star] = s_star def full_gradient(state, n=n, p=p, X=X, y=y): #data = state[:n] alpha = state[:n] subgrad = state[n:] mat = np.dot(X.T, np.diag(y)) omega = - mat.dot(alpha) + subgrad if randomization_dist == "laplace": randomization_derivative = np.sign(omega) if randomization_dist == "logistic": randomization_derivative = -(np.exp(-omega) - 1) / (np.exp(-omega) + 1) if randomization_dist == "normal": randomization_derivative = omega grad = np.empty(state.shape, np.float) #grad[:n] = - (data - X.dot(randomization_derivative)) grad[:n] = np.dot(mat.T,randomization_derivative) if weights == "normal": grad[:n] -= alpha if (weights == "gumbel"): gumbel_beta = np.sqrt(6) / (1.14 * np.pi) euler = 0.57721 gumbel_mu = -gumbel_beta * euler gumbel_sigma = 1. / 1.14 grad[:n] -= (1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta grad[n:] = - randomization_derivative return grad state = np.zeros(n+p) #state[:n] = y state[:n] = np.zeros(n) state[n:] = T_random sampler = projected_langevin(state, full_gradient, full_projection, 1./p) samples = [] for i in range(Langevin_steps): sampler.next() if (i>burning): samples.append(sampler.state.copy()) samples = np.array(samples) Z = samples[:,:n] print Z.shape mat = np.dot(X.T,np.diag(y)) #pop = [np.linalg.norm(np.dot(mat, Z[i,:].T)) for i in range(Z.shape[0])] #obs = np.linalg.norm(np.dot(X.T,y)) pop = np.abs(np.dot(mat, Z.T)).max(0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) #stop print 'pvalue:', pval return pval
def test_fstep(s=0, n=100, p=10, Langevin_steps=10000, burning=2000, condition_on_sign=True): X, y, _, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0) epsilon = 0. randomization = laplace(loc=0, scale=1.) random_Z = randomization.rvs(p) T = np.dot(X.T,y) T_random = T + random_Z T_abs = np.abs(T_random) j_star = np.argmax(T_abs) s_star = np.sign(T_random[j_star]) # this is the subgradient part of the projection if condition_on_sign: projection = projection_cone(p, j_star, s_star) else: projection = projection_cone_nosign(p, j_star) def full_projection(state, n=n, p=p): """ State is (y, u) -- first n coordinates are y, last p are u. """ new_state = np.empty(state.shape, np.float) new_state[:n] = state[:n] new_state[n:] = projection(state[n:]) return new_state obs = np.max(np.abs(T)) eta_star = np.zeros(p) eta_star[j_star] = s_star def full_gradient(state, n=n, p=p, X=X): data = state[:n] subgrad = state[n:] sign_vec = np.sign(-X.T.dot(data) + subgrad) grad = np.empty(state.shape, np.float) grad[n:] = - sign_vec grad[:n] = - (data - X.dot(sign_vec)) return grad state = np.zeros(n+p) state[:n] = y state[n:] = T_random sampler = projected_langevin(state, full_gradient, full_projection, 1./p) samples = [] for i in range(Langevin_steps): if i>burning: sampler.next() samples.append(sampler.state.copy()) samples = np.array(samples) Z = samples[:,:n] pop = np.abs(X.T.dot(Z.T)).max(0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) #stop print 'pvalue:', pval return pval
def pval(vec_state, full_gradient, full_projection, X, y, obs_residuals, nonzero, active): """ """ n, p = X.shape y0 = y.copy() null = [] alt = [] X_E = X[:, active] ndata = y.shape[0] active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set if set(nonzero).issubset(active_set): #for j, idx in enumerate(active_set): #eta = X[:, idx] #keep = np.copy(active) #keep = np.ones(p, dtype=bool) #keep[idx] = False #linear_part = X[:,keep].T #P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) #I = np.identity(linear_part.shape[1]) #R = I - P #fixed_part = np.dot(P, np.dot(X.T, y0)) sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, 1. / p) samples = [] #boot_samples = bootstrap_samples(y0, P, R) for _ in range(6000): sampler.next() samples.append(sampler.state.copy()) samples = np.array(samples) alpha_samples = samples[:, :n] data_samples = [np.dot(X[:, active].T, np.diag(obs_residuals).dot(alpha_samples[i,:])) for i in range(len(samples))] pop = [np.linalg.norm(z) for z in data_samples] obs = np.linalg.norm(np.dot(X[:, active].T, y0)) #obs = np.linalg.norm(y0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1-pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) #if idx in nonzero: # alt.append(pval) #else: null.append(pval) return null, alt
def pval(vec_state, full_projection, X, y, obs_residuals, signs, lam, epsilon, nonzero, active): """ """ n, p = X.shape y0 = y.copy() null = [] alt = [] X_E = X[:, active] ndata = y.shape[0] inactive = ~active nalpha = n active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set if set(nonzero).issubset(active_set): for j, idx in enumerate(active_set): eta = X[:, idx] keep = np.copy(active) #keep = np.ones(p, dtype=bool) keep[idx] = False linear_part = X[:, keep].T P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) I = np.identity(linear_part.shape[1]) R = I - P fixed_part = np.dot(X.T, np.dot(P, y)) hessian = np.dot(X.T, X) B = hessian + epsilon * np.identity(p) A = B[:, active] matXTR = X.T.dot(R) def full_gradient(vec_state, fixed_part=fixed_part, R=R, obs_residuals=obs_residuals, signs=signs, X=X, lam=lam, epsilon=epsilon, data0=y, hessian=hessian, A=A, matXTR=matXTR, nalpha=nalpha, active=active, inactive=inactive): nactive = np.sum(active) ninactive = np.sum(inactive) alpha = vec_state[:nalpha] betaE = vec_state[nalpha:(nalpha + nactive)] cube = vec_state[(nalpha + nactive):] p = X.shape[1] beta_full = np.zeros(p) beta_full[active] = betaE subgradient = np.zeros(p) subgradient[inactive] = lam * cube subgradient[active] = lam * signs opt_vec = epsilon * beta_full + subgradient # omega = - np.dot(X.T, np.diag(obs_residuals).dot(alpha))/np.sum(alpha) + np.dot(hessian, beta_full) + opt_vec weighted_residuals = np.diag(obs_residuals).dot(alpha) omega = -fixed_part - np.dot(matXTR, weighted_residuals) + np.dot( hessian, beta_full) + opt_vec sign_vec = np.sign(omega) #mat = np.dot(X.T, np.diag(obs_residuals)) mat = np.dot(matXTR, np.diag(obs_residuals)) _gradient = np.zeros(nalpha + nactive + ninactive) _gradient[:nalpha] = -np.ones(nalpha) + np.dot(mat.T, sign_vec) _gradient[nalpha:(nalpha + nactive)] = -np.dot(A.T, sign_vec) _gradient[(nalpha + nactive):] = -lam * sign_vec[inactive] return _gradient sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, 1. / p) samples = [] for _ in range(5000): sampler.next() samples.append(sampler.state.copy()) samples = np.array(samples) alpha_samples = samples[:, :n] residuals_samples = [ np.diag(obs_residuals).dot(alpha_samples[i, :]) for i in range(len(samples)) ] pop = [ np.inner(eta, np.dot(P, y0) + np.dot(R, z)) for z in residuals_samples ] obs = np.inner(eta, y0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) if idx in nonzero: alt.append(pval) else: null.append(pval) return null, alt
def test_overall_null_two_queries(): s, n, p = 5, 200, 20 randomizer = randomization.laplace((p, ), scale=0.5) X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, snr=14) nonzero = np.where(beta)[0] lam_frac = 1. loss = rr.glm.logistic(X, y) epsilon = 1. / np.sqrt(n) lam = lam_frac * np.mean( np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0)) W = np.ones(p) * lam W[0] = 0 # use at least some unpenalized penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.) # first randomization M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer) M_est1.solve() bootstrap_score1 = M_est1.setup_sampler(scaling=2.) # second randomization M_est2 = glm_group_lasso(loss, epsilon, penalty, randomizer) M_est2.solve() bootstrap_score2 = M_est2.setup_sampler(scaling=2.) # we take target to be union of two active sets active = M_est1.selection_variable[ 'variables'] + M_est2.selection_variable['variables'] if set(nonzero).issubset(np.nonzero(active)[0]): boot_target, target_observed = pairs_bootstrap_glm(loss, active) # target are all true null coefficients selected sampler = lambda: np.random.choice(n, size=(n, ), replace=True) target_cov, cov1, cov2 = bootstrap_cov(sampler, boot_target, cross_terms=(bootstrap_score1, bootstrap_score2)) active_set = np.nonzero(active)[0] inactive_selected = I = [ i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero ] # is it enough only to bootstrap the inactive ones? # seems so... if not I: return None A1, b1 = M_est1.linear_decomposition(cov1[I], target_cov[I][:, I], target_observed[I]) A2, b2 = M_est2.linear_decomposition(cov2[I], target_cov[I][:, I], target_observed[I]) target_inv_cov = np.linalg.inv(target_cov[I][:, I]) initial_state = np.hstack([ target_observed[I], M_est1.observed_opt_state, M_est2.observed_opt_state ]) ntarget = len(I) target_slice = slice(0, ntarget) opt_slice1 = slice(ntarget, p + ntarget) opt_slice2 = slice(p + ntarget, 2 * p + ntarget) def target_gradient(state): # with many samplers, we will add up the `target_slice` component # many target_grads # and only once do the Gaussian addition of full_grad target = state[target_slice] opt_state1 = state[opt_slice1] opt_state2 = state[opt_slice2] target_grad1 = M_est1.randomization_gradient( target, (A1, b1), opt_state1) target_grad2 = M_est2.randomization_gradient( target, (A2, b2), opt_state2) full_grad = np.zeros_like(state) full_grad[opt_slice1] = -target_grad1[1] full_grad[opt_slice2] = -target_grad2[1] full_grad[target_slice] -= target_grad1[0] + target_grad2[0] full_grad[target_slice] -= target_inv_cov.dot(target) return full_grad def target_projection(state): opt_state1 = state[opt_slice1] state[opt_slice1] = M_est1.projection(opt_state1) opt_state2 = state[opt_slice2] state[opt_slice2] = M_est2.projection(opt_state2) return state target_langevin = projected_langevin(initial_state, target_gradient, target_projection, .5 / (2 * p + 1)) Langevin_steps = 10000 burning = 2000 samples = [] for i in range(Langevin_steps): target_langevin.next() if (i >= burning): samples.append(target_langevin.state[target_slice].copy()) test_stat = lambda x: np.linalg.norm(x) observed = test_stat(target_observed[I]) sample_test_stat = np.array([test_stat(x) for x in samples]) family = discrete_family(sample_test_stat, np.ones_like(sample_test_stat)) pval = family.ccdf(0, observed) return pval, False
def test_one_inactive_coordinate_handcoded(): s, n, p = 5, 200, 20 randomizer = randomization.laplace((p, ), scale=1.) X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, snr=14) nonzero = np.where(beta)[0] lam_frac = 1. loss = rr.glm.logistic(X, y) epsilon = 1. lam = lam_frac * np.mean( np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0)) W = np.ones(p) * lam W += lam * np.arange(p) / 200 W[0] = 0 penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.) print(lam) # our randomization M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer) M_est1.solve() bootstrap_score1 = M_est1.setup_sampler() active = M_est1.selection_variable['variables'] if set(nonzero).issubset(np.nonzero(active)[0]): boot_target, target_observed = pairs_bootstrap_glm(loss, active) # target are all true null coefficients selected sampler = lambda: np.random.choice(n, size=(n, ), replace=True) target_cov, cov1 = bootstrap_cov(sampler, boot_target, cross_terms=(bootstrap_score1, )) # have checked that covariance up to here agrees with other test_glm_langevin example active_set = np.nonzero(active)[0] inactive_selected = I = [ i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero ] # is it enough only to bootstrap the inactive ones? # seems so... if not I: return None # take the first inactive one I = I[:1] A1, b1 = M_est1.linear_decomposition(cov1[I], target_cov[I][:, I], target_observed[I]) print(I, 'I', target_observed[I]) target_inv_cov = np.linalg.inv(target_cov[I][:, I]) initial_state = np.hstack( [target_observed[I], M_est1.observed_opt_state]) ntarget = len(I) target_slice = slice(0, ntarget) opt_slice1 = slice(ntarget, p + ntarget) def target_gradient(state): # with many samplers, we will add up the `target_slice` component # many target_grads # and only once do the Gaussian addition of full_grad target = state[target_slice] opt_state1 = state[opt_slice1] target_grad1 = M_est1.randomization_gradient( target, (A1, b1), opt_state1) full_grad = np.zeros_like(state) full_grad[opt_slice1] = -target_grad1[1] full_grad[target_slice] -= target_grad1[0] full_grad[target_slice] -= target_inv_cov.dot(target) return full_grad def target_projection(state): opt_state1 = state[opt_slice1] state[opt_slice1] = M_est1.projection(opt_state1) return state target_langevin = projected_langevin(initial_state, target_gradient, target_projection, 1. / p) Langevin_steps = 10000 burning = 2000 samples = [] for i in range(Langevin_steps + burning): target_langevin.next() if (i > burning): samples.append(target_langevin.state[target_slice].copy()) test_stat = lambda x: x observed = test_stat(target_observed[I]) sample_test_stat = np.array([test_stat(x) for x in samples]) family = discrete_family(sample_test_stat, np.ones_like(sample_test_stat)) pval = family.ccdf(0, observed) pval = 2 * min(pval, 1 - pval) _i = I[0] naive_Z = target_observed[_i] / np.sqrt(target_cov[_i, _i]) naive_pval = ndist.sf(np.fabs(naive_Z)) naive_pval = 2 * min(naive_pval, 1 - naive_pval) print('naive Z', naive_Z, naive_pval) return pval, naive_pval, False
def pval(vec_state, full_projection, X, obs_residuals, beta_unpenalized, full_null, signs, lam, epsilon, nonzero, active, Sigma, weights, randomization_dist, randomization_scale, Langevin_steps, step_size, burning, X_scaled): """ """ n, p = X.shape null = [] alt = [] X_E = X[:, active] inactive = ~active nalpha = n nactive = np.sum(active) ninactive = np.sum(inactive) active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set XEpinv = np.linalg.pinv(X[:, active]) hessian = np.dot(X.T, X) hessian_restricted = hessian[:, active] mat = XEpinv.dot(np.diag(obs_residuals)) SigmaE_inv = np.linalg.inv(Sigma[:nactive, :nactive]) if set(nonzero).issubset(active_set): def full_gradient(vec_state, obs_residuals=obs_residuals, lam=lam, epsilon=epsilon, active=active, inactive=inactive): nactive = np.sum(active) ninactive = np.sum(inactive) alpha = vec_state[:n] betaE = vec_state[n:(n + nactive)] cube = vec_state[(n + nactive):] p = X.shape[1] beta_full = np.zeros(p) beta_full[active] = betaE subgradient = np.zeros(p) subgradient[inactive] = lam * cube subgradient[active] = lam * signs opt_vec = epsilon * beta_full + subgradient beta_bar_boot = mat.dot(alpha) omega = -full_null - np.dot( hessian_restricted, beta_bar_boot) + np.dot( hessian_restricted, betaE) + opt_vec if randomization_dist == "laplace": randomization_derivative = np.sign( omega ) / randomization_scale # sign(w), w=grad+\epsilon*beta+lambda*u if randomization_dist == "logistic": randomization_derivative = -(np.exp(-omega) - 1) / ( np.exp(-omega) + 1) A = hessian + epsilon * np.identity(nactive + ninactive) A_restricted = A[:, active] _gradient = np.zeros(n + nactive + ninactive) # saturated model mat_q = np.dot(hessian_restricted, mat) _gradient[:n] = np.dot(mat_q.T, randomization_derivative) if (weights == 'exponential'): _gradient[:n] -= np.ones(n) if (weights == "normal"): _gradient[:n] -= alpha if weights == "gamma": _gradient[:n] = 3. / (alpha + 2) - 2 if (weights == "gumbel"): gumbel_beta = np.sqrt(6) / (1.14 * np.pi) euler = 0.57721 gumbel_mu = -gumbel_beta * euler gumbel_sigma = 1. / 1.14 _gradient[:n] -= ( 1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta if weights == "neutral": _gradient[:n] -= np.dot(mat.T, np.dot(SigmaE_inv, beta_bar_boot)) _gradient[n:( n + nactive)] = -A_restricted.T.dot(randomization_derivative) _gradient[(n + nactive):] = -lam * randomization_derivative[inactive] # selected model # _gradient[:nactive] = - (np.dot(Sigma_T_inv, data[:nactive]) + np.dot(hessian[:, active].T, sign_vec)) # _gradient[ndata:(ndata + nactive)] = np.dot(A_restricted.T, sign_vec) # _gradient[(ndata + nactive):] = lam * sign_vec[inactive] return _gradient sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, 1. / p) samples = [] for i in range(Langevin_steps): sampler.next() if (i > burning): samples.append(sampler.state.copy()) samples = np.array(samples) alpha_samples = samples[:, :n] beta_bars = [ np.dot(XEpinv, np.diag(obs_residuals)).dot(alpha_samples[i, :].T) for i in range(len(samples)) ] pop = [np.linalg.norm(z) for z in beta_bars] obs = np.linalg.norm(beta_unpenalized) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) #if idx in nonzero: # alt.append(pval) #else: # null.append(pval) return [pval], [0]
def pval(vec_state, full_projection, X, obs_residuals, beta_unpenalized, full_null, signs, lam, epsilon, nonzero, active, Sigma, weights, randomization_dist, randomization_scale, Langevin_steps, step_size, burning, X_scaled): """ """ n, p = X.shape null = [] alt = [] X_E = X[:, active] inactive = ~active nalpha = n nactive = np.sum(active) ninactive = np.sum(inactive) active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set XEpinv = np.linalg.pinv(X[:, active]) hessian = np.dot(X.T, X) hessian_reistricted = hessian[:, active] mat = XEpinv.dot(np.diag(obs_residuals)) if set(nonzero).issubset(active_set): for j, idx in enumerate(active_set): #if j>0: # break eta = np.zeros(nactive) eta[j] = 1 sigma_eta_sq = Sigma[j, j] linear_part = np.identity(nactive) - ( np.outer(np.dot(Sigma, eta), eta) / sigma_eta_sq) #P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) #T_minus_j = np.dot(P, beta_unpenalized) T_minus_j = np.dot( linear_part, beta_unpenalized) # sufficient stat for the nuisance c = np.dot(Sigma, eta) / sigma_eta_sq fixed_part = full_null + hessian_reistricted.dot(T_minus_j) XXc = hessian_reistricted.dot(c) if (X_scaled == False): fixed_part /= np.sqrt(n) hessian /= np.sqrt(n) hessian_reistricted /= np.sqrt(n) XXc /= np.sqrt(n) def full_gradient(vec_state, fixed_part=fixed_part, obs_residuals=obs_residuals, eta=eta, lam=lam, epsilon=epsilon, active=active, inactive=inactive): nactive = np.sum(active) ninactive = np.sum(inactive) alpha = vec_state[:n] betaE = vec_state[n:(n + nactive)] cube = vec_state[(n + nactive):] beta_full = np.zeros(p) beta_full[active] = betaE subgradient = np.zeros(p) subgradient[inactive] = lam * cube subgradient[active] = lam * signs opt_vec = epsilon * beta_full + subgradient beta_bar_j_boot = np.inner(mat[j, :], alpha) omega = -fixed_part - XXc * beta_bar_j_boot + np.dot( hessian_reistricted, betaE) + opt_vec if randomization_dist == "laplace": randomization_derivative = np.sign( omega ) / randomization_scale # sign(w), w=grad+\epsilon*beta+lambda*u if randomization_dist == "logistic": omega_scaled = omega / randomization_scale randomization_derivative = -(np.exp(-omega_scaled) - 1) / ( np.exp(-omega_scaled) + 1) randomization_derivative /= randomization_scale if randomization_dist == "normal": randomization_derivative = omega / (randomization_scale**2) A = hessian + epsilon * np.identity(nactive + ninactive) A_restricted = A[:, active] _gradient = np.zeros(n + nactive + ninactive) # saturated model mat_q = np.outer(XXc, eta).dot(mat) _gradient[:n] = np.dot(mat_q.T, randomization_derivative) if (weights == 'exponential'): _gradient[:n] -= np.ones(n) if (weights == "normal"): _gradient[:n] -= alpha if weights == "gamma": _gradient[:n] = 3. / (alpha + 2) - 2 if (weights == "gumbel"): gumbel_beta = np.sqrt(6) / (1.14 * np.pi) euler = 0.57721 gumbel_mu = -gumbel_beta * euler gumbel_sigma = 1. / 1.14 _gradient[:n] -= ( 1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta if weights == "neutral": _gradient[:n] -= (beta_bar_j_boot / sigma_eta_sq) * np.dot( mat.T, eta) _gradient[n:( n + nactive)] = -A_restricted.T.dot(randomization_derivative) _gradient[( n + nactive):] = -lam * randomization_derivative[inactive] # selected model # _gradient[:nactive] = - (np.dot(Sigma_T_inv, data[:nactive]) + np.dot(hessian[:, active].T, sign_vec)) # _gradient[ndata:(ndata + nactive)] = np.dot(A_restricted.T, sign_vec) # _gradient[(ndata + nactive):] = lam * sign_vec[inactive] return _gradient sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, step_size) samples = [] for i in range(Langevin_steps): sampler.next() if (i > burning) and (i % 3 == 0): samples.append(sampler.state.copy()) samples = np.array(samples) alpha_samples = samples[:, :n] beta_samples = samples[:, n:(n + nactive)] beta_bars = [ np.dot(XEpinv, np.diag(obs_residuals)).dot(alpha_samples[i, :].T) for i in range(len(samples)) ] pop = [z[j] for z in beta_bars] obs = beta_unpenalized[j] #pop = [np.linalg.norm(beta_samples[i,:]) for i in range(beta_samples.shape[0])] #obs = np.linalg.norm(vec_state[n:(n+nactive)]) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) if idx in nonzero: alt.append(pval) else: null.append(pval) return null, alt
def test_simple_problem(n=100, randomization_dist="logistic", threshold=1, weights="neutral", Langevin_steps=10000, burning=0): step_size = 1. / n y = np.random.standard_normal(n) obs = np.sqrt(n) * np.mean(y) if randomization_dist == "logistic": omega = np.random.logistic(loc=0, scale=1, size=1) if (obs + omega < threshold): return -1 #initial_state = np.ones(n) initial_state = np.zeros(n) y_cs = (y - np.mean(y)) / np.sqrt(n) def full_projection(state): return state def full_gradient(state, n=n, y_cs=y_cs): gradient = np.zeros(n) if weights == "normal": gradient -= state if (weights == "gumbel"): gumbel_beta = np.sqrt(6) / (1.14 * np.pi) euler = 0.57721 gumbel_mu = -gumbel_beta * euler gumbel_sigma = 1. / 1.14 gradient -= (1. - np.exp(-(state * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta if weights == "logistic": gradient = np.divide(np.exp(-state) - 1, np.exp(-state) + 1) if weights == "neutral": gradient = -np.inner(state, y_cs) * y_cs omega = -np.inner(y_cs, state) + threshold if randomization_dist == "logistic": randomization_derivative = -1. / (1 + np.exp(-omega)) gradient -= y_cs * randomization_derivative return gradient sampler = projected_langevin(initial_state.copy(), full_gradient, full_projection, step_size) samples = [] for i in range(Langevin_steps): sampler.next() if (i > burning): samples.append(sampler.state.copy()) alphas = np.array(samples) pop = [np.inner(y_cs, alphas[i, :]) for i in range(alphas.shape[0])] fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval return pval