def test_conditional_simple(): A = np.ones((1,2)) b = np.array([1]) con = AC.constraints(A,b) #X1+X2<= 1 C = np.array([[0,1]]) d = np.array([2]) #X2=2 new_con = con.conditional(C,d) while True: W = np.random.standard_normal(2) W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d) if con(W): break Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000) counter = 0 new_sample = [] while True: W = np.random.standard_normal() # conditional distribution if W < -1: new_sample.append(W) counter += 1 if counter >= 10000: break a1 = Z1[:,0] a2 = np.array(new_sample) test = np.fabs((a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000)) nt.assert_true(test < 5)
def test_conditional(): p = 200 k1, k2 = 5, 3 b = np.random.standard_normal((k1,)) A = np.random.standard_normal((k1,p)) con = AC.constraints(A,b) w = np.random.standard_normal(p) con.mean = w C = np.random.standard_normal((k2,p)) d = np.random.standard_normal(k2) new_con = con.conditional(C, d) while True: W = np.random.standard_normal(p) W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d) if new_con(W) and con(W): break Z = AC.sample_from_constraints(new_con, W, ndraw=5000) tol = 0 nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None,:]) < 1.e-7) V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None,:]).max(1) V2 = (np.dot(Z, con.linear_part.T) - con.offset[None,:]).max(1) print ('failing:', (V>tol).sum(), (V2>tol).sum(), np.linalg.norm(np.dot(C, W) - d)) nt.assert_true(np.sum(V > tol) < 0.001*V.shape[0])
def power(n, snr, pos, rho=0.25, muval = np.linspace(0,5,51)): X, mu, beta = parameters(n, rho, pos) # form the correct constraints con, initial = constraints(X, pos) Z_selection = sample_from_constraints(con, initial, ndraw=4000000, burnin=100000) S0 = np.dot(X.T, Z_selection.T).T W0 = np.ones(S0.shape[0]) dfam0 = discrete_family(S0[:,pos], W0) one_sided_acceptance_region = dfam0.one_sided_acceptance(0) def one_sided_power(mu): L, U = one_sided_acceptance_region return 1 - (dfam0.cdf(mu,U) - dfam0.cdf(mu, L)) power_fig = plt.figure(figsize=(8,8)) power_ax = power_fig.gca() power_ax.set_ylabel('Power', fontsize=20) power_ax.legend(loc='lower right') power_ax.set_xlabel('Effect size $\mu$', fontsize=20) full_power = np.array([one_sided_power(m) for m in muval]) print full_power power_ax.plot(muval, full_power, label='Reduced model UMPU', linewidth=7, alpha=0.5) power_ax.legend(loc='lower right') power_ax.set_xlim([0,5]) power_ax.plot([snr,snr],[0,1], 'k--') print one_sided_power(snr) return power_fig, {'full':full_power}
def simulation(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000): X, mu, beta = parameters(n, rho, pos) con, initial = constraints(X, pos) con.mean = snr * mu / np.sqrt(2) Z_selection = sample_from_constraints(con, initial, ndraw=ndraw, burnin=burnin) Z_inference_pos = np.random.standard_normal(Z_selection.shape[0]) + snr / np.sqrt(2) return (np.dot(X.T, Z_selection.T)[pos] + Z_inference_pos) / np.sqrt(2)
def power(mu, ndraw=100000, keep_every=100): constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff])) constraint.mean = np.array([mu,mu]) sample = affine.sample_from_constraints(constraint, np.array([4,2.]), ndraw=ndraw)[::keep_every] print sample.mean(0) sample = sample.sum(1) decisions = [] for s in sample: decisions.append(null_dbn.one_sided_test(0, s, alternative='greater')) print np.mean(decisions) return np.mean(decisions)
def simulation(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000): X, mu, beta = parameters(n, rho, pos) con, initial = constraints(X, pos) con.mean = snr * mu / np.sqrt(2) Z_selection = sample_from_constraints(con, initial, ndraw=ndraw, burnin=burnin) Z_inference_pos = np.random.standard_normal( Z_selection.shape[0]) + snr / np.sqrt(2) return (np.dot(X.T, Z_selection.T)[pos] + Z_inference_pos) / np.sqrt(2)
def power(mu, ndraw=100000, keep_every=100): constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff])) constraint.mean = np.array([mu, mu]) sample = affine.sample_from_constraints(constraint, np.array([4, 2.]), ndraw=ndraw)[::keep_every] print sample.mean(0) sample = sample.sum(1) decisions = [] for s in sample: decisions.append(null_dbn.one_sided_test(0, s, alternative='greater')) print np.mean(decisions) return np.mean(decisions)
def test_sampling(): """ See that means and covariances are approximately correct """ C = AC.constraints(np.identity(3), np.inf*np.ones(3)) C.mean = np.array([3,4,5.2]) W = np.random.standard_normal((5,3)) S = np.dot(W.T, W) / 30. C.covariance = S V = AC.sample_from_constraints(C, np.zeros(3), ndraw=500000) nt.assert_true(np.linalg.norm(V.mean(0)-C.mean) < 0.01) nt.assert_true(np.linalg.norm(np.einsum('ij,ik->ijk', V, V).mean(0) - np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
def cone_with_slice(angles, ai, hull, which, fill_args={}, ax=None, label=None, suffix='', Y=None): ax, poly, constraint, rays = cone_rays(angles, ai, hull, which, ax=ax, fill_args=fill_args) eta_idx = np.argmax(np.dot(hull.points, Y)) eta = 40 * hull.points[eta_idx] representation = constraints(-constraint.T, np.zeros(2)) if Y is None: Y = sample_from_constraints(representation) ax.fill(poly[:, 0], poly[:, 1], label=r'$A_{(M,H_0)}$', **fill_args) if symmetric: ax.fill(-poly[:, 0], -poly[:, 1], **fill_args) legend_args = {'scatterpoints': 1, 'fontsize': 30, 'loc': 'lower left'} ax.legend(**legend_args) ax.figure.savefig('fig_onesparse1.png', dpi=300) ax.scatter(Y[0], Y[1], c='k', s=150, label=label) Vp, _, Vm = representation.bounds(eta, Y)[:3] Yperp = Y - (np.dot(eta, Y) / np.linalg.norm(eta)**2 * eta) if Vm == np.inf: Vm = 10000 width_points = np.array([(Yperp + Vp * eta / np.linalg.norm(eta)**2), (Yperp + Vm * eta / np.linalg.norm(eta)**2)]) ax.plot(width_points[:, 0], width_points[:, 1], '-', c='k', linewidth=4) legend_args = {'scatterpoints': 1, 'fontsize': 30, 'loc': 'lower left'} ax.legend(**legend_args) ax.figure.savefig('fig_onesparse2.png', dpi=300) return ax, poly, constraint, rays
def test_simulate_nonwhitened(): n, p = 50, 200 X = np.random.standard_normal((n,p)) cov = np.dot(X.T, X) W = np.random.standard_normal((3,p)) con = AC.constraints(W, np.ones(3), covariance=cov) while True: z = np.random.standard_normal(p) if np.dot(W, z).max() <= 1: break Z = AC.sample_from_constraints(con, z) nt.assert_true((np.dot(Z, W.T) - 1).max() < 0)
def draw_sample(mu, cutoff, nsample=10000): if mu >= cutoff - 4: sample = [] while True: candidate = np.random.standard_normal(1000000) + mu candidate = candidate[candidate > cutoff] sample.extend(candidate) if len(sample) > nsample: break sample = np.array(sample) sample += np.random.standard_normal(sample.shape) + mu else: constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff])) constraint.mean = np.array([mu,mu]) sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]), ndraw=2000000, direction_of_interest=np.array([1,1.])) sample = sample.sum(1)[::(2000000/nsample)] return sample
def cone_with_slice(angles, ai, hull, which, fill_args={}, ax=None, label=None, suffix='', Y=None): ax, poly, constraint, rays = cone_rays(angles, ai, hull, which, ax=ax, fill_args=fill_args) eta_idx = np.argmax(np.dot(hull.points, Y)) eta = 40 * hull.points[eta_idx] representation = constraints(-constraint.T, np.zeros(2)) if Y is None: Y = sample_from_constraints(representation) ax.fill(poly[:,0], poly[:,1], label=r'$A_{(M,H_0)}$', **fill_args) if symmetric: ax.fill(-poly[:,0], -poly[:,1], **fill_args) legend_args = {'scatterpoints':1, 'fontsize':30, 'loc':'lower left'} ax.legend(**legend_args) ax.figure.savefig('fig_onesparse1.png', dpi=300) ax.scatter(Y[0], Y[1], c='k', s=150, label=label) Vp, _, Vm = representation.bounds(eta, Y)[:3] Yperp = Y - (np.dot(eta, Y) / np.linalg.norm(eta)**2 * eta) if Vm == np.inf: Vm = 10000 width_points = np.array([(Yperp + Vp*eta / np.linalg.norm(eta)**2), (Yperp + Vm*eta / np.linalg.norm(eta)**2)]) ax.plot(width_points[:,0], width_points[:,1], '-', c='k', linewidth=4) legend_args = {'scatterpoints':1, 'fontsize':30, 'loc':'lower left'} ax.legend(**legend_args) ax.figure.savefig('fig_onesparse2.png', dpi=300) return ax, poly, constraint, rays
def test_pivots_intervals(): A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4) con = AC.constraints(A,b) while True: w = np.random.standard_normal(30) if con(w): break Z = AC.sample_from_constraints(con, w)[-1] u = np.zeros(con.dim) u[4] = 1 # call pivot con.pivot(u, Z) con.pivot(u, Z, alternative='less') con.pivot(u, Z, alternative='greater') con.interval(u, Z, UMAU=True) con.interval(u, Z, UMAU=False)
def power(n, snr, pos, rho=0.25, muval=np.linspace(0, 5, 51)): X, mu, beta = parameters(n, rho, pos) # form the correct constraints con, initial = constraints(X, pos) Z_selection = sample_from_constraints(con, initial, ndraw=4000000, burnin=100000) S0 = np.dot(X.T, Z_selection.T).T W0 = np.ones(S0.shape[0]) dfam0 = discrete_family(S0[:, pos], W0) one_sided_acceptance_region = dfam0.one_sided_acceptance(0) def one_sided_power(mu): L, U = one_sided_acceptance_region return 1 - (dfam0.cdf(mu, U) - dfam0.cdf(mu, L)) power_fig = plt.figure(figsize=(8, 8)) power_ax = power_fig.gca() power_ax.set_ylabel('Power', fontsize=20) power_ax.legend(loc='lower right') power_ax.set_xlabel('Effect size $\mu$', fontsize=20) full_power = np.array([one_sided_power(m) for m in muval]) print full_power power_ax.plot(muval, full_power, label='Reduced model UMPU', linewidth=7, alpha=0.5) power_ax.legend(loc='lower right') power_ax.set_xlim([0, 5]) power_ax.plot([snr, snr], [0, 1], 'k--') print one_sided_power(snr) return power_fig, {'full': full_power}
def test_chisq_central(): n, p = 4, 10 A, b = np.random.standard_normal((n, p)), np.zeros(n) con = AC.constraints(A,b) while True: z = np.random.standard_normal(p) if con(z): break S = np.identity(p)[:3] Z = AC.sample_from_constraints(con, z, ndraw=10000) P = [] for i in range(Z.shape[0]/10): P.append(chisq.quadratic_test(Z[10*i], S, con)) ecdf = sm.distributions.ECDF(P) plt.clf() x = np.linspace(0,1,101) plt.plot(x, ecdf(x), c='red') plt.plot([0,1],[0,1], c='blue', linewidth=2) nt.assert_true(np.fabs(np.mean(P)-0.5) < 0.03) nt.assert_true(np.fabs(np.std(P)-1/np.sqrt(12)) < 0.03)
def test_chisq_central(): n, p = 4, 10 A, b = np.random.standard_normal((n, p)), np.zeros(n) con = AC.constraints(A, b) while True: z = np.random.standard_normal(p) if con(z): break S = np.identity(p)[:3] Z = AC.sample_from_constraints(con, z, ndraw=10000) P = [] for i in range(Z.shape[0] / 10): P.append(chisq.quadratic_test(Z[10 * i], S, con)) ecdf = sm.distributions.ECDF(P) plt.clf() x = np.linspace(0, 1, 101) plt.plot(x, ecdf(x), c='red') plt.plot([0, 1], [0, 1], c='blue', linewidth=2) nt.assert_true(np.fabs(np.mean(P) - 0.5) < 0.03) nt.assert_true(np.fabs(np.std(P) - 1 / np.sqrt(12)) < 0.03)
import os import numpy as np import matplotlib.pyplot as plt from selection import affine from selection.discrete_family import discrete_family from scipy.stats import norm as ndist cutoff = ndist.ppf(0.95) null_constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff])) null_sample = affine.sample_from_constraints(null_constraint, np.array([4, 2.]), ndraw=100000).sum(1) null_dbn = discrete_family(null_sample, np.ones_like(null_sample)) def power(mu, ndraw=100000, keep_every=100): constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff])) constraint.mean = np.array([mu, mu]) sample = affine.sample_from_constraints(constraint, np.array([4, 2.]), ndraw=ndraw)[::keep_every] print sample.mean(0) sample = sample.sum(1) decisions = [] for s in sample: decisions.append(null_dbn.one_sided_test(0, s, alternative='greater')) print np.mean(decisions) return np.mean(decisions)
def marginal(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000, nsim=5000, sigma=1.): X, mu, beta = parameters(n, rho, pos) Psplit = [] Pselect = [] hypotheses = [] for _ in range(nsim): Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True) cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign correct = (sign == +1) and (pos == select_pos) hypotheses.append(correct) Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin) Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2) Z_inference = sigma * (cond_ncp + np.random.standard_normal()) Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2) dfam = discrete_family(Z_null, np.ones(Z_null.shape)) Pselect.append(dfam.ccdf(0, Z_observed)) if sign == +1: Psplit.append(ndist.sf(Z_inference / sigma)) else: Psplit.append(ndist.cdf(Z_inference / sigma)) Ugrid = np.linspace(0,1,101) Psplit = np.array(Psplit) Pselect = np.array(Pselect) hypotheses = np.array(hypotheses, np.bool) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8,8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05,0.05],[0,1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8,8)) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05,0.05],[0,1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['split'] = Psplit dbn1['select'] = Pselect dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
def marginal(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000, nsim=5000, sigma=1.): X, mu, beta = parameters(n, rho, pos) Psplit = [] Pselect = [] hypotheses = [] for _ in range(nsim): Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True) cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign correct = (sign == +1) and (pos == select_pos) hypotheses.append(correct) Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin) Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2) Z_inference = sigma * (cond_ncp + np.random.standard_normal()) Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2) dfam = discrete_family(Z_null, np.ones(Z_null.shape)) Pselect.append(dfam.ccdf(0, Z_observed)) if sign == +1: Psplit.append(ndist.sf(Z_inference / sigma)) else: Psplit.append(ndist.cdf(Z_inference / sigma)) Ugrid = np.linspace(0, 1, 101) Psplit = np.array(Psplit) Pselect = np.array(Pselect) hypotheses = np.array(hypotheses, np.bool) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8, 8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05, 0.05], [0, 1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8, 8)) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05, 0.05], [0, 1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['split'] = Psplit dbn1['select'] = Pselect dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
import os from glob import glob import numpy as np import matplotlib.pyplot as plt from selection import affine from selection.discrete_family import discrete_family from scipy.stats import norm as ndist from sklearn.isotonic import IsotonicRegression cutoff = 3. null_constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff])) null_sample = affine.sample_from_constraints(null_constraint, np.array([4,2.]), ndraw=100000).sum(1) null_dbn = discrete_family(null_sample, np.ones_like(null_sample)) def draw_sample(mu, cutoff, nsample=10000): if mu >= cutoff - 4: sample = [] while True: candidate = np.random.standard_normal(1000000) + mu candidate = candidate[candidate > cutoff] sample.extend(candidate) if len(sample) > nsample: break sample = np.array(sample) sample += np.random.standard_normal(sample.shape) + mu else: constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff])) constraint.mean = np.array([mu,mu]) sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]), ndraw=2000000,