예제 #1
0
def test_conditional_simple():

    A = np.ones((1,2))
    b = np.array([1])
    con = AC.constraints(A,b) #X1+X2<= 1

    C = np.array([[0,1]])
    d = np.array([2])   #X2=2

    new_con = con.conditional(C,d)
    while True:
        W = np.random.standard_normal(2)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if con(W):
            break
    Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000)

    counter = 0
    new_sample = []
    while True:
        W = np.random.standard_normal() # conditional distribution
        if W < -1:
            new_sample.append(W)
            counter += 1

        if counter >= 10000:
            break

    a1 = Z1[:,0]
    a2 = np.array(new_sample)
    test = np.fabs((a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000))
    nt.assert_true(test < 5)
예제 #2
0
def test_conditional():

    p = 200
    k1, k2 = 5, 3
    b = np.random.standard_normal((k1,))
    A = np.random.standard_normal((k1,p))
    con = AC.constraints(A,b)
    w = np.random.standard_normal(p)
    con.mean = w
    C = np.random.standard_normal((k2,p))
    d = np.random.standard_normal(k2)
    new_con = con.conditional(C, d)

    while True:
        W = np.random.standard_normal(p)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if new_con(W) and con(W):
            break

    Z = AC.sample_from_constraints(new_con, W, ndraw=5000)

    tol = 0
    
    nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None,:]) < 1.e-7)

    V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None,:]).max(1)
    V2 = (np.dot(Z, con.linear_part.T) - con.offset[None,:]).max(1)
    print ('failing:', 
           (V>tol).sum(), 
           (V2>tol).sum(), 
           np.linalg.norm(np.dot(C, W) - d))
    nt.assert_true(np.sum(V > tol) < 0.001*V.shape[0])
예제 #3
0
def test_conditional():

    p = 200
    k1, k2 = 5, 3
    b = np.random.standard_normal((k1,))
    A = np.random.standard_normal((k1,p))
    con = AC.constraints(A,b)
    w = np.random.standard_normal(p)
    con.mean = w
    C = np.random.standard_normal((k2,p))
    d = np.random.standard_normal(k2)
    new_con = con.conditional(C, d)

    while True:
        W = np.random.standard_normal(p)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if new_con(W) and con(W):
            break

    Z = AC.sample_from_constraints(new_con, W, ndraw=5000)

    tol = 0
    
    nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None,:]) < 1.e-7)

    V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None,:]).max(1)
    V2 = (np.dot(Z, con.linear_part.T) - con.offset[None,:]).max(1)
    print ('failing:', 
           (V>tol).sum(), 
           (V2>tol).sum(), 
           np.linalg.norm(np.dot(C, W) - d))
    nt.assert_true(np.sum(V > tol) < 0.001*V.shape[0])
예제 #4
0
def test_conditional_simple():

    A = np.ones((1,2))
    b = np.array([1])
    con = AC.constraints(A,b) #X1+X2<= 1

    C = np.array([[0,1]])
    d = np.array([2])   #X2=2

    new_con = con.conditional(C,d)
    while True:
        W = np.random.standard_normal(2)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if con(W):
            break
    Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000)

    counter = 0
    new_sample = []
    while True:
        W = np.random.standard_normal() # conditional distribution
        if W < -1:
            new_sample.append(W)
            counter += 1

        if counter >= 10000:
            break

    a1 = Z1[:,0]
    a2 = np.array(new_sample)
    test = np.fabs((a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000))
    nt.assert_true(test < 5)
예제 #5
0
def power(n, snr, pos, rho=0.25,
          muval = np.linspace(0,5,51)):

    X, mu, beta = parameters(n, rho, pos)

    # form the correct constraints

    con, initial = constraints(X, pos)

    Z_selection = sample_from_constraints(con, initial, ndraw=4000000, burnin=100000)
    S0 = np.dot(X.T, Z_selection.T).T
    W0 = np.ones(S0.shape[0])
    dfam0 = discrete_family(S0[:,pos], W0)

    one_sided_acceptance_region = dfam0.one_sided_acceptance(0)
    def one_sided_power(mu):
        L, U = one_sided_acceptance_region
        return 1 - (dfam0.cdf(mu,U) - dfam0.cdf(mu, L))

    power_fig = plt.figure(figsize=(8,8))
    power_ax = power_fig.gca()
    power_ax.set_ylabel('Power', fontsize=20)
    power_ax.legend(loc='lower right')
    power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
    full_power = np.array([one_sided_power(m) for m in muval])
    print full_power
    power_ax.plot(muval, full_power, label='Reduced model UMPU', linewidth=7, alpha=0.5)
    power_ax.legend(loc='lower right')
    power_ax.set_xlim([0,5])
    power_ax.plot([snr,snr],[0,1], 'k--')
    print one_sided_power(snr)
    return power_fig, {'full':full_power}
def simulation(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000):

    X, mu, beta = parameters(n, rho, pos)
    con, initial = constraints(X, pos)

    con.mean = snr * mu / np.sqrt(2)
    Z_selection = sample_from_constraints(con, initial, ndraw=ndraw, burnin=burnin)
    Z_inference_pos = np.random.standard_normal(Z_selection.shape[0]) + snr / np.sqrt(2)
    return (np.dot(X.T, Z_selection.T)[pos] + Z_inference_pos) / np.sqrt(2)
예제 #7
0
def power(mu, ndraw=100000, keep_every=100):
    constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
    constraint.mean = np.array([mu,mu])
    sample = affine.sample_from_constraints(constraint, np.array([4,2.]),
                                            ndraw=ndraw)[::keep_every]
    print sample.mean(0)
    sample = sample.sum(1)
    decisions = []
    for s in sample:
        decisions.append(null_dbn.one_sided_test(0, s, alternative='greater'))
    print np.mean(decisions)
    return np.mean(decisions)
예제 #8
0
def simulation(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000):

    X, mu, beta = parameters(n, rho, pos)
    con, initial = constraints(X, pos)

    con.mean = snr * mu / np.sqrt(2)
    Z_selection = sample_from_constraints(con,
                                          initial,
                                          ndraw=ndraw,
                                          burnin=burnin)
    Z_inference_pos = np.random.standard_normal(
        Z_selection.shape[0]) + snr / np.sqrt(2)
    return (np.dot(X.T, Z_selection.T)[pos] + Z_inference_pos) / np.sqrt(2)
예제 #9
0
def power(mu, ndraw=100000, keep_every=100):
    constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff]))
    constraint.mean = np.array([mu, mu])
    sample = affine.sample_from_constraints(constraint,
                                            np.array([4, 2.]),
                                            ndraw=ndraw)[::keep_every]
    print sample.mean(0)
    sample = sample.sum(1)
    decisions = []
    for s in sample:
        decisions.append(null_dbn.one_sided_test(0, s, alternative='greater'))
    print np.mean(decisions)
    return np.mean(decisions)
예제 #10
0
def test_sampling():
    """
    See that means and covariances are approximately correct
    """
    C = AC.constraints(np.identity(3), np.inf*np.ones(3))
    C.mean = np.array([3,4,5.2])
    W = np.random.standard_normal((5,3))
    S = np.dot(W.T, W) / 30.
    C.covariance = S
    V = AC.sample_from_constraints(C, np.zeros(3), ndraw=500000)

    nt.assert_true(np.linalg.norm(V.mean(0)-C.mean) < 0.01)
    nt.assert_true(np.linalg.norm(np.einsum('ij,ik->ijk', V, V).mean(0) - 
                                  np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
예제 #11
0
def cone_with_slice(angles,
                    ai,
                    hull,
                    which,
                    fill_args={},
                    ax=None,
                    label=None,
                    suffix='',
                    Y=None):

    ax, poly, constraint, rays = cone_rays(angles,
                                           ai,
                                           hull,
                                           which,
                                           ax=ax,
                                           fill_args=fill_args)
    eta_idx = np.argmax(np.dot(hull.points, Y))
    eta = 40 * hull.points[eta_idx]

    representation = constraints(-constraint.T, np.zeros(2))

    if Y is None:
        Y = sample_from_constraints(representation)

    ax.fill(poly[:, 0], poly[:, 1], label=r'$A_{(M,H_0)}$', **fill_args)
    if symmetric:
        ax.fill(-poly[:, 0], -poly[:, 1], **fill_args)

    legend_args = {'scatterpoints': 1, 'fontsize': 30, 'loc': 'lower left'}
    ax.legend(**legend_args)
    ax.figure.savefig('fig_onesparse1.png', dpi=300)

    ax.scatter(Y[0], Y[1], c='k', s=150, label=label)

    Vp, _, Vm = representation.bounds(eta, Y)[:3]

    Yperp = Y - (np.dot(eta, Y) / np.linalg.norm(eta)**2 * eta)

    if Vm == np.inf:
        Vm = 10000

    width_points = np.array([(Yperp + Vp * eta / np.linalg.norm(eta)**2),
                             (Yperp + Vm * eta / np.linalg.norm(eta)**2)])

    ax.plot(width_points[:, 0], width_points[:, 1], '-', c='k', linewidth=4)
    legend_args = {'scatterpoints': 1, 'fontsize': 30, 'loc': 'lower left'}
    ax.legend(**legend_args)
    ax.figure.savefig('fig_onesparse2.png', dpi=300)

    return ax, poly, constraint, rays
예제 #12
0
def test_sampling():
    """
    See that means and covariances are approximately correct
    """
    C = AC.constraints(np.identity(3), np.inf*np.ones(3))
    C.mean = np.array([3,4,5.2])
    W = np.random.standard_normal((5,3))
    S = np.dot(W.T, W) / 30.
    C.covariance = S
    V = AC.sample_from_constraints(C, np.zeros(3), ndraw=500000)

    nt.assert_true(np.linalg.norm(V.mean(0)-C.mean) < 0.01)
    nt.assert_true(np.linalg.norm(np.einsum('ij,ik->ijk', V, V).mean(0) - 
                                  np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
예제 #13
0
def test_simulate_nonwhitened():
    n, p = 50, 200

    X = np.random.standard_normal((n,p))
    cov = np.dot(X.T, X)

    W = np.random.standard_normal((3,p))
    con = AC.constraints(W, np.ones(3), covariance=cov)

    while True:
        z = np.random.standard_normal(p)
        if np.dot(W, z).max() <= 1:
            break

    Z = AC.sample_from_constraints(con, z)
    nt.assert_true((np.dot(Z, W.T) - 1).max() < 0)
예제 #14
0
def test_simulate_nonwhitened():
    n, p = 50, 200

    X = np.random.standard_normal((n,p))
    cov = np.dot(X.T, X)

    W = np.random.standard_normal((3,p))
    con = AC.constraints(W, np.ones(3), covariance=cov)

    while True:
        z = np.random.standard_normal(p)
        if np.dot(W, z).max() <= 1:
            break

    Z = AC.sample_from_constraints(con, z)
    nt.assert_true((np.dot(Z, W.T) - 1).max() < 0)
예제 #15
0
def draw_sample(mu, cutoff, nsample=10000):
    if mu >= cutoff - 4:
        sample = []
        while True:
            candidate = np.random.standard_normal(1000000) + mu
            candidate = candidate[candidate > cutoff]
            sample.extend(candidate)
            if len(sample) > nsample:
                break
        sample = np.array(sample)
        sample += np.random.standard_normal(sample.shape) + mu
    else:
        constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
	constraint.mean = np.array([mu,mu])
        sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]),
                                                ndraw=2000000,
                                                direction_of_interest=np.array([1,1.]))
        sample = sample.sum(1)[::(2000000/nsample)]
    return sample
예제 #16
0
def cone_with_slice(angles, ai, hull, which, fill_args={}, ax=None, label=None,
                    suffix='', 
                    Y=None):

    ax, poly, constraint, rays = cone_rays(angles, ai, hull, which, ax=ax, fill_args=fill_args)
    eta_idx = np.argmax(np.dot(hull.points, Y))
    eta = 40 * hull.points[eta_idx]

    representation = constraints(-constraint.T, np.zeros(2))

    if Y is None:
        Y = sample_from_constraints(representation)

    ax.fill(poly[:,0], poly[:,1], label=r'$A_{(M,H_0)}$', **fill_args)
    if symmetric:
        ax.fill(-poly[:,0], -poly[:,1], **fill_args)

    legend_args = {'scatterpoints':1, 'fontsize':30, 'loc':'lower left'}
    ax.legend(**legend_args)
    ax.figure.savefig('fig_onesparse1.png', dpi=300)

    ax.scatter(Y[0], Y[1], c='k', s=150, label=label)

    Vp, _, Vm = representation.bounds(eta, Y)[:3]

    Yperp = Y - (np.dot(eta, Y) / 
                 np.linalg.norm(eta)**2 * eta)

    if Vm == np.inf:
        Vm = 10000

    width_points = np.array([(Yperp + Vp*eta /  
                              np.linalg.norm(eta)**2),
                             (Yperp + Vm*eta /  
                              np.linalg.norm(eta)**2)])

    ax.plot(width_points[:,0], width_points[:,1], '-', c='k', linewidth=4)
    legend_args = {'scatterpoints':1, 'fontsize':30, 'loc':'lower left'}
    ax.legend(**legend_args)
    ax.figure.savefig('fig_onesparse2.png', dpi=300)

    return ax, poly, constraint, rays
예제 #17
0
def test_pivots_intervals():

    A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4)

    con = AC.constraints(A,b)
    while True:
        w = np.random.standard_normal(30)
        if con(w):
            break

    Z = AC.sample_from_constraints(con, w)[-1]
    u = np.zeros(con.dim)
    u[4] = 1

    # call pivot
    con.pivot(u, Z)
    con.pivot(u, Z, alternative='less')
    con.pivot(u, Z, alternative='greater')

    con.interval(u, Z, UMAU=True)
    con.interval(u, Z, UMAU=False)
예제 #18
0
def test_pivots_intervals():

    A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4)

    con = AC.constraints(A,b)
    while True:
        w = np.random.standard_normal(30)
        if con(w):
            break

    Z = AC.sample_from_constraints(con, w)[-1]
    u = np.zeros(con.dim)
    u[4] = 1

    # call pivot
    con.pivot(u, Z)
    con.pivot(u, Z, alternative='less')
    con.pivot(u, Z, alternative='greater')

    con.interval(u, Z, UMAU=True)
    con.interval(u, Z, UMAU=False)
예제 #19
0
def power(n, snr, pos, rho=0.25, muval=np.linspace(0, 5, 51)):

    X, mu, beta = parameters(n, rho, pos)

    # form the correct constraints

    con, initial = constraints(X, pos)

    Z_selection = sample_from_constraints(con,
                                          initial,
                                          ndraw=4000000,
                                          burnin=100000)
    S0 = np.dot(X.T, Z_selection.T).T
    W0 = np.ones(S0.shape[0])
    dfam0 = discrete_family(S0[:, pos], W0)

    one_sided_acceptance_region = dfam0.one_sided_acceptance(0)

    def one_sided_power(mu):
        L, U = one_sided_acceptance_region
        return 1 - (dfam0.cdf(mu, U) - dfam0.cdf(mu, L))

    power_fig = plt.figure(figsize=(8, 8))
    power_ax = power_fig.gca()
    power_ax.set_ylabel('Power', fontsize=20)
    power_ax.legend(loc='lower right')
    power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
    full_power = np.array([one_sided_power(m) for m in muval])
    print full_power
    power_ax.plot(muval,
                  full_power,
                  label='Reduced model UMPU',
                  linewidth=7,
                  alpha=0.5)
    power_ax.legend(loc='lower right')
    power_ax.set_xlim([0, 5])
    power_ax.plot([snr, snr], [0, 1], 'k--')
    print one_sided_power(snr)
    return power_fig, {'full': full_power}
def test_chisq_central():

    n, p = 4, 10
    A, b = np.random.standard_normal((n, p)), np.zeros(n)
    con = AC.constraints(A,b)

    while True:
        z = np.random.standard_normal(p)
        if con(z):
            break

    S = np.identity(p)[:3]
    Z = AC.sample_from_constraints(con, z, ndraw=10000)
    P = []
    for i in range(Z.shape[0]/10):
        P.append(chisq.quadratic_test(Z[10*i], S, con))
    ecdf = sm.distributions.ECDF(P)

    plt.clf()
    x = np.linspace(0,1,101)
    plt.plot(x, ecdf(x), c='red')
    plt.plot([0,1],[0,1], c='blue', linewidth=2)
    nt.assert_true(np.fabs(np.mean(P)-0.5) < 0.03)
    nt.assert_true(np.fabs(np.std(P)-1/np.sqrt(12)) < 0.03)
예제 #21
0
def test_chisq_central():

    n, p = 4, 10
    A, b = np.random.standard_normal((n, p)), np.zeros(n)
    con = AC.constraints(A, b)

    while True:
        z = np.random.standard_normal(p)
        if con(z):
            break

    S = np.identity(p)[:3]
    Z = AC.sample_from_constraints(con, z, ndraw=10000)
    P = []
    for i in range(Z.shape[0] / 10):
        P.append(chisq.quadratic_test(Z[10 * i], S, con))
    ecdf = sm.distributions.ECDF(P)

    plt.clf()
    x = np.linspace(0, 1, 101)
    plt.plot(x, ecdf(x), c='red')
    plt.plot([0, 1], [0, 1], c='blue', linewidth=2)
    nt.assert_true(np.fabs(np.mean(P) - 0.5) < 0.03)
    nt.assert_true(np.fabs(np.std(P) - 1 / np.sqrt(12)) < 0.03)
예제 #22
0
import os
import numpy as np
import matplotlib.pyplot as plt
from selection import affine
from selection.discrete_family import discrete_family
from scipy.stats import norm as ndist

cutoff = ndist.ppf(0.95)

null_constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff]))
null_sample = affine.sample_from_constraints(null_constraint,
                                             np.array([4, 2.]),
                                             ndraw=100000).sum(1)
null_dbn = discrete_family(null_sample, np.ones_like(null_sample))


def power(mu, ndraw=100000, keep_every=100):
    constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff]))
    constraint.mean = np.array([mu, mu])
    sample = affine.sample_from_constraints(constraint,
                                            np.array([4, 2.]),
                                            ndraw=ndraw)[::keep_every]
    print sample.mean(0)
    sample = sample.sum(1)
    decisions = []
    for s in sample:
        decisions.append(null_dbn.one_sided_test(0, s, alternative='greater'))
    print np.mean(decisions)
    return np.mean(decisions)

예제 #23
0
def marginal(n, snr, pos, rho=0.25, ndraw=5000,
             burnin=1000, nsim=5000, sigma=1.):

    X, mu, beta = parameters(n, rho, pos)

    Psplit = []
    Pselect = []
    hypotheses = []


    for _ in range(nsim):
        Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma
        con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True)

        cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign

        correct = (sign == +1) and (pos == select_pos)
        hypotheses.append(correct)
        Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin)
        Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2)
        Z_inference = sigma * (cond_ncp + np.random.standard_normal())
        Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2)
        dfam = discrete_family(Z_null, np.ones(Z_null.shape))
        Pselect.append(dfam.ccdf(0, Z_observed))
        if sign == +1:
            Psplit.append(ndist.sf(Z_inference / sigma))
        else:
            Psplit.append(ndist.cdf(Z_inference / sigma))

    Ugrid = np.linspace(0,1,101)

    Psplit = np.array(Psplit)
    Pselect = np.array(Pselect)
    hypotheses = np.array(hypotheses, np.bool)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8,8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05,0.05],[0,1], 'k--')
    ax1.legend(loc='lower right')
    
    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8,8))
    ax2 = fig2.gca()
    ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05,0.05],[0,1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['split'] = Psplit
    dbn1['select'] = Pselect
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
예제 #24
0
def marginal(n,
             snr,
             pos,
             rho=0.25,
             ndraw=5000,
             burnin=1000,
             nsim=5000,
             sigma=1.):

    X, mu, beta = parameters(n, rho, pos)

    Psplit = []
    Pselect = []
    hypotheses = []

    for _ in range(nsim):
        Y_select = (snr * mu / np.sqrt(2) +
                    np.random.standard_normal(n)) * sigma
        con, _, select_pos, sign = covtest(X,
                                           Y_select,
                                           sigma=sigma,
                                           exact=True)

        cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign

        correct = (sign == +1) and (pos == select_pos)
        hypotheses.append(correct)
        Y_null = sample_from_constraints(con,
                                         Y_select,
                                         ndraw=ndraw,
                                         burnin=burnin)
        Z_null = (np.dot(X.T[select_pos], Y_null.T) +
                  sigma * np.random.standard_normal(ndraw)) / np.sqrt(2)
        Z_inference = sigma * (cond_ncp + np.random.standard_normal())
        Z_observed = (np.dot(X.T[select_pos], Y_select) * sign +
                      Z_inference) / np.sqrt(2)
        dfam = discrete_family(Z_null, np.ones(Z_null.shape))
        Pselect.append(dfam.ccdf(0, Z_observed))
        if sign == +1:
            Psplit.append(ndist.sf(Z_inference / sigma))
        else:
            Psplit.append(ndist.cdf(Z_inference / sigma))

    Ugrid = np.linspace(0, 1, 101)

    Psplit = np.array(Psplit)
    Pselect = np.array(Pselect)
    hypotheses = np.array(hypotheses, np.bool)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8, 8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid,
             ECDF(Psplit)(Ugrid),
             label='Sample splitting',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pselect)(Ugrid),
             label='Selected using $i^*(Z_S)$',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05, 0.05], [0, 1], 'k--')
    ax1.legend(loc='lower right')

    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8, 8))
    ax2 = fig2.gca()
    ax2.plot(Ugrid,
             ECDF(Psplit[hypotheses])(Ugrid),
             label='Sample splitting',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pselect[hypotheses])(Ugrid),
             label='Selected using $i^*(Z_S)$',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05, 0.05], [0, 1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['split'] = Psplit
    dbn1['select'] = Pselect
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
예제 #25
0
import os
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from selection import affine 
from selection.discrete_family import discrete_family
from scipy.stats import norm as ndist
from sklearn.isotonic import IsotonicRegression

cutoff = 3.
null_constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
null_sample = affine.sample_from_constraints(null_constraint, np.array([4,2.]),
                                             ndraw=100000).sum(1)
null_dbn = discrete_family(null_sample, np.ones_like(null_sample))

def draw_sample(mu, cutoff, nsample=10000):
    if mu >= cutoff - 4:
        sample = []
        while True:
            candidate = np.random.standard_normal(1000000) + mu
            candidate = candidate[candidate > cutoff]
            sample.extend(candidate)
            if len(sample) > nsample:
                break
        sample = np.array(sample)
        sample += np.random.standard_normal(sample.shape) + mu
    else:
        constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
	constraint.mean = np.array([mu,mu])
        sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]),
                                                ndraw=2000000,