Example #1
0
def test_stack():

    A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4)

    con1 = AC.constraints(A,b)

    A, b = np.random.standard_normal((5,30)), np.random.standard_normal(5)
    E, f = np.random.standard_normal((3,30)), np.random.standard_normal(3)

    con2 = AC.constraints(A,b)

    return AC.stack(con1, con2)
def test_stack():

    A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4)

    con1 = AC.constraints(A,b)

    A, b = np.random.standard_normal((5,30)), np.random.standard_normal(5)
    E, f = np.random.standard_normal((3,30)), np.random.standard_normal(3)

    con2 = AC.constraints(A,b)

    return AC.stack(con1, con2)
def test_conditional():

    p = 200
    k1, k2 = 5, 3
    b = np.random.standard_normal((k1,))
    A = np.random.standard_normal((k1,p))
    con = AC.constraints(A,b)
    w = np.random.standard_normal(p)
    con.mean = w
    C = np.random.standard_normal((k2,p))
    d = np.random.standard_normal(k2)
    new_con = con.conditional(C, d)

    while True:
        W = np.random.standard_normal(p)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if new_con(W) and con(W):
            break

    Z = AC.sample_from_constraints(new_con, W, ndraw=5000)

    tol = 0
    
    nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None,:]) < 1.e-7)

    V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None,:]).max(1)
    V2 = (np.dot(Z, con.linear_part.T) - con.offset[None,:]).max(1)
    print ('failing:', 
           (V>tol).sum(), 
           (V2>tol).sum(), 
           np.linalg.norm(np.dot(C, W) - d))
    nt.assert_true(np.sum(V > tol) < 0.001*V.shape[0])
Example #4
0
def test_conditional():

    p = 200
    k1, k2 = 5, 3
    b = np.random.standard_normal((k1,))
    A = np.random.standard_normal((k1,p))
    con = AC.constraints(A,b)
    w = np.random.standard_normal(p)
    con.mean = w
    C = np.random.standard_normal((k2,p))
    d = np.random.standard_normal(k2)
    new_con = con.conditional(C, d)

    while True:
        W = np.random.standard_normal(p)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if new_con(W) and con(W):
            break

    Z = AC.sample_from_constraints(new_con, W, ndraw=5000)

    tol = 0
    
    nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None,:]) < 1.e-7)

    V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None,:]).max(1)
    V2 = (np.dot(Z, con.linear_part.T) - con.offset[None,:]).max(1)
    print ('failing:', 
           (V>tol).sum(), 
           (V2>tol).sum(), 
           np.linalg.norm(np.dot(C, W) - d))
    nt.assert_true(np.sum(V > tol) < 0.001*V.shape[0])
Example #5
0
def test_conditional_simple():

    A = np.ones((1,2))
    b = np.array([1])
    con = AC.constraints(A,b) #X1+X2<= 1

    C = np.array([[0,1]])
    d = np.array([2])   #X2=2

    new_con = con.conditional(C,d)
    while True:
        W = np.random.standard_normal(2)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if con(W):
            break
    Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000)

    counter = 0
    new_sample = []
    while True:
        W = np.random.standard_normal() # conditional distribution
        if W < -1:
            new_sample.append(W)
            counter += 1

        if counter >= 10000:
            break

    a1 = Z1[:,0]
    a2 = np.array(new_sample)
    test = np.fabs((a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000))
    nt.assert_true(test < 5)
Example #6
0
def power(n, snr, pos, rho=0.25,
          muval = np.linspace(0,5,51)):

    X, mu, beta = parameters(n, rho, pos)

    # form the correct constraints

    con, initial = constraints(X, pos)

    Z_selection = sample_from_constraints(con, initial, ndraw=4000000, burnin=100000)
    S0 = np.dot(X.T, Z_selection.T).T
    W0 = np.ones(S0.shape[0])
    dfam0 = discrete_family(S0[:,pos], W0)

    one_sided_acceptance_region = dfam0.one_sided_acceptance(0)
    def one_sided_power(mu):
        L, U = one_sided_acceptance_region
        return 1 - (dfam0.cdf(mu,U) - dfam0.cdf(mu, L))

    power_fig = plt.figure(figsize=(8,8))
    power_ax = power_fig.gca()
    power_ax.set_ylabel('Power', fontsize=20)
    power_ax.legend(loc='lower right')
    power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
    full_power = np.array([one_sided_power(m) for m in muval])
    print full_power
    power_ax.plot(muval, full_power, label='Reduced model UMPU', linewidth=7, alpha=0.5)
    power_ax.legend(loc='lower right')
    power_ax.set_xlim([0,5])
    power_ax.plot([snr,snr],[0,1], 'k--')
    print one_sided_power(snr)
    return power_fig, {'full':full_power}
def test_conditional_simple():

    A = np.ones((1,2))
    b = np.array([1])
    con = AC.constraints(A,b) #X1+X2<= 1

    C = np.array([[0,1]])
    d = np.array([2])   #X2=2

    new_con = con.conditional(C,d)
    while True:
        W = np.random.standard_normal(2)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if con(W):
            break
    Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000)

    counter = 0
    new_sample = []
    while True:
        W = np.random.standard_normal() # conditional distribution
        if W < -1:
            new_sample.append(W)
            counter += 1

        if counter >= 10000:
            break

    a1 = Z1[:,0]
    a2 = np.array(new_sample)
    test = np.fabs((a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000))
    nt.assert_true(test < 5)
def simulation(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000):

    X, mu, beta = parameters(n, rho, pos)
    con, initial = constraints(X, pos)

    con.mean = snr * mu / np.sqrt(2)
    Z_selection = sample_from_constraints(con, initial, ndraw=ndraw, burnin=burnin)
    Z_inference_pos = np.random.standard_normal(Z_selection.shape[0]) + snr / np.sqrt(2)
    return (np.dot(X.T, Z_selection.T)[pos] + Z_inference_pos) / np.sqrt(2)
Example #9
0
def power(mu, ndraw=100000, keep_every=100):
    constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
    constraint.mean = np.array([mu,mu])
    sample = affine.sample_from_constraints(constraint, np.array([4,2.]),
                                            ndraw=ndraw)[::keep_every]
    print sample.mean(0)
    sample = sample.sum(1)
    decisions = []
    for s in sample:
        decisions.append(null_dbn.one_sided_test(0, s, alternative='greater'))
    print np.mean(decisions)
    return np.mean(decisions)
Example #10
0
def simulation(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000):

    X, mu, beta = parameters(n, rho, pos)
    con, initial = constraints(X, pos)

    con.mean = snr * mu / np.sqrt(2)
    Z_selection = sample_from_constraints(con,
                                          initial,
                                          ndraw=ndraw,
                                          burnin=burnin)
    Z_inference_pos = np.random.standard_normal(
        Z_selection.shape[0]) + snr / np.sqrt(2)
    return (np.dot(X.T, Z_selection.T)[pos] + Z_inference_pos) / np.sqrt(2)
Example #11
0
def power(mu, ndraw=100000, keep_every=100):
    constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff]))
    constraint.mean = np.array([mu, mu])
    sample = affine.sample_from_constraints(constraint,
                                            np.array([4, 2.]),
                                            ndraw=ndraw)[::keep_every]
    print sample.mean(0)
    sample = sample.sum(1)
    decisions = []
    for s in sample:
        decisions.append(null_dbn.one_sided_test(0, s, alternative='greater'))
    print np.mean(decisions)
    return np.mean(decisions)
def cone_with_slice(angles,
                    ai,
                    hull,
                    which,
                    fill_args={},
                    ax=None,
                    label=None,
                    suffix='',
                    Y=None):

    ax, poly, constraint, rays = cone_rays(angles,
                                           ai,
                                           hull,
                                           which,
                                           ax=ax,
                                           fill_args=fill_args)
    eta_idx = np.argmax(np.dot(hull.points, Y))
    eta = 40 * hull.points[eta_idx]

    representation = constraints(-constraint.T, np.zeros(2))

    if Y is None:
        Y = sample_from_constraints(representation)

    ax.fill(poly[:, 0], poly[:, 1], label=r'$A_{(M,H_0)}$', **fill_args)
    if symmetric:
        ax.fill(-poly[:, 0], -poly[:, 1], **fill_args)

    legend_args = {'scatterpoints': 1, 'fontsize': 30, 'loc': 'lower left'}
    ax.legend(**legend_args)
    ax.figure.savefig('fig_onesparse1.png', dpi=300)

    ax.scatter(Y[0], Y[1], c='k', s=150, label=label)

    Vp, _, Vm = representation.bounds(eta, Y)[:3]

    Yperp = Y - (np.dot(eta, Y) / np.linalg.norm(eta)**2 * eta)

    if Vm == np.inf:
        Vm = 10000

    width_points = np.array([(Yperp + Vp * eta / np.linalg.norm(eta)**2),
                             (Yperp + Vm * eta / np.linalg.norm(eta)**2)])

    ax.plot(width_points[:, 0], width_points[:, 1], '-', c='k', linewidth=4)
    legend_args = {'scatterpoints': 1, 'fontsize': 30, 'loc': 'lower left'}
    ax.legend(**legend_args)
    ax.figure.savefig('fig_onesparse2.png', dpi=300)

    return ax, poly, constraint, rays
Example #13
0
def test_sampling():
    """
    See that means and covariances are approximately correct
    """
    C = AC.constraints(np.identity(3), np.inf*np.ones(3))
    C.mean = np.array([3,4,5.2])
    W = np.random.standard_normal((5,3))
    S = np.dot(W.T, W) / 30.
    C.covariance = S
    V = AC.sample_from_constraints(C, np.zeros(3), ndraw=500000)

    nt.assert_true(np.linalg.norm(V.mean(0)-C.mean) < 0.01)
    nt.assert_true(np.linalg.norm(np.einsum('ij,ik->ijk', V, V).mean(0) - 
                                  np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
def test_sampling():
    """
    See that means and covariances are approximately correct
    """
    C = AC.constraints(np.identity(3), np.inf*np.ones(3))
    C.mean = np.array([3,4,5.2])
    W = np.random.standard_normal((5,3))
    S = np.dot(W.T, W) / 30.
    C.covariance = S
    V = AC.sample_from_constraints(C, np.zeros(3), ndraw=500000)

    nt.assert_true(np.linalg.norm(V.mean(0)-C.mean) < 0.01)
    nt.assert_true(np.linalg.norm(np.einsum('ij,ik->ijk', V, V).mean(0) - 
                                  np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
Example #15
0
    def full_sim(L, b, p):
        k, q = L.shape
        A1 = np.random.standard_normal((p, q))
        A2 = L[:p]
        A3 = np.array([np.arange(q)**(i / 2.) for i in range(1, 4)])

        con = AC.constraints((L, b), None)

        def sim(A):

            y = C.simulate_from_constraints(con)
            return quadratic_test(y, np.identity(con.dim), con)

        return sim(A1), sim(A2), sim(A3)
    def full_sim(L, b, p):
        k, q = L.shape
        A1 = np.random.standard_normal((p,q))
        A2 = L[:p]
        A3 = np.array([np.arange(q)**(i/2.) for i in range(1,4)])

        con = AC.constraints((L, b), None)
        
        def sim(A):

            y = C.simulate_from_constraints(con) 
            return quadratic_test(y, np.identity(con.dim),
                                  con)

        return sim(A1), sim(A2), sim(A3)
Example #17
0
def test_chisq_noncentral():

    mu = np.arange(6)
    ncp = np.linalg.norm(mu[:3])**2

    A, b = np.random.standard_normal((4, 6)), np.zeros(4)
    con = AC.constraints(A, b, mean=mu)

    ro.r('fncp=%f' % ncp)
    ro.r('f = function(x) {pchisq(x,3,ncp=fncp)}')

    def F(x):
        if x != np.inf:
            return np.array(ro.r('f(%f)' % x))
        else:
            return np.array([1.])

    nsim = 2000
    P = []
    for i in range(nsim):
        Z = AC.simulate_from_constraints(con, mu=mu)
        print i
        u = 0 * Z
        u[:3] = Z[:3] / np.linalg.norm(Z[:3])
        L, V, U = con.pivots(u, Z)[:3]
        if L > 0:
            Ln = L**2
            Un = U**2
            Vn = V**2
        else:
            Ln = 0
            Un = U**2
            Vn = V**2

        if U < 0:
            stop
        P.append(np.array((F(Un) - F(Vn)) / (F(Un) - F(Ln))))

    P = np.array(P).reshape(-1)
    P = P[P > 0]
    P = P[P < 1]

    ecdf = sm.distributions.ECDF(P)

    plt.clf()
    x = np.linspace(0, 1, 101)
    plt.plot(x, ecdf(x), c='red')
    plt.plot([0, 1], [0, 1], c='blue', linewidth=2)
Example #18
0
def test_simulate_nonwhitened():
    n, p = 50, 200

    X = np.random.standard_normal((n,p))
    cov = np.dot(X.T, X)

    W = np.random.standard_normal((3,p))
    con = AC.constraints(W, np.ones(3), covariance=cov)

    while True:
        z = np.random.standard_normal(p)
        if np.dot(W, z).max() <= 1:
            break

    Z = AC.sample_from_constraints(con, z)
    nt.assert_true((np.dot(Z, W.T) - 1).max() < 0)
def test_simulate_nonwhitened():
    n, p = 50, 200

    X = np.random.standard_normal((n,p))
    cov = np.dot(X.T, X)

    W = np.random.standard_normal((3,p))
    con = AC.constraints(W, np.ones(3), covariance=cov)

    while True:
        z = np.random.standard_normal(p)
        if np.dot(W, z).max() <= 1:
            break

    Z = AC.sample_from_constraints(con, z)
    nt.assert_true((np.dot(Z, W.T) - 1).max() < 0)
def test_chisq_noncentral():

    mu = np.arange(6)
    ncp = np.linalg.norm(mu[:3])**2

    A, b = np.random.standard_normal((4,6)), np.zeros(4)
    con = AC.constraints(A,b, mean=mu)

    ro.r('fncp=%f' % ncp)
    ro.r('f = function(x) {pchisq(x,3,ncp=fncp)}')
    def F(x):
        if x != np.inf:
            return np.array(ro.r('f(%f)' % x))
        else:
            return np.array([1.])

    nsim = 2000
    P = []
    for i in range(nsim):
        Z = AC.simulate_from_constraints(con,mu=mu)
        print i
        u = 0 * Z
        u[:3] = Z[:3] / np.linalg.norm(Z[:3])
        L, V, U = con.pivots(u, Z)[:3]
        if L > 0:
            Ln = L**2
            Un = U**2
            Vn = V**2
        else:
            Ln = 0
            Un = U**2
            Vn = V**2

        if U < 0:
            stop
        P.append(np.array((F(Un) - F(Vn)) / (F(Un) - F(Ln))))

    P = np.array(P).reshape(-1)
    P = P[P > 0]
    P = P[P < 1]

    ecdf = sm.distributions.ECDF(P)

    plt.clf()
    x = np.linspace(0,1,101)
    plt.plot(x, ecdf(x), c='red')
    plt.plot([0,1],[0,1], c='blue', linewidth=2)
Example #21
0
def draw_sample(mu, cutoff, nsample=10000):
    if mu >= cutoff - 4:
        sample = []
        while True:
            candidate = np.random.standard_normal(1000000) + mu
            candidate = candidate[candidate > cutoff]
            sample.extend(candidate)
            if len(sample) > nsample:
                break
        sample = np.array(sample)
        sample += np.random.standard_normal(sample.shape) + mu
    else:
        constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
	constraint.mean = np.array([mu,mu])
        sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]),
                                                ndraw=2000000,
                                                direction_of_interest=np.array([1,1.]))
        sample = sample.sum(1)[::(2000000/nsample)]
    return sample
def cone_with_slice(angles, ai, hull, which, fill_args={}, ax=None, label=None,
                    suffix='', 
                    Y=None):

    ax, poly, constraint, rays = cone_rays(angles, ai, hull, which, ax=ax, fill_args=fill_args)
    eta_idx = np.argmax(np.dot(hull.points, Y))
    eta = 40 * hull.points[eta_idx]

    representation = constraints(-constraint.T, np.zeros(2))

    if Y is None:
        Y = sample_from_constraints(representation)

    ax.fill(poly[:,0], poly[:,1], label=r'$A_{(M,H_0)}$', **fill_args)
    if symmetric:
        ax.fill(-poly[:,0], -poly[:,1], **fill_args)

    legend_args = {'scatterpoints':1, 'fontsize':30, 'loc':'lower left'}
    ax.legend(**legend_args)
    ax.figure.savefig('fig_onesparse1.png', dpi=300)

    ax.scatter(Y[0], Y[1], c='k', s=150, label=label)

    Vp, _, Vm = representation.bounds(eta, Y)[:3]

    Yperp = Y - (np.dot(eta, Y) / 
                 np.linalg.norm(eta)**2 * eta)

    if Vm == np.inf:
        Vm = 10000

    width_points = np.array([(Yperp + Vp*eta /  
                              np.linalg.norm(eta)**2),
                             (Yperp + Vm*eta /  
                              np.linalg.norm(eta)**2)])

    ax.plot(width_points[:,0], width_points[:,1], '-', c='k', linewidth=4)
    legend_args = {'scatterpoints':1, 'fontsize':30, 'loc':'lower left'}
    ax.legend(**legend_args)
    ax.figure.savefig('fig_onesparse2.png', dpi=300)

    return ax, poly, constraint, rays
def test_pivots_intervals():

    A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4)

    con = AC.constraints(A,b)
    while True:
        w = np.random.standard_normal(30)
        if con(w):
            break

    Z = AC.sample_from_constraints(con, w)[-1]
    u = np.zeros(con.dim)
    u[4] = 1

    # call pivot
    con.pivot(u, Z)
    con.pivot(u, Z, alternative='less')
    con.pivot(u, Z, alternative='greater')

    con.interval(u, Z, UMAU=True)
    con.interval(u, Z, UMAU=False)
Example #24
0
def test_pivots_intervals():

    A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4)

    con = AC.constraints(A,b)
    while True:
        w = np.random.standard_normal(30)
        if con(w):
            break

    Z = AC.sample_from_constraints(con, w)[-1]
    u = np.zeros(con.dim)
    u[4] = 1

    # call pivot
    con.pivot(u, Z)
    con.pivot(u, Z, alternative='less')
    con.pivot(u, Z, alternative='greater')

    con.interval(u, Z, UMAU=True)
    con.interval(u, Z, UMAU=False)
Example #25
0
def power(n, snr, pos, rho=0.25, muval=np.linspace(0, 5, 51)):

    X, mu, beta = parameters(n, rho, pos)

    # form the correct constraints

    con, initial = constraints(X, pos)

    Z_selection = sample_from_constraints(con,
                                          initial,
                                          ndraw=4000000,
                                          burnin=100000)
    S0 = np.dot(X.T, Z_selection.T).T
    W0 = np.ones(S0.shape[0])
    dfam0 = discrete_family(S0[:, pos], W0)

    one_sided_acceptance_region = dfam0.one_sided_acceptance(0)

    def one_sided_power(mu):
        L, U = one_sided_acceptance_region
        return 1 - (dfam0.cdf(mu, U) - dfam0.cdf(mu, L))

    power_fig = plt.figure(figsize=(8, 8))
    power_ax = power_fig.gca()
    power_ax.set_ylabel('Power', fontsize=20)
    power_ax.legend(loc='lower right')
    power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
    full_power = np.array([one_sided_power(m) for m in muval])
    print full_power
    power_ax.plot(muval,
                  full_power,
                  label='Reduced model UMPU',
                  linewidth=7,
                  alpha=0.5)
    power_ax.legend(loc='lower right')
    power_ax.set_xlim([0, 5])
    power_ax.plot([snr, snr], [0, 1], 'k--')
    print one_sided_power(snr)
    return power_fig, {'full': full_power}
Example #26
0
def test_chisq_central():

    n, p = 4, 10
    A, b = np.random.standard_normal((n, p)), np.zeros(n)
    con = AC.constraints(A, b)

    while True:
        z = np.random.standard_normal(p)
        if con(z):
            break

    S = np.identity(p)[:3]
    Z = AC.sample_from_constraints(con, z, ndraw=10000)
    P = []
    for i in range(Z.shape[0] / 10):
        P.append(chisq.quadratic_test(Z[10 * i], S, con))
    ecdf = sm.distributions.ECDF(P)

    plt.clf()
    x = np.linspace(0, 1, 101)
    plt.plot(x, ecdf(x), c='red')
    plt.plot([0, 1], [0, 1], c='blue', linewidth=2)
    nt.assert_true(np.fabs(np.mean(P) - 0.5) < 0.03)
    nt.assert_true(np.fabs(np.std(P) - 1 / np.sqrt(12)) < 0.03)
def test_chisq_central():

    n, p = 4, 10
    A, b = np.random.standard_normal((n, p)), np.zeros(n)
    con = AC.constraints(A,b)

    while True:
        z = np.random.standard_normal(p)
        if con(z):
            break

    S = np.identity(p)[:3]
    Z = AC.sample_from_constraints(con, z, ndraw=10000)
    P = []
    for i in range(Z.shape[0]/10):
        P.append(chisq.quadratic_test(Z[10*i], S, con))
    ecdf = sm.distributions.ECDF(P)

    plt.clf()
    x = np.linspace(0,1,101)
    plt.plot(x, ecdf(x), c='red')
    plt.plot([0,1],[0,1], c='blue', linewidth=2)
    nt.assert_true(np.fabs(np.mean(P)-0.5) < 0.03)
    nt.assert_true(np.fabs(np.std(P)-1/np.sqrt(12)) < 0.03)
Example #28
0
def forward_step(X,
                 Y,
                 sigma=None,
                 nstep=5,
                 exact=False,
                 burnin=1000,
                 ndraw=5000):
    """
    A simple implementation of forward stepwise
    that uses the `reduced_covtest` iteratively
    after adjusting fully for the selected variable.

    This implementation is not efficient, in
    that it computes more SVDs than it really has to.

    Parameters
    ----------

    X : np.float((n,p))

    Y : np.float(n)

    sigma : float (optional) 
        Noise level (not needed for reduced).

    nstep : int
        How many steps of forward stepwise?

    exact : bool
        Which version of covtest should we use?

    burnin : int
        How many iterations until we start
        recording samples?

    ndraw : int
        How many samples should we return?

    tests : ['reduced_known', 'covtest', 'reduced_unknown']
        Which test to use? A subset of the above sequence.

    """

    n, p = X.shape
    FS = forward_stepwise(X, Y)

    spacings_P = []
    covtest_P = []
    reduced_Pknown = []
    reduced_Punknown = []

    for i in range(nstep):
        FS.next()

        # covtest
        if FS.P[i] is not None:
            RX = X - FS.P[i](X)
            RY = Y - FS.P[i](Y)
            covariance = np.identity(n) - np.dot(FS.P[i].U, FS.P[i].U.T)
        else:
            RX = X
            RY = Y
            covariance = None
        RX -= RX.mean(0)[None, :]
        RX /= RX.std(0)[None, :]

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=exact)
        covtest_P.append(pval)

        # reduced

        eta = RX[:, idx] * sign
        Acon = constraints(FS.A, np.zeros(FS.A.shape[0]))
        Acon.covariance *= sigma**2
        if i > 0:
            U = FS.P[-2].U.T
            Uy = np.dot(U, Y)
            Bcon = Acon.conditional(U, Uy)
        else:
            Bcon = Acon

        spacings_P.append(Acon.pivot(eta, Y))

        reduced_pval, _, _ = gibbs_test(Bcon,
                                        Y,
                                        eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=sigma is not None,
                                        alternative='greater')
        reduced_Pknown.append(reduced_pval)

        reduced_pval, _, _ = gibbs_test(Bcon,
                                        Y,
                                        eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=False,
                                        alternative='greater')
        reduced_Punknown.append(reduced_pval)

    return covtest_P, reduced_Pknown, reduced_Punknown, spacings_P, FS.variables
Example #29
0
import os
import numpy as np
import matplotlib.pyplot as plt
from selection import affine
from selection.discrete_family import discrete_family
from scipy.stats import norm as ndist

cutoff = ndist.ppf(0.95)

null_constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff]))
null_sample = affine.sample_from_constraints(null_constraint,
                                             np.array([4, 2.]),
                                             ndraw=100000).sum(1)
null_dbn = discrete_family(null_sample, np.ones_like(null_sample))


def power(mu, ndraw=100000, keep_every=100):
    constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff]))
    constraint.mean = np.array([mu, mu])
    sample = affine.sample_from_constraints(constraint,
                                            np.array([4, 2.]),
                                            ndraw=ndraw)[::keep_every]
    print sample.mean(0)
    sample = sample.sum(1)
    decisions = []
    for s in sample:
        decisions.append(null_dbn.one_sided_test(0, s, alternative='greater'))
    print np.mean(decisions)
    return np.mean(decisions)

def forward_step(X, Y, sigma=None,
                 nstep=5,
                 exact=False,
                 burnin=1000,
                 ndraw=5000):
    """
    A simple implementation of forward stepwise
    that uses the `reduced_covtest` iteratively
    after adjusting fully for the selected variable.

    This implementation is not efficient, in
    that it computes more SVDs than it really has to.

    Parameters
    ----------

    X : np.float((n,p))

    Y : np.float(n)

    sigma : float (optional) 
        Noise level (not needed for reduced).

    nstep : int
        How many steps of forward stepwise?

    exact : bool
        Which version of covtest should we use?

    burnin : int
        How many iterations until we start
        recording samples?

    ndraw : int
        How many samples should we return?

    tests : ['reduced_known', 'covtest', 'reduced_unknown']
        Which test to use? A subset of the above sequence.

    """

    n, p = X.shape
    FS = forward_stepwise(X, Y)

    spacings_P = []
    covtest_P = []
    reduced_Pknown = []
    reduced_Punknown = []

    for i in range(nstep):
        FS.next()

        # covtest
        if FS.P[i] is not None:
            RX = X - FS.P[i](X)
            RY = Y - FS.P[i](Y)
            covariance = np.identity(n) - np.dot(FS.P[i].U, FS.P[i].U.T)
        else:
            RX = X
            RY = Y
            covariance = None
        RX -= RX.mean(0)[None,:]
        RX /= RX.std(0)[None,:]

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=exact)
        covtest_P.append(pval)

        # reduced

        eta = RX[:,idx] * sign
        Acon = constraints(FS.A, np.zeros(FS.A.shape[0]))
        Acon.covariance *= sigma**2
        if i > 0:
            U = FS.P[-2].U.T
            Uy = np.dot(U, Y)
            Bcon = Acon.conditional(U, Uy)
        else:
            Bcon = Acon

        spacings_P.append(Acon.pivot(eta, Y))

        reduced_pval, _, _ = gibbs_test(Bcon, Y, eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=sigma is not None,
                                        alternative='greater')
        reduced_Pknown.append(reduced_pval)

        reduced_pval, _, _ = gibbs_test(Bcon, Y, eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=False,
                                        alternative='greater')
        reduced_Punknown.append(reduced_pval)

    return covtest_P, reduced_Pknown, reduced_Punknown, spacings_P, FS.variables
def sample_split(X, Y, sigma=None,
                 nstep=10,
                 burnin=1000,
                 ndraw=5000,
                 reduced=True):

    n, p = X.shape
    half_n = int(n/2)
    X1, Y1 = X[:half_n,:]*1., Y[:half_n]*1.
    X1 -= X1.mean(0)[None,:]
    Y1 -= Y1.mean()

    X2, Y2 = X[half_n:], Y[half_n:]
    X2 -= X2.mean(0)[None,:]
    Y2 -= Y2.mean()

    FS_half = forward_stepwise(X1, Y1) # sample splitting model
    FS_full = forward_stepwise(X.copy(), Y.copy()) # full data model
    
    spacings_P = []
    split_P = []
    reduced_Pknown = []
    reduced_Punknown = []
    covtest_P = []

    for i in range(nstep):

        FS_half.next()

        if FS_half.P[i] is not None:
            RX = FS_half.X - FS_half.P[i](FS_half.X)
            RY = FS_half.Y - FS_half.P[i](FS_half.Y)
            covariance = centering(FS_half.Y.shape[0]) - np.dot(FS_half.P[i].U, FS_half.P[i].U.T)
        else:
            RX = FS_half.X
            RY = FS_half.Y
            covariance = centering(FS_half.Y.shape[0])

        RX -= RX.mean(0)[None,:]
        RX /= (RX.std(0)[None,:] * np.sqrt(RX.shape[0]))

        # covtest on half -- not saved

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=True)

        # spacings on half -- not saved

        eta1 = RX[:,idx] * sign
        Acon = constraints(FS_half.A, np.zeros(FS_half.A.shape[0]),
                           covariance=centering(FS_half.Y.shape[0]))
        Acon.covariance *= sigma**2
        Acon.pivot(eta1, FS_half.Y)

        # sample split

        eta2 = np.linalg.pinv(X2[:,FS_half.variables])[-1]
        eta_sigma = np.linalg.norm(eta2) * sigma
        split_P.append(2*ndist.sf(np.fabs((eta2*Y2).sum() / eta_sigma)))

        # inference on full mu using split model, this \beta^+_s.

        zero_block = np.zeros((Acon.linear_part.shape[0], (n-half_n)))
        linear_part = np.hstack([Acon.linear_part, zero_block])
        Fcon = constraints(linear_part, Acon.offset,
                           covariance=centering(n))
        Fcon.covariance *= sigma**2

        if i > 0:
            U = np.linalg.pinv(X[:,FS_half.variables[:-1]])
            Uy = np.dot(U, Y)
            Fcon = Fcon.conditional(U, Uy)
        else:
            Fcon = Fcon

        eta_full = np.linalg.pinv(X[:,FS_half.variables])[-1]

        if reduced:
            reduced_pval = gibbs_test(Fcon, Y, eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=sigma is not None,
                                      alternative='twosided')[0]
            reduced_Pknown.append(reduced_pval)

            reduced_pval = gibbs_test(Fcon, Y, eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=False,
                                      alternative='twosided')[0]
            reduced_Punknown.append(reduced_pval)


        # now use all the data

        FS_full.next()
        if FS_full.P[i] is not None:
            RX = X - FS_full.P[i](X)
            RY = Y - FS_full.P[i](Y)
            covariance = centering(RY.shape[0]) - np.dot(FS_full.P[i].U, FS_full.P[i].U.T)
        else:
            RX = X
            RY = Y.copy()
            covariance = centering(RY.shape[0])
        RX -= RX.mean(0)[None,:]
        RX /= RX.std(0)[None,:]

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=False)
        covtest_P.append(pval)

        # spacings on full data

        eta1 = RX[:,idx] * sign
        Acon = constraints(FS_full.A, np.zeros(FS_full.A.shape[0]),
                           centering(RY.shape[0]))
        Acon.covariance *= sigma**2
        spacings_P.append(Acon.pivot(eta1, Y))

    return split_P, reduced_Pknown, reduced_Punknown, spacings_P, covtest_P, FS_half.variables
Example #32
0
def sample_split(X,
                 Y,
                 sigma=None,
                 nstep=10,
                 burnin=1000,
                 ndraw=5000,
                 reduced=True):

    n, p = X.shape
    half_n = int(n / 2)
    X1, Y1 = X[:half_n, :] * 1., Y[:half_n] * 1.
    X1 -= X1.mean(0)[None, :]
    Y1 -= Y1.mean()

    X2, Y2 = X[half_n:], Y[half_n:]
    X2 -= X2.mean(0)[None, :]
    Y2 -= Y2.mean()

    FS_half = forward_stepwise(X1, Y1)  # sample splitting model
    FS_full = forward_stepwise(X.copy(), Y.copy())  # full data model

    spacings_P = []
    split_P = []
    reduced_Pknown = []
    reduced_Punknown = []
    covtest_P = []

    for i in range(nstep):

        FS_half.next()

        if FS_half.P[i] is not None:
            RX = FS_half.X - FS_half.P[i](FS_half.X)
            RY = FS_half.Y - FS_half.P[i](FS_half.Y)
            covariance = centering(FS_half.Y.shape[0]) - np.dot(
                FS_half.P[i].U, FS_half.P[i].U.T)
        else:
            RX = FS_half.X
            RY = FS_half.Y
            covariance = centering(FS_half.Y.shape[0])

        RX -= RX.mean(0)[None, :]
        RX /= (RX.std(0)[None, :] * np.sqrt(RX.shape[0]))

        # covtest on half -- not saved

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=True)

        # spacings on half -- not saved

        eta1 = RX[:, idx] * sign
        Acon = constraints(FS_half.A,
                           np.zeros(FS_half.A.shape[0]),
                           covariance=centering(FS_half.Y.shape[0]))
        Acon.covariance *= sigma**2
        Acon.pivot(eta1, FS_half.Y)

        # sample split

        eta2 = np.linalg.pinv(X2[:, FS_half.variables])[-1]
        eta_sigma = np.linalg.norm(eta2) * sigma
        split_P.append(2 * ndist.sf(np.fabs((eta2 * Y2).sum() / eta_sigma)))

        # inference on full mu using split model, this \beta^+_s.

        zero_block = np.zeros((Acon.linear_part.shape[0], (n - half_n)))
        linear_part = np.hstack([Acon.linear_part, zero_block])
        Fcon = constraints(linear_part, Acon.offset, covariance=centering(n))
        Fcon.covariance *= sigma**2

        if i > 0:
            U = np.linalg.pinv(X[:, FS_half.variables[:-1]])
            Uy = np.dot(U, Y)
            Fcon = Fcon.conditional(U, Uy)
        else:
            Fcon = Fcon

        eta_full = np.linalg.pinv(X[:, FS_half.variables])[-1]

        if reduced:
            reduced_pval = gibbs_test(Fcon,
                                      Y,
                                      eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=sigma is not None,
                                      alternative='twosided')[0]
            reduced_Pknown.append(reduced_pval)

            reduced_pval = gibbs_test(Fcon,
                                      Y,
                                      eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=False,
                                      alternative='twosided')[0]
            reduced_Punknown.append(reduced_pval)

        # now use all the data

        FS_full.next()
        if FS_full.P[i] is not None:
            RX = X - FS_full.P[i](X)
            RY = Y - FS_full.P[i](Y)
            covariance = centering(RY.shape[0]) - np.dot(
                FS_full.P[i].U, FS_full.P[i].U.T)
        else:
            RX = X
            RY = Y.copy()
            covariance = centering(RY.shape[0])
        RX -= RX.mean(0)[None, :]
        RX /= RX.std(0)[None, :]

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=False)
        covtest_P.append(pval)

        # spacings on full data

        eta1 = RX[:, idx] * sign
        Acon = constraints(FS_full.A, np.zeros(FS_full.A.shape[0]),
                           centering(RY.shape[0]))
        Acon.covariance *= sigma**2
        spacings_P.append(Acon.pivot(eta1, Y))

    return split_P, reduced_Pknown, reduced_Punknown, spacings_P, covtest_P, FS_half.variables
import os
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from selection import affine 
from selection.discrete_family import discrete_family
from scipy.stats import norm as ndist
from sklearn.isotonic import IsotonicRegression

cutoff = 3.
null_constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
null_sample = affine.sample_from_constraints(null_constraint, np.array([4,2.]),
                                             ndraw=100000).sum(1)
null_dbn = discrete_family(null_sample, np.ones_like(null_sample))

def draw_sample(mu, cutoff, nsample=10000):
    if mu >= cutoff - 4:
        sample = []
        while True:
            candidate = np.random.standard_normal(1000000) + mu
            candidate = candidate[candidate > cutoff]
            sample.extend(candidate)
            if len(sample) > nsample:
                break
        sample = np.array(sample)
        sample += np.random.standard_normal(sample.shape) + mu
    else:
        constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
	constraint.mean = np.array([mu,mu])
        sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]),
                                                ndraw=2000000,