def constraints(X, pos):
    n, p = X.shape
    while True:
        Y = np.random.standard_normal(n)
        con, _, idx, sign = covtest(X, Y, sigma=1)
        if idx == pos and sign == +1:
            initial = Y.copy()
            break
    return con, initial
def test_covtest():

    n, p = 30, 50
    X = np.random.standard_normal((n,p)) + np.random.standard_normal(n)[:,None]
    X /= X.std(0)[None,:]
    Y = np.random.standard_normal(n) * 1.5 

    for exact, covariance in itertools.product([True, False],
                                               [None, np.identity(n)]):
        con, pval, idx, sign = covtest(X, Y, sigma=1.5, exact=exact,
                                       covariance=covariance)
    for covariance in [None, np.identity(n)]:
        con, pval, idx, sign = reduced_covtest(X, Y, sigma=1.5,
                                               covariance=covariance)

    return pval
def test_covtest():

    n, p = 30, 50
    X = np.random.standard_normal(
        (n, p)) + np.random.standard_normal(n)[:, None]
    X /= X.std(0)[None, :]
    Y = np.random.standard_normal(n) * 1.5

    for exact, covariance in itertools.product([True, False],
                                               [None, np.identity(n)]):
        con, pval, idx, sign = covtest(X,
                                       Y,
                                       sigma=1.5,
                                       exact=exact,
                                       covariance=covariance)
    for covariance in [None, np.identity(n)]:
        con, pval, idx, sign = reduced_covtest(X,
                                               Y,
                                               sigma=1.5,
                                               covariance=covariance)

    return pval
Beispiel #4
0
def sample_split(X,
                 Y,
                 sigma=None,
                 nstep=10,
                 burnin=1000,
                 ndraw=5000,
                 reduced=True):

    n, p = X.shape
    half_n = int(n / 2)
    X1, Y1 = X[:half_n, :] * 1., Y[:half_n] * 1.
    X1 -= X1.mean(0)[None, :]
    Y1 -= Y1.mean()

    X2, Y2 = X[half_n:], Y[half_n:]
    X2 -= X2.mean(0)[None, :]
    Y2 -= Y2.mean()

    FS_half = forward_stepwise(X1, Y1)  # sample splitting model
    FS_full = forward_stepwise(X.copy(), Y.copy())  # full data model

    spacings_P = []
    split_P = []
    reduced_Pknown = []
    reduced_Punknown = []
    covtest_P = []

    for i in range(nstep):

        FS_half.next()

        if FS_half.P[i] is not None:
            RX = FS_half.X - FS_half.P[i](FS_half.X)
            RY = FS_half.Y - FS_half.P[i](FS_half.Y)
            covariance = centering(FS_half.Y.shape[0]) - np.dot(
                FS_half.P[i].U, FS_half.P[i].U.T)
        else:
            RX = FS_half.X
            RY = FS_half.Y
            covariance = centering(FS_half.Y.shape[0])

        RX -= RX.mean(0)[None, :]
        RX /= (RX.std(0)[None, :] * np.sqrt(RX.shape[0]))

        # covtest on half -- not saved

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=True)

        # spacings on half -- not saved

        eta1 = RX[:, idx] * sign
        Acon = constraints(FS_half.A,
                           np.zeros(FS_half.A.shape[0]),
                           covariance=centering(FS_half.Y.shape[0]))
        Acon.covariance *= sigma**2
        Acon.pivot(eta1, FS_half.Y)

        # sample split

        eta2 = np.linalg.pinv(X2[:, FS_half.variables])[-1]
        eta_sigma = np.linalg.norm(eta2) * sigma
        split_P.append(2 * ndist.sf(np.fabs((eta2 * Y2).sum() / eta_sigma)))

        # inference on full mu using split model, this \beta^+_s.

        zero_block = np.zeros((Acon.linear_part.shape[0], (n - half_n)))
        linear_part = np.hstack([Acon.linear_part, zero_block])
        Fcon = constraints(linear_part, Acon.offset, covariance=centering(n))
        Fcon.covariance *= sigma**2

        if i > 0:
            U = np.linalg.pinv(X[:, FS_half.variables[:-1]])
            Uy = np.dot(U, Y)
            Fcon = Fcon.conditional(U, Uy)
        else:
            Fcon = Fcon

        eta_full = np.linalg.pinv(X[:, FS_half.variables])[-1]

        if reduced:
            reduced_pval = gibbs_test(Fcon,
                                      Y,
                                      eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=sigma is not None,
                                      alternative='twosided')[0]
            reduced_Pknown.append(reduced_pval)

            reduced_pval = gibbs_test(Fcon,
                                      Y,
                                      eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=False,
                                      alternative='twosided')[0]
            reduced_Punknown.append(reduced_pval)

        # now use all the data

        FS_full.next()
        if FS_full.P[i] is not None:
            RX = X - FS_full.P[i](X)
            RY = Y - FS_full.P[i](Y)
            covariance = centering(RY.shape[0]) - np.dot(
                FS_full.P[i].U, FS_full.P[i].U.T)
        else:
            RX = X
            RY = Y.copy()
            covariance = centering(RY.shape[0])
        RX -= RX.mean(0)[None, :]
        RX /= RX.std(0)[None, :]

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=False)
        covtest_P.append(pval)

        # spacings on full data

        eta1 = RX[:, idx] * sign
        Acon = constraints(FS_full.A, np.zeros(FS_full.A.shape[0]),
                           centering(RY.shape[0]))
        Acon.covariance *= sigma**2
        spacings_P.append(Acon.pivot(eta1, Y))

    return split_P, reduced_Pknown, reduced_Punknown, spacings_P, covtest_P, FS_half.variables
def sample_split(X, Y, sigma=None,
                 nstep=10,
                 burnin=1000,
                 ndraw=5000,
                 reduced=True):

    n, p = X.shape
    half_n = int(n/2)
    X1, Y1 = X[:half_n,:]*1., Y[:half_n]*1.
    X1 -= X1.mean(0)[None,:]
    Y1 -= Y1.mean()

    X2, Y2 = X[half_n:], Y[half_n:]
    X2 -= X2.mean(0)[None,:]
    Y2 -= Y2.mean()

    FS_half = forward_stepwise(X1, Y1) # sample splitting model
    FS_full = forward_stepwise(X.copy(), Y.copy()) # full data model
    
    spacings_P = []
    split_P = []
    reduced_Pknown = []
    reduced_Punknown = []
    covtest_P = []

    for i in range(nstep):

        FS_half.next()

        if FS_half.P[i] is not None:
            RX = FS_half.X - FS_half.P[i](FS_half.X)
            RY = FS_half.Y - FS_half.P[i](FS_half.Y)
            covariance = centering(FS_half.Y.shape[0]) - np.dot(FS_half.P[i].U, FS_half.P[i].U.T)
        else:
            RX = FS_half.X
            RY = FS_half.Y
            covariance = centering(FS_half.Y.shape[0])

        RX -= RX.mean(0)[None,:]
        RX /= (RX.std(0)[None,:] * np.sqrt(RX.shape[0]))

        # covtest on half -- not saved

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=True)

        # spacings on half -- not saved

        eta1 = RX[:,idx] * sign
        Acon = constraints(FS_half.A, np.zeros(FS_half.A.shape[0]),
                           covariance=centering(FS_half.Y.shape[0]))
        Acon.covariance *= sigma**2
        Acon.pivot(eta1, FS_half.Y)

        # sample split

        eta2 = np.linalg.pinv(X2[:,FS_half.variables])[-1]
        eta_sigma = np.linalg.norm(eta2) * sigma
        split_P.append(2*ndist.sf(np.fabs((eta2*Y2).sum() / eta_sigma)))

        # inference on full mu using split model, this \beta^+_s.

        zero_block = np.zeros((Acon.linear_part.shape[0], (n-half_n)))
        linear_part = np.hstack([Acon.linear_part, zero_block])
        Fcon = constraints(linear_part, Acon.offset,
                           covariance=centering(n))
        Fcon.covariance *= sigma**2

        if i > 0:
            U = np.linalg.pinv(X[:,FS_half.variables[:-1]])
            Uy = np.dot(U, Y)
            Fcon = Fcon.conditional(U, Uy)
        else:
            Fcon = Fcon

        eta_full = np.linalg.pinv(X[:,FS_half.variables])[-1]

        if reduced:
            reduced_pval = gibbs_test(Fcon, Y, eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=sigma is not None,
                                      alternative='twosided')[0]
            reduced_Pknown.append(reduced_pval)

            reduced_pval = gibbs_test(Fcon, Y, eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=False,
                                      alternative='twosided')[0]
            reduced_Punknown.append(reduced_pval)


        # now use all the data

        FS_full.next()
        if FS_full.P[i] is not None:
            RX = X - FS_full.P[i](X)
            RY = Y - FS_full.P[i](Y)
            covariance = centering(RY.shape[0]) - np.dot(FS_full.P[i].U, FS_full.P[i].U.T)
        else:
            RX = X
            RY = Y.copy()
            covariance = centering(RY.shape[0])
        RX -= RX.mean(0)[None,:]
        RX /= RX.std(0)[None,:]

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=False)
        covtest_P.append(pval)

        # spacings on full data

        eta1 = RX[:,idx] * sign
        Acon = constraints(FS_full.A, np.zeros(FS_full.A.shape[0]),
                           centering(RY.shape[0]))
        Acon.covariance *= sigma**2
        spacings_P.append(Acon.pivot(eta1, Y))

    return split_P, reduced_Pknown, reduced_Punknown, spacings_P, covtest_P, FS_half.variables
Beispiel #6
0
def marginal(n,
             snr,
             pos,
             rho=0.25,
             ndraw=5000,
             burnin=1000,
             nsim=5000,
             sigma=1.):

    X, mu, beta = parameters(n, rho, pos)

    Psplit = []
    Pselect = []
    hypotheses = []

    for _ in range(nsim):
        Y_select = (snr * mu / np.sqrt(2) +
                    np.random.standard_normal(n)) * sigma
        con, _, select_pos, sign = covtest(X,
                                           Y_select,
                                           sigma=sigma,
                                           exact=True)

        cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign

        correct = (sign == +1) and (pos == select_pos)
        hypotheses.append(correct)
        Y_null = sample_from_constraints(con,
                                         Y_select,
                                         ndraw=ndraw,
                                         burnin=burnin)
        Z_null = (np.dot(X.T[select_pos], Y_null.T) +
                  sigma * np.random.standard_normal(ndraw)) / np.sqrt(2)
        Z_inference = sigma * (cond_ncp + np.random.standard_normal())
        Z_observed = (np.dot(X.T[select_pos], Y_select) * sign +
                      Z_inference) / np.sqrt(2)
        dfam = discrete_family(Z_null, np.ones(Z_null.shape))
        Pselect.append(dfam.ccdf(0, Z_observed))
        if sign == +1:
            Psplit.append(ndist.sf(Z_inference / sigma))
        else:
            Psplit.append(ndist.cdf(Z_inference / sigma))

    Ugrid = np.linspace(0, 1, 101)

    Psplit = np.array(Psplit)
    Pselect = np.array(Pselect)
    hypotheses = np.array(hypotheses, np.bool)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8, 8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid,
             ECDF(Psplit)(Ugrid),
             label='Sample splitting',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pselect)(Ugrid),
             label='Selected using $i^*(Z_S)$',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05, 0.05], [0, 1], 'k--')
    ax1.legend(loc='lower right')

    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8, 8))
    ax2 = fig2.gca()
    ax2.plot(Ugrid,
             ECDF(Psplit[hypotheses])(Ugrid),
             label='Sample splitting',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pselect[hypotheses])(Ugrid),
             label='Selected using $i^*(Z_S)$',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05, 0.05], [0, 1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['split'] = Psplit
    dbn1['select'] = Pselect
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
def marginal(n, snr, pos, rho=0.25, ndraw=5000,
             burnin=1000, nsim=5000, sigma=1.):

    X, mu, beta = parameters(n, rho, pos)

    Psplit = []
    Pselect = []
    hypotheses = []


    for _ in range(nsim):
        Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma
        con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True)

        cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign

        correct = (sign == +1) and (pos == select_pos)
        hypotheses.append(correct)
        Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin)
        Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2)
        Z_inference = sigma * (cond_ncp + np.random.standard_normal())
        Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2)
        dfam = discrete_family(Z_null, np.ones(Z_null.shape))
        Pselect.append(dfam.ccdf(0, Z_observed))
        if sign == +1:
            Psplit.append(ndist.sf(Z_inference / sigma))
        else:
            Psplit.append(ndist.cdf(Z_inference / sigma))

    Ugrid = np.linspace(0,1,101)

    Psplit = np.array(Psplit)
    Pselect = np.array(Pselect)
    hypotheses = np.array(hypotheses, np.bool)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8,8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05,0.05],[0,1], 'k--')
    ax1.legend(loc='lower right')
    
    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8,8))
    ax2 = fig2.gca()
    ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05,0.05],[0,1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['split'] = Psplit
    dbn1['select'] = Pselect
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
def forward_step(X, Y, sigma=None,
                 nstep=5,
                 exact=False,
                 burnin=1000,
                 ndraw=5000):
    """
    A simple implementation of forward stepwise
    that uses the `reduced_covtest` iteratively
    after adjusting fully for the selected variable.

    This implementation is not efficient, in
    that it computes more SVDs than it really has to.

    Parameters
    ----------

    X : np.float((n,p))

    Y : np.float(n)

    sigma : float (optional) 
        Noise level (not needed for reduced).

    nstep : int
        How many steps of forward stepwise?

    exact : bool
        Which version of covtest should we use?

    burnin : int
        How many iterations until we start
        recording samples?

    ndraw : int
        How many samples should we return?

    tests : ['reduced_known', 'covtest', 'reduced_unknown']
        Which test to use? A subset of the above sequence.

    """

    n, p = X.shape
    FS = forward_stepwise(X, Y)

    spacings_P = []
    covtest_P = []
    reduced_Pknown = []
    reduced_Punknown = []

    for i in range(nstep):
        FS.next()

        # covtest
        if FS.P[i] is not None:
            RX = X - FS.P[i](X)
            RY = Y - FS.P[i](Y)
            covariance = np.identity(n) - np.dot(FS.P[i].U, FS.P[i].U.T)
        else:
            RX = X
            RY = Y
            covariance = None
        RX -= RX.mean(0)[None,:]
        RX /= RX.std(0)[None,:]

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=exact)
        covtest_P.append(pval)

        # reduced

        eta = RX[:,idx] * sign
        Acon = constraints(FS.A, np.zeros(FS.A.shape[0]))
        Acon.covariance *= sigma**2
        if i > 0:
            U = FS.P[-2].U.T
            Uy = np.dot(U, Y)
            Bcon = Acon.conditional(U, Uy)
        else:
            Bcon = Acon

        spacings_P.append(Acon.pivot(eta, Y))

        reduced_pval, _, _ = gibbs_test(Bcon, Y, eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=sigma is not None,
                                        alternative='greater')
        reduced_Pknown.append(reduced_pval)

        reduced_pval, _, _ = gibbs_test(Bcon, Y, eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=False,
                                        alternative='greater')
        reduced_Punknown.append(reduced_pval)

    return covtest_P, reduced_Pknown, reduced_Punknown, spacings_P, FS.variables
Beispiel #9
0
def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5):

    # Design, mean vector and parameter vector

    X, mu, beta = parameters(n, rho, pos)

    Pcov = []
    Pexact = []
    Pu = []
    Pr = []
    Pfixed = []
    Pmax = []
    hypotheses = []

    # Set seed

    np.random.seed(0)

    # Max test

    max_stat = np.fabs(np.dot(X.T, np.random.standard_normal(
        (n, 10000)))).max(0) * sigma
    max_fam = discrete_family(max_stat, np.ones(max_stat.shape))
    max_fam.theta = 0

    for i in range(nsim):
        Y = (snr * mu + np.random.standard_normal(n)) * sigma
        Z = np.dot(X.T, Y)

        # did this find the correct position and sign?
        correct = np.all(np.less_equal(np.fabs(Z), Z[pos]))
        hypotheses.append(correct)

        Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1])
        Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1])
        Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma))
        Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1])
        Pr.append(
            reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1])
        p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max())
        Pmax.append(p)

    Ugrid = np.linspace(0, 1, 101)

    Pcov = np.array(Pcov)
    Pexact = np.array(Pexact)
    Pu = np.array(Pu)
    Pr = np.array(Pr)
    Pfixed = np.array(Pfixed)
    Pmax = np.array(Pmax)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8, 8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid,
             ECDF(Pcov)(Ugrid),
             label='Full (exact)',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pexact)(Ugrid),
             label='Full (asymptotic)',
             c='k',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pmax)(Ugrid),
             label='Max test',
             c='cyan',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pu)(Ugrid),
             label=r'Selected 1-sparse, $\sigma$ unknown',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pr)(Ugrid),
             label=r'Selected 1-sparse, $\sigma$ known',
             c='green',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pfixed)(Ugrid),
             label=r'Fixed 1-sparse, $\sigma$ known',
             c='yellow',
             linewidth=5,
             alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05, 0.05], [0, 1], 'k--')
    ax1.legend(loc='lower right')

    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8, 8))
    hypotheses = np.array(hypotheses, np.bool)
    ax2 = fig2.gca()
    ax2.plot(Ugrid,
             ECDF(Pcov[hypotheses])(Ugrid),
             label='Full (exact)',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pexact[hypotheses])(Ugrid),
             label='Full (asymptotic)',
             c='k',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pu[hypotheses])(Ugrid),
             label=r'Selected 1-sparse, $\sigma$ unknown',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pr[hypotheses])(Ugrid),
             label=r'Selected 1-sparse, $\sigma$ known',
             c='green',
             linewidth=5,
             alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05, 0.05], [0, 1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['exact'] = Pexact
    dbn1['covtest'] = Pcov
    dbn1['unknown'] = Pu
    dbn1['known'] = Pr
    dbn1['fixed'] = Pfixed
    dbn1['max'] = Pmax
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
Beispiel #10
0
def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5):

    # Design, mean vector and parameter vector

    X, mu, beta = parameters(n, rho, pos)

    Pcov = []
    Pexact = []
    Pu = []
    Pr = []
    Pfixed = []
    Pmax = []
    hypotheses = []
    
    
    # Set seed

    np.random.seed(0)

    # Max test

    max_stat = np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0) * sigma
    max_fam = discrete_family(max_stat, np.ones(max_stat.shape))
    max_fam.theta = 0

    for i in range(nsim):
        Y = (snr * mu + np.random.standard_normal(n)) * sigma
        Z = np.dot(X.T, Y)

        # did this find the correct position and sign?
        correct = np.all(np.less_equal(np.fabs(Z), Z[pos]))
        hypotheses.append(correct)

        Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1])
        Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1])
        Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma))
        Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1])
        Pr.append(reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1])
        p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max())
        Pmax.append(p)

    Ugrid = np.linspace(0,1,101)

    Pcov = np.array(Pcov)
    Pexact = np.array(Pexact)
    Pu = np.array(Pu)
    Pr = np.array(Pr)
    Pfixed = np.array(Pfixed)
    Pmax = np.array(Pmax)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8,8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid, ECDF(Pcov)(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pexact)(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pmax)(Ugrid), label='Max test', c='cyan', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pu)(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pr)(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pfixed)(Ugrid), label=r'Fixed 1-sparse, $\sigma$ known', c='yellow', linewidth=5, alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05,0.05],[0,1], 'k--')
    ax1.legend(loc='lower right')
    
    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8,8))
    hypotheses = np.array(hypotheses, np.bool)
    ax2 = fig2.gca()
    ax2.plot(Ugrid, ECDF(Pcov[hypotheses])(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pexact[hypotheses])(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pu[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pr[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05,0.05],[0,1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['exact'] = Pexact
    dbn1['covtest'] = Pcov
    dbn1['unknown'] = Pu
    dbn1['known'] = Pr
    dbn1['fixed'] = Pfixed
    dbn1['max'] = Pmax
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
Beispiel #11
0
def forward_step(X,
                 Y,
                 sigma=None,
                 nstep=5,
                 exact=False,
                 burnin=1000,
                 ndraw=5000):
    """
    A simple implementation of forward stepwise
    that uses the `reduced_covtest` iteratively
    after adjusting fully for the selected variable.

    This implementation is not efficient, in
    that it computes more SVDs than it really has to.

    Parameters
    ----------

    X : np.float((n,p))

    Y : np.float(n)

    sigma : float (optional) 
        Noise level (not needed for reduced).

    nstep : int
        How many steps of forward stepwise?

    exact : bool
        Which version of covtest should we use?

    burnin : int
        How many iterations until we start
        recording samples?

    ndraw : int
        How many samples should we return?

    tests : ['reduced_known', 'covtest', 'reduced_unknown']
        Which test to use? A subset of the above sequence.

    """

    n, p = X.shape
    FS = forward_stepwise(X, Y)

    spacings_P = []
    covtest_P = []
    reduced_Pknown = []
    reduced_Punknown = []

    for i in range(nstep):
        FS.next()

        # covtest
        if FS.P[i] is not None:
            RX = X - FS.P[i](X)
            RY = Y - FS.P[i](Y)
            covariance = np.identity(n) - np.dot(FS.P[i].U, FS.P[i].U.T)
        else:
            RX = X
            RY = Y
            covariance = None
        RX -= RX.mean(0)[None, :]
        RX /= RX.std(0)[None, :]

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=exact)
        covtest_P.append(pval)

        # reduced

        eta = RX[:, idx] * sign
        Acon = constraints(FS.A, np.zeros(FS.A.shape[0]))
        Acon.covariance *= sigma**2
        if i > 0:
            U = FS.P[-2].U.T
            Uy = np.dot(U, Y)
            Bcon = Acon.conditional(U, Uy)
        else:
            Bcon = Acon

        spacings_P.append(Acon.pivot(eta, Y))

        reduced_pval, _, _ = gibbs_test(Bcon,
                                        Y,
                                        eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=sigma is not None,
                                        alternative='greater')
        reduced_Pknown.append(reduced_pval)

        reduced_pval, _, _ = gibbs_test(Bcon,
                                        Y,
                                        eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=False,
                                        alternative='greater')
        reduced_Punknown.append(reduced_pval)

    return covtest_P, reduced_Pknown, reduced_Punknown, spacings_P, FS.variables