Exemple #1
0
def simulate_null():

    n, p = 100, 40
    X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    
    Y = np.random.standard_normal(100) * 0.5
    
    FS = forward_stepwise(X, Y, sigma=0.5)
    
    for i in range(5):
        FS.next()

    return [p[-1] for p in FS.model_pivots(3)]
Exemple #2
0
def test_FS():

    n, p = 100, 40
    X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    
    Y = np.random.standard_normal(100) * 0.5
    
    FS = forward_stepwise(X, Y, sigma=0.5)
    
    for i in range(30):
        FS.next()
        if not FS.check_constraints():
            raise ValueError('constraints not satisfied')

    print 'first 30 variables selected', FS.variables

    print 'M^{\pm} for the 10th selected model knowing that we performed 30 steps of forward stepwise'

    FS.model_pivots(3)
    FS.model_quadratic(3)
Exemple #3
0
def sample_split(X,
                 Y,
                 sigma=None,
                 nstep=10,
                 burnin=1000,
                 ndraw=5000,
                 reduced=True):

    n, p = X.shape
    half_n = int(n / 2)
    X1, Y1 = X[:half_n, :] * 1., Y[:half_n] * 1.
    X1 -= X1.mean(0)[None, :]
    Y1 -= Y1.mean()

    X2, Y2 = X[half_n:], Y[half_n:]
    X2 -= X2.mean(0)[None, :]
    Y2 -= Y2.mean()

    FS_half = forward_stepwise(X1, Y1)  # sample splitting model
    FS_full = forward_stepwise(X.copy(), Y.copy())  # full data model

    spacings_P = []
    split_P = []
    reduced_Pknown = []
    reduced_Punknown = []
    covtest_P = []

    for i in range(nstep):

        FS_half.next()

        if FS_half.P[i] is not None:
            RX = FS_half.X - FS_half.P[i](FS_half.X)
            RY = FS_half.Y - FS_half.P[i](FS_half.Y)
            covariance = centering(FS_half.Y.shape[0]) - np.dot(
                FS_half.P[i].U, FS_half.P[i].U.T)
        else:
            RX = FS_half.X
            RY = FS_half.Y
            covariance = centering(FS_half.Y.shape[0])

        RX -= RX.mean(0)[None, :]
        RX /= (RX.std(0)[None, :] * np.sqrt(RX.shape[0]))

        # covtest on half -- not saved

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=True)

        # spacings on half -- not saved

        eta1 = RX[:, idx] * sign
        Acon = constraints(FS_half.A,
                           np.zeros(FS_half.A.shape[0]),
                           covariance=centering(FS_half.Y.shape[0]))
        Acon.covariance *= sigma**2
        Acon.pivot(eta1, FS_half.Y)

        # sample split

        eta2 = np.linalg.pinv(X2[:, FS_half.variables])[-1]
        eta_sigma = np.linalg.norm(eta2) * sigma
        split_P.append(2 * ndist.sf(np.fabs((eta2 * Y2).sum() / eta_sigma)))

        # inference on full mu using split model, this \beta^+_s.

        zero_block = np.zeros((Acon.linear_part.shape[0], (n - half_n)))
        linear_part = np.hstack([Acon.linear_part, zero_block])
        Fcon = constraints(linear_part, Acon.offset, covariance=centering(n))
        Fcon.covariance *= sigma**2

        if i > 0:
            U = np.linalg.pinv(X[:, FS_half.variables[:-1]])
            Uy = np.dot(U, Y)
            Fcon = Fcon.conditional(U, Uy)
        else:
            Fcon = Fcon

        eta_full = np.linalg.pinv(X[:, FS_half.variables])[-1]

        if reduced:
            reduced_pval = gibbs_test(Fcon,
                                      Y,
                                      eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=sigma is not None,
                                      alternative='twosided')[0]
            reduced_Pknown.append(reduced_pval)

            reduced_pval = gibbs_test(Fcon,
                                      Y,
                                      eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=False,
                                      alternative='twosided')[0]
            reduced_Punknown.append(reduced_pval)

        # now use all the data

        FS_full.next()
        if FS_full.P[i] is not None:
            RX = X - FS_full.P[i](X)
            RY = Y - FS_full.P[i](Y)
            covariance = centering(RY.shape[0]) - np.dot(
                FS_full.P[i].U, FS_full.P[i].U.T)
        else:
            RX = X
            RY = Y.copy()
            covariance = centering(RY.shape[0])
        RX -= RX.mean(0)[None, :]
        RX /= RX.std(0)[None, :]

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=False)
        covtest_P.append(pval)

        # spacings on full data

        eta1 = RX[:, idx] * sign
        Acon = constraints(FS_full.A, np.zeros(FS_full.A.shape[0]),
                           centering(RY.shape[0]))
        Acon.covariance *= sigma**2
        spacings_P.append(Acon.pivot(eta1, Y))

    return split_P, reduced_Pknown, reduced_Punknown, spacings_P, covtest_P, FS_half.variables
def sample_split(X, Y, sigma=None,
                 nstep=10,
                 burnin=1000,
                 ndraw=5000,
                 reduced=True):

    n, p = X.shape
    half_n = int(n/2)
    X1, Y1 = X[:half_n,:]*1., Y[:half_n]*1.
    X1 -= X1.mean(0)[None,:]
    Y1 -= Y1.mean()

    X2, Y2 = X[half_n:], Y[half_n:]
    X2 -= X2.mean(0)[None,:]
    Y2 -= Y2.mean()

    FS_half = forward_stepwise(X1, Y1) # sample splitting model
    FS_full = forward_stepwise(X.copy(), Y.copy()) # full data model
    
    spacings_P = []
    split_P = []
    reduced_Pknown = []
    reduced_Punknown = []
    covtest_P = []

    for i in range(nstep):

        FS_half.next()

        if FS_half.P[i] is not None:
            RX = FS_half.X - FS_half.P[i](FS_half.X)
            RY = FS_half.Y - FS_half.P[i](FS_half.Y)
            covariance = centering(FS_half.Y.shape[0]) - np.dot(FS_half.P[i].U, FS_half.P[i].U.T)
        else:
            RX = FS_half.X
            RY = FS_half.Y
            covariance = centering(FS_half.Y.shape[0])

        RX -= RX.mean(0)[None,:]
        RX /= (RX.std(0)[None,:] * np.sqrt(RX.shape[0]))

        # covtest on half -- not saved

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=True)

        # spacings on half -- not saved

        eta1 = RX[:,idx] * sign
        Acon = constraints(FS_half.A, np.zeros(FS_half.A.shape[0]),
                           covariance=centering(FS_half.Y.shape[0]))
        Acon.covariance *= sigma**2
        Acon.pivot(eta1, FS_half.Y)

        # sample split

        eta2 = np.linalg.pinv(X2[:,FS_half.variables])[-1]
        eta_sigma = np.linalg.norm(eta2) * sigma
        split_P.append(2*ndist.sf(np.fabs((eta2*Y2).sum() / eta_sigma)))

        # inference on full mu using split model, this \beta^+_s.

        zero_block = np.zeros((Acon.linear_part.shape[0], (n-half_n)))
        linear_part = np.hstack([Acon.linear_part, zero_block])
        Fcon = constraints(linear_part, Acon.offset,
                           covariance=centering(n))
        Fcon.covariance *= sigma**2

        if i > 0:
            U = np.linalg.pinv(X[:,FS_half.variables[:-1]])
            Uy = np.dot(U, Y)
            Fcon = Fcon.conditional(U, Uy)
        else:
            Fcon = Fcon

        eta_full = np.linalg.pinv(X[:,FS_half.variables])[-1]

        if reduced:
            reduced_pval = gibbs_test(Fcon, Y, eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=sigma is not None,
                                      alternative='twosided')[0]
            reduced_Pknown.append(reduced_pval)

            reduced_pval = gibbs_test(Fcon, Y, eta_full,
                                      ndraw=ndraw,
                                      burnin=burnin,
                                      sigma_known=False,
                                      alternative='twosided')[0]
            reduced_Punknown.append(reduced_pval)


        # now use all the data

        FS_full.next()
        if FS_full.P[i] is not None:
            RX = X - FS_full.P[i](X)
            RY = Y - FS_full.P[i](Y)
            covariance = centering(RY.shape[0]) - np.dot(FS_full.P[i].U, FS_full.P[i].U.T)
        else:
            RX = X
            RY = Y.copy()
            covariance = centering(RY.shape[0])
        RX -= RX.mean(0)[None,:]
        RX /= RX.std(0)[None,:]

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=False)
        covtest_P.append(pval)

        # spacings on full data

        eta1 = RX[:,idx] * sign
        Acon = constraints(FS_full.A, np.zeros(FS_full.A.shape[0]),
                           centering(RY.shape[0]))
        Acon.covariance *= sigma**2
        spacings_P.append(Acon.pivot(eta1, Y))

    return split_P, reduced_Pknown, reduced_Punknown, spacings_P, covtest_P, FS_half.variables
def forward_step(X, Y, sigma=None,
                 nstep=5,
                 exact=False,
                 burnin=1000,
                 ndraw=5000):
    """
    A simple implementation of forward stepwise
    that uses the `reduced_covtest` iteratively
    after adjusting fully for the selected variable.

    This implementation is not efficient, in
    that it computes more SVDs than it really has to.

    Parameters
    ----------

    X : np.float((n,p))

    Y : np.float(n)

    sigma : float (optional) 
        Noise level (not needed for reduced).

    nstep : int
        How many steps of forward stepwise?

    exact : bool
        Which version of covtest should we use?

    burnin : int
        How many iterations until we start
        recording samples?

    ndraw : int
        How many samples should we return?

    tests : ['reduced_known', 'covtest', 'reduced_unknown']
        Which test to use? A subset of the above sequence.

    """

    n, p = X.shape
    FS = forward_stepwise(X, Y)

    spacings_P = []
    covtest_P = []
    reduced_Pknown = []
    reduced_Punknown = []

    for i in range(nstep):
        FS.next()

        # covtest
        if FS.P[i] is not None:
            RX = X - FS.P[i](X)
            RY = Y - FS.P[i](Y)
            covariance = np.identity(n) - np.dot(FS.P[i].U, FS.P[i].U.T)
        else:
            RX = X
            RY = Y
            covariance = None
        RX -= RX.mean(0)[None,:]
        RX /= RX.std(0)[None,:]

        con, pval, idx, sign = covtest(RX, RY, sigma=sigma,
                                       covariance=covariance,
                                       exact=exact)
        covtest_P.append(pval)

        # reduced

        eta = RX[:,idx] * sign
        Acon = constraints(FS.A, np.zeros(FS.A.shape[0]))
        Acon.covariance *= sigma**2
        if i > 0:
            U = FS.P[-2].U.T
            Uy = np.dot(U, Y)
            Bcon = Acon.conditional(U, Uy)
        else:
            Bcon = Acon

        spacings_P.append(Acon.pivot(eta, Y))

        reduced_pval, _, _ = gibbs_test(Bcon, Y, eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=sigma is not None,
                                        alternative='greater')
        reduced_Pknown.append(reduced_pval)

        reduced_pval, _, _ = gibbs_test(Bcon, Y, eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=False,
                                        alternative='greater')
        reduced_Punknown.append(reduced_pval)

    return covtest_P, reduced_Pknown, reduced_Punknown, spacings_P, FS.variables
Exemple #6
0
def forward_step(X,
                 Y,
                 sigma=None,
                 nstep=5,
                 exact=False,
                 burnin=1000,
                 ndraw=5000):
    """
    A simple implementation of forward stepwise
    that uses the `reduced_covtest` iteratively
    after adjusting fully for the selected variable.

    This implementation is not efficient, in
    that it computes more SVDs than it really has to.

    Parameters
    ----------

    X : np.float((n,p))

    Y : np.float(n)

    sigma : float (optional) 
        Noise level (not needed for reduced).

    nstep : int
        How many steps of forward stepwise?

    exact : bool
        Which version of covtest should we use?

    burnin : int
        How many iterations until we start
        recording samples?

    ndraw : int
        How many samples should we return?

    tests : ['reduced_known', 'covtest', 'reduced_unknown']
        Which test to use? A subset of the above sequence.

    """

    n, p = X.shape
    FS = forward_stepwise(X, Y)

    spacings_P = []
    covtest_P = []
    reduced_Pknown = []
    reduced_Punknown = []

    for i in range(nstep):
        FS.next()

        # covtest
        if FS.P[i] is not None:
            RX = X - FS.P[i](X)
            RY = Y - FS.P[i](Y)
            covariance = np.identity(n) - np.dot(FS.P[i].U, FS.P[i].U.T)
        else:
            RX = X
            RY = Y
            covariance = None
        RX -= RX.mean(0)[None, :]
        RX /= RX.std(0)[None, :]

        con, pval, idx, sign = covtest(RX,
                                       RY,
                                       sigma=sigma,
                                       covariance=covariance,
                                       exact=exact)
        covtest_P.append(pval)

        # reduced

        eta = RX[:, idx] * sign
        Acon = constraints(FS.A, np.zeros(FS.A.shape[0]))
        Acon.covariance *= sigma**2
        if i > 0:
            U = FS.P[-2].U.T
            Uy = np.dot(U, Y)
            Bcon = Acon.conditional(U, Uy)
        else:
            Bcon = Acon

        spacings_P.append(Acon.pivot(eta, Y))

        reduced_pval, _, _ = gibbs_test(Bcon,
                                        Y,
                                        eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=sigma is not None,
                                        alternative='greater')
        reduced_Pknown.append(reduced_pval)

        reduced_pval, _, _ = gibbs_test(Bcon,
                                        Y,
                                        eta,
                                        ndraw=ndraw,
                                        burnin=burnin,
                                        sigma_known=False,
                                        alternative='greater')
        reduced_Punknown.append(reduced_pval)

    return covtest_P, reduced_Pknown, reduced_Punknown, spacings_P, FS.variables