Ejemplo n.º 1
0
def main(n=50):

    from regreg.atoms._isotonic import _isotonic_regression
    import matplotlib.pyplot as plt
    initial = np.ones(n) + 0.01 * np.random.standard_normal(n)
    grad_map = lambda val: _grad_log_wishart_white(val, n)

    def projection_map(vals):
        iso = np.zeros_like(vals)
        _isotonic_regression(vals, np.ones_like(vals), iso)
        vals = np.asarray(iso)
        return np.maximum(vals, 1.e-6)

    sampler = projected_langevin(initial, grad_map, projection_map, 0.01)
    sampler = iter(sampler)

    path = [initial.copy()]
    for _ in range(200):
        print(sampler.state)
        sampler.next()
        path.append(sampler.state.copy())
    path = np.array(path)

    [plt.plot(path[:, i]) for i in range(5)]
    plt.show()
Ejemplo n.º 2
0
def main(n=50):

    initial = np.ones(n) + 0.01 * np.random.standard_normal(n)
    grad_map = lambda val: _grad_log_wishart_white(val, n)

    def projection_map(vals):
        iso = IsotonicRegression(y_min=1.e-6)
        vals = np.asarray(vals)
        return np.maximum(vals, 1.e-6)

    sampler = projected_langevin(initial, grad_map, projection_map, 0.01)
    sampler = iter(sampler)

    path = [initial.copy()]
    for _ in range(200):
        print(sampler.state)
        sampler.next()
        path.append(sampler.state.copy())
    path = np.array(path)

    [plt.plot(path[:, i]) for i in range(5)]
    plt.show()
Ejemplo n.º 3
0
def pval(vec_state, full_gradient, full_projection, move_data,
         bootstrap_samples, X, y, nonzero, active):
    """
    """

    n, p = X.shape

    y0 = y.copy()

    null = []
    alt = []

    X_E = X[:, active]
    ndata = y.shape[0]

    active_set = np.where(active)[0]

    print "true nonzero ", nonzero, "active set", active_set

    if set(nonzero).issubset(active_set):
        for j, idx in enumerate(active_set):
            eta = X[:, idx]
            #keep = np.copy(active)
            keep = np.ones(p, dtype=bool)
            keep[idx] = False

            linear_part = X[:, keep].T

            P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T)
            I = np.identity(linear_part.shape[1])
            R = I - P

            sampler = projected_langevin(vec_state.copy(), full_gradient,
                                         full_projection, 1. / (2 * p))

            samples = []

            boot_samples = bootstrap_samples(y0, P, R)

            for _ in range(1000):
                sampler.next()
                new_data = move_data(sampler.state, boot_samples)
                sampler.state[:ndata] = new_data
                samples.append(sampler.state.copy())

            samples = np.array(samples)
            data_samples = samples[:, :n]

            pop = [np.dot(eta, z) for z in data_samples]
            obs = np.dot(eta, y0)

            fam = discrete_family(pop, np.ones_like(pop))
            pval = fam.cdf(0, obs)
            pval = 2 * min(pval, 1 - pval)
            print "observed: ", obs, "p value: ", pval
            #if pval < 0.0001:
            #    print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0])
            if idx in nonzero:
                alt.append(pval)
            else:
                null.append(pval)

    return null, alt
def test_kfstep(k=4, s=3, n=100, p=10, Langevin_steps=10000, burning=2000):

    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0, signal=10)
    epsilon = 0.

    randomization = laplace(loc=0, scale=1.)

    j_seq = np.empty(k, dtype=int)
    s_seq = np.empty(k)

    left = np.ones(p, dtype=bool)
    obs = 0

    initial_state = np.zeros(n + np.sum([i for i in range(p-k+1,p+1)]))
    initial_state[:n] = y.copy()

    mat = [np.array((n, ncol)) for ncol in range(p,p-k,-1)]

    curr = n

    keep = np.zeros(p, dtype=bool)

    for i in range(k):
        X_left = X[:,left]
        X_selected = X[:, ~left]
        if (np.sum(left)<p):
            P_perp = np.identity(n) - X_selected.dot(np.linalg.pinv(X_selected))
            mat[i] = P_perp.dot(X_left)
        else:
            mat[i] = X

        mat_complete = np.zeros((n,p))
        mat_complete[:, left] = mat[i]

        T = np.dot(mat[i].T, y)
        T_complete = np.dot(mat_complete.T, y)

        obs = np.max(np.abs(T))
        keep = np.copy(~left)

        random_Z = randomization.rvs(T.shape[0])
        T_random = T + random_Z
        initial_state[curr:(curr+p-i)] = T_random # initializing subgradients
        curr = curr + p-i

        j_seq[i] = np.argmax(np.abs(T_random))
        s_seq[i] = np.sign(T_random[j_seq[i]])

        #def find_index(v, idx1):
        #    _sumF = 0
        #    _sumT = 0
        #    idx = idx1+1
        #    for i in range(v.shape[0]):
        #        if (v[i] == False):
        #            _sumF = _sumF + 1
        #        else:
        #           _sumT = _sumT + 1
        #        if _sumT >= idx: break
        #    return (_sumT + _sumF-1)

        T_complete[left] += random_Z
        left[np.argmax(np.abs(T_complete))] = False


    # conditioning
    linear_part = X[:, keep].T
    P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T)
    I = np.identity(linear_part.shape[1])
    R = I - P


    def full_projection(state, n=n, p=p, k=k):
        """
        """
        new_state = np.empty(state.shape, np.float)
        new_state[:n] = state[:n]
        curr = n
        for i in range(k):
            projection = projection_cone(p-i, j_seq[i], s_seq[i])
            new_state[curr:(curr+p-i)] = projection(state[curr:(curr+p-i)])
            curr = curr+p-i
        return new_state


    def full_gradient(state, n=n, p=p, k=k, X=X, mat=mat):
        data = state[:n]

        grad = np.empty(n + np.sum([i for i in range(p-k+1,p+1)]))
        grad[:n] = - data

        curr = n
        for i in range(k):
            subgrad = state[curr:(curr+p-i)]

            sign_vec = np.sign(-mat[i].T.dot(data) + subgrad)
            grad[curr:(curr + p - i)] = -sign_vec
            curr = curr+p-i
            grad[:n] += mat[i].dot(sign_vec)

        return grad



    sampler = projected_langevin(initial_state,
                                 full_gradient,
                                 full_projection,
                                 1./p)
    samples = []


    for i in range(Langevin_steps):
        if i>burning:
            old_state = sampler.state.copy()
            old_data = old_state[:n]
            sampler.next()
            new_state = sampler.state.copy()
            new_data = new_state[:n]
            new_data = np.dot(P, old_data) + np.dot(R, new_data)
            sampler.state[:n] = new_data
            samples.append(sampler.state.copy())


    samples = np.array(samples)
    Z = samples[:,:n]

    pop = np.abs(mat[k-1].T.dot(Z.T)).max(0)
    fam = discrete_family(pop, np.ones_like(pop))
    pval = fam.cdf(0, obs)
    pval = 2 * min(pval, 1 - pval)

    #stop

    print('pvalue:', pval)
    return pval
Ejemplo n.º 5
0
def test_fstep(s=0, n=50, p=10, weights = "gumbel", randomization_dist ="logistic",
               Langevin_steps = 10000, burning=1000):

    X, y, _, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0)
    epsilon = 0.
    if randomization_dist == "laplace":
        randomization = laplace(loc=0, scale=1.)
        random_Z = randomization.rvs(p)
    if randomization_dist=="logistic":
        random_Z = np.random.logistic(loc=0, scale=1, size=p)

    T = np.dot(X.T,y)
    T_random = T + random_Z
    T_abs = np.abs(T_random)
    j_star = np.argmax(T_abs)
    s_star = np.sign(T_random[j_star])

    # this is the subgradient part of the projection
    projection = projection_cone(p, j_star, s_star)


    def full_projection(state, n=n, p=p):
        """
        State is (y, u) -- first n coordinates are y, last p are u.
        """
        new_state = np.empty(state.shape, np.float)
        new_state[:n] = state[:n]
        new_state[n:] = projection(state[n:])
        return new_state


    obs = np.max(np.abs(T))
    eta_star = np.zeros(p)
    eta_star[j_star] = s_star


    def full_gradient(state, n=n, p=p, X=X, y=y):
        #data = state[:n]

        alpha = state[:n]
        subgrad = state[n:]

        mat = np.dot(X.T, np.diag(y))
        omega = - mat.dot(alpha) + subgrad

        if randomization_dist == "laplace":
            randomization_derivative = np.sign(omega)
        if randomization_dist == "logistic":
            randomization_derivative = -(np.exp(-omega) - 1) / (np.exp(-omega) + 1)
        if randomization_dist == "normal":
            randomization_derivative = omega

        grad = np.empty(state.shape, np.float)
        #grad[:n] = - (data - X.dot(randomization_derivative))
        grad[:n] = np.dot(mat.T,randomization_derivative)

        if weights == "normal":
            grad[:n] -= alpha
        if (weights == "gumbel"):
            gumbel_beta = np.sqrt(6) / (1.14 * np.pi)
            euler = 0.57721
            gumbel_mu = -gumbel_beta * euler
            gumbel_sigma = 1. / 1.14
            grad[:n] -= (1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta

        grad[n:] = - randomization_derivative

        return grad



    state = np.zeros(n+p)
    #state[:n] = y
    state[:n] = np.zeros(n)
    state[n:] = T_random

    sampler = projected_langevin(state,
                                 full_gradient,
                                 full_projection,
                                 1./p)
    samples = []

    for i in range(Langevin_steps):
        sampler.next()
        if (i>burning):
            samples.append(sampler.state.copy())

    samples = np.array(samples)
    Z = samples[:,:n]
    print Z.shape

    mat = np.dot(X.T,np.diag(y))

    #pop = [np.linalg.norm(np.dot(mat, Z[i,:].T)) for i in range(Z.shape[0])]
    #obs = np.linalg.norm(np.dot(X.T,y))
    pop = np.abs(np.dot(mat, Z.T)).max(0)
    fam = discrete_family(pop, np.ones_like(pop))
    pval = fam.cdf(0, obs)
    pval = 2 * min(pval, 1 - pval)

    #stop

    print 'pvalue:', pval
    return pval
Ejemplo n.º 6
0
def test_fstep(s=0, n=100, p=10, Langevin_steps=10000, burning=2000, condition_on_sign=True):

    X, y, _, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0)
    epsilon = 0.
    randomization = laplace(loc=0, scale=1.)

    random_Z = randomization.rvs(p)
    T = np.dot(X.T,y) 
    T_random = T + random_Z
    T_abs = np.abs(T_random)
    j_star = np.argmax(T_abs)
    s_star = np.sign(T_random[j_star])

    # this is the subgradient part of the projection

    if condition_on_sign:
        projection = projection_cone(p, j_star, s_star)
    else:
        projection = projection_cone_nosign(p, j_star)

    def full_projection(state, n=n, p=p):
        """
        State is (y, u) -- first n coordinates are y, last p are u.
        """
        new_state = np.empty(state.shape, np.float)
        new_state[:n] = state[:n]
        new_state[n:] = projection(state[n:])
        return new_state

    obs = np.max(np.abs(T))
    eta_star = np.zeros(p)
    eta_star[j_star] = s_star

    def full_gradient(state, n=n, p=p, X=X):
        data = state[:n]
        subgrad = state[n:]
        sign_vec = np.sign(-X.T.dot(data) + subgrad)

        grad = np.empty(state.shape, np.float)
        grad[n:] = - sign_vec

        grad[:n] = - (data - X.dot(sign_vec))
        return grad



    state = np.zeros(n+p)
    state[:n] = y
    state[n:] = T_random

    sampler = projected_langevin(state,
                                 full_gradient,
                                 full_projection,
                                 1./p)
    samples = []

    for i in range(Langevin_steps):
        if i>burning:
            sampler.next()
            samples.append(sampler.state.copy())

    samples = np.array(samples)
    Z = samples[:,:n]

    pop = np.abs(X.T.dot(Z.T)).max(0)
    fam = discrete_family(pop, np.ones_like(pop))
    pval = fam.cdf(0, obs)
    pval = 2 * min(pval, 1 - pval)

    #stop

    print 'pvalue:', pval
    return pval
Ejemplo n.º 7
0
def pval(vec_state, full_gradient, full_projection,
         X, y, obs_residuals,
         nonzero, active):
    """
    """
    
    n, p = X.shape

    y0 = y.copy()

    null = []
    alt = []

    X_E = X[:, active]
    ndata = y.shape[0]

    active_set = np.where(active)[0]

    print "true nonzero ", nonzero, "active set", active_set

    if set(nonzero).issubset(active_set):
        #for j, idx in enumerate(active_set):
            #eta = X[:, idx]
            #keep = np.copy(active)
            #keep = np.ones(p, dtype=bool)
            #keep[idx] = False

            #linear_part = X[:,keep].T

            #P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T)
            #I = np.identity(linear_part.shape[1])
            #R = I - P

            #fixed_part  = np.dot(P, np.dot(X.T, y0))

            sampler = projected_langevin(vec_state.copy(),
                                         full_gradient,
                                         full_projection,
                                         1. / p)

            samples = []

            #boot_samples = bootstrap_samples(y0, P, R)

            for _ in range(6000):
                sampler.next()
                samples.append(sampler.state.copy())

            samples = np.array(samples)
            alpha_samples = samples[:, :n]

            data_samples = [np.dot(X[:, active].T, np.diag(obs_residuals).dot(alpha_samples[i,:])) for i in range(len(samples))]

            pop = [np.linalg.norm(z) for z in data_samples]
            obs = np.linalg.norm(np.dot(X[:, active].T, y0))
            #obs = np.linalg.norm(y0)

            fam = discrete_family(pop, np.ones_like(pop))
            pval = fam.cdf(0, obs)
            pval = 2 * min(pval, 1-pval)
            print "observed: ", obs, "p value: ", pval
            #if pval < 0.0001:
            #    print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0])
            #if idx in nonzero:
            #    alt.append(pval)
            #else:
            null.append(pval)


    return null, alt
Ejemplo n.º 8
0
def pval(vec_state, full_projection, X, y, obs_residuals, signs, lam, epsilon,
         nonzero, active):
    """
    """

    n, p = X.shape

    y0 = y.copy()

    null = []
    alt = []

    X_E = X[:, active]
    ndata = y.shape[0]
    inactive = ~active
    nalpha = n

    active_set = np.where(active)[0]

    print "true nonzero ", nonzero, "active set", active_set

    if set(nonzero).issubset(active_set):
        for j, idx in enumerate(active_set):
            eta = X[:, idx]
            keep = np.copy(active)
            #keep = np.ones(p, dtype=bool)
            keep[idx] = False

            linear_part = X[:, keep].T

            P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T)
            I = np.identity(linear_part.shape[1])
            R = I - P

            fixed_part = np.dot(X.T, np.dot(P, y))
            hessian = np.dot(X.T, X)
            B = hessian + epsilon * np.identity(p)
            A = B[:, active]

            matXTR = X.T.dot(R)

            def full_gradient(vec_state,
                              fixed_part=fixed_part,
                              R=R,
                              obs_residuals=obs_residuals,
                              signs=signs,
                              X=X,
                              lam=lam,
                              epsilon=epsilon,
                              data0=y,
                              hessian=hessian,
                              A=A,
                              matXTR=matXTR,
                              nalpha=nalpha,
                              active=active,
                              inactive=inactive):

                nactive = np.sum(active)
                ninactive = np.sum(inactive)

                alpha = vec_state[:nalpha]
                betaE = vec_state[nalpha:(nalpha + nactive)]
                cube = vec_state[(nalpha + nactive):]

                p = X.shape[1]
                beta_full = np.zeros(p)
                beta_full[active] = betaE
                subgradient = np.zeros(p)
                subgradient[inactive] = lam * cube
                subgradient[active] = lam * signs

                opt_vec = epsilon * beta_full + subgradient

                # omega = -  np.dot(X.T, np.diag(obs_residuals).dot(alpha))/np.sum(alpha) + np.dot(hessian, beta_full) + opt_vec
                weighted_residuals = np.diag(obs_residuals).dot(alpha)
                omega = -fixed_part - np.dot(matXTR,
                                             weighted_residuals) + np.dot(
                                                 hessian, beta_full) + opt_vec
                sign_vec = np.sign(omega)

                #mat = np.dot(X.T, np.diag(obs_residuals))
                mat = np.dot(matXTR, np.diag(obs_residuals))
                _gradient = np.zeros(nalpha + nactive + ninactive)
                _gradient[:nalpha] = -np.ones(nalpha) + np.dot(mat.T, sign_vec)
                _gradient[nalpha:(nalpha + nactive)] = -np.dot(A.T, sign_vec)
                _gradient[(nalpha + nactive):] = -lam * sign_vec[inactive]

                return _gradient

            sampler = projected_langevin(vec_state.copy(), full_gradient,
                                         full_projection, 1. / p)

            samples = []

            for _ in range(5000):
                sampler.next()
                samples.append(sampler.state.copy())

            samples = np.array(samples)
            alpha_samples = samples[:, :n]

            residuals_samples = [
                np.diag(obs_residuals).dot(alpha_samples[i, :])
                for i in range(len(samples))
            ]

            pop = [
                np.inner(eta,
                         np.dot(P, y0) + np.dot(R, z))
                for z in residuals_samples
            ]
            obs = np.inner(eta, y0)

            fam = discrete_family(pop, np.ones_like(pop))
            pval = fam.cdf(0, obs)
            pval = 2 * min(pval, 1 - pval)
            print "observed: ", obs, "p value: ", pval
            #if pval < 0.0001:
            #    print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0])
            if idx in nonzero:
                alt.append(pval)
            else:
                null.append(pval)

    return null, alt
Ejemplo n.º 9
0
def test_overall_null_two_queries():
    s, n, p = 5, 200, 20

    randomizer = randomization.laplace((p, ), scale=0.5)
    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, snr=14)

    nonzero = np.where(beta)[0]
    lam_frac = 1.

    loss = rr.glm.logistic(X, y)
    epsilon = 1. / np.sqrt(n)

    lam = lam_frac * np.mean(
        np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
    W = np.ones(p) * lam
    W[0] = 0  # use at least some unpenalized
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)),
                             lagrange=1.)
    # first randomization

    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
    M_est1.solve()
    bootstrap_score1 = M_est1.setup_sampler(scaling=2.)

    # second randomization

    M_est2 = glm_group_lasso(loss, epsilon, penalty, randomizer)
    M_est2.solve()
    bootstrap_score2 = M_est2.setup_sampler(scaling=2.)

    # we take target to be union of two active sets

    active = M_est1.selection_variable[
        'variables'] + M_est2.selection_variable['variables']

    if set(nonzero).issubset(np.nonzero(active)[0]):
        boot_target, target_observed = pairs_bootstrap_glm(loss, active)

        # target are all true null coefficients selected

        sampler = lambda: np.random.choice(n, size=(n, ), replace=True)
        target_cov, cov1, cov2 = bootstrap_cov(sampler,
                                               boot_target,
                                               cross_terms=(bootstrap_score1,
                                                            bootstrap_score2))

        active_set = np.nonzero(active)[0]
        inactive_selected = I = [
            i for i in np.arange(active_set.shape[0])
            if active_set[i] not in nonzero
        ]

        # is it enough only to bootstrap the inactive ones?
        # seems so...

        if not I:
            return None

        A1, b1 = M_est1.linear_decomposition(cov1[I], target_cov[I][:, I],
                                             target_observed[I])
        A2, b2 = M_est2.linear_decomposition(cov2[I], target_cov[I][:, I],
                                             target_observed[I])

        target_inv_cov = np.linalg.inv(target_cov[I][:, I])

        initial_state = np.hstack([
            target_observed[I], M_est1.observed_opt_state,
            M_est2.observed_opt_state
        ])

        ntarget = len(I)
        target_slice = slice(0, ntarget)
        opt_slice1 = slice(ntarget, p + ntarget)
        opt_slice2 = slice(p + ntarget, 2 * p + ntarget)

        def target_gradient(state):
            # with many samplers, we will add up the `target_slice` component
            # many target_grads
            # and only once do the Gaussian addition of full_grad

            target = state[target_slice]
            opt_state1 = state[opt_slice1]
            opt_state2 = state[opt_slice2]
            target_grad1 = M_est1.randomization_gradient(
                target, (A1, b1), opt_state1)
            target_grad2 = M_est2.randomization_gradient(
                target, (A2, b2), opt_state2)

            full_grad = np.zeros_like(state)
            full_grad[opt_slice1] = -target_grad1[1]
            full_grad[opt_slice2] = -target_grad2[1]
            full_grad[target_slice] -= target_grad1[0] + target_grad2[0]
            full_grad[target_slice] -= target_inv_cov.dot(target)

            return full_grad

        def target_projection(state):
            opt_state1 = state[opt_slice1]
            state[opt_slice1] = M_est1.projection(opt_state1)
            opt_state2 = state[opt_slice2]
            state[opt_slice2] = M_est2.projection(opt_state2)
            return state

        target_langevin = projected_langevin(initial_state, target_gradient,
                                             target_projection,
                                             .5 / (2 * p + 1))

        Langevin_steps = 10000
        burning = 2000
        samples = []
        for i in range(Langevin_steps):
            target_langevin.next()
            if (i >= burning):
                samples.append(target_langevin.state[target_slice].copy())

        test_stat = lambda x: np.linalg.norm(x)
        observed = test_stat(target_observed[I])
        sample_test_stat = np.array([test_stat(x) for x in samples])

        family = discrete_family(sample_test_stat,
                                 np.ones_like(sample_test_stat))
        pval = family.ccdf(0, observed)
        return pval, False
Ejemplo n.º 10
0
def test_one_inactive_coordinate_handcoded():
    s, n, p = 5, 200, 20

    randomizer = randomization.laplace((p, ), scale=1.)
    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, snr=14)

    nonzero = np.where(beta)[0]
    lam_frac = 1.

    loss = rr.glm.logistic(X, y)
    epsilon = 1.

    lam = lam_frac * np.mean(
        np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
    W = np.ones(p) * lam
    W += lam * np.arange(p) / 200
    W[0] = 0
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)),
                             lagrange=1.)

    print(lam)
    # our randomization

    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
    M_est1.solve()
    bootstrap_score1 = M_est1.setup_sampler()

    active = M_est1.selection_variable['variables']
    if set(nonzero).issubset(np.nonzero(active)[0]):
        boot_target, target_observed = pairs_bootstrap_glm(loss, active)

        # target are all true null coefficients selected

        sampler = lambda: np.random.choice(n, size=(n, ), replace=True)
        target_cov, cov1 = bootstrap_cov(sampler,
                                         boot_target,
                                         cross_terms=(bootstrap_score1, ))

        # have checked that covariance up to here agrees with other test_glm_langevin example

        active_set = np.nonzero(active)[0]
        inactive_selected = I = [
            i for i in np.arange(active_set.shape[0])
            if active_set[i] not in nonzero
        ]

        # is it enough only to bootstrap the inactive ones?
        # seems so...

        if not I:
            return None

        # take the first inactive one
        I = I[:1]
        A1, b1 = M_est1.linear_decomposition(cov1[I], target_cov[I][:, I],
                                             target_observed[I])

        print(I, 'I', target_observed[I])
        target_inv_cov = np.linalg.inv(target_cov[I][:, I])

        initial_state = np.hstack(
            [target_observed[I], M_est1.observed_opt_state])

        ntarget = len(I)
        target_slice = slice(0, ntarget)
        opt_slice1 = slice(ntarget, p + ntarget)

        def target_gradient(state):
            # with many samplers, we will add up the `target_slice` component
            # many target_grads
            # and only once do the Gaussian addition of full_grad

            target = state[target_slice]
            opt_state1 = state[opt_slice1]
            target_grad1 = M_est1.randomization_gradient(
                target, (A1, b1), opt_state1)

            full_grad = np.zeros_like(state)
            full_grad[opt_slice1] = -target_grad1[1]
            full_grad[target_slice] -= target_grad1[0]
            full_grad[target_slice] -= target_inv_cov.dot(target)

            return full_grad

        def target_projection(state):
            opt_state1 = state[opt_slice1]
            state[opt_slice1] = M_est1.projection(opt_state1)
            return state

        target_langevin = projected_langevin(initial_state, target_gradient,
                                             target_projection, 1. / p)

        Langevin_steps = 10000
        burning = 2000
        samples = []
        for i in range(Langevin_steps + burning):
            target_langevin.next()
            if (i > burning):
                samples.append(target_langevin.state[target_slice].copy())

        test_stat = lambda x: x
        observed = test_stat(target_observed[I])
        sample_test_stat = np.array([test_stat(x) for x in samples])

        family = discrete_family(sample_test_stat,
                                 np.ones_like(sample_test_stat))
        pval = family.ccdf(0, observed)
        pval = 2 * min(pval, 1 - pval)

        _i = I[0]
        naive_Z = target_observed[_i] / np.sqrt(target_cov[_i, _i])
        naive_pval = ndist.sf(np.fabs(naive_Z))
        naive_pval = 2 * min(naive_pval, 1 - naive_pval)
        print('naive Z', naive_Z, naive_pval)
        return pval, naive_pval, False
Ejemplo n.º 11
0
def pval(vec_state, full_projection, X, obs_residuals, beta_unpenalized,
         full_null, signs, lam, epsilon, nonzero, active, Sigma, weights,
         randomization_dist, randomization_scale, Langevin_steps, step_size,
         burning, X_scaled):
    """
    """

    n, p = X.shape

    null = []
    alt = []

    X_E = X[:, active]
    inactive = ~active
    nalpha = n
    nactive = np.sum(active)
    ninactive = np.sum(inactive)

    active_set = np.where(active)[0]

    print "true nonzero ", nonzero, "active set", active_set

    XEpinv = np.linalg.pinv(X[:, active])
    hessian = np.dot(X.T, X)
    hessian_restricted = hessian[:, active]

    mat = XEpinv.dot(np.diag(obs_residuals))

    SigmaE_inv = np.linalg.inv(Sigma[:nactive, :nactive])

    if set(nonzero).issubset(active_set):

        def full_gradient(vec_state,
                          obs_residuals=obs_residuals,
                          lam=lam,
                          epsilon=epsilon,
                          active=active,
                          inactive=inactive):

            nactive = np.sum(active)
            ninactive = np.sum(inactive)

            alpha = vec_state[:n]
            betaE = vec_state[n:(n + nactive)]
            cube = vec_state[(n + nactive):]

            p = X.shape[1]
            beta_full = np.zeros(p)
            beta_full[active] = betaE
            subgradient = np.zeros(p)
            subgradient[inactive] = lam * cube
            subgradient[active] = lam * signs

            opt_vec = epsilon * beta_full + subgradient

            beta_bar_boot = mat.dot(alpha)
            omega = -full_null - np.dot(
                hessian_restricted, beta_bar_boot) + np.dot(
                    hessian_restricted, betaE) + opt_vec

            if randomization_dist == "laplace":
                randomization_derivative = np.sign(
                    omega
                ) / randomization_scale  # sign(w), w=grad+\epsilon*beta+lambda*u
            if randomization_dist == "logistic":
                randomization_derivative = -(np.exp(-omega) - 1) / (
                    np.exp(-omega) + 1)

            A = hessian + epsilon * np.identity(nactive + ninactive)
            A_restricted = A[:, active]

            _gradient = np.zeros(n + nactive + ninactive)

            # saturated model
            mat_q = np.dot(hessian_restricted, mat)

            _gradient[:n] = np.dot(mat_q.T, randomization_derivative)

            if (weights == 'exponential'):
                _gradient[:n] -= np.ones(n)
            if (weights == "normal"):
                _gradient[:n] -= alpha
            if weights == "gamma":
                _gradient[:n] = 3. / (alpha + 2) - 2

            if (weights == "gumbel"):
                gumbel_beta = np.sqrt(6) / (1.14 * np.pi)
                euler = 0.57721
                gumbel_mu = -gumbel_beta * euler
                gumbel_sigma = 1. / 1.14
                _gradient[:n] -= (
                    1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) /
                                gumbel_beta)) * gumbel_sigma / gumbel_beta

            if weights == "neutral":
                _gradient[:n] -= np.dot(mat.T, np.dot(SigmaE_inv,
                                                      beta_bar_boot))

            _gradient[n:(
                n + nactive)] = -A_restricted.T.dot(randomization_derivative)
            _gradient[(n +
                       nactive):] = -lam * randomization_derivative[inactive]

            # selected model
            # _gradient[:nactive] = - (np.dot(Sigma_T_inv, data[:nactive]) + np.dot(hessian[:, active].T, sign_vec))
            # _gradient[ndata:(ndata + nactive)] = np.dot(A_restricted.T, sign_vec)
            # _gradient[(ndata + nactive):] = lam * sign_vec[inactive]

            return _gradient

        sampler = projected_langevin(vec_state.copy(), full_gradient,
                                     full_projection, 1. / p)

        samples = []

        for i in range(Langevin_steps):
            sampler.next()
            if (i > burning):
                samples.append(sampler.state.copy())

        samples = np.array(samples)
        alpha_samples = samples[:, :n]

        beta_bars = [
            np.dot(XEpinv, np.diag(obs_residuals)).dot(alpha_samples[i, :].T)
            for i in range(len(samples))
        ]

        pop = [np.linalg.norm(z) for z in beta_bars]
        obs = np.linalg.norm(beta_unpenalized)

        fam = discrete_family(pop, np.ones_like(pop))
        pval = fam.cdf(0, obs)
        pval = 2 * min(pval, 1 - pval)
        print "observed: ", obs, "p value: ", pval
        #if pval < 0.0001:
        #    print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0])
        #if idx in nonzero:
        #    alt.append(pval)
        #else:
        #    null.append(pval)

    return [pval], [0]
Ejemplo n.º 12
0
def pval(vec_state, full_projection, X, obs_residuals, beta_unpenalized,
         full_null, signs, lam, epsilon, nonzero, active, Sigma, weights,
         randomization_dist, randomization_scale, Langevin_steps, step_size,
         burning, X_scaled):
    """
    """

    n, p = X.shape

    null = []
    alt = []

    X_E = X[:, active]
    inactive = ~active
    nalpha = n
    nactive = np.sum(active)
    ninactive = np.sum(inactive)

    active_set = np.where(active)[0]

    print "true nonzero ", nonzero, "active set", active_set

    XEpinv = np.linalg.pinv(X[:, active])
    hessian = np.dot(X.T, X)
    hessian_reistricted = hessian[:, active]
    mat = XEpinv.dot(np.diag(obs_residuals))

    if set(nonzero).issubset(active_set):
        for j, idx in enumerate(active_set):
            #if j>0:
            #    break
            eta = np.zeros(nactive)
            eta[j] = 1
            sigma_eta_sq = Sigma[j, j]

            linear_part = np.identity(nactive) - (
                np.outer(np.dot(Sigma, eta), eta) / sigma_eta_sq)
            #P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T)
            #T_minus_j = np.dot(P, beta_unpenalized)
            T_minus_j = np.dot(
                linear_part,
                beta_unpenalized)  # sufficient stat for the nuisance
            c = np.dot(Sigma, eta) / sigma_eta_sq
            fixed_part = full_null + hessian_reistricted.dot(T_minus_j)

            XXc = hessian_reistricted.dot(c)

            if (X_scaled == False):
                fixed_part /= np.sqrt(n)
                hessian /= np.sqrt(n)
                hessian_reistricted /= np.sqrt(n)
                XXc /= np.sqrt(n)

            def full_gradient(vec_state,
                              fixed_part=fixed_part,
                              obs_residuals=obs_residuals,
                              eta=eta,
                              lam=lam,
                              epsilon=epsilon,
                              active=active,
                              inactive=inactive):

                nactive = np.sum(active)
                ninactive = np.sum(inactive)

                alpha = vec_state[:n]
                betaE = vec_state[n:(n + nactive)]
                cube = vec_state[(n + nactive):]

                beta_full = np.zeros(p)
                beta_full[active] = betaE
                subgradient = np.zeros(p)
                subgradient[inactive] = lam * cube
                subgradient[active] = lam * signs

                opt_vec = epsilon * beta_full + subgradient

                beta_bar_j_boot = np.inner(mat[j, :], alpha)
                omega = -fixed_part - XXc * beta_bar_j_boot + np.dot(
                    hessian_reistricted, betaE) + opt_vec

                if randomization_dist == "laplace":
                    randomization_derivative = np.sign(
                        omega
                    ) / randomization_scale  # sign(w), w=grad+\epsilon*beta+lambda*u
                if randomization_dist == "logistic":
                    omega_scaled = omega / randomization_scale
                    randomization_derivative = -(np.exp(-omega_scaled) - 1) / (
                        np.exp(-omega_scaled) + 1)
                    randomization_derivative /= randomization_scale
                if randomization_dist == "normal":
                    randomization_derivative = omega / (randomization_scale**2)

                A = hessian + epsilon * np.identity(nactive + ninactive)
                A_restricted = A[:, active]

                _gradient = np.zeros(n + nactive + ninactive)

                # saturated model
                mat_q = np.outer(XXc, eta).dot(mat)

                _gradient[:n] = np.dot(mat_q.T, randomization_derivative)

                if (weights == 'exponential'):
                    _gradient[:n] -= np.ones(n)
                if (weights == "normal"):
                    _gradient[:n] -= alpha
                if weights == "gamma":
                    _gradient[:n] = 3. / (alpha + 2) - 2
                if (weights == "gumbel"):
                    gumbel_beta = np.sqrt(6) / (1.14 * np.pi)
                    euler = 0.57721
                    gumbel_mu = -gumbel_beta * euler
                    gumbel_sigma = 1. / 1.14
                    _gradient[:n] -= (
                        1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) /
                                    gumbel_beta)) * gumbel_sigma / gumbel_beta

                if weights == "neutral":
                    _gradient[:n] -= (beta_bar_j_boot / sigma_eta_sq) * np.dot(
                        mat.T, eta)

                _gradient[n:(
                    n +
                    nactive)] = -A_restricted.T.dot(randomization_derivative)
                _gradient[(
                    n + nactive):] = -lam * randomization_derivative[inactive]

                # selected model
                # _gradient[:nactive] = - (np.dot(Sigma_T_inv, data[:nactive]) + np.dot(hessian[:, active].T, sign_vec))
                # _gradient[ndata:(ndata + nactive)] = np.dot(A_restricted.T, sign_vec)
                # _gradient[(ndata + nactive):] = lam * sign_vec[inactive]

                return _gradient

            sampler = projected_langevin(vec_state.copy(), full_gradient,
                                         full_projection, step_size)

            samples = []

            for i in range(Langevin_steps):
                sampler.next()
                if (i > burning) and (i % 3 == 0):
                    samples.append(sampler.state.copy())

            samples = np.array(samples)
            alpha_samples = samples[:, :n]
            beta_samples = samples[:, n:(n + nactive)]

            beta_bars = [
                np.dot(XEpinv,
                       np.diag(obs_residuals)).dot(alpha_samples[i, :].T)
                for i in range(len(samples))
            ]
            pop = [z[j] for z in beta_bars]
            obs = beta_unpenalized[j]

            #pop = [np.linalg.norm(beta_samples[i,:]) for i in range(beta_samples.shape[0])]
            #obs = np.linalg.norm(vec_state[n:(n+nactive)])

            fam = discrete_family(pop, np.ones_like(pop))
            pval = fam.cdf(0, obs)
            pval = 2 * min(pval, 1 - pval)
            print "observed: ", obs, "p value: ", pval
            #if pval < 0.0001:
            #    print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0])
            if idx in nonzero:
                alt.append(pval)
            else:
                null.append(pval)

    return null, alt
Ejemplo n.º 13
0
def test_simple_problem(n=100,
                        randomization_dist="logistic",
                        threshold=1,
                        weights="neutral",
                        Langevin_steps=10000,
                        burning=0):
    step_size = 1. / n

    y = np.random.standard_normal(n)
    obs = np.sqrt(n) * np.mean(y)

    if randomization_dist == "logistic":
        omega = np.random.logistic(loc=0, scale=1, size=1)

    if (obs + omega < threshold):
        return -1

    #initial_state = np.ones(n)
    initial_state = np.zeros(n)

    y_cs = (y - np.mean(y)) / np.sqrt(n)

    def full_projection(state):
        return state

    def full_gradient(state, n=n, y_cs=y_cs):

        gradient = np.zeros(n)
        if weights == "normal":
            gradient -= state
        if (weights == "gumbel"):
            gumbel_beta = np.sqrt(6) / (1.14 * np.pi)
            euler = 0.57721
            gumbel_mu = -gumbel_beta * euler
            gumbel_sigma = 1. / 1.14
            gradient -= (1. - np.exp(-(state * gumbel_sigma - gumbel_mu) /
                                     gumbel_beta)) * gumbel_sigma / gumbel_beta
        if weights == "logistic":
            gradient = np.divide(np.exp(-state) - 1, np.exp(-state) + 1)

        if weights == "neutral":
            gradient = -np.inner(state, y_cs) * y_cs

        omega = -np.inner(y_cs, state) + threshold
        if randomization_dist == "logistic":
            randomization_derivative = -1. / (1 + np.exp(-omega))

        gradient -= y_cs * randomization_derivative

        return gradient

    sampler = projected_langevin(initial_state.copy(), full_gradient,
                                 full_projection, step_size)

    samples = []

    for i in range(Langevin_steps):
        sampler.next()
        if (i > burning):
            samples.append(sampler.state.copy())

    alphas = np.array(samples)

    pop = [np.inner(y_cs, alphas[i, :]) for i in range(alphas.shape[0])]

    fam = discrete_family(pop, np.ones_like(pop))
    pval = fam.cdf(0, obs)
    pval = 2 * min(pval, 1 - pval)
    print "observed: ", obs, "p value: ", pval
    return pval