Esempio n. 1
0
def interval(mu, ndraw=100000, keep_every=100):
    #dbn = form_dbn(mu, samples)

    if not os.path.exists('umau_lengths%0.2f.npz' % mu):
        lengths = []
    else:
        lengths = list(np.load('umau_lengths%0.2f.npz' % mu)['lengths'])

    if mu < 10:
        big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
        mean, scale = big_sample.mean(), big_sample.std()
        big_sample -= mean
        big_sample /= scale

        dbn = discrete_family(big_sample, np.ones_like(big_sample))
        dbn.theta = 0.
        new_sample = draw_sample(mu, cutoff, nsample=2500)[:2500]
        for i, s in enumerate(new_sample):
            try:
                _interval = dbn.interval((s - mean) / scale)
                lengths.append(np.fabs(_interval[1] - _interval[0]) / scale)
            except:
                print 'exception raised'
            if i % 20 == 0 and i > 0:
                print np.median(lengths), np.mean(lengths)
                np.savez('umau_lengths%0.2f' % mu, **{'lengths':lengths,'mu':mu})
            if i % 1000 == 0 and i > 0:
                big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
                mean, scale = big_sample.mean(), big_sample.std()
                big_sample -= mean
                big_sample /= scale
            print i
    else:
        for i in range(2500):
            big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
            s = big_sample[-1]
            big_sample = big_sample[:-1]
            mean, scale = big_sample.mean(), big_sample.std()
            big_sample -= mean
            big_sample /= scale
            s = (s - mean) / scale
            dbn = discrete_family(big_sample, np.ones_like(big_sample))
            try:
                _interval = dbn.interval(s)
                lengths.append(np.fabs(_interval[1] - _interval[0]) / scale)
            except:
                print 'exception raised'
            print i
            if i % 10 == 0 and i > 0:
                print np.median(lengths), np.mean(lengths)
                np.savez('umau_lengths%0.2f' % mu, **{'lengths':lengths,'mu':mu})

    print 'final', np.mean(lengths)
    return (np.mean(lengths), np.std(lengths), np.median(lengths))
def power_onesided(n, snr, pos, rho=0.25, ndraw=10000,
                   muval = np.linspace(0,5,51), burnin=1000):

    S0 = simulation(n, 0, pos, rho=rho, ndraw=ndraw, burnin=burnin)
    W0 = np.ones(S0.shape)
    dfam0 = discrete_family(S0, W0)

    cutoff = dfam0.one_sided_acceptance(0, alternative='greater')[1]

    def UMPU_power_onesided(mu):
        return dfam0.ccdf(mu, cutoff)

    def sample_split_onesided(mu, alpha=0.05):
        cutoff = ndist.ppf(1 - alpha)
        if np.any(mu < 0):
            raise ValueError('mu is negative: in null hypothesis')
        power = 1 - ndist.cdf(cutoff - mu / np.sqrt(2))
        return np.squeeze(power)

    power_fig = plt.figure(figsize=(8,8))
    P_split = np.array(sample_split_onesided(muval))
    plt.plot(muval, P_split, label='Sample splitting', c='red', linewidth=5, alpha=0.5)
    power_ax = power_fig.gca()
    power_ax.set_ylabel('Power', fontsize=20)
    power_ax.legend(loc='lower right')
    power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
    P_UMPU = np.array([UMPU_power_onesided(m) for m in muval])
    power_ax.plot(muval, P_UMPU, label=r'Selected using $i^*(Z_S)$', linewidth=5, alpha=0.5)
    P_full = power_full(n, snr, pos, rho=rho, muval=muval)[1]['full']
    power_ax.plot(muval, P_full, label=r'Selected using $i^*(Z)$', color='blue', linewidth=5, alpha=0.5)
    print UMPU_power_onesided(snr)
    power_ax.legend(loc='lower right')
    power_ax.set_xlim([0,5])
    power_ax.plot([snr,snr], [0,1], 'k--')
    return power_fig, {'umpu':P_UMPU, 'split':P_split}
Esempio n. 3
0
def power(n, snr, pos, rho=0.25,
          muval = np.linspace(0,5,51)):

    X, mu, beta = parameters(n, rho, pos)

    # form the correct constraints

    con, initial = constraints(X, pos)

    Z_selection = sample_from_constraints(con, initial, ndraw=4000000, burnin=100000)
    S0 = np.dot(X.T, Z_selection.T).T
    W0 = np.ones(S0.shape[0])
    dfam0 = discrete_family(S0[:,pos], W0)

    one_sided_acceptance_region = dfam0.one_sided_acceptance(0)
    def one_sided_power(mu):
        L, U = one_sided_acceptance_region
        return 1 - (dfam0.cdf(mu,U) - dfam0.cdf(mu, L))

    power_fig = plt.figure(figsize=(8,8))
    power_ax = power_fig.gca()
    power_ax.set_ylabel('Power', fontsize=20)
    power_ax.legend(loc='lower right')
    power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
    full_power = np.array([one_sided_power(m) for m in muval])
    print full_power
    power_ax.plot(muval, full_power, label='Reduced model UMPU', linewidth=7, alpha=0.5)
    power_ax.legend(loc='lower right')
    power_ax.set_xlim([0,5])
    power_ax.plot([snr,snr],[0,1], 'k--')
    print one_sided_power(snr)
    return power_fig, {'full':full_power}
Esempio n. 4
0
def interval(mu, ndraw=100000, keep_every=100):

    if not os.path.exists('lengths%0.2f.npz' % mu):
        lengths = []
    else:
        lengths = list(np.load('lengths%0.2f.npz' % mu)['lengths'])

    big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
    mean, scale = big_sample.mean(), big_sample.std()
    big_sample -= mean
    big_sample /= scale

    dbn = discrete_family(big_sample, np.ones_like(big_sample))
    dbn.theta = 0.
    new_sample = draw_sample(mu, cutoff, nsample=2500)[:2500]
    for i, s in enumerate(new_sample):
        try:
            _interval = dbn.equal_tailed_interval((s - mean) / scale)
            lengths.append(np.fabs(_interval[1] - _interval[0]) / scale)
        except:
            print 'exception raised'
        if i % 20 == 0 and i > 0:
            print np.median(lengths), np.mean(lengths)
            np.savez('lengths%0.2f' % mu, **{'lengths': lengths, 'mu': mu})
        if i % 1000 == 0 and i > 0:
            big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000]
            mean, scale = big_sample.mean(), big_sample.std()
            big_sample -= mean
            big_sample /= scale
        dbn.theta = 0.
        print i
    return (np.mean(lengths), np.std(lengths), np.median(lengths))
Esempio n. 5
0
def form_dbn(mu, samples):
    pts = np.arange(-6,13)
    keep = np.fabs(pts - mu) <= 2
    pts = pts[keep]
    _samples = np.hstack([samples['mu%d' % l] for l in pts])
    _log_weights = np.hstack([(mu-l)*samples['mu%d' % l] for l in pts])
    _weights = np.exp(_log_weights)
    dbn = discrete_family(_samples, _weights)
    dbn.theta = 0.
    return dbn
Esempio n. 6
0
def power_onesided(n,
                   snr,
                   pos,
                   rho=0.25,
                   ndraw=10000,
                   muval=np.linspace(0, 5, 51),
                   burnin=1000):

    S0 = simulation(n, 0, pos, rho=rho, ndraw=ndraw, burnin=burnin)
    W0 = np.ones(S0.shape)
    dfam0 = discrete_family(S0, W0)

    cutoff = dfam0.one_sided_acceptance(0, alternative='greater')[1]

    def UMPU_power_onesided(mu):
        return dfam0.ccdf(mu, cutoff)

    def sample_split_onesided(mu, alpha=0.05):
        cutoff = ndist.ppf(1 - alpha)
        if np.any(mu < 0):
            raise ValueError('mu is negative: in null hypothesis')
        power = 1 - ndist.cdf(cutoff - mu / np.sqrt(2))
        return np.squeeze(power)

    power_fig = plt.figure(figsize=(8, 8))
    P_split = np.array(sample_split_onesided(muval))
    plt.plot(muval,
             P_split,
             label='Sample splitting',
             c='red',
             linewidth=5,
             alpha=0.5)
    power_ax = power_fig.gca()
    power_ax.set_ylabel('Power', fontsize=20)
    power_ax.legend(loc='lower right')
    power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
    P_UMPU = np.array([UMPU_power_onesided(m) for m in muval])
    power_ax.plot(muval,
                  P_UMPU,
                  label=r'Selected using $i^*(Z_S)$',
                  linewidth=5,
                  alpha=0.5)
    P_full = power_full(n, snr, pos, rho=rho, muval=muval)[1]['full']
    power_ax.plot(muval,
                  P_full,
                  label=r'Selected using $i^*(Z)$',
                  color='blue',
                  linewidth=5,
                  alpha=0.5)
    print UMPU_power_onesided(snr)
    power_ax.legend(loc='lower right')
    power_ax.set_xlim([0, 5])
    power_ax.plot([snr, snr], [0, 1], 'k--')
    return power_fig, {'umpu': P_UMPU, 'split': P_split}
Esempio n. 7
0
def power(n, snr, pos, rho=0.25, muval=np.linspace(0, 5, 51)):

    X, mu, beta = parameters(n, rho, pos)

    # form the correct constraints

    con, initial = constraints(X, pos)

    Z_selection = sample_from_constraints(con,
                                          initial,
                                          ndraw=4000000,
                                          burnin=100000)
    S0 = np.dot(X.T, Z_selection.T).T
    W0 = np.ones(S0.shape[0])
    dfam0 = discrete_family(S0[:, pos], W0)

    one_sided_acceptance_region = dfam0.one_sided_acceptance(0)

    def one_sided_power(mu):
        L, U = one_sided_acceptance_region
        return 1 - (dfam0.cdf(mu, U) - dfam0.cdf(mu, L))

    power_fig = plt.figure(figsize=(8, 8))
    power_ax = power_fig.gca()
    power_ax.set_ylabel('Power', fontsize=20)
    power_ax.legend(loc='lower right')
    power_ax.set_xlabel('Effect size $\mu$', fontsize=20)
    full_power = np.array([one_sided_power(m) for m in muval])
    print full_power
    power_ax.plot(muval,
                  full_power,
                  label='Reduced model UMPU',
                  linewidth=7,
                  alpha=0.5)
    power_ax.legend(loc='lower right')
    power_ax.set_xlim([0, 5])
    power_ax.plot([snr, snr], [0, 1], 'k--')
    print one_sided_power(snr)
    return power_fig, {'full': full_power}
def test_discreteExFam():

    X = np.arange(100)
    pois = discrete_family(X, poisson.pmf(X, 1))
    tol = 1e-5

    print (
        pois._leftCutFromRight(theta=0.4618311, rightCut=(5, 0.5)),
        pois._test2RejectsLeft(theta=2.39, observed=5, auxVar=0.5),
    )
    print pois.interval(observed=5, alpha=0.05, randomize=True, auxVar=0.5)

    print abs(1 - sum(pois.pdf(0)))
    pois.ccdf(0, 3, 0.4)

    print pois.Var(np.log(2), lambda x: x)
    print pois.Cov(np.log(2), lambda x: x, lambda x: x)

    lc = pois._rightCutFromLeft(0, (0, 0.01))
    print (0, 0.01), pois._leftCutFromRight(0, lc)

    pois._rightCutFromLeft(-10, (0, 0.01))
    # [pois.test2Cutoffs(t)[1] for t in range(-10,3)]
    pois._critCovFromLeft(-10, (0, 0.01))

    pois._critCovFromLeft(0, (0, 0.01))
    pois._critCovFromRight(0, lc)

    pois._critCovFromLeft(5, (5, 1))

    pois._test2RejectsLeft(np.log(5), 5)
    pois._test2RejectsRight(np.log(5), 5)

    pois._test2RejectsLeft(np.log(20), 5)
    pois._test2RejectsRight(np.log(0.1), 5)

    print pois._inter2Upper(5, auxVar=0.5)
    print pois.interval(5, auxVar=0.5)
def test_discreteExFam():

    X = np.arange(100)
    pois = discrete_family(X, poisson.pmf(X, 1))
    tol = 1e-5

    print(pois._leftCutFromRight(theta=0.4618311, rightCut=(5, .5)),
          pois._test2RejectsLeft(theta=2.39, observed=5, auxVar=.5))
    print pois.interval(observed=5, alpha=.05, randomize=True, auxVar=.5)

    print abs(1 - sum(pois.pdf(0)))
    pois.ccdf(0, 3, .4)

    print pois.Var(np.log(2), lambda x: x)
    print pois.Cov(np.log(2), lambda x: x, lambda x: x)

    lc = pois._rightCutFromLeft(0, (0, .01))
    print(0, 0.01), pois._leftCutFromRight(0, lc)

    pois._rightCutFromLeft(-10, (0, .01))
    #[pois.test2Cutoffs(t)[1] for t in range(-10,3)]
    pois._critCovFromLeft(-10, (0, .01))

    pois._critCovFromLeft(0, (0, .01))
    pois._critCovFromRight(0, lc)

    pois._critCovFromLeft(5, (5, 1))

    pois._test2RejectsLeft(np.log(5), 5)
    pois._test2RejectsRight(np.log(5), 5)

    pois._test2RejectsLeft(np.log(20), 5)
    pois._test2RejectsRight(np.log(.1), 5)

    print pois._inter2Upper(5, auxVar=.5)
    print pois.interval(5, auxVar=.5)
Esempio n. 10
0
import os
import numpy as np
import matplotlib.pyplot as plt
from selection import affine
from selection.discrete_family import discrete_family
from scipy.stats import norm as ndist

cutoff = ndist.ppf(0.95)

null_constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff]))
null_sample = affine.sample_from_constraints(null_constraint,
                                             np.array([4, 2.]),
                                             ndraw=100000).sum(1)
null_dbn = discrete_family(null_sample, np.ones_like(null_sample))


def power(mu, ndraw=100000, keep_every=100):
    constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff]))
    constraint.mean = np.array([mu, mu])
    sample = affine.sample_from_constraints(constraint,
                                            np.array([4, 2.]),
                                            ndraw=ndraw)[::keep_every]
    print sample.mean(0)
    sample = sample.sum(1)
    decisions = []
    for s in sample:
        decisions.append(null_dbn.one_sided_test(0, s, alternative='greater'))
    print np.mean(decisions)
    return np.mean(decisions)

Esempio n. 11
0
import os
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from selection import affine 
from selection.discrete_family import discrete_family
from scipy.stats import norm as ndist
from sklearn.isotonic import IsotonicRegression

cutoff = 3.
null_constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
null_sample = affine.sample_from_constraints(null_constraint, np.array([4,2.]),
                                             ndraw=100000).sum(1)
null_dbn = discrete_family(null_sample, np.ones_like(null_sample))

def draw_sample(mu, cutoff, nsample=10000):
    if mu >= cutoff - 4:
        sample = []
        while True:
            candidate = np.random.standard_normal(1000000) + mu
            candidate = candidate[candidate > cutoff]
            sample.extend(candidate)
            if len(sample) > nsample:
                break
        sample = np.array(sample)
        sample += np.random.standard_normal(sample.shape) + mu
    else:
        constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff]))
	constraint.mean = np.array([mu,mu])
        sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]),
                                                ndraw=2000000,
Esempio n. 12
0
def marginal(n,
             snr,
             pos,
             rho=0.25,
             ndraw=5000,
             burnin=1000,
             nsim=5000,
             sigma=1.):

    X, mu, beta = parameters(n, rho, pos)

    Psplit = []
    Pselect = []
    hypotheses = []

    for _ in range(nsim):
        Y_select = (snr * mu / np.sqrt(2) +
                    np.random.standard_normal(n)) * sigma
        con, _, select_pos, sign = covtest(X,
                                           Y_select,
                                           sigma=sigma,
                                           exact=True)

        cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign

        correct = (sign == +1) and (pos == select_pos)
        hypotheses.append(correct)
        Y_null = sample_from_constraints(con,
                                         Y_select,
                                         ndraw=ndraw,
                                         burnin=burnin)
        Z_null = (np.dot(X.T[select_pos], Y_null.T) +
                  sigma * np.random.standard_normal(ndraw)) / np.sqrt(2)
        Z_inference = sigma * (cond_ncp + np.random.standard_normal())
        Z_observed = (np.dot(X.T[select_pos], Y_select) * sign +
                      Z_inference) / np.sqrt(2)
        dfam = discrete_family(Z_null, np.ones(Z_null.shape))
        Pselect.append(dfam.ccdf(0, Z_observed))
        if sign == +1:
            Psplit.append(ndist.sf(Z_inference / sigma))
        else:
            Psplit.append(ndist.cdf(Z_inference / sigma))

    Ugrid = np.linspace(0, 1, 101)

    Psplit = np.array(Psplit)
    Pselect = np.array(Pselect)
    hypotheses = np.array(hypotheses, np.bool)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8, 8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid,
             ECDF(Psplit)(Ugrid),
             label='Sample splitting',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pselect)(Ugrid),
             label='Selected using $i^*(Z_S)$',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05, 0.05], [0, 1], 'k--')
    ax1.legend(loc='lower right')

    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8, 8))
    ax2 = fig2.gca()
    ax2.plot(Ugrid,
             ECDF(Psplit[hypotheses])(Ugrid),
             label='Sample splitting',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pselect[hypotheses])(Ugrid),
             label='Selected using $i^*(Z_S)$',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05, 0.05], [0, 1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['split'] = Psplit
    dbn1['select'] = Pselect
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
def marginal(n, snr, pos, rho=0.25, ndraw=5000,
             burnin=1000, nsim=5000, sigma=1.):

    X, mu, beta = parameters(n, rho, pos)

    Psplit = []
    Pselect = []
    hypotheses = []


    for _ in range(nsim):
        Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma
        con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True)

        cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign

        correct = (sign == +1) and (pos == select_pos)
        hypotheses.append(correct)
        Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin)
        Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2)
        Z_inference = sigma * (cond_ncp + np.random.standard_normal())
        Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2)
        dfam = discrete_family(Z_null, np.ones(Z_null.shape))
        Pselect.append(dfam.ccdf(0, Z_observed))
        if sign == +1:
            Psplit.append(ndist.sf(Z_inference / sigma))
        else:
            Psplit.append(ndist.cdf(Z_inference / sigma))

    Ugrid = np.linspace(0,1,101)

    Psplit = np.array(Psplit)
    Pselect = np.array(Pselect)
    hypotheses = np.array(hypotheses, np.bool)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8,8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05,0.05],[0,1], 'k--')
    ax1.legend(loc='lower right')
    
    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8,8))
    ax2 = fig2.gca()
    ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05,0.05],[0,1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['split'] = Psplit
    dbn1['select'] = Pselect
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
Esempio n. 14
0
def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5):

    # Design, mean vector and parameter vector

    X, mu, beta = parameters(n, rho, pos)

    Pcov = []
    Pexact = []
    Pu = []
    Pr = []
    Pfixed = []
    Pmax = []
    hypotheses = []

    # Set seed

    np.random.seed(0)

    # Max test

    max_stat = np.fabs(np.dot(X.T, np.random.standard_normal(
        (n, 10000)))).max(0) * sigma
    max_fam = discrete_family(max_stat, np.ones(max_stat.shape))
    max_fam.theta = 0

    for i in range(nsim):
        Y = (snr * mu + np.random.standard_normal(n)) * sigma
        Z = np.dot(X.T, Y)

        # did this find the correct position and sign?
        correct = np.all(np.less_equal(np.fabs(Z), Z[pos]))
        hypotheses.append(correct)

        Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1])
        Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1])
        Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma))
        Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1])
        Pr.append(
            reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1])
        p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max())
        Pmax.append(p)

    Ugrid = np.linspace(0, 1, 101)

    Pcov = np.array(Pcov)
    Pexact = np.array(Pexact)
    Pu = np.array(Pu)
    Pr = np.array(Pr)
    Pfixed = np.array(Pfixed)
    Pmax = np.array(Pmax)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8, 8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid,
             ECDF(Pcov)(Ugrid),
             label='Full (exact)',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pexact)(Ugrid),
             label='Full (asymptotic)',
             c='k',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pmax)(Ugrid),
             label='Max test',
             c='cyan',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pu)(Ugrid),
             label=r'Selected 1-sparse, $\sigma$ unknown',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pr)(Ugrid),
             label=r'Selected 1-sparse, $\sigma$ known',
             c='green',
             linewidth=5,
             alpha=0.5)
    ax1.plot(Ugrid,
             ECDF(Pfixed)(Ugrid),
             label=r'Fixed 1-sparse, $\sigma$ known',
             c='yellow',
             linewidth=5,
             alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05, 0.05], [0, 1], 'k--')
    ax1.legend(loc='lower right')

    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8, 8))
    hypotheses = np.array(hypotheses, np.bool)
    ax2 = fig2.gca()
    ax2.plot(Ugrid,
             ECDF(Pcov[hypotheses])(Ugrid),
             label='Full (exact)',
             c='red',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pexact[hypotheses])(Ugrid),
             label='Full (asymptotic)',
             c='k',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pu[hypotheses])(Ugrid),
             label=r'Selected 1-sparse, $\sigma$ unknown',
             c='blue',
             linewidth=5,
             alpha=0.5)
    ax2.plot(Ugrid,
             ECDF(Pr[hypotheses])(Ugrid),
             label=r'Selected 1-sparse, $\sigma$ known',
             c='green',
             linewidth=5,
             alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05, 0.05], [0, 1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['exact'] = Pexact
    dbn1['covtest'] = Pcov
    dbn1['unknown'] = Pu
    dbn1['known'] = Pr
    dbn1['fixed'] = Pfixed
    dbn1['max'] = Pmax
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1
Esempio n. 15
0
def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5):

    # Design, mean vector and parameter vector

    X, mu, beta = parameters(n, rho, pos)

    Pcov = []
    Pexact = []
    Pu = []
    Pr = []
    Pfixed = []
    Pmax = []
    hypotheses = []
    
    
    # Set seed

    np.random.seed(0)

    # Max test

    max_stat = np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0) * sigma
    max_fam = discrete_family(max_stat, np.ones(max_stat.shape))
    max_fam.theta = 0

    for i in range(nsim):
        Y = (snr * mu + np.random.standard_normal(n)) * sigma
        Z = np.dot(X.T, Y)

        # did this find the correct position and sign?
        correct = np.all(np.less_equal(np.fabs(Z), Z[pos]))
        hypotheses.append(correct)

        Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1])
        Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1])
        Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma))
        Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1])
        Pr.append(reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1])
        p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max())
        Pmax.append(p)

    Ugrid = np.linspace(0,1,101)

    Pcov = np.array(Pcov)
    Pexact = np.array(Pexact)
    Pu = np.array(Pu)
    Pr = np.array(Pr)
    Pfixed = np.array(Pfixed)
    Pmax = np.array(Pmax)

    # plot of marginal distribution of p-values

    fig1 = plt.figure(figsize=(8,8))
    ax1 = fig1.gca()
    ax1.plot(Ugrid, ECDF(Pcov)(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pexact)(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pmax)(Ugrid), label='Max test', c='cyan', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pu)(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pr)(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5)
    ax1.plot(Ugrid, ECDF(Pfixed)(Ugrid), label=r'Fixed 1-sparse, $\sigma$ known', c='yellow', linewidth=5, alpha=0.5)
    ax1.set_xlabel('P-value, $p$', fontsize=20)
    ax1.set_ylabel('ECDF($p$)', fontsize=20)
    ax1.plot([0.05,0.05],[0,1], 'k--')
    ax1.legend(loc='lower right')
    
    # conditional distribution of p-values
    # conditioned on selection choosing correct position and sign

    fig2 = plt.figure(figsize=(8,8))
    hypotheses = np.array(hypotheses, np.bool)
    ax2 = fig2.gca()
    ax2.plot(Ugrid, ECDF(Pcov[hypotheses])(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pexact[hypotheses])(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pu[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5)
    ax2.plot(Ugrid, ECDF(Pr[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5)
    ax2.set_xlabel('P-value, $p$', fontsize=20)
    ax2.set_ylabel('ECDF($p$)', fontsize=20)
    ax2.plot([0.05,0.05],[0,1], 'k--')
    ax2.legend(loc='lower right')

    dbn1 = {}
    dbn1['exact'] = Pexact
    dbn1['covtest'] = Pcov
    dbn1['unknown'] = Pu
    dbn1['known'] = Pr
    dbn1['fixed'] = Pfixed
    dbn1['max'] = Pmax
    dbn1['hypotheses'] = hypotheses

    return fig1, fig2, dbn1