def test_data_carving_IC(n=100,
                         p=200,
                         s=7,
                         sigma=5,
                         rho=0.3,
                         snr=7.,
                         split_frac=0.9,
                         ndraw=5000,
                         burnin=1000, 
                         df=np.inf,
                         coverage=0.90,
                         compute_intervals=False):

    counter = 0

    while True:
        counter += 1
        X, y, beta, active, sigma = instance(n=n, 
                                             p=p, 
                                             s=s, 
                                             sigma=sigma, 
                                             rho=rho, 
                                             snr=snr, 
                                             df=df)
        mu = np.dot(X, beta)
        splitn = int(n*split_frac)
        indices = np.arange(n)
        np.random.shuffle(indices)
        stage_one = indices[:splitn]

        FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one)

        if set(range(s)).issubset(FS.active):
            results, FS = data_carving_IC(y, X, sigma,
                                          stage_one=stage_one,
                                          splitting=True, 
                                          ndraw=ndraw,
                                          burnin=burnin,
                                          coverage=coverage,
                                          compute_intervals=compute_intervals,
                                          cost=np.log(n))

            carve = [r[1] for r in results]
            split = [r[3] for r in results]

            Xa = X[:,FS.variables[:-1]]
            truth = np.dot(np.linalg.pinv(Xa), mu) 

            split_coverage = []
            carve_coverage = []
            for result, t in zip(results, truth):
                _, _, ci, _, si = result
                carve_coverage.append((ci[0] < t) * (t < ci[1]))
                split_coverage.append((si[0] < t) * (t < si[1]))

            return ([carve[j] for j, i in enumerate(FS.active) if i >= s], 
                    [split[j] for j, i in enumerate(FS.active) if i >= s], 
                    [carve[j] for j, i in enumerate(FS.active) if i < s], 
                    [split[j] for j, i in enumerate(FS.active) if i < s], 
                    counter, carve_coverage, split_coverage)
def test_BIC(k=10, do_sample=True):

    n, p = 100, 200
    X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    
    Y = np.random.standard_normal(100) * 0.5
    
    FS = info_crit_stop(Y, X, 0.5, cost=np.log(n))
    final_model = len(FS.variables) - 1

    if do_sample:
        return [p[-1] for p in FS.model_pivots(final_model, saturated=False, burnin=5000, ndraw=5000)]
    else:
        saturated_pivots = FS.model_pivots(final_model)
        return [p[-1] for p in saturated_pivots]
Ejemplo n.º 3
0
def test_BIC(do_sample=True, ndraw=8000, burnin=2000, 
             force=False):

    X, Y, beta, active, sigma, _ = gaussian_instance()
    n, p = X.shape
    FS = info_crit_stop(Y, X, sigma, cost=np.log(n))
    final_model = len(FS.variables) 

    active = set(list(active))
    if active.issubset(FS.variables) or force:
        which_var = [v for v in FS.variables if v not in active]

        if do_sample:
            return [pval[-1] for pval in FS.model_pivots(final_model, saturated=False, burnin=burnin, ndraw=ndraw, which_var=which_var)]
        else:
            saturated_pivots = FS.model_pivots(final_model, which_var=which_var)
            return [pval[-1] for pval in saturated_pivots]
    return []
def test_BIC(do_sample=True, ndraw=8000, burnin=2000, nsim=None,
             force=False):

    X, Y, beta, active, sigma = instance()
    n, p = X.shape
    FS = info_crit_stop(Y, X, sigma, cost=np.log(n))
    final_model = len(FS.variables) 

    active = set(list(active))
    if active.issubset(FS.variables) or force:
        which_var = [v for v in FS.variables if v not in active]

        if do_sample:
            return [pval[-1] for pval in FS.model_pivots(final_model, saturated=False, burnin=burnin, ndraw=ndraw, which_var=which_var)]
        else:
            saturated_pivots = FS.model_pivots(final_model, which_var=which_var)
            return [pval[-1] for pval in saturated_pivots]
    return []
def test_BIC(k=10, do_sample=True):

    n, p = 100, 200
    X = np.random.standard_normal(
        (n, p)) + 0.4 * np.random.standard_normal(n)[:, None]
    X /= (X.std(0)[None, :] * np.sqrt(n))

    Y = np.random.standard_normal(100) * 0.5

    FS = info_crit_stop(Y, X, 0.5, cost=np.log(n))
    final_model = len(FS.variables) - 1

    if do_sample:
        return [
            p[-1] for p in FS.model_pivots(
                final_model, saturated=False, burnin=5000, ndraw=5000)
        ]
    else:
        saturated_pivots = FS.model_pivots(final_model)
        return [p[-1] for p in saturated_pivots]
Ejemplo n.º 6
0
def test_data_carving_IC(nsim=500,
                         n=100,
                         p=200,
                         s=7,
                         sigma=5,
                         rho=0.3,
                         signal=7.,
                         split_frac=0.9,
                         ndraw=5000,
                         burnin=1000, 
                         df=np.inf,
                         coverage=0.90,
                         compute_intervals=False):

    counter = 0

    while counter < nsim:
        counter += 1
        X, y, beta, active, sigma, _ = gaussian_instance(n=n, 
                                                         p=p, 
                                                         s=s, 
                                                         sigma=sigma, 
                                                         rho=rho, 
                                                         signal=signal, 
                                                         df=df)
        mu = np.dot(X, beta)
        splitn = int(n*split_frac)
        indices = np.arange(n)
        np.random.shuffle(indices)
        stage_one = indices[:splitn]

        FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one)

        if set(active).issubset(FS.active):
            results, FS = data_carving_IC(y, X, sigma,
                                          stage_one=stage_one,
                                          splitting=True, 
                                          ndraw=ndraw,
                                          burnin=burnin,
                                          coverage=coverage,
                                          compute_intervals=compute_intervals,
                                          cost=np.log(n))

            carve_split = [(r[1], r[3]) for r in results]
            carve = np.array(carve_split)[:,0]
            split = np.array(carve_split)[:,1]

            Xa = X[:,FS.variables[:-1]]
            truth = np.dot(np.linalg.pinv(Xa), mu) 

            split_coverage = []
            carve_coverage = []
            for result, t in zip(results, truth):
                _, _, ci, _, si = result
                carve_coverage.append((ci[0] < t) * (t < ci[1]))
                split_coverage.append((si[0] < t) * (t < si[1]))

            print(carve, 'carve')
            print(split, 'split')
            print(results, 'results')
            return ([carve[j] for j, i in enumerate(FS.active) if i >= s], 
                    [split[j] for j, i in enumerate(FS.active) if i >= s], 
                    [carve[j] for j, i in enumerate(FS.active) if i < s], 
                    [split[j] for j, i in enumerate(FS.active) if i < s], 
                    counter, carve_coverage, split_coverage)
Ejemplo n.º 7
0
def test_data_carving_IC(n=600,
                         p=100,
                         s=10,
                         sigma=5,
                         rho=0.25,
                         signal=(3.5,5.),
                         split_frac=0.9,
                         ndraw=25000,
                         burnin=5000, 
                         df=np.inf,
                         coverage=0.90,
                         compute_intervals=False):

    X, y, beta, active, sigma, _ = gaussian_instance(n=n, 
                                                     p=p, 
                                                     s=s, 
                                                     sigma=sigma, 
                                                     rho=rho, 
                                                     signal=signal, 
                                                     df=df,
                                                     equicorrelated=False)
    mu = np.dot(X, beta)
    splitn = int(n*split_frac)
    indices = np.arange(n)
    np.random.shuffle(indices)
    stage_one = indices[:splitn]

    FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one)

    con = FS.constraints()

    X_E = X[:,FS.active]
    X_Ei = np.linalg.pinv(X_E)
    beta_bar = X_Ei.dot(y)
    mu_E = X_E.dot(beta_bar)
    sigma_E = np.linalg.norm(y-mu_E) / np.sqrt(n - len(FS.active))

    con.mean[:] = mu_E
    con.covariance = sigma_E**2 * np.identity(n)

    print(sigma_E, sigma)
    Z = sample_from_constraints(con, 
                                y,
                                ndraw=ndraw,
                                burnin=burnin)
    
    pvalues = []
    for idx, var in enumerate(FS.active):
        active = copy(FS.active)
        active.remove(var)
        X_r = X[:,active] # restricted design
        mu_r = X_r.dot(np.linalg.pinv(X_r).dot(y))
        delta_mu = (mu_r - mu_E) / sigma_E**2

        W = np.exp(Z.dot(delta_mu))
        fam = discrete_family(Z.dot(X_Ei[idx].T), W)
        pval = fam.cdf(0, x=beta_bar[idx])
        pval = 2 * min(pval, 1 - pval)
        pvalues.append((pval, beta[var]))

    return pvalues