コード例 #1
0
def test_data_carving_logistic(n=500,
                               p=300,
                               s=5,
                               sigma=5,
                               rho=0.05,
                               snr=4.,
                               split_frac=0.8,
                               ndraw=8000,
                               burnin=2000, 
                               df=np.inf,
                               coverage=0.90,
                               compute_intervals=True,
                               nsim=None,
                               use_full_cov=False):
    
    counter = 0
    return_value = []

    while True:
        counter += 1
        X, y, beta, active, sigma = instance(n=n, 
                                             p=p, 
                                             s=s, 
                                             sigma=sigma, 
                                             rho=rho, 
                                             snr=snr, 
                                             df=df)

        
        mu = X.dot(beta)
        prob = np.exp(mu) / (1 + np.exp(mu))

        X = np.hstack([np.ones((n,1)), X])
        z = np.random.binomial(1, prob)
        active = np.array(active)
        active += 1
        s += 1
        active = [0] + list(active)

        idx = np.arange(n)
        np.random.shuffle(idx)
        stage_one = idx[:int(n*split_frac)]
        n1 = len(stage_one)

        lam_theor = 1.0 * np.ones(p+1)
        lam_theor[0] = 0.
        DC = data_carving.logistic(X, z, feature_weights=lam_theor,
                                   stage_one=stage_one)

        DC.fit()

        if len(DC.active) < n - int(n*split_frac):
            DS = data_splitting.logistic(X, z, feature_weights=lam_theor,
                                         stage_one=stage_one)
            DS.fit(use_full_cov=use_full_cov)
            data_split = True
        else:
            print('not enough data for data splitting second stage')
            print(DC.active)
            data_split = False

        if set(range(s)).issubset(DC.active):
            carve = []
            split = []
            for var in DC.active:
                carve.append(DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw))
                if data_split:
                    split.append(DS.hypothesis_test(var))
                else:
                    split.append(np.random.sample())

            Xa = X[:,DC.active]

            split_coverage = np.nan
            carve_coverage = np.nan

            TP = s
            FP = DC.active.shape[0] - TP
            v = (carve[s:], split[s:], carve[:s], split[:s], counter, carve_coverage, split_coverage, TP, FP)
            return_value.append(v)
            break
        else:
            TP = len(set(DC.active).intersection(range(s)))
            FP = DC.active.shape[0] - TP
            v = (None, None, None, None, counter, np.nan, np.nan, TP, FP)
            return_value.append(v)

    return return_value
コード例 #2
0
def test_data_carving_logistic(n=500,
                               p=300,
                               s=5,
                               sigma=5,
                               rho=0.05,
                               snr=4.,
                               split_frac=0.8,
                               ndraw=8000,
                               burnin=2000,
                               df=np.inf,
                               coverage=0.90,
                               compute_intervals=True,
                               nsim=None,
                               use_full_cov=False):

    counter = 0
    return_value = []

    while True:
        counter += 1
        X, y, beta, active, sigma = instance(n=n,
                                             p=p,
                                             s=s,
                                             sigma=sigma,
                                             rho=rho,
                                             snr=snr,
                                             df=df)

        mu = X.dot(beta)
        prob = np.exp(mu) / (1 + np.exp(mu))

        X = np.hstack([np.ones((n, 1)), X])
        z = np.random.binomial(1, prob)
        active = np.array(active)
        active += 1
        s += 1
        active = [0] + list(active)

        idx = np.arange(n)
        np.random.shuffle(idx)
        stage_one = idx[:int(n * split_frac)]
        n1 = len(stage_one)

        lam_theor = 1.0 * np.ones(p + 1)
        lam_theor[0] = 0.
        DC = data_carving.logistic(X,
                                   z,
                                   feature_weights=lam_theor,
                                   stage_one=stage_one)

        DC.fit()

        if len(DC.active) < n - int(n * split_frac):
            DS = data_splitting.logistic(X,
                                         z,
                                         feature_weights=lam_theor,
                                         stage_one=stage_one)
            DS.fit(use_full_cov=use_full_cov)
            data_split = True
        else:
            print('not enough data for data splitting second stage')
            print(DC.active)
            data_split = False

        if set(range(s)).issubset(DC.active):
            carve = []
            split = []
            for var in DC.active:
                carve.append(
                    DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw))
                if data_split:
                    split.append(DS.hypothesis_test(var))
                else:
                    split.append(np.random.sample())

            Xa = X[:, DC.active]

            split_coverage = np.nan
            carve_coverage = np.nan

            TP = s
            FP = DC.active.shape[0] - TP
            v = (carve[s:], split[s:], carve[:s], split[:s], counter,
                 carve_coverage, split_coverage, TP, FP)
            return_value.append(v)
            break
        else:
            TP = len(set(DC.active).intersection(range(s)))
            FP = DC.active.shape[0] - TP
            v = (None, None, None, None, counter, np.nan, np.nan, TP, FP)
            return_value.append(v)

    return return_value
コード例 #3
0
def test_data_carving_logistic(n=700,
                               p=300,
                               s=5,
                               rho=0.05,
                               signal=12.,
                               split_frac=0.8,
                               ndraw=8000,
                               burnin=2000,
                               df=np.inf,
                               compute_intervals=True,
                               use_full_cov=False,
                               return_only_screening=True):

    X, y, beta, true_active, _ = logistic_instance(n=n,
                                                   p=p,
                                                   s=s,
                                                   rho=rho,
                                                   signal=signal,
                                                   equicorrelated=False)

    mu = X.dot(beta)
    prob = np.exp(mu) / (1 + np.exp(mu))

    X = np.hstack([np.ones((n, 1)), X])
    active = np.array(true_active)
    active += 1
    s += 1
    active = [0] + list(active)
    true_active = active

    idx = np.arange(n)
    np.random.shuffle(idx)
    stage_one = idx[:int(n * split_frac)]
    n1 = len(stage_one)

    lam_theor = 1.0 * np.ones(p + 1)
    lam_theor[0] = 0.
    DC = data_carving.logistic(X,
                               y,
                               feature_weights=lam_theor,
                               stage_one=stage_one)

    DC.fit()

    if len(DC.active) < n - int(n * split_frac):
        DS = data_splitting.logistic(X,
                                     y,
                                     feature_weights=lam_theor,
                                     stage_one=stage_one)
        DS.fit(use_full_cov=True)
        data_split = True
    else:
        print('not enough data for data splitting second stage')
        print(DC.active)
        data_split = False

    print(true_active, DC.active)
    if set(true_active).issubset(DC.active):
        carve = []
        split = []
        for var in DC.active:
            carve.append(DC.hypothesis_test(var, burnin=burnin, ndraw=ndraw))
            if data_split:
                split.append(DS.hypothesis_test(var))
            else:
                split.append(np.random.sample())

        Xa = X[:, DC.active]

        active = np.zeros(p + 1, np.bool)
        active[true_active] = 1
        v = (carve, split, active)
        return v