def run():
    n = 100
    p = 5
    lamda = 0.05
    beta_vec = [1, 1, 0, 0, 0]
    cov = np.identity(n)

    threshold = 20

    X, y, _ = gen_data.generate(n, p, beta_vec)

    clf = linear_model.Lasso(alpha=lamda, fit_intercept=False, normalize=False)
    clf.fit(X, y)
    bh = clf.coef_

    y = y.reshape((n, 1))

    A, XA, Ac, XAc, bhA = util.construct_A_XA_Ac_XAc_bhA(X, bh, n, p)

    if len(A) == 0:
        return None

    for j_selected in A:
        etaj, etajTy = util.construct_test_statistic(j_selected, XA, y, A)

        list_zk, list_bhz, list_active_set = parametric_lasso.run_parametric_lasso(
            X, y, lamda, etaj, n, p, threshold)
        p_value = util.p_value(A, bh, list_active_set, list_zk, list_bhz, etaj,
                               etajTy, cov)

        print('Feature', j_selected + 1, ' True Beta:', beta_vec[j_selected],
              ' p-value:', '{:.4f}'.format(p_value))
        print("==========")
def run():
    n = 100
    p = 5
    lamda = 1
    beta_vec = [2, 2, 0, 0, 0]
    cov = np.identity(n)

    threshold = 20

    X, y, true_y = gen_data.generate(n, p, beta_vec)

    clf = linear_model.Lasso(alpha=lamda,
                             fit_intercept=False,
                             normalize=False,
                             tol=1e-10)
    clf.fit(X, y)
    bh = clf.coef_

    y = y.reshape((n, 1))
    true_y = true_y.reshape((n, 1))

    A, XA, Ac, XAc, bhA = util.construct_A_XA_Ac_XAc_bhA(X, bh, n, p)

    if len(A) == 0:
        return None

    rand_value = np.random.randint(len(A))
    j_selected = A[rand_value]

    etaj, etajTy = util.construct_test_statistic(j_selected, XA, y, A)

    list_zk, list_bhz, list_active_set = parametric_lasso.run_parametric_lasso(
        X, y, lamda, etaj, n, p, threshold)

    tn_mu = np.dot(etaj.T, true_y)[0][0]
    pivot = util.pivot(A, bh, list_active_set, list_zk, list_bhz, etaj, etajTy,
                       cov, tn_mu, 'A')

    return pivot
예제 #3
0
def run():
    alpha = 0.05
    n = 100
    p = 5
    lamda = 0.05
    beta_vec = [1, 1, 0, 0, 0]
    cov = np.identity(n)

    threshold = 20

    X, y, _ = gen_data.generate(n, p, beta_vec)

    clf = linear_model.Lasso(alpha=lamda, fit_intercept=False, normalize=False)
    clf.fit(X, y)
    bh = clf.coef_

    y = y.reshape((n, 1))

    A, XA, Ac, XAc, bhA = util.construct_A_XA_Ac_XAc_bhA(X, bh, n, p)

    if len(A) == 0:
        return None

    for j_selected in A:
        etaj, etajTy = util.construct_test_statistic(j_selected, XA, y, A)

        list_zk, list_bhz, list_active_set = parametric_lasso.run_parametric_lasso(
            X, y, lamda, etaj, n, p, threshold)
        confidence_interval = ci.compute_ci(A, bh, list_active_set, list_zk,
                                            list_bhz, etaj, etajTy, cov,
                                            beta_vec[j_selected], alpha)

        print(
            'Feature', j_selected + 1, ' True Beta:', beta_vec[j_selected],
            ' CI: ' + '[{:.2f}'.format(confidence_interval[0]) +
            ', {:.2f}]'.format(confidence_interval[1]), ' CI Length',
            '{:.2f}'.format(confidence_interval[1] - confidence_interval[0]))
        print("==========")
예제 #4
0
    fold) + '.p'  # Where is validation data?

label_column_name = 'category'  # In the metadata, which index indicates category?
n_category = 39  # The number of categories;
batch_size_tr = 39  # Batch size of training data
batch_size_val = 39  # Batch size of validation data
n_epoch = 50  # Epoch
learning_rate = 0.001  # Learning rate

# With "gpu_device"
with tf.device(gpu_device):
    # If "gen_data" = True, generate dataset in .p format
    if gen_data:
        generate(metadata_path=metadata_path,
                 data_path=traindata_path,
                 batch_size=batch_size_tr,
                 label_column_name=label_column_name,
                 is_training=True,
                 fold=fold)
        generate(metadata_path=metadata_path,
                 data_path=validdata_path,
                 batch_size=batch_size_tr,
                 label_column_name=label_column_name,
                 is_training=False,
                 fold=fold)
    else:
        pass

    # Calculate mean of each channel
    #- Load the training data (.p); Note that "dataframe" is an instance
    patch_mean = np.array([0, 0, 0], np.float32)  # Init.
    dataframe = load(traindata_path, batch_size_tr)  # Instance
예제 #5
0
save_tsne = False                                                # save tSNE?
fold = 1                                                        # Which Fold(k) will be used as a data?
rec_name = 'result/tsne_conv.csv'                               # Record "xs", "ys", and "label" as "rec_name".csv
pretrain_weights = 'saver_tr_nf_mspdb_2048_2048_592_k1.npz'     # Which weights are you going to transfer?
metadata_path = 'dataset/metadata_5fcv_box.csv'                 # where is meta-data?
data_path = 'dataset/data.p'                                    # Where is data in .p format
batch_size = 1                                                  # Batch size
label_column_name = 'category'                                  # @metdata.csv, Which index indicates category?


# With "gpu_device"
with tf.device(gpu_device):
    # If "gen_data" = True, generate a dataset in .p format
    if gen_data:
        #- Training data(k="fold") -> .p
        generate(metadata_path = metadata_path, data_path = data_path,
                 batch_size = batch_size, label_column_name=label_column_name, is_training = True, fold=fold)
    else:
        pass

    # Calculate mean of each channel
    #- Load data (.p)
    patch_mean = np.array([0, 0, 0], np.float32)        # Initialize mean of each channel
    dataframe = load(data_path, batch_size)    # Instance
    #- Calculate mean of each channel
    for i, row in dataframe.dataframe.iterrows():
        patch = row['patch']
        patch_mean[0] += np.mean(patch[:, :, 0])
        patch_mean[1] += np.mean(patch[:, :, 1])
        patch_mean[2] += np.mean(patch[:, :, 2])
    patch_mean = patch_mean / len(dataframe.dataframe['patch'])
    #- Delete "dataframe" from memory
예제 #6
0
def run():
    n = 100
    p = 5
    list_lamda = [
        2**-10, 2**-9, 2**-8, 2**-7, 2**-6, 2**-5, 2**-4, 2**-3, 2**-2, 2**-1,
        2**0, 2**1, 2**2, 2**3, 2**4, 2**5, 2**6, 2**7, 2**8, 2**9, 2**10
    ]

    beta_vec = [1, 1, 0, 0, 0]

    cov = np.identity(n)

    threshold = 20

    X, y, true_y = gen_data.generate(n, p, beta_vec)

    cutoff = int(4 * n / 5)

    X_train = X[:cutoff, :]
    y_train = y[:cutoff]

    X_val = X[cutoff:n, :]
    y_val = y[cutoff:n]

    min_cv_error = np.Inf
    lamda = None
    lamda_idx = None

    for i in range(len(list_lamda)):
        each_lamda = list_lamda[i]

        clf_lamda = linear_model.Lasso(alpha=each_lamda,
                                       fit_intercept=False,
                                       normalize=False)
        clf_lamda.fit(X_train, y_train)
        bh_lamda = clf_lamda.coef_
        bh_lamda = bh_lamda.reshape((len(bh_lamda), 1))
        temp_cv_error = 0.5 * sum(
            (y_val - (np.dot(X_val, bh_lamda)).flatten())**2)
        if temp_cv_error < min_cv_error:
            min_cv_error = temp_cv_error
            lamda = each_lamda
            lamda_idx = i

    clf = linear_model.Lasso(alpha=lamda, fit_intercept=False, normalize=False)
    clf.fit(X, y)
    bh = clf.coef_

    y = y.reshape((n, 1))
    true_y = true_y.reshape((n, 1))

    A, XA, Ac, XAc, bhA = util.construct_A_XA_Ac_XAc_bhA(X, bh, n, p)

    if len(A) == 0:
        return None

    for j_selected in A:

        etaj, etajTy = util.construct_test_statistic(j_selected, XA, y, A)

        a, b = compute_a_b(y, etaj, n)
        a_flatten = a.flatten()
        b_flatten = b.flatten()
        a_train = (a_flatten[:cutoff]).reshape((cutoff, 1))
        b_train = (b_flatten[:cutoff]).reshape((cutoff, 1))

        a_val = (a_flatten[cutoff:n]).reshape((n - cutoff, 1))
        b_val = (b_flatten[cutoff:n]).reshape((n - cutoff, 1))

        list_zk_min_lamda, list_bhz_min_lamda, list_active_set_min_lamda, list_etaAkz_min_lamda, list_bhAz_min_lamda = \
            parametric_lasso.run_parametric_lasso_cv(X_train, list_lamda[lamda_idx], X_train.shape[0], p, threshold, a_train, b_train)

        piecewise_quadratic_min_lamda = construct_piecewise_quadratic(
            a_val, b_val, X_val, list_zk_min_lamda, list_active_set_min_lamda,
            list_etaAkz_min_lamda, list_bhAz_min_lamda)

        set_piecewise_funct = [piecewise_quadratic_min_lamda]
        set_list_zk = [list_zk_min_lamda]

        for i in range(len(list_lamda)):
            if i == lamda_idx:
                continue

            list_zk_i, list_bhz_i, list_active_set_i, list_etaAkz_i, list_bhAz_i = \
                parametric_lasso.run_parametric_lasso_cv(X_train, list_lamda[i], X_train.shape[0], p, threshold, a_train, b_train)

            piecewise_quadratic_i = construct_piecewise_quadratic(
                a_val, b_val, X_val, list_zk_i, list_active_set_i,
                list_etaAkz_i, list_bhAz_i)

            set_piecewise_funct.append(piecewise_quadratic_i)
            set_list_zk.append(list_zk_i)

        z_interval_cv = construct_z_interval_cv(set_piecewise_funct,
                                                set_list_zk)

        list_zk, list_bhz, list_active_set = parametric_lasso.run_parametric_lasso(
            X, y, lamda, etaj, n, p, threshold)

        z_interval_m = construct_m_z_interval(A, list_active_set, list_zk)

        z_interval = construct_z_interval(z_interval_m, z_interval_cv)

        pivot = util.pivot_with_specified_interval(z_interval, etaj, etajTy,
                                                   cov, 0)

        p_value = 2 * min(1 - pivot, pivot)

        print('Feature', j_selected + 1, ' True Beta:', beta_vec[j_selected],
              ' p-value:', '{:.4f}'.format(p_value))
        print("==========")
예제 #7
0
rec_name = 'result/test' + str(
    fold) + '.csv'  # Record results into .csv; Name of the .csv file
pretrain_weights = 'saver_weights.npz'  # Which weights are you going to transfer?
metadata_path = 'dataset/metadata_box_test_0.csv'  # where is meta-data?
testdata_path = 'dataset/test.p'  # Where is validation data in .p format

label_column_name = 'category'  # @metdata.csv, Which index indicates category?
batch_size_test = 1  # Batch size of test data

# With "gpu_device"
with tf.device(gpu_device):
    # If "gen_data" = True, generate a dataset in .p format
    if gen_data:
        generate(metadata_path=metadata_path,
                 data_path=testdata_path,
                 batch_size=batch_size_test,
                 label_column_name=label_column_name,
                 is_training=False,
                 fold=fold)
    else:
        pass

    # Calculate mean of each channel
    #- Load data (.p)
    patch_mean = np.array([0, 0, 0],
                          np.float32)  # Initialize mean of each channel
    dataframe = load(testdata_path, batch_size_test)  # Instance
    #- Calculate mean of each channel
    for i, row in dataframe.dataframe.iterrows():
        patch = row['patch']
        patch_mean[0] += np.mean(patch[:, :, 0])
        patch_mean[1] += np.mean(patch[:, :, 1])