def run(): n = 100 p = 5 lamda = 0.05 beta_vec = [1, 1, 0, 0, 0] cov = np.identity(n) threshold = 20 X, y, _ = gen_data.generate(n, p, beta_vec) clf = linear_model.Lasso(alpha=lamda, fit_intercept=False, normalize=False) clf.fit(X, y) bh = clf.coef_ y = y.reshape((n, 1)) A, XA, Ac, XAc, bhA = util.construct_A_XA_Ac_XAc_bhA(X, bh, n, p) if len(A) == 0: return None for j_selected in A: etaj, etajTy = util.construct_test_statistic(j_selected, XA, y, A) list_zk, list_bhz, list_active_set = parametric_lasso.run_parametric_lasso( X, y, lamda, etaj, n, p, threshold) p_value = util.p_value(A, bh, list_active_set, list_zk, list_bhz, etaj, etajTy, cov) print('Feature', j_selected + 1, ' True Beta:', beta_vec[j_selected], ' p-value:', '{:.4f}'.format(p_value)) print("==========")
def run(): n = 100 p = 5 lamda = 1 beta_vec = [2, 2, 0, 0, 0] cov = np.identity(n) threshold = 20 X, y, true_y = gen_data.generate(n, p, beta_vec) clf = linear_model.Lasso(alpha=lamda, fit_intercept=False, normalize=False, tol=1e-10) clf.fit(X, y) bh = clf.coef_ y = y.reshape((n, 1)) true_y = true_y.reshape((n, 1)) A, XA, Ac, XAc, bhA = util.construct_A_XA_Ac_XAc_bhA(X, bh, n, p) if len(A) == 0: return None rand_value = np.random.randint(len(A)) j_selected = A[rand_value] etaj, etajTy = util.construct_test_statistic(j_selected, XA, y, A) list_zk, list_bhz, list_active_set = parametric_lasso.run_parametric_lasso( X, y, lamda, etaj, n, p, threshold) tn_mu = np.dot(etaj.T, true_y)[0][0] pivot = util.pivot(A, bh, list_active_set, list_zk, list_bhz, etaj, etajTy, cov, tn_mu, 'A') return pivot
def run(): alpha = 0.05 n = 100 p = 5 lamda = 0.05 beta_vec = [1, 1, 0, 0, 0] cov = np.identity(n) threshold = 20 X, y, _ = gen_data.generate(n, p, beta_vec) clf = linear_model.Lasso(alpha=lamda, fit_intercept=False, normalize=False) clf.fit(X, y) bh = clf.coef_ y = y.reshape((n, 1)) A, XA, Ac, XAc, bhA = util.construct_A_XA_Ac_XAc_bhA(X, bh, n, p) if len(A) == 0: return None for j_selected in A: etaj, etajTy = util.construct_test_statistic(j_selected, XA, y, A) list_zk, list_bhz, list_active_set = parametric_lasso.run_parametric_lasso( X, y, lamda, etaj, n, p, threshold) confidence_interval = ci.compute_ci(A, bh, list_active_set, list_zk, list_bhz, etaj, etajTy, cov, beta_vec[j_selected], alpha) print( 'Feature', j_selected + 1, ' True Beta:', beta_vec[j_selected], ' CI: ' + '[{:.2f}'.format(confidence_interval[0]) + ', {:.2f}]'.format(confidence_interval[1]), ' CI Length', '{:.2f}'.format(confidence_interval[1] - confidence_interval[0])) print("==========")
fold) + '.p' # Where is validation data? label_column_name = 'category' # In the metadata, which index indicates category? n_category = 39 # The number of categories; batch_size_tr = 39 # Batch size of training data batch_size_val = 39 # Batch size of validation data n_epoch = 50 # Epoch learning_rate = 0.001 # Learning rate # With "gpu_device" with tf.device(gpu_device): # If "gen_data" = True, generate dataset in .p format if gen_data: generate(metadata_path=metadata_path, data_path=traindata_path, batch_size=batch_size_tr, label_column_name=label_column_name, is_training=True, fold=fold) generate(metadata_path=metadata_path, data_path=validdata_path, batch_size=batch_size_tr, label_column_name=label_column_name, is_training=False, fold=fold) else: pass # Calculate mean of each channel #- Load the training data (.p); Note that "dataframe" is an instance patch_mean = np.array([0, 0, 0], np.float32) # Init. dataframe = load(traindata_path, batch_size_tr) # Instance
save_tsne = False # save tSNE? fold = 1 # Which Fold(k) will be used as a data? rec_name = 'result/tsne_conv.csv' # Record "xs", "ys", and "label" as "rec_name".csv pretrain_weights = 'saver_tr_nf_mspdb_2048_2048_592_k1.npz' # Which weights are you going to transfer? metadata_path = 'dataset/metadata_5fcv_box.csv' # where is meta-data? data_path = 'dataset/data.p' # Where is data in .p format batch_size = 1 # Batch size label_column_name = 'category' # @metdata.csv, Which index indicates category? # With "gpu_device" with tf.device(gpu_device): # If "gen_data" = True, generate a dataset in .p format if gen_data: #- Training data(k="fold") -> .p generate(metadata_path = metadata_path, data_path = data_path, batch_size = batch_size, label_column_name=label_column_name, is_training = True, fold=fold) else: pass # Calculate mean of each channel #- Load data (.p) patch_mean = np.array([0, 0, 0], np.float32) # Initialize mean of each channel dataframe = load(data_path, batch_size) # Instance #- Calculate mean of each channel for i, row in dataframe.dataframe.iterrows(): patch = row['patch'] patch_mean[0] += np.mean(patch[:, :, 0]) patch_mean[1] += np.mean(patch[:, :, 1]) patch_mean[2] += np.mean(patch[:, :, 2]) patch_mean = patch_mean / len(dataframe.dataframe['patch']) #- Delete "dataframe" from memory
def run(): n = 100 p = 5 list_lamda = [ 2**-10, 2**-9, 2**-8, 2**-7, 2**-6, 2**-5, 2**-4, 2**-3, 2**-2, 2**-1, 2**0, 2**1, 2**2, 2**3, 2**4, 2**5, 2**6, 2**7, 2**8, 2**9, 2**10 ] beta_vec = [1, 1, 0, 0, 0] cov = np.identity(n) threshold = 20 X, y, true_y = gen_data.generate(n, p, beta_vec) cutoff = int(4 * n / 5) X_train = X[:cutoff, :] y_train = y[:cutoff] X_val = X[cutoff:n, :] y_val = y[cutoff:n] min_cv_error = np.Inf lamda = None lamda_idx = None for i in range(len(list_lamda)): each_lamda = list_lamda[i] clf_lamda = linear_model.Lasso(alpha=each_lamda, fit_intercept=False, normalize=False) clf_lamda.fit(X_train, y_train) bh_lamda = clf_lamda.coef_ bh_lamda = bh_lamda.reshape((len(bh_lamda), 1)) temp_cv_error = 0.5 * sum( (y_val - (np.dot(X_val, bh_lamda)).flatten())**2) if temp_cv_error < min_cv_error: min_cv_error = temp_cv_error lamda = each_lamda lamda_idx = i clf = linear_model.Lasso(alpha=lamda, fit_intercept=False, normalize=False) clf.fit(X, y) bh = clf.coef_ y = y.reshape((n, 1)) true_y = true_y.reshape((n, 1)) A, XA, Ac, XAc, bhA = util.construct_A_XA_Ac_XAc_bhA(X, bh, n, p) if len(A) == 0: return None for j_selected in A: etaj, etajTy = util.construct_test_statistic(j_selected, XA, y, A) a, b = compute_a_b(y, etaj, n) a_flatten = a.flatten() b_flatten = b.flatten() a_train = (a_flatten[:cutoff]).reshape((cutoff, 1)) b_train = (b_flatten[:cutoff]).reshape((cutoff, 1)) a_val = (a_flatten[cutoff:n]).reshape((n - cutoff, 1)) b_val = (b_flatten[cutoff:n]).reshape((n - cutoff, 1)) list_zk_min_lamda, list_bhz_min_lamda, list_active_set_min_lamda, list_etaAkz_min_lamda, list_bhAz_min_lamda = \ parametric_lasso.run_parametric_lasso_cv(X_train, list_lamda[lamda_idx], X_train.shape[0], p, threshold, a_train, b_train) piecewise_quadratic_min_lamda = construct_piecewise_quadratic( a_val, b_val, X_val, list_zk_min_lamda, list_active_set_min_lamda, list_etaAkz_min_lamda, list_bhAz_min_lamda) set_piecewise_funct = [piecewise_quadratic_min_lamda] set_list_zk = [list_zk_min_lamda] for i in range(len(list_lamda)): if i == lamda_idx: continue list_zk_i, list_bhz_i, list_active_set_i, list_etaAkz_i, list_bhAz_i = \ parametric_lasso.run_parametric_lasso_cv(X_train, list_lamda[i], X_train.shape[0], p, threshold, a_train, b_train) piecewise_quadratic_i = construct_piecewise_quadratic( a_val, b_val, X_val, list_zk_i, list_active_set_i, list_etaAkz_i, list_bhAz_i) set_piecewise_funct.append(piecewise_quadratic_i) set_list_zk.append(list_zk_i) z_interval_cv = construct_z_interval_cv(set_piecewise_funct, set_list_zk) list_zk, list_bhz, list_active_set = parametric_lasso.run_parametric_lasso( X, y, lamda, etaj, n, p, threshold) z_interval_m = construct_m_z_interval(A, list_active_set, list_zk) z_interval = construct_z_interval(z_interval_m, z_interval_cv) pivot = util.pivot_with_specified_interval(z_interval, etaj, etajTy, cov, 0) p_value = 2 * min(1 - pivot, pivot) print('Feature', j_selected + 1, ' True Beta:', beta_vec[j_selected], ' p-value:', '{:.4f}'.format(p_value)) print("==========")
rec_name = 'result/test' + str( fold) + '.csv' # Record results into .csv; Name of the .csv file pretrain_weights = 'saver_weights.npz' # Which weights are you going to transfer? metadata_path = 'dataset/metadata_box_test_0.csv' # where is meta-data? testdata_path = 'dataset/test.p' # Where is validation data in .p format label_column_name = 'category' # @metdata.csv, Which index indicates category? batch_size_test = 1 # Batch size of test data # With "gpu_device" with tf.device(gpu_device): # If "gen_data" = True, generate a dataset in .p format if gen_data: generate(metadata_path=metadata_path, data_path=testdata_path, batch_size=batch_size_test, label_column_name=label_column_name, is_training=False, fold=fold) else: pass # Calculate mean of each channel #- Load data (.p) patch_mean = np.array([0, 0, 0], np.float32) # Initialize mean of each channel dataframe = load(testdata_path, batch_size_test) # Instance #- Calculate mean of each channel for i, row in dataframe.dataframe.iterrows(): patch = row['patch'] patch_mean[0] += np.mean(patch[:, :, 0]) patch_mean[1] += np.mean(patch[:, :, 1])