def cal_gama_feature_table(XL_train, YL_train, XU_train, YU_train, gama_list, gama_feature_table_path=None): gama_label = [] fn = XL_train.shape[1] xf = range(fn) feature_order_list = [] for gama in gama_list: XL, YL, XU, YU = XL_train.copy(), YL_train.copy(), XU_train.copy( ), YU_train.copy() print(str(gama) + " : lsfs feature select start") YL = read_data.label_n1_to_nc(YL) YU = read_data.label_n1_to_nc(YU) feature_order, time_dual = lsfs(XL, YL, XU, gama=gama) feature_order_list.append(feature_order) gama_label.append(str(gama)) print("finished feature select!") # print(exc_fun_label) gama_feature_table = pd.DataFrame(data=np.array(feature_order_list), index=gama_label, columns=xf) if gama_feature_table_path != None: gama_feature_table.to_csv(gama_feature_table_path) return gama_feature_table
def compute_acc_diff_gama_fearture(XL_train, YL_train, XU_train, YU_train, gama_array, idx_array\ , output_file_name="feature_order"): XL, YL, XU, YU = XL_train.copy(), YL_train.copy(), XU_train.copy( ), YU_train.copy() YL = read_data.label_n1_to_nc(YL) YU = read_data.label_n1_to_nc(YU) data = [] for gama in gama_array: feature_order, time_dual = lsfs(XL, YL, XU, output_file_name=output_file_name, gama=gama) acc_array = evaluate.cal_many_acc_by_idx2(XL_train, YL_train, XU_train, YU_train,\ feature_order, idx_array) data.append(acc_array)
def run_accuracy(fun, XL_train, YL_train, XU_train, YU_train, sel_num=5, output_file_name="feature_order"): XL, YL, XU, YU = XL_train.copy(), YL_train.copy(), XU_train.copy( ), YU_train.copy() if fun.__name__.lower() == "lsfs": YL = read_data.label_n1_to_nc(YL) YU = read_data.label_n1_to_nc(YU) feature_order, time_dual = fun(XL, YL, XU, output_file_name=output_file_name) X,Y = evaluate.select_data(XL_train, YL_train, XU_train, YU_train,\ feature_order, sel_num=sel_num) a = evaluate.run_acc(X, Y) print("accuracy", ":", a) return feature_order, time_dual, a
def cal_feature_order_table(XL_train, YL_train, XU_train, YU_train,fun_list, gama_list, feature_order_table_path = None): exc_fun_label = [] fn = XL_train.shape[1] xf = range(fn) feature_order_list = [] for fun in fun_list: XL, YL, XU, YU = XL_train.copy(), YL_train.copy(), XU_train.copy(), YU_train.copy() fun_name = fun.__name__ if fun_name == "lsfs": print("lsfs feature select start") YL = read_data.label_n1_to_nc(YL) YU = read_data.label_n1_to_nc(YU) for gama in gama_list: feature_order, time_dual = lsfs(XL, YL, XU, gama = gama) exc_fun_label.append("LSFS :" + str(gama)) feature_order_list.append(feature_order) with open("time.txt", "a+") as f: print(fun_name + " " + str(gama) + " time : " + str(time_dual), file=f) elif fun_name == "fisher_score": print("fisher_score feature select start") # feature_order, time_dual = fisher_score(XL_train, YL_train, output_file_name=output_file_name) feature_order, time_dual = fun(XL, YL) exc_fun_label.append("Fisher score") with open("time.txt", "a+") as f: print(fun_name + " time : " + str(time_dual), file=f) elif fun_name == "laplacian_score_feature_order2": print("laplacian_score_feature_order2 feature select start") # feature_order, time_dual = laplacian_score_feature_order2(XL, k = 10, output_file_name=output_file_name) feature_order, time_dual = fun(XL, k = 10) exc_fun_label.append("Laplacian score") with open("time.txt", "a+") as f: print(fun_name + " time : " + str(time_dual), file=f) elif fun_name == "lsdf": print("lsdf feature select start") # feature_order, time_dual = lsdf(XL, YL, XU, output_file_name=output_file_name) feature_order, time_dual = fun(XL, YL, XU) exc_fun_label.append("LSDF") with open("time.txt", "a+") as f: print(fun_name + " time : " + str(time_dual), file=f) elif fun_name == "prpc": print("prpc feature select start") feature_order, time_dual = fun(XL, YL, XU) exc_fun_label.append("PRPC") with open("time.txt", "a+") as f: print(fun_name + " time : " + str(time_dual), file=f) elif fun_name == "sSelect": print("sSelect feature select start") feature_order, time_dual = fun(XL, YL, XU, k = 10, theta = 10, namuda = 0.1) exc_fun_label.append("sSelect") with open("time.txt", "a+") as f: print(fun_name + " time : " + str(time_dual), file=f) if fun_name != "lsfs": feature_order_list.append(feature_order) # print(feature_order) with open("time.txt", "a+") as f: print("======================================================================", file=f) print("finished feature select!") # print(exc_fun_label) feature_order_table = pd.DataFrame(data=np.array(feature_order_list), index=exc_fun_label, columns=xf) if feature_order_table_path != None: feature_order_table.to_csv(feature_order_table_path) return feature_order_table
output_file_name_nei = "..\\result\\iter_objFun\\" + "lsfs_iter_objFun_result_nei_" + file_path.split("\\")[-2] + "_" + \ str(example_rate) + "_" + str(feature_rate) + "_g" + str(gama) output_file_name_wai = "..\\result\\iter_objFun\\" + "lsfs_iter_objFun_result_wai_" + file_path.split("\\")[-2] + "_" + \ str(example_rate) + "_" + str(feature_rate) + "_g" + str(gama) XL_train, YL_train, XU_train, YU_train = get_data(file_path, selected_data_file_name, selected_cluster_name_file_name, \ unselected_data_file_name, unselected_cluster_name_file_name, example_rate, feature_rate) # feature_order, time_dual, a = run_accuracy(lsfs, XL_train,YL_train,XU_train,YU_train, 10, output_file_name) XL, YL, XU, YU = XL_train.copy(), YL_train.copy(), XU_train.copy( ), YU_train.copy() YL = read_data.label_n1_to_nc(YL) YU = read_data.label_n1_to_nc(YU) iterations_wai = get_lsfs_obj_value( XL, YL, XU, gama=gama, output_file_path=output_file_name_nei + ".png") plot_iter_obj_funValue(iterations_wai, xlabel_name=x_label, ylabel_name=y_label, output_file_path=output_file_name_wai + ".png") print("gama : ", gama, " finish")