def bootstrap(): #dataset = Processing().import_data() for dataset, filename in Processing().import_single_data(): print(filename) training_data_X, training_data_y, testing_data_X, testing_data_y = Processing( ).separate_data(dataset) # print('train shape', training_data_X.shape) training_data_X = training_data_X.tolist() training_data_y = training_data_y.tolist() from PyOptimize.General_Opt import Test_function def LTR(a, **kwargs): return Test_function().LTR(a, **kwargs) ga = pyGaft(objfunc=LTR, var_bounds=[(-2, 2)] * 20, individual_size=50, max_iter=10, max_or_min='max', X=training_data_X, y=training_data_y).run() importlib.reload(best_fit) a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] print('a = {0}'.format(a)) pred_y = [] for test_x in testing_data_X: pred_y.append(np.dot(test_x, a)) fpa = PerformanceMeasure(testing_data_y, pred_y).FPA() print('fpa = {0}'.format(fpa))
def bootstrap(): #dataset = Processing().import_data() count = 0 for dataset, filename in Processing().import_single_data(): print(filename) count += 1 training_data_X, training_data_y, testing_data_X, testing_data_y = Processing( ).separate_data(dataset) # print('train shape', training_data_X.shape) # 1.降序排列训练集(Processing中已完成) # 2.利用transfrom_pairwise() 得到Pi,ri # P是一个矩阵,每个向量是两个x相减的结果 # r是一个向量 因为排序过,所以结果r = [1,1,1,1,1,1...] rs = RankSVM() P, r = rs.transform_pairwise(training_data_X, training_data_y) #print('p shape ', P.shape, 'r len ', len(r)) P = P.tolist() r = r.tolist() print('type of P ', type(P[0][0]), 'type of r ', type(r[0])) # P = [[1, 1, 2], [1, -1, 3], [3, 2, 1], [1, -5, 1], [2, 1, -2]] # r = [1, 1, 1, 1, 1] # 3.用training_data_y计算u,n u, n = PerformanceMeasure(training_data_y).calc_UN(type='cs') # print(len(u), len(n)) print(type(u[0]), type(n[0])) # 4. 将Pi,ri,u,n导入genetic algorithm 计算w from PyOptimize.General_Opt import Test_function def Loss(x, **kwargs): return Test_function().Loss(x, **kwargs) ga = pyGaft(objfunc=Loss, var_bounds=[(-2, 2)] * 20, individual_size=50, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() # 5.编写predict3 # w 从best_fit中获得 if count == 1: import best_fit else: importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] print('w = ', w) rs_pred_y = RankSVM(w=w).predict3(testing_data_X) rs_pred_y = np.around(rs_pred_y) rs_fpa = PerformanceMeasure(testing_data_y, rs_pred_y).FPA() print('rs_fpa:', rs_fpa) # f1.append(rs_fpa) # RankSVM 效果 from sklearn.utils import shuffle X_shuf, y_shuf = shuffle(training_data_X, training_data_y) rs2 = RankSVM().fit(X_shuf, y_shuf) rs_pred_y2 = np.around(rs2.predict2(testing_data_X)) rs_fpa2 = PerformanceMeasure(testing_data_y, rs_pred_y2).FPA() rs_aae_result = PerformanceMeasure(testing_data_y, rs_pred_y2).AAE() print('rs_fpa2:', rs_fpa2)
def bootstrap(dataset): count = 0 training_data_X, training_data_y, testing_data_X, testing_data_y = Processing( ).separate_data(dataset) # cost sensitive ranking SVM csrs = RankSVM() P, r = csrs.transform_pairwise(training_data_X, training_data_y) P = P.tolist() r = r.tolist() u, n = PerformanceMeasure(training_data_y).calc_UN(type='cs') count += 1 global Loss csga = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 20, individual_size=500, max_iter=2, max_or_min='min', P=P, r=r, u=u, n=n).run() if count == 1: import best_fit else: importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] #print('w = ', w) csrs_pred_y = RankSVM(w=w).predict3(testing_data_X) csrs_fpa = PerformanceMeasure(testing_data_y, csrs_pred_y).FPA() csrs_fpa_list.append(csrs_fpa) csrs_pofb = PerformanceMeasure(testing_data_y, csrs_pred_y).PofB20() csrs_pofd = PerformanceMeasure(testing_data_y, csrs_pred_y).PofD20() csrs_ranking = PerformanceMeasure(testing_data_y, csrs_pred_y).ranking() csrs_pofb_list.append(csrs_pofb) csrs_pofd_list.append(csrs_pofd) csrs_ranking_list.append(csrs_ranking) # IR SVM u, n = PerformanceMeasure(training_data_y).calc_UN(type='ir') count += 1 irga = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 20, individual_size=500, max_iter=2, max_or_min='min', P=P, r=r, u=u, n=n).run() if count == 1: import best_fit else: importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] #print('w = ', w) irsvm_pred_y = RankSVM(w=w).predict3(testing_data_X) irsvm_fpa = PerformanceMeasure(testing_data_y, irsvm_pred_y).FPA() #print('irsvm_fpa:', irsvm_fpa) irsvm_fpa_list.append(irsvm_fpa) irsvm_pofb = PerformanceMeasure(testing_data_y, irsvm_pred_y).PofB20() irsvm_pofd = PerformanceMeasure(testing_data_y, irsvm_pred_y).PofD20() irsvm_ranking = PerformanceMeasure(testing_data_y, irsvm_pred_y).ranking() irsvm_pofb_list.append(irsvm_pofb) irsvm_pofd_list.append(irsvm_pofd) irsvm_ranking_list.append(irsvm_ranking) # 这里还要加个去掉另一个参数的 u, n = PerformanceMeasure(training_data_y).calc_UN(type='svm') count += 1 irga = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 20, individual_size=500, max_iter=2, max_or_min='min', P=P, r=r, u=u, n=n).run() if count == 1: import best_fit else: importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] #print('w = ', w) svm_pred_y = RankSVM(w=w).predict3(testing_data_X) svm_fpa = PerformanceMeasure(testing_data_y, svm_pred_y).FPA() #print('svm_fpa:', svm_fpa) svm_fpa_list.append(svm_fpa) svm_pofb = PerformanceMeasure(testing_data_y, svm_pred_y).PofB20() svm_pofd = PerformanceMeasure(testing_data_y, svm_pred_y).PofD20() svm_ranking = PerformanceMeasure(testing_data_y, svm_pred_y).ranking() svm_pofb_list.append(svm_pofb) svm_pofd_list.append(svm_pofd) svm_ranking_list.append(svm_ranking) # 这个是LTR training_datalist_X = training_data_X.tolist() training_datalist_y = training_data_y.tolist() from PyOptimize.General_Opt import Test_function count += 1 global LTR ltrga = pyGaft(objfunc=LTR, var_bounds=[(-20, 20)] * 20, individual_size=100, max_iter=2, max_or_min='max', X=training_datalist_X, y=training_datalist_y).run() if count == 1: import best_fit else: importlib.reload(best_fit) a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] #print('a = {0}'.format(a)) ltr_pred_y = [] for test_x in testing_data_X: ltr_pred_y.append(np.dot(test_x, a)) ltr_fpa = PerformanceMeasure(testing_data_y, ltr_pred_y).FPA() # print('ltr_fpa', ltr_fpa) ltr_fpa_list.append(ltr_fpa) ltr_pofb = PerformanceMeasure(testing_data_y, ltr_pred_y).PofB20() ltr_pofd = PerformanceMeasure(testing_data_y, ltr_pred_y).PofD20() ltr_ranking = PerformanceMeasure(testing_data_y, ltr_pred_y).ranking() ltr_pofb_list.append(ltr_pofb) ltr_pofd_list.append(ltr_pofd) ltr_ranking_list.append(ltr_ranking) # 在原始数据集上训练Ranking SVM,DTR,LR,BRR # 这里加上了shuffle,是为了让r的值不全为1,全为1svm会报错 shuf_X, shuf_y = shuffle(training_data_X, training_data_y) rs = RankSVM(C=1.0).fit(shuf_X, shuf_y) rs_pred_y = np.around(rs.predict2(testing_data_X)) rs_fpa = PerformanceMeasure(testing_data_y, rs_pred_y).FPA() # print('rs_fpa:', rs_fpa) rs_fpa_list.append(rs_fpa) rs_pofb = PerformanceMeasure(testing_data_y, rs_pred_y).PofB20() rs_pofd = PerformanceMeasure(testing_data_y, rs_pred_y).PofD20() rs_ranking = PerformanceMeasure(testing_data_y, rs_pred_y).ranking() rs_pofb_list.append(rs_pofb) rs_pofd_list.append(rs_pofd) rs_ranking_list.append(rs_ranking) dtr = DecisionTreeRegressor().fit(training_data_X, training_data_y) dtr_pred_y = dtr.predict(testing_data_X) dtr_fpa = PerformanceMeasure(testing_data_y, dtr_pred_y).FPA() # print('dtr_fpa:', dtr_fpa) dtr_fpa_list.append(dtr_fpa) dtr_pofb = PerformanceMeasure(testing_data_y, dtr_pred_y).PofB20() dtr_pofd = PerformanceMeasure(testing_data_y, dtr_pred_y).PofD20() dtr_ranking = PerformanceMeasure(testing_data_y, dtr_pred_y).ranking() dtr_pofb_list.append(dtr_pofb) dtr_pofd_list.append(dtr_pofd) dtr_ranking_list.append(dtr_ranking) lr = linear_model.LinearRegression().fit(training_data_X, training_data_y) lr_pred_y = lr.predict(testing_data_X) lr_fpa = PerformanceMeasure(testing_data_y, lr_pred_y).FPA() # print('lr_fpa:', lr_fpa) lr_fpa_list.append(lr_fpa) lr_pofb = PerformanceMeasure(testing_data_y, lr_pred_y).PofB20() lr_pofd = PerformanceMeasure(testing_data_y, lr_pred_y).PofD20() lr_ranking = PerformanceMeasure(testing_data_y, lr_pred_y).ranking() lr_pofb_list.append(lr_pofb) lr_pofd_list.append(lr_pofd) lr_ranking_list.append(lr_ranking) brr = BayesianRidge().fit(training_data_X, training_data_y) brr_pred_y = brr.predict(testing_data_X) brr_fpa = PerformanceMeasure(testing_data_y, brr_pred_y).FPA() # print('brr_fpa:', brr_fpa) brr_fpa_list.append(brr_fpa) brr_pofb = PerformanceMeasure(testing_data_y, brr_pred_y).PofB20() brr_pofd = PerformanceMeasure(testing_data_y, brr_pred_y).PofD20() brr_ranking = PerformanceMeasure(testing_data_y, brr_pred_y).ranking() brr_pofb_list.append(brr_pofb) brr_pofd_list.append(brr_pofd) brr_ranking_list.append(brr_ranking) # 先对训练数据集进行RUS处理,然后训练Ranking SVM, DTR,LR,BRR rus_X, rus_y, _id = RandomUnderSampler(ratio=1.0, return_indices=True).fit_sample( training_data_X, training_data_y) # LTR training_datalist_X = rus_X.tolist() training_datalist_y = rus_y.tolist() from PyOptimize.General_Opt import Test_function count += 1 rus_ltrga = pyGaft(objfunc=LTR, var_bounds=[(-20, 20)] * 20, individual_size=100, max_iter=2, max_or_min='max', X=training_datalist_X, y=training_datalist_y).run() if count == 1: import best_fit else: importlib.reload(best_fit) rus_a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] #print('rus_a = {0}'.format(rus_a)) rus_ltr_pred_y = [] for test_x in testing_data_X: rus_ltr_pred_y.append(np.dot(test_x, rus_a)) rus_ltr_fpa = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).FPA() # print('rus_ltr_fpa', rus_ltr_fpa) rus_ltr_fpa_list.append(rus_ltr_fpa) rus_ltr_pofb = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).PofB20() rus_ltr_pofd = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).PofD20() rus_ltr_ranking = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).ranking() rus_ltr_pofb_list.append(rus_ltr_pofb) rus_ltr_pofd_list.append(rus_ltr_pofd) rus_ltr_ranking_list.append(rus_ltr_ranking) shuf_X, shuf_y = shuffle(rus_X, rus_y) rus_rs = RankSVM(C=1.0).fit(shuf_X, shuf_y) rus_rs_pred_y = rus_rs.predict2(testing_data_X) rus_rs_fpa = PerformanceMeasure(testing_data_y, rus_rs_pred_y).FPA() # print('rus_rs_fpa:', rus_rs_fpa) rus_rs_fpa_list.append(rus_rs_fpa) rus_rs_pofb = PerformanceMeasure(testing_data_y, rus_rs_pred_y).PofB20() rus_rs_pofd = PerformanceMeasure(testing_data_y, rus_rs_pred_y).PofD20() rus_rs_ranking = PerformanceMeasure(testing_data_y, rus_rs_pred_y).ranking() rus_rs_pofb_list.append(rus_rs_pofb) rus_rs_pofd_list.append(rus_rs_pofd) rus_rs_ranking_list.append(rus_rs_ranking) rus_dtr = DecisionTreeRegressor().fit(rus_X, rus_y) rus_dtr_pred_y = rus_dtr.predict(testing_data_X) rus_dtr_fpa = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).FPA() # print('rus_dtr_fpa:', rus_dtr_fpa) rus_dtr_fpa_list.append(rus_dtr_fpa) rus_dtr_pofb = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).PofB20() rus_dtr_pofd = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).PofD20() rus_dtr_ranking = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).ranking() rus_dtr_pofb_list.append(rus_dtr_pofb) rus_dtr_pofd_list.append(rus_dtr_pofd) rus_dtr_ranking_list.append(rus_dtr_ranking) rus_lr = linear_model.LinearRegression().fit(rus_X, rus_y) rus_lr_pred_y = rus_lr.predict(testing_data_X) rus_lr_fpa = PerformanceMeasure(testing_data_y, rus_lr_pred_y).FPA() # print('rus_lr_fpa:', rus_lr_fpa) rus_lr_fpa_list.append(rus_lr_fpa) rus_lr_pofb = PerformanceMeasure(testing_data_y, rus_lr_pred_y).PofB20() rus_lr_pofd = PerformanceMeasure(testing_data_y, rus_lr_pred_y).PofD20() rus_lr_ranking = PerformanceMeasure(testing_data_y, rus_lr_pred_y).ranking() rus_lr_pofb_list.append(rus_lr_pofb) rus_lr_pofd_list.append(rus_lr_pofd) rus_lr_ranking_list.append(rus_lr_ranking) rus_brr = BayesianRidge().fit(rus_X, rus_y) rus_brr_pred_y = rus_brr.predict(testing_data_X) rus_brr_fpa = PerformanceMeasure(testing_data_y, rus_brr_pred_y).FPA() # print('rus_brr_fpa:', rus_brr_fpa) rus_brr_fpa_list.append(rus_brr_fpa) rus_brr_pofb = PerformanceMeasure(testing_data_y, rus_brr_pred_y).PofB20() rus_brr_pofd = PerformanceMeasure(testing_data_y, rus_brr_pred_y).PofD20() rus_brr_ranking = PerformanceMeasure(testing_data_y, rus_brr_pred_y).ranking() rus_brr_pofb_list.append(rus_brr_pofb) rus_brr_pofd_list.append(rus_brr_pofd) rus_brr_ranking_list.append(rus_brr_ranking) # 先对训练数据集进行Smote处理,然后训练Ranking SVM, DTR,LR,BRR smote_X, smote_y = Smote(training_data_X, training_data_y, ratio=1.0, k=5).over_sampling_addorginaldata() # LTR training_datalist_X = smote_X.tolist() training_datalist_y = smote_y.tolist() from PyOptimize.General_Opt import Test_function count += 1 smote_ltrga = pyGaft(objfunc=LTR, var_bounds=[(-20, 20)] * 20, individual_size=100, max_iter=2, max_or_min='max', X=training_datalist_X, y=training_datalist_y).run() if count == 1: import best_fit else: importlib.reload(best_fit) smote_a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][ -1] smote_ltr_pred_y = [] for test_x in testing_data_X: smote_ltr_pred_y.append(np.dot(test_x, smote_a)) smote_ltr_fpa = PerformanceMeasure(testing_data_y, smote_ltr_pred_y).FPA() smote_ltr_fpa_list.append(smote_ltr_fpa) smote_ltr_pofb = PerformanceMeasure(testing_data_y, smote_ltr_pred_y).PofB20() smote_ltr_pofd = PerformanceMeasure(testing_data_y, smote_ltr_pred_y).PofD20() smote_ltr_ranking = PerformanceMeasure(testing_data_y, smote_ltr_pred_y).ranking() smote_ltr_pofb_list.append(smote_ltr_pofb) smote_ltr_pofd_list.append(smote_ltr_pofd) smote_ltr_ranking_list.append(smote_ltr_ranking) shuf_X, shuf_y = shuffle(smote_X, smote_y) smote_rs = RankSVM(C=1.0).fit(shuf_X, shuf_y) smote_rs_pred_y = smote_rs.predict2(testing_data_X) smote_rs_fpa = PerformanceMeasure(testing_data_y, smote_rs_pred_y).FPA() smote_rs_fpa_list.append(smote_rs_fpa) smote_rs_pofb = PerformanceMeasure(testing_data_y, smote_rs_pred_y).PofB20() smote_rs_pofd = PerformanceMeasure(testing_data_y, smote_rs_pred_y).PofD20() smote_rs_ranking = PerformanceMeasure(testing_data_y, smote_rs_pred_y).ranking() smote_rs_pofb_list.append(smote_rs_pofb) smote_rs_pofd_list.append(smote_rs_pofd) smote_rs_ranking_list.append(smote_rs_ranking) smote_dtr = DecisionTreeRegressor().fit(smote_X, smote_y) smote_dtr_pred_y = smote_dtr.predict(testing_data_X) smote_dtr_fpa = PerformanceMeasure(testing_data_y, smote_dtr_pred_y).FPA() smote_dtr_fpa_list.append(smote_dtr_fpa) smote_dtr_pofb = PerformanceMeasure(testing_data_y, smote_dtr_pred_y).PofB20() smote_dtr_pofd = PerformanceMeasure(testing_data_y, smote_dtr_pred_y).PofD20() smote_dtr_ranking = PerformanceMeasure(testing_data_y, smote_dtr_pred_y).ranking() smote_dtr_pofb_list.append(smote_dtr_pofb) smote_dtr_pofd_list.append(smote_dtr_pofd) smote_dtr_ranking_list.append(smote_dtr_ranking) smote_lr = linear_model.LinearRegression().fit(smote_X, smote_y) smote_lr_pred_y = smote_lr.predict(testing_data_X) smote_lr_fpa = PerformanceMeasure(testing_data_y, smote_lr_pred_y).FPA() smote_lr_fpa_list.append(smote_lr_fpa) smote_lr_pofb = PerformanceMeasure(testing_data_y, smote_lr_pred_y).PofB20() smote_lr_pofd = PerformanceMeasure(testing_data_y, smote_lr_pred_y).PofD20() smote_lr_ranking = PerformanceMeasure(testing_data_y, smote_lr_pred_y).ranking() smote_lr_pofb_list.append(smote_lr_pofb) smote_lr_pofd_list.append(smote_lr_pofd) smote_lr_rankig_list.append(smote_lr_rankig) smote_brr = BayesianRidge().fit(smote_X, smote_y) smote_brr_pred_y = smote_brr.predict(testing_data_X) smote_brr_fpa = PerformanceMeasure(testing_data_y, smote_brr_pred_y).FPA() smote_brr_fpa_list.append(smote_brr_fpa) smote_brr_pofb = PerformanceMeasure(testing_data_y, smote_brr_pred_y).PofB20() smote_brr_pofd = PerformanceMeasure(testing_data_y, smote_brr_pred_y).PofD20() smote_brr_ranking = PerformanceMeasure(testing_data_y, smote_brr_pred_y).ranking() smote_brr_pofb_list.append(smote_brr_pofb) smote_brr_pofd_list.append(smote_brr_pofd) smote_brr_ranking_list.append(smote_brr_ranking)
def bootstrap(dataset): training_data_X, training_data_y, testing_data_X, testing_data_y = Processing( ).separate_data(dataset) training_data_X2, training_data_y2, testing_data_X2, testing_data_y2 = IG( training_data_X, training_data_y, testing_data_X, testing_data_y).getSelectedFeature(2) training_data_X3, training_data_y3, testing_data_X3, testing_data_y3 = IG( training_data_X, training_data_y, testing_data_X, testing_data_y).getSelectedFeature(3) training_data_X5, training_data_y5, testing_data_X5, testing_data_y5 = IG( training_data_X, training_data_y, testing_data_X, testing_data_y).getSelectedFeature(5) training_data_X8, training_data_y8, testing_data_X8, testing_data_y8 = IG( training_data_X, training_data_y, testing_data_X, testing_data_y).getSelectedFeature(8) training_data_X13, training_data_y13, testing_data_X13, testing_data_y13 = IG( training_data_X, training_data_y, testing_data_X, testing_data_y).getSelectedFeature(13) # cost sensitive ranking SVM with the number of features as 2 csrs2 = RankSVM() P, r = csrs2.transform_pairwise(training_data_X2, training_data_y2) P = P.tolist() r = r.tolist() u, n = PerformanceMeasure(training_data_y2).calc_UN(type='cs') global Loss csga2 = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 2, individual_size=500, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] csrs_pred_y2 = RankSVM(w=w).predict3(testing_data_X2) csrs_fpa2 = PerformanceMeasure(testing_data_y2, csrs_pred_y2).FPA() # print('csrs_fpa:', csrs_fpa) csrs_fpa_list2.append(csrs_fpa2) # cost sensitive ranking SVM with the number of features as 3 csrs3 = RankSVM() P, r = csrs3.transform_pairwise(training_data_X3, training_data_y3) P = P.tolist() r = r.tolist() u, n = PerformanceMeasure(training_data_y3).calc_UN(type='cs') first = False csga3 = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 3, individual_size=500, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] # print('w = ', w) csrs_pred_y3 = RankSVM(w=w).predict3(testing_data_X3) csrs_fpa3 = PerformanceMeasure(testing_data_y3, csrs_pred_y3).FPA() # print('irsvm_fpa:', irsvm_fpa) csrs_fpa_list3.append(csrs_fpa3) # cost sensitive ranking SVM with the number of features as 5 csrs5 = RankSVM() P, r = csrs5.transform_pairwise(training_data_X5, training_data_y5) P = P.tolist() r = r.tolist() u, n = PerformanceMeasure(training_data_y5).calc_UN(type='cs') csga5 = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 5, individual_size=500, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() # if count == 1: # import best_fit # else: importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] # print('w = ', w) csrs_pred_y5 = RankSVM(w=w).predict3(testing_data_X5) csrs_fpa5 = PerformanceMeasure(testing_data_y5, csrs_pred_y5).FPA() # print('irsvm_fpa:', irsvm_fpa) csrs_fpa_list5.append(csrs_fpa5) # cost sensitive ranking SVM with the number of features as 8 csrs8 = RankSVM() P, r = csrs8.transform_pairwise(training_data_X8, training_data_y8) P = P.tolist() r = r.tolist() u, n = PerformanceMeasure(training_data_y8).calc_UN(type='cs') csga8 = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 8, individual_size=500, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] # print('w = ', w) csrs_pred_y8 = RankSVM(w=w).predict3(testing_data_X8) csrs_fpa8 = PerformanceMeasure(testing_data_y8, csrs_pred_y8).FPA() # print('irsvm_fpa:', irsvm_fpa) csrs_fpa_list8.append(csrs_fpa8) # cost sensitive ranking SVM with the number of features as 13 csrs13 = RankSVM() P, r = csrs13.transform_pairwise(training_data_X13, training_data_y13) P = P.tolist() r = r.tolist() u, n = PerformanceMeasure(training_data_y13).calc_UN(type='cs') csga13 = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 13, individual_size=500, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] # print('w = ', w) csrs_pred_y13 = RankSVM(w=w).predict3(testing_data_X13) csrs_fpa13 = PerformanceMeasure(testing_data_y13, csrs_pred_y13).FPA() # print('irsvm_fpa:', irsvm_fpa) csrs_fpa_list13.append(csrs_fpa13) print(f'first={first}')
def bootstrap(training_data_X, training_data_y, testing_data_X, testing_data_y, dataset, trainingfilename, testingfilename): trainingdataname.append(trainingfilename) print('trainingdata', trainingfilename) count = 0 # cost sensitive ranking SVM csrs = RankSVM() P, r = csrs.transform_pairwise(training_data_X, training_data_y) P = P.tolist() r = r.tolist() u, n = PerformanceMeasure(training_data_y).calc_UN(type='cs') count += 1 global Loss csga = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 20, individual_size=500, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() if count == 1: import best_fit else: importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] csrs_pred_y = RankSVM(w=w).predict3(testing_data_X) csrs_fpa = PerformanceMeasure(testing_data_y, csrs_pred_y).FPA() csrs_pofb = PerformanceMeasure(testing_data_y, csrs_pred_y).PofB20() csrs_fpa_list.append(csrs_fpa) csrs_pofb_list.append(csrs_pofb) # IR SVM u, n = PerformanceMeasure(training_data_y).calc_UN(type='ir') count += 1 irga = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 20, individual_size=500, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() if count == 1: import best_fit else: importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] irsvm_pred_y = RankSVM(w=w).predict3(testing_data_X) irsvm_fpa = PerformanceMeasure(testing_data_y, irsvm_pred_y).FPA() irsvm_pofb = PerformanceMeasure(testing_data_y, irsvm_pred_y).PofB20() irsvm_fpa_list.append(irsvm_fpa) irsvm_pofb_list.append(irsvm_pofb) # 这里还要加个去掉另一个参数的 u, n = PerformanceMeasure(training_data_y).calc_UN(type='svm') count += 1 irga = pyGaft(objfunc=Loss, var_bounds=[(-1, 1)] * 20, individual_size=500, max_iter=200, max_or_min='min', P=P, r=r, u=u, n=n).run() if count == 1: import best_fit else: importlib.reload(best_fit) w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] #print('w = ', w) svm_pred_y = RankSVM(w=w).predict3(testing_data_X) svm_fpa = PerformanceMeasure(testing_data_y, svm_pred_y).FPA() svm_pofb = PerformanceMeasure(testing_data_y, svm_pred_y).PofB20() svm_fpa_list.append(svm_fpa) svm_pofb_list.append(svm_pofb) # 这个是LTR training_datalist_X = training_data_X.tolist() training_datalist_y = training_data_y.tolist() from PyOptimize.General_Opt import Test_function count += 1 global LTR ltrga = pyGaft(objfunc=LTR, var_bounds=[(-20, 20)] * 20, individual_size=100, max_iter=200, max_or_min='max', X=training_datalist_X, y=training_datalist_y).run() if count == 1: import best_fit else: importlib.reload(best_fit) a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1] #print('a = {0}'.format(a)) ltr_pred_y = [] for test_x in testing_data_X: ltr_pred_y.append(np.dot(test_x, a)) ltr_fpa = PerformanceMeasure(testing_data_y, ltr_pred_y).FPA() ltr_pofb = PerformanceMeasure(testing_data_y, ltr_pred_y).PofB20() ltr_fpa_list.append(ltr_fpa) ltr_pofb_list.append(ltr_pofb) # 在原始数据集上训练Ranking SVM,DTR,LR,BRR # 这里加上了shuffle,是为了让r的值不全为1,全为1svm会报错 shuf_X, shuf_y = shuffle(training_data_X, training_data_y) rs = RankSVM(C=1.0).fit(shuf_X, shuf_y) rs_pred_y = np.around(rs.predict2(testing_data_X)) rs_fpa = PerformanceMeasure(testing_data_y, rs_pred_y).FPA() rs_pofb = PerformanceMeasure(testing_data_y, rs_pred_y).PofB20() rs_fpa_list.append(rs_fpa) rs_pofb_list.append(rs_pofb) dtr = DecisionTreeRegressor().fit(training_data_X, training_data_y) dtr_pred_y = dtr.predict(testing_data_X) dtr_fpa = PerformanceMeasure(testing_data_y, dtr_pred_y).FPA() dtr_pofb = PerformanceMeasure(testing_data_y, dtr_pred_y).PofB20() dtr_fpa_list.append(dtr_fpa) dtr_pofb_list.append(dtr_pofb) lr = linear_model.LinearRegression().fit(training_data_X, training_data_y) lr_pred_y = lr.predict(testing_data_X) lr_fpa = PerformanceMeasure(testing_data_y, lr_pred_y).FPA() lr_pofb = PerformanceMeasure(testing_data_y, lr_pred_y).PofB20() lr_fpa_list.append(lr_fpa) lr_pofb_list.append(lr_pofb) brr = BayesianRidge().fit(training_data_X, training_data_y) brr_pred_y = brr.predict(testing_data_X) brr_fpa = PerformanceMeasure(testing_data_y, brr_pred_y).FPA() brr_pofb = PerformanceMeasure(testing_data_y, brr_pred_y).PofB20() brr_fpa_list.append(brr_fpa) brr_pofb_list.append(brr_pofb)