def nn_feature_selection_wrap(training_feature,training_label,alpha): Result_List = CalParList (3,"alpha","feature_0","feature_1") skf = StratifiedKFold(n_splits=10,shuffle=True) skf.get_n_splits(training_feature,training_label['label']) feature_avaliable = ['feature0','feature1','feature2','feature3','feature4'] feature_choice=list(itertools.combinations(feature_avaliable ,2)) for i in range(len(feature_choice)): Cal_Result_List = CalList () for train_index, test_index in skf.split(training_feature,training_label['label']): X_train, X_val = training_feature.loc[train_index], training_feature.loc[test_index] y_train, y_val = training_label.loc[train_index], training_label.loc[test_index] X_train=X_train.loc[:,[feature_choice[i][0],feature_choice[i][1]]] X_val=X_val.loc[:,[feature_choice[i][0],feature_choice[i][1]]] dis_1,dis_2=nn_distance_calculate(X_val,X_train,y_train) y_pred_temp=nn_predict(dis_1,dis_2,alpha,X_val) Precall,f1_score,BER,FPR = cal_score (y_pred_temp,y_val['label']) Cal_Result_List.list_append(Precall,f1_score,BER,FPR) Precall,FPR,BER,f1_score = Cal_Result_List.list_average_cal() Result_List.list_append (Precall,f1_score,BER,FPR,alpha,feature_choice[i][0],feature_choice[i][1]) result = Result_List.return_result() return result
def parameter_adjust(X_train, y_train, sample_amount, data_ratio): start = time.time() label_1_amount = int(sample_amount * (data_ratio / (data_ratio + 1))) label_2_amount = int(sample_amount - label_1_amount) Result_List = CalParList(3, "gamma_exp", "C_exp", "time") for gamma_exp in [-15, -13, -11, -9, -7, -5, -3, -1, 1, 3]: for C_exp in [-5, -3, -1, 1, 3, 5, 7, 9, 11, 13, 15]: start1 = time.time() sample_feature, sample_label = choose_data_seperately( X_train, y_train, label_1_amount, label_2_amount) tpr, fpr, BER, f1_score, time_var = SVM_cross_validation( sample_feature, sample_label, 2**C_exp, 2**gamma_exp) Result_List.list_append(tpr, f1_score, BER, fpr, gamma_exp, C_exp, time_var) #print("fit time:%5.1fminute"%(temp)) print("the total executing time:%5.1fminute" % ((time.time() - start) / 60)) result = Result_List.return_result() return result
def nn_predict_with_distance_adjust_presion (training_feature,training_label,alpha_lower_bound,alpha_higher_bound): #dis_1,dis_2=nn_distance_calculate(X_val,X_train,y_train) alpha =alpha_lower_bound Result_List = CalParList (1,"alpha") while(alpha<=alpha_higher_bound): Precall,FPR,BER,f1_score = NN_cross_validation(training_feature,training_label,alpha) Result_List.list_append (Precall,f1_score,BER,FPR,alpha) alpha=alpha+0.001 result = Result_List.return_result() return result
def kNN_data_ratio_adjust(training_feature, training_label, k_value): start = time.time() label_1_amount = 40000 label_2_amount = 10000 Result_List = CalParList(4, "label_1_amount", "label_2_amount", "ratio", "time") iter_amount = 5 train_data = pd.concat([training_feature, training_label['label']], axis=1, join='outer') while (label_1_amount > 2000): count = 0 Cal_Result_List = CalList() time_list_temp = [] while (count < iter_amount): start1 = time.time() sample_feature, sample_label = choose_data_seperately( training_feature, training_label, label_1_amount, label_2_amount) Precall, FPR, BER, f1_score, time_var = kNN_cross_validation( sample_feature, sample_label, k_value) Cal_Result_List.list_append(Precall, f1_score, BER, FPR) time_list_temp.append(time_var) count = count + 1 Precall, FPR, BER, f1_score = Cal_Result_List.list_average_cal() time_ave = sum(time_list_temp) / len(time_list_temp) Result_List.list_append(Precall, f1_score, BER, FPR, label_1_amount, label_2_amount, label_1_amount / label_2_amount, time_ave) #print("current data labe 1 size:%d ,fit time:%5.1fminute"%(t,(time.time()-start1)/60)) if (label_1_amount > 10000): label_1_amount = label_1_amount - 5000 else: label_1_amount = label_1_amount - 2500 print("the total executing time:%5.1fminute" % ((time.time() - start) / 60)) result = Result_List.return_result() return result
def nn_predict_with_distance_adjust (training_feature,training_label):#,#X_val,y_val): Result_List = CalParList (1,"alpha") alpha = 0.10 while(alpha<=0.9): Precall,FPR,BER,f1_score = NN_cross_validation(training_feature,training_label,alpha) Result_List.list_append (Precall,f1_score,BER,FPR,alpha) if (0.4<=alpha<=0.6): alpha=alpha+0.01 else: alpha=alpha+0.05 result = Result_List.return_result() return result
def sample_amount_choice(X_train, y_train, sample_amount_upper_bound): start = time.time() Result_List = CalParList(2, "sample_amount", "time") sample_amount = sample_amount_upper_bound iter_amount = 3 while (sample_amount > 800): count = 0 Cal_Result_List = CalList() time_list_temp = [] while (count < iter_amount): print("current sample amount:%d" % sample_amount) sample_feature, sample_label = choose_data_together( X_train, y_train, sample_amount) tpr, fpr, BER, f1_score, time_var = SVM_cross_validation( sample_feature, sample_label, 1, 'auto') Cal_Result_List.list_append(tpr, f1_score, BER, fpr) time_list_temp.append(time_var) count = count + 1 Precall, FPR, BER, f1_score = Cal_Result_List.list_average_cal() time_ave = sum(time_list_temp) / len(time_list_temp) Result_List.list_append(Precall, f1_score, BER, FPR, sample_amount, time_ave) if (sample_amount > 10000): sample_amount = int(sample_amount / 2) elif (sample_amount <= 2000): sample_amount = sample_amount - 100 iter_amount = 10 else: sample_amount = sample_amount - 2000 iter_amount = 10 print("the total executing time:%5.1fminute" % ((time.time() - start) / 60)) result = Result_List.return_result() return result
def SVC_data_ratio_adjust(X_train, y_train, sample_amount): start = time.time() Result_List = CalParList(4, "label_1_amount", "label_2_amount", "ratio", "time") iter_amount = 5 data_ratio = 4 while (data_ratio > 0.2): count = 0 Cal_Result_List = CalList() time_list_temp = [] label_1_amount = int(sample_amount * (data_ratio / (data_ratio + 1))) label_2_amount = int(sample_amount - label_1_amount) while (count < iter_amount): start1 = time.time() sample_feature, sample_label = choose_data_seperately( X_train, y_train, label_1_amount, label_2_amount) tpr, fpr, BER, f1_score, time_var = SVM_cross_validation( sample_feature, sample_label, 1, 'auto') time_list_temp.append(time_var) Cal_Result_List.list_append(tpr, f1_score, BER, fpr) count = count + 1 Precall, FPR, BER, f1_score = Cal_Result_List.list_average_cal() time_ave = sum(time_list_temp) / len(time_list_temp) Result_List.list_append(Precall, f1_score, BER, FPR, label_1_amount, label_2_amount, label_1_amount / label_2_amount, time_ave) if (data_ratio > 2): data_ratio = data_ratio / 2 elif (data_ratio < 0.8): data_ratio = data_ratio - 0.25 else: data_ratio = data_ratio - 0.1 print("the total executing time:%5.1fminute" % ((time.time() - start) / 60)) result = Result_List.return_result() return result
def kNN_k_parameter_adjust(training_feature, training_label): #start=time.time() k_value = 5 Result_List = CalParList(2, "k_value", "time") while (k_value < 10): Precall, FPR, BER, f1_score, time_var = kNN_cross_validation( training_feature, training_label, k_value) Result_List.list_append(Precall, f1_score, BER, FPR, k_value, time_var) if (k_value < 27): k_value = k_value + 2 else: k_value = k_value + 5 #print("the total executing time:%5.1fminute"%((time.time()-start)/60)) result = Result_List.return_result() return result