# train_feature = train_feature.sample(frac=1).reset_index(drop=True) # test_feature = test_feature.sample(frac=1).reset_index(drop=True) print(feature_select_path) if os.path.exists(feature_select_path): feature_list = [] with open(feature_select_path, 'r+') as f: read_list = f.readlines() for item in read_list: feature_list.append(item.split('\n')[0]) else: print('无法找到feature list,请重新运行特征选择') X_ = train_feature[feature_list] Y_ = train_feature['feature_label'] print(len(Y_)) X, Y = undersampling(X_.values, Y_.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) num_positive = 0 num_negitive = 0 for j in range(len(Y)): if Y[j] == -1: num_negitive += 1 else: num_positive += 1 print("the number of negitive smaple is {0},and the number of positive sample is{1}".format(num_negitive,num_positive)) sensitivity,recall,fscore,MACC = model_training_stack(X,Y,True,'feature_label', n_maj, n_min) save_path = '/home/deep/heart_science/result3/' + str(feature_subset[i]) with open(save_path,'w+') as f: f.write('sensitivity is ')
feature_label0 = feature_list0[-1] feature_label1 = feature_list1[-1] feature_label2 = feature_list2[-1] feature_list0.remove(feature_label0) feature_list1.remove(feature_label1) feature_list2.remove(feature_label2) X_0 = train_data0[feature_list0] Y_0 = train_data0[feature_label0] X_1 = train_data1[feature_list1] Y_1 = train_data1[feature_label1] X_2 = train_data2[feature_list2] Y_2 = train_data2[feature_label2] X0, Y0 = undersampling(X_0.values, Y_0.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) X1, Y1 = undersampling(X_1.values, Y_1.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) X2, Y2 = undersampling(X_2.values, Y_2.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) sensitivity0, recall0, fscore0, MACC0 = model_training_stack(X0, Y0, True, 'feature_label', n_maj, n_min) sensitivity1, recall1, fscore1, MACC1 = model_training_stack(X1, Y1, True, 'feature_label', n_maj, n_min) sensitivity2, recall2, fscore2, MACC2 = model_training_stack(X2, Y2, True, 'feature_label', n_maj, n_min) save_path0 = '/home/deep/heart_science/dicuss0.txt' save_path1 = '/home/deep/heart_science/dicuss1.txt' save_path2 = '/home/deep/heart_science/dicuss2.txt'
def GSFS(feature_data, good_feature, feature_score): continue_flag = True last_precision = 0 count_miss = 0 L = 2 score_list = feature_score feature_A = good_feature #待选特征 feature_B = [] #已选特征 best_feature = feature_A[score_list.index(np.max(score_list))] feature_B.append(best_feature) feature_A.remove(best_feature) score_list.remove(np.max(score_list)) while continue_flag: ipt_degree = {} # 从相关性和冗余度两个方面来衡量候选特征的重要程度,score有正负 是否要绝对值? for i in range(len(feature_A)): str_feature = [] str_feature.append(str(feature_A[i])) Df = score_list[i] / cal_rongyu(feature_data[str_feature], feature_data[feature_B], len(feature_B)) ipt_degree[feature_A[i]] = Df sort = sorted(zip(ipt_degree.values(), ipt_degree.keys()), reverse=True) print(sort) if len(sort) > 1: buffer = list(sort[i][1] for i in range(L)) for i in range(len(buffer)): for j in range(i + 1, len(buffer)): try_feature = [] if i == j: continue try_feature.append(buffer[j]) try_feature.append( buffer[i]) # 将try_feature送入模型训练,保留效果最好的那一组try_feature test_feature = feature_B + try_feature print(test_feature) X_ = feature_data[test_feature] Y_ = feature_data['feature_label'] X_train, Y_train = undersampling(X_.values, Y_.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) now_precision, _, _, = model_training_stack( X_train, Y_train, True, 'feature_label', n_maj, n_min) if now_precision > last_precision: saved_feature = try_feature last_precision = now_precision else: pass #buffer里现在有L个特征了,需要根据分类模型的效果去除R个 else: buffer = list(sort[i][1] for i in range(1)) for i in range(len(buffer)): try_feature = [] try_feature.append( buffer[i]) # 将try_feature送入模型训练,保留效果最好的那一组try_feature test_feature = feature_B + try_feature print(test_feature) X_ = feature_data[test_feature] Y_ = feature_data['feature_label'] X_train, Y_train = undersampling(X_.values, Y_.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) now_precision, _, _ = model_training_stack( X_train, Y_train, True, 'feature_label', n_maj, n_min) if now_precision > last_precision: saved_feature = try_feature last_precision = now_precision else: pass print("此轮迭代完毕!最好的precision是", last_precision) print("添加了属性", saved_feature) feature_B += saved_feature if len(buffer) == 2: feature_A.remove(buffer[0]) feature_A.remove(buffer[1]) else: feature_A.remove(buffer[0]) saved_feature = [] if len(buffer) < 2: continue_flag = False return feature_B
def LRS(feature_data, good_feature, feature_score): for period in range(periods): print('it is number ', period) if period == 0: continue_flag = True best_precision = 0 L = 3 score_list = copy.copy(feature_score) feature_A = copy.copy(good_feature) #待选特征 feature_B = [] #已选特征 best_feature = feature_A[score_list.index(np.max(score_list))] feature_B.append(best_feature) feature_A.remove(best_feature) score_list.remove(np.max(score_list)) else: continue_flag = True score_list = copy.copy(feature_score) feature_A = copy.copy(good_feature) for x in range(len(feature_B)): print(feature_B[x]) position = feature_A.index(feature_B[x]) print(position) del score_list[position] del feature_A[position] #score_list.remove(score_list[x] for x in feature_A.index(feature_B)) #feature_A.remove(feature_B) print(len(score_list)) print(len(feature_A)) while continue_flag: ipt_degree = {} # 从相关性和冗余度两个方面来衡量候选特征的重要程度,score有正负 是否要绝对值? for i in range(len(feature_A)): str_feature = [] str_feature.append(str(feature_A[i])) Df = score_list[i] / cal_rongyu(feature_data[str_feature], feature_data[feature_B], len(feature_B)) ipt_degree[feature_A[i]] = Df sort = sorted(zip(ipt_degree.values(), ipt_degree.keys()), reverse=True) print(sort) if len(sort) >= 3: buffer = list( sort[i][1] for i in range(L)) #buffer里现在有L个特征了,需要根据分类模型的效果去除R个 else: buffer = list(sort[i][1] for i in range(len(sort))) ######################################################################################### for i in range(len(buffer)): try_feature_1 = list(idx for idx in buffer) test_feature_1 = list(feature_B[ii] for ii in range(len(feature_B))) test_feature_1 = test_feature_1 + try_feature_1 index1 = int(i + len(feature_B)) del test_feature_1[index1] X_ = feature_data[test_feature_1] Y_ = feature_data['feature_label'] X_train, Y_train = undersampling(X_.values, Y_.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) now_precision, _, _, _ = model_training_stack( X_train, Y_train, True, 'feature_label', n_maj, n_min) if now_precision > best_precision: option = try_feature_1 del option[i] saved_feature = option best_precision = now_precision #########加入减去上一轮减去的特征的基础上再减去一个 for j in range(len(try_feature_1) - i): if i == j: continue else: try_feature_2 = list(idx for idx in try_feature_1) test_feature_2 = list(feature_B[ii] for ii in range(len(feature_B))) test_feature_2 = test_feature_2 + try_feature_2 index2 = int(j + index1) del test_feature_2[index2 - 1] X_ = feature_data[test_feature_2] Y_ = feature_data['feature_label'] X_train, Y_train = undersampling(X_.values, Y_.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) now_precision, _, _ = model_training_stack( X_train, Y_train, True, 'feature_label', n_maj, n_min) if now_precision > best_precision: option = try_feature_2 del option[j] saved_feature = option best_precision = now_precision for k in range(len(try_feature_2) - j): if i == j or i == k or j == k: continue else: try_feature_3 = list(idx for idx in try_feature_2) test_feature_3 = list( feature_B[ii] for ii in range(len(feature_B))) test_feature_3 = test_feature_3 + try_feature_3 del test_feature_3[int(k + index2) - 1] X_ = feature_data[test_feature_3] Y_ = feature_data['feature_label'] X_train, Y_train = undersampling( X_.values, Y_.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) now_precision, _, _, _ = model_training_stack( X_train, Y_train, True, 'feature_label', n_maj, n_min) if now_precision > best_precision: option = try_feature_3 del option[k] saved_feature = option best_precision = now_precision print("此轮迭代完毕!最好的precision是", best_precision) print("添加了属性", saved_feature) feature_B += saved_feature for item in buffer: feature_A.remove(item) print(feature_B) saved_feature = [] if len(buffer) < 3: continue_flag = False return feature_B
def BDS(feature_data, good_feature, feature_score): last_precisionf = 0 last_precisionb = 0 continue_flag = True feature_A = copy.copy(good_feature) feature_A.remove(good_feature[0]) forward_sub = [good_feature[0]] #前向已选择特征集合 backward_sub = copy.copy(good_feature) #后向已选择特征集合 backward_sub.remove(good_feature[-1]) moved_list = [] moved_list.append(good_feature[-1]) #后向已去除特征集合 score_list1 = copy.copy(feature_score) score_list1.remove(score_list1[0]) while continue_flag: ipt_degree1 = {} ipt_degree2 = {} for i in range(feature_A): str_feature = [] str_feature.append(str(feature_A[i])) Df = score_list1[i] / cal_rongyu(feature_data[str_feature], feature_data[forward_sub], len(forward_sub)) ipt_degree1[feature_A[i]] = Df sort1 = sorted(zip(ipt_degree1.values(), ipt_degree1.keys()), reverse=True) print(sort1) for i in range(backward_sub): str_feature = [] str_feature.append(str(feature_A[i])) Df = score_list1[i] / cal_rongyu(feature_data[str_feature], feature_data[moved_list], len(moved_list)) ipt_degree2[backward_sub] = Df sort2 = sorted(zip(ipt_degree2.values(), ipt_degree2.keys()), reverse=True) print(sort2) if sort1[0][1] not in moved_list: try_feature = [] try_feature.append(sort1[0][1]) test_feature1 = try_feature + forward_sub feature_A.remove(try_feature) X_ = feature_data[test_feature1] Y_ = feature_data['feature_label'] X_train, Y_train = undersampling(X_.values, Y_.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) now_precision, _, _, _ = model_training_stack( X_train, Y_train, True, 'feature_label', n_maj, n_min) if now_precision > last_precisionf: last_precisionf = now_precision forward_sub.append(try_feature) else: pass else: feature_A.remove(sort1[0][1]) if sort2[0][1] not in forward_sub: try_feature = sort2[0][1] test_feature2 = backward_sub.remove(try_feature) moved_list.append(try_feature) X_ = feature_data[test_feature2] Y_ = feature_data['feature_label'] X_train, Y_train = undersampling(X_.values, Y_.values, majority_class=-1, minority_class=1, maj_proportion=n_maj, min_proportion=n_min) now_precision, _, _, _ = model_training_stack( X_train, Y_train, True, 'feature_label', n_maj, n_min) if now_precision > last_precisionb: last_precisionb = now_precision backward_sub.remove(try_feature) else: pass if backward_sub == forward_sub: continue_flag = False else: pass return backward_sub