def getMyRFAcclist(treenum_list): seed(1) # 每一次执行本文件时都能产生同一个随机数 filename = 'DataSet3.csv' #SMOTE处理过的数据 min_size = 1 sample_ratio = 1 feature_ratio = 0.3 # 尽可能小,但是要保证 int(self.feature_ratio * (len(train[0])-1)) 大于1 same_value = 20 # 向量内积的差(小于此值认为相似) same_rate = 0.63 # 树的相似度(大于此值认为相似) # 加载数据 dataset, features = load_csv(filename) traindata, testdata = split_train_test(dataset, feature_ratio) # 森林中不同树个数的对比 # treenum_list = [20, 30, 40, 50, 60] acc_num_list = list() acc_list = list() for trees_num in treenum_list: # 优化1-获取最优深度 max_depth = getBestDepth(min_size, sample_ratio, trees_num, feature_ratio, traindata, testdata) print('max_depth is ', max_depth) # 初始化随机森林 myRF = randomForest(trees_num, max_depth, min_size, sample_ratio, feature_ratio) # 生成随机森林 myRF.build_randomforest(traindata) print('Tree_number: ', myRF.trees.__len__()) # 计算森林中每棵树的AUC auc_list = caculateAUC_1.caculateRFAUC(testdata, myRF.trees) # 选取AUC高的决策数形成新的森林(auc优化) newTempForest = auc_optimization(auc_list, trees_num, myRF.trees) # 相似度优化 myRF.trees = similarity_optimization(newTempForest, same_value, same_rate) # 测试评估 acc = myRF.accuracy_metric(testdata[:-1]) print('myRF1_模型准确率:', acc, '%') acc_num_list.append([myRF.trees.__len__(), acc]) acc_list.append(acc) print('trees_num from 20 to 60: ', acc_num_list) return acc_list
# print('max_depth is ',max_depth) # 初始化随机森林 myRF = randomForest(trees_num, max_depth, min_size, sample_ratio, feature_ratio) # 生成随机森林 myRF.build_randomforest(traindata) print('Tree_number: ', myRF.trees.__len__()) acc = myRF.accuracy_metric(testdata[:-1]) print('传统RF模型准确率:', acc, '%') # 画出某棵树用以可视化观察(这里是第一棵树) # plotTree.creatPlot(myRF.trees[0], features) # 计算森林中每棵树的AUC auc_list = caculateAUC_1.caculateRFAUC(testdata, myRF.trees) # 画出每棵树的auc——柱状图 # plotTree.plotAUCbar(auc_list.__len__(),auc_list) # 选取AUC高的决策数形成新的森林(auc优化) newTempForest = auc_optimization(auc_list, trees_num, myRF.trees) # 相似度优化 myRF.trees = similarity_optimization(newTempForest, same_value, same_rate) print('优化后Tree_number: ', myRF.trees.__len__()) # 测试评估 acc = myRF.accuracy_metric(testdata[:-1]) # print('优化后模型准确率:', acc, '%') print('myRF1_模型准确率:', acc, '%') # 画出某棵树用以可视化观察(这里是第一棵树) # plotTree.creatPlot(myRF.trees[0], features)