def show_result(filename, q=50, lr=0.1, thresh=0.01, epoch=10000, test_size=0.3): dataSet_name = filename.split('/')[1].split('.')[0] print("\033[31m------------------------" + dataSet_name + "------------------------\033[0m") dataSet = BP_revalue(filename) train_data, test_data = utils.splitDataSet1(dataSet, test_size=test_size) v2, gamma2, w2, out2, errHistory2, accHistory2 = ABP(train_data, q=q, lr=lr, thresh=thresh, epoch=epoch) v3, gamma3, w3, out3, errHistory3, accHistory3 = ABP(train_data, q=q, lr=lr, thresh=thresh, epoch=epoch, pro=True) plt.plot(np.arange(len(errHistory2)) * 10, errHistory2, 'r', label='ABP') plt.plot(np.arange(len(errHistory3)) * 10, errHistory3, 'b', label='改进ABP') plt.xlabel('Epoch') plt.ylabel('Loss') plt.title(dataSet_name + "-ABP/改进ABP训练损失变化图") plt.rcParams['font.sans-serif'] = ['SimHei'] plt.legend() plt.show() plt.plot(np.arange(len(accHistory2)) * 10, accHistory2, 'r', label='ABP') plt.plot(np.arange(len(accHistory3)) * 10, accHistory3, 'b', label='改进ABP') # plt.ylim(0, 1) plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.title(dataSet_name + "-ABP/改进ABP训练准确率变化图") plt.rcParams['font.sans-serif'] = ['SimHei'] plt.legend() plt.show() #ABP算法测试 err, acc, real_list, predict_list = calErr(test_data, v2, gamma2, w2, out2) print("------------ABP算法------------") acc, p, r, f1 = utils.calAccuracy(predict_list, real_list) print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format( acc, p, r, f1)) #改进BP算法测试 err, acc, real_list, predict_list = calErr(test_data, v3, gamma3, w3, out3) print("------------改进ABP算法------------") acc, p, r, f1 = utils.calAccuracy(predict_list, real_list) print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format( acc, p, r, f1))
def evaluate_train_epoch(fixednet, trainDataLoader, criterion, device, optimizer, bAuxiliary, auxiliary_weight): fixednet.train() objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for batch_idx, (traininputs, traintargets) in enumerate(trainDataLoader): traininputs, traintargets = traininputs.to(device), traintargets.to( device) optimizer.zero_grad() logits, logits_aux = fixednet(traininputs) loss = criterion(logits, traintargets) if bAuxiliary: loss_aux = criterion(logits_aux, traintargets) loss += auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm_(fixednet.parameters(), 5) optimizer.step() # prec1, prec5 = utils.calAccuracy(logits, traintargets, topk=(1, 5)) tmpBatchSize = traininputs.size(0) objs.update(loss.data, tmpBatchSize) top1.update(prec1.data, tmpBatchSize) top5.update(prec5.data, tmpBatchSize) return objs.avg, top1.avg, top5.avg
def test(filename): X, Y_, _ = utils.getData(filename) Y = XGboost_revalue(Y_) dataSet_name = filename.split('/')[1].split('.')[0] print("------------------------" + dataSet_name + "------------------------") train_data, train_label, test_data, test_label = utils.splitDataSet( X, Y, test_size=0.3) # 转换为DMatrix数据格式 dtrain = xgb.DMatrix(train_data, label=train_label) dtest = xgb.DMatrix(test_data, label=test_label) # 设置参数 parameters = { 'eta': 0.01, 'subsample': 0.75, 'objective': 'multi:softmax', # error evaluation for multiclass tasks 'num_class': 2, # number of classes to predic 'max_depth': 8 # depth of the trees in the boosting process } num_round = 500 # the number of training iterations bst = xgb.train(parameters, dtrain, num_round) preds = bst.predict(dtest) #输出的是概率 acc, p, r, f1 = utils.calAccuracy(preds, test_label) print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format( acc, p, r, f1))
def SBC(train_data, train_label, valid_data, valid_label): """ SBC: 改进朴素贝叶斯:基于分类精度和贪婪算法的属性选择方法, 参考Langley P, Sage S. Induction of selective Bayesian classifiers[M]//Uncertainty Proceedings 1994. Morgan Kaufmann, 1994: 399-406. :param train_data:训练数据 :param train_label: 训练标签 :param valid_data:验证数据 :param valid_label: 验证标签 :returns:p1:好瓜先验概率 :returns:px1_list:好瓜中每个属性的条件概率,是一个二维列表,离散值直接返回而连续值返回的方差和均值 :returns:px0_list:坏瓜中每个属性的条件概率,是一个二维列表,离散值直接返回而连续值返回的方差和均值 :returns:col_del:删除的列名 """ current_data = train_data #当前样本,后面会依次去掉样本 col_name = train_data.columns.tolist() p1_best, px1_list_best, px0_list_best = train(train_data, train_label, is_Laplacian=True) #考虑所有样本时 pred = predict(valid_data, p1_best, px1_list_best, px0_list_best) max_acc, p, r, f1 = utils.calAccuracy(pred, valid_label) print("当保留所有列的时候,准确率为:", max_acc) col_del = [] #记录被删除的列 for col in col_name: #尝试依次去掉每一列 p1, px1_list, px0_list = train(current_data.drop(columns=[col]), train_label) pred = predict(valid_data.drop(columns=[col]), p1, px1_list, px0_list) acc, p, r, f1 = utils.calAccuracy(pred, valid_label) if acc >= max_acc: #当前准确率更大 current_data = current_data.drop(columns=[col]) #去掉当前列,同时更新参数 valid_data = valid_data.drop(columns=[col]) col_del.append(col) print("由于删除【{}】列后准确率由{}大于等于此前最大准确率{}因此删除该列!".format( col, max_acc, acc)) max_acc = acc p1_best = p1 px1_list_best = px1_list px0_list_best = px0_list if len(current_data.columns == 1): break else: continue return p1_best, px1_list_best, px0_list_best, col_del
def evaluate_test_epoch(fixednet, testDataLoader, criterion, device): fixednet.eval() objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() # for batch_idx, (testinputs, testtargets) in enumerate(testDataLoader): testinputs, testtargets = testinputs.to(device), testtargets.to(device) logits = fixednet(testinputs) loss = criterion(logits, testtargets) prec1, prec5 = utils.calAccuracy(logits, testtargets, topk=(1, 5)) n = testinputs.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) #break return objs.avg, top1.avg, top5.avg
test_size = 0.3 seed = 1111 print( "\033[31m------------------------haberman------------------------\033[0m" ) print("\033[4;32m*************朴素贝叶斯*************\033[0m") X, Y_, col_name = utils.getData("data/haberman.data") Y = Bayes_revalue(Y_) col_name = col_name.tolist()[:-1] train_data, train_label, test_data, test_label = utils.splitDataSet( X, Y, test_size=test_size, seed=seed) train_data = pd.DataFrame(train_data, columns=col_name) test_data = pd.DataFrame(test_data, columns=col_name) p1, px1_list, px0_list = train(train_data, train_label, is_Laplacian=True) pred = predict(test_data, p1, px1_list, px0_list) acc, p, r, f1 = utils.calAccuracy(pred, test_label) print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format( acc, p, r, f1)) print("\033[4;32m*************朴素贝叶斯结构扩展*************\033[0m") filename = "data/haberman.data" # haberman.data【】\heart.dat【】 dataSet = utils.getDataSet(filename) train_data, test_data = utils.splitDataSet1(dataSet, test_size=test_size, seed=seed) train_data, test_data = pd.DataFrame(train_data), pd.DataFrame(test_data) train_label = train_data.iloc[:, -1].astype(int) train_data = train_data.iloc[:, :-1].astype(int) test_label = test_data.iloc[:, -1].astype(int) test_data = test_data.iloc[:, :-1].astype(int) pred = AODE_Predict(train_data, train_label, test_data)
filename="data/heart.dat"#haberman.data【3,0.3】\heart.dat【】 dataSet = utils.getDataSet(filename) train_data, test_data = utils.splitDataSet1(dataSet, test_size=0.3) test_data_data=test_data[:,:-1] real_label=list(test_data[:,-1]) for i in range(len(real_label)): real_label[i]=int(real_label[i]) # KNN算法测试[1,2,3,4,5,6,7,8,9,10,15,17,23][3,7,11][3,6,11] for K in [3,5,7,11]: pred=KNN_Predict(train_data=train_data,test_data=test_data_data,K=K) print("------------"+filename+"---KNN算法---K="+str(K)+"------------") # print("true:", real_label) # print("pred:", pred_label) acc, p, r, f1 = utils.calAccuracy(pred, real_label) print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format(acc, p, r, f1)) # 读取数据并划分训练集以及测试集 filename="data/haberman.data"#haberman.data【3,0.3】\heart.dat【】 dataSet = utils.getDataSet(filename) train_data, test_data = utils.splitDataSet1(dataSet, test_size=0.3) print() test_data_data=test_data[:,:-1] real_label=list(test_data[:,-1]) for i in range(len(real_label)): real_label[i]=int(real_label[i]) for K in [3,5,7,11]: pred=KNN_Predict(train_data=train_data,test_data=test_data_data,K=K) print("------------"+filename+"---KNN算法---K="+str(K)+"------------") # print("true:", real_label)