コード例 #1
0
def show_result(filename,
                q=50,
                lr=0.1,
                thresh=0.01,
                epoch=10000,
                test_size=0.3):
    dataSet_name = filename.split('/')[1].split('.')[0]
    print("\033[31m------------------------" + dataSet_name +
          "------------------------\033[0m")
    dataSet = BP_revalue(filename)
    train_data, test_data = utils.splitDataSet1(dataSet, test_size=test_size)

    v2, gamma2, w2, out2, errHistory2, accHistory2 = ABP(train_data,
                                                         q=q,
                                                         lr=lr,
                                                         thresh=thresh,
                                                         epoch=epoch)
    v3, gamma3, w3, out3, errHistory3, accHistory3 = ABP(train_data,
                                                         q=q,
                                                         lr=lr,
                                                         thresh=thresh,
                                                         epoch=epoch,
                                                         pro=True)

    plt.plot(np.arange(len(errHistory2)) * 10, errHistory2, 'r', label='ABP')
    plt.plot(np.arange(len(errHistory3)) * 10, errHistory3, 'b', label='改进ABP')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(dataSet_name + "-ABP/改进ABP训练损失变化图")
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.legend()
    plt.show()

    plt.plot(np.arange(len(accHistory2)) * 10, accHistory2, 'r', label='ABP')
    plt.plot(np.arange(len(accHistory3)) * 10, accHistory3, 'b', label='改进ABP')
    # plt.ylim(0, 1)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title(dataSet_name + "-ABP/改进ABP训练准确率变化图")
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.legend()
    plt.show()
    #ABP算法测试
    err, acc, real_list, predict_list = calErr(test_data, v2, gamma2, w2, out2)
    print("------------ABP算法------------")
    acc, p, r, f1 = utils.calAccuracy(predict_list, real_list)
    print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format(
        acc, p, r, f1))
    #改进BP算法测试
    err, acc, real_list, predict_list = calErr(test_data, v3, gamma3, w3, out3)
    print("------------改进ABP算法------------")
    acc, p, r, f1 = utils.calAccuracy(predict_list, real_list)
    print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format(
        acc, p, r, f1))
コード例 #2
0
def evaluate_train_epoch(fixednet, trainDataLoader, criterion, device,
                         optimizer, bAuxiliary, auxiliary_weight):
    fixednet.train()

    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    for batch_idx, (traininputs, traintargets) in enumerate(trainDataLoader):
        traininputs, traintargets = traininputs.to(device), traintargets.to(
            device)

        optimizer.zero_grad()
        logits, logits_aux = fixednet(traininputs)
        loss = criterion(logits, traintargets)
        if bAuxiliary:
            loss_aux = criterion(logits_aux, traintargets)
            loss += auxiliary_weight * loss_aux

        loss.backward()
        nn.utils.clip_grad_norm_(fixednet.parameters(), 5)
        optimizer.step()
        #
        prec1, prec5 = utils.calAccuracy(logits, traintargets, topk=(1, 5))
        tmpBatchSize = traininputs.size(0)
        objs.update(loss.data, tmpBatchSize)
        top1.update(prec1.data, tmpBatchSize)
        top5.update(prec5.data, tmpBatchSize)

    return objs.avg, top1.avg, top5.avg
コード例 #3
0
def test(filename):
    X, Y_, _ = utils.getData(filename)
    Y = XGboost_revalue(Y_)
    dataSet_name = filename.split('/')[1].split('.')[0]
    print("------------------------" + dataSet_name +
          "------------------------")
    train_data, train_label, test_data, test_label = utils.splitDataSet(
        X, Y, test_size=0.3)
    # 转换为DMatrix数据格式
    dtrain = xgb.DMatrix(train_data, label=train_label)
    dtest = xgb.DMatrix(test_data, label=test_label)
    # 设置参数
    parameters = {
        'eta': 0.01,
        'subsample': 0.75,
        'objective': 'multi:softmax',  # error evaluation for multiclass tasks
        'num_class': 2,  # number of classes to predic
        'max_depth': 8  # depth of the trees in the boosting process
    }
    num_round = 500  # the number of training iterations
    bst = xgb.train(parameters, dtrain, num_round)
    preds = bst.predict(dtest)  #输出的是概率
    acc, p, r, f1 = utils.calAccuracy(preds, test_label)
    print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format(
        acc, p, r, f1))
コード例 #4
0
def SBC(train_data, train_label, valid_data, valid_label):
    """
    SBC: 改进朴素贝叶斯:基于分类精度和贪婪算法的属性选择方法,
    参考Langley P, Sage S. Induction of selective Bayesian classifiers[M]//Uncertainty Proceedings 1994. Morgan Kaufmann, 1994: 399-406.
    :param train_data:训练数据
    :param train_label: 训练标签
    :param valid_data:验证数据
    :param valid_label: 验证标签
    :returns:p1:好瓜先验概率
    :returns:px1_list:好瓜中每个属性的条件概率,是一个二维列表,离散值直接返回而连续值返回的方差和均值
    :returns:px0_list:坏瓜中每个属性的条件概率,是一个二维列表,离散值直接返回而连续值返回的方差和均值
    :returns:col_del:删除的列名
    """
    current_data = train_data  #当前样本,后面会依次去掉样本
    col_name = train_data.columns.tolist()
    p1_best, px1_list_best, px0_list_best = train(train_data,
                                                  train_label,
                                                  is_Laplacian=True)  #考虑所有样本时
    pred = predict(valid_data, p1_best, px1_list_best, px0_list_best)
    max_acc, p, r, f1 = utils.calAccuracy(pred, valid_label)
    print("当保留所有列的时候,准确率为:", max_acc)
    col_del = []  #记录被删除的列
    for col in col_name:  #尝试依次去掉每一列
        p1, px1_list, px0_list = train(current_data.drop(columns=[col]),
                                       train_label)
        pred = predict(valid_data.drop(columns=[col]), p1, px1_list, px0_list)
        acc, p, r, f1 = utils.calAccuracy(pred, valid_label)
        if acc >= max_acc:  #当前准确率更大
            current_data = current_data.drop(columns=[col])  #去掉当前列,同时更新参数
            valid_data = valid_data.drop(columns=[col])
            col_del.append(col)
            print("由于删除【{}】列后准确率由{}大于等于此前最大准确率{}因此删除该列!".format(
                col, max_acc, acc))
            max_acc = acc
            p1_best = p1
            px1_list_best = px1_list
            px0_list_best = px0_list
            if len(current_data.columns == 1):
                break
        else:
            continue
    return p1_best, px1_list_best, px0_list_best, col_del
コード例 #5
0
def evaluate_test_epoch(fixednet, testDataLoader, criterion, device):
    fixednet.eval()
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    #
    for batch_idx, (testinputs, testtargets) in enumerate(testDataLoader):
        testinputs, testtargets = testinputs.to(device), testtargets.to(device)
        logits = fixednet(testinputs)
        loss = criterion(logits, testtargets)
        prec1, prec5 = utils.calAccuracy(logits, testtargets, topk=(1, 5))
        n = testinputs.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)
        #break

    return objs.avg, top1.avg, top5.avg
コード例 #6
0
    test_size = 0.3
    seed = 1111
    print(
        "\033[31m------------------------haberman------------------------\033[0m"
    )
    print("\033[4;32m*************朴素贝叶斯*************\033[0m")
    X, Y_, col_name = utils.getData("data/haberman.data")
    Y = Bayes_revalue(Y_)
    col_name = col_name.tolist()[:-1]
    train_data, train_label, test_data, test_label = utils.splitDataSet(
        X, Y, test_size=test_size, seed=seed)
    train_data = pd.DataFrame(train_data, columns=col_name)
    test_data = pd.DataFrame(test_data, columns=col_name)
    p1, px1_list, px0_list = train(train_data, train_label, is_Laplacian=True)
    pred = predict(test_data, p1, px1_list, px0_list)
    acc, p, r, f1 = utils.calAccuracy(pred, test_label)
    print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format(
        acc, p, r, f1))

    print("\033[4;32m*************朴素贝叶斯结构扩展*************\033[0m")
    filename = "data/haberman.data"  # haberman.data【】\heart.dat【】
    dataSet = utils.getDataSet(filename)
    train_data, test_data = utils.splitDataSet1(dataSet,
                                                test_size=test_size,
                                                seed=seed)
    train_data, test_data = pd.DataFrame(train_data), pd.DataFrame(test_data)
    train_label = train_data.iloc[:, -1].astype(int)
    train_data = train_data.iloc[:, :-1].astype(int)
    test_label = test_data.iloc[:, -1].astype(int)
    test_data = test_data.iloc[:, :-1].astype(int)
    pred = AODE_Predict(train_data, train_label, test_data)
コード例 #7
0
    filename="data/heart.dat"#haberman.data【3,0.3】\heart.dat【】
    dataSet = utils.getDataSet(filename)
    train_data, test_data = utils.splitDataSet1(dataSet, test_size=0.3)

    test_data_data=test_data[:,:-1]
    real_label=list(test_data[:,-1])
    for i in range(len(real_label)):
        real_label[i]=int(real_label[i])

    # KNN算法测试[1,2,3,4,5,6,7,8,9,10,15,17,23][3,7,11][3,6,11]
    for K in [3,5,7,11]:
        pred=KNN_Predict(train_data=train_data,test_data=test_data_data,K=K)
        print("------------"+filename+"---KNN算法---K="+str(K)+"------------")
        # print("true:", real_label)
        # print("pred:", pred_label)
        acc, p, r, f1 = utils.calAccuracy(pred, real_label)
        print("正确率:{:.2%}\t查准率:{:.4f}\t查全率:{:.4f}\tF1:{:.4f}".format(acc, p, r, f1))

    # 读取数据并划分训练集以及测试集
    filename="data/haberman.data"#haberman.data【3,0.3】\heart.dat【】
    dataSet = utils.getDataSet(filename)
    train_data, test_data = utils.splitDataSet1(dataSet, test_size=0.3)
    print()
    test_data_data=test_data[:,:-1]
    real_label=list(test_data[:,-1])
    for i in range(len(real_label)):
        real_label[i]=int(real_label[i])
    for K in [3,5,7,11]:
        pred=KNN_Predict(train_data=train_data,test_data=test_data_data,K=K)
        print("------------"+filename+"---KNN算法---K="+str(K)+"------------")
        # print("true:", real_label)