コード例 #1
0
 def test_adaboost(self):
     train_X,train_y,test_X,test_y = loadHorseColic()
     adaboost = AdaBoostClassifier()
     adaboost.fit(train_X,train_y)
     preds = adaboost.predict(test_X)
     print(accuracy_score(preds,test_y))
     assert accuracy_score(preds,test_y)>0.7
コード例 #2
0
ファイル: train.py プロジェクト: SunDoge/ML2017-lab-03
def test_breast_cancer():
    clf = AdaBoostClassifier(n_weakers_limit=50)
    X, y = load_breast_cancer(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))

    skclf = SkAdaBoostClassifier()
    skclf.fit(X_train, y_train)
    print(classification_report(y_test, skclf.predict(X_test)))
コード例 #3
0
def process_boost():
    x_train, y_train, x_valid, y_valid = load_and_split()
    n_weakers_limit = 20
    adaBoost = AdaBoostClassifier(DecisionTreeClassifier, n_weakers_limit)
    adaBoost.fit(x_train, y_train)
    # 测试集预测
    predict_list = adaBoost.predict(x_valid)
    target_names = ['face', 'non_face']
    report = classification_report(y_valid,
                                   predict_list,
                                   target_names=target_names)
    with open("D:/testing/python/classifier_report.txt", "w") as f:
        f.write(report)
コード例 #4
0
ファイル: train.py プロジェクト: SunXingX/ML2017-lab-03
    def train(train_X, train_y):
        weak_classifier = DecisionTreeClassifier(max_depth=3)
        ada = AdaBoostClassifier(weak_classifier, 5)
        ada.fit(train_X, train_y)
        result = ada.predict(train_X)
        diff = np.abs(result - train_y)
        diff[diff > ep] = 1
        t = np.sum(diff)
        print("错误预测的个数为: ", t)
        target_names = ['人脸', '非人脸']
        report = (classification_report(train_y,
                                        result,
                                        target_names=target_names))

        re_path = "/home/sun/ComputerScience/MachineLearning/Experiments/Experiment_three/ML2017-lab-03/report.txt"
        write_report(re_path, report)
        return ada
コード例 #5
0
ファイル: train.py プロジェクト: SunDoge/ML2017-lab-03
def test_image():
    path = 'datasets/original/'
    face = io.imread_collection(path + 'face/*.jpg')
    nonface = io.imread_collection(path + 'nonface/*.jpg')
    labels = ['face', 'nonface']

    X = []
    y = []

    # face_list = [get_features(i) for i in face]
    # nonface_list = [get_features(i) for i in nonface]
    # face_list = Parallel(n_jobs=4)(delayed(get_features)(i) for i in face)
    # nonface_list = Parallel(n_jobs=4)(
    #     delayed(get_features)(i) for i in nonface)

    # X += face_list
    # y += list(np.zeros(len(face_list), dtype=int))
    # X += nonface_list
    # y += list(np.ones(len(nonface_list), dtype=int))

    # AdaBoostClassifier.save(X, 'X.pkl')
    # AdaBoostClassifier.save(y, 'y.pkl')
    X = AdaBoostClassifier.load('X.pkl')
    y = AdaBoostClassifier.load('y.pkl')

    X_train, X_test, y_train, y_test = train_test_split(
        np.array(X), np.array(y), test_size=0.33, random_state=42)

    print('start training')

    clf = AdaBoostClassifier(n_weakers_limit=50)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    with open('report.txt', 'w') as f:
        print(classification_report(y_test, y_pred, target_names=labels), file=f)
コード例 #6
0
ファイル: train.py プロジェクト: SunDoge/ML2017-lab-03
def test_xor():
    X_train = np.array([
        [1, 1],
        [1, 0],
        [0, 1],
        [0, 0]
    ])

    y_train = np.array([
        0,
        1,
        1,
        0
    ])

    clf = AdaBoostClassifier(n_weakers_limit=1000)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_train)
    print(classification_report(y_train, y_pred))

    skclf = SkAdaBoostClassifier()
    skclf.fit(X_train, y_train)
    print(classification_report(y_train, skclf.predict(X_train)))
コード例 #7
0
ファイル: train.py プロジェクト: Fantasy333/ML2017-lab-03
        X.append(NPDFeature(np.array(img)).extract())
        y.append(-1)
        print(i)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    ada = AdaBoostClassifier(DecisionTreeClassifier, 20)

    X_train = np.array(X_train)
    X_test = np.array(X_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    y_train = y_train.reshape((y_train.shape[0], 1))
    y_test = y_test.reshape((y_test.shape[0], 1))

    ada.fit(X_train, y_train)
    h = ada.predict(X_test)
    yes = 0
    no = 0
    for i in range(0, len(h)):
        if (h[i] == y_test[i]): yes += 1
        if (h[i] != y_test[i]): no += 1
    print(yes, "   ", no)

    report = classification_report(y_test, h, target_names=["nonface", "face"])

    file = open('report.txt', 'w')
    file.write(report)
    file.close()
コード例 #8
0
ファイル: train.py プロジェクト: yangjianscut/MLlab
    # write your code here
    X, y = read_data()
    with open("data.pickle", "wb") as f:
        pickle.dump((X, y), f, pickle.HIGHEST_PROTOCOL)
    with open("data.pickle", "rb") as f:
        X, y = pickle.load(f)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=2)

    classifiler = AdaBoostClassifier(DecisionTreeClassifier, 7)
    classifiler.fit(X_train, y_train)

    pred_y_test = classifiler.predict(X_test)
    report = classification_report(y_test,
                                   pred_y_test,
                                   labels=[-1, 1],
                                   target_names=['face', 'nonface'])
    print(report)
    '''
    pred_test_error_list = []
    pred_test_error_list.append(None)
    pred_train_error_list = []
    pred_train_error_list.append(None)
    for iter_s in tqdm(range(1, 20), desc='test', leave=True):
        classifiler = AdaBoostClassifier(DecisionTreeClassifier, iter_s)
        classifiler.fit(X_train, y_train)
        pred_y_train = classifiler.predict(X_train)
        error_rate_count_train = 0
コード例 #9
0
ファイル: train.py プロジェクト: LiangXiaoEr/ML_Lab_three
    with open("datasets/processed/label", 'wb') as label:
        pickle.dump(y, label)


if __name__ == "__main__":
    print("Processing Images-------")
    # process_images()
    print("Loading data------------")
    with open("datasets/processed/feature", 'rb') as feature:
        X = pickle.load(feature)
    with open("datasets/processed/label", 'rb') as label:
        y = pickle.load(label)
    print("Shape of data after processing: ", X.shape, y.shape)

    # spliting dataset
    X_train, X_valid, y_train, y_valid = train_test_split(X,
                                                          y,
                                                          test_size=0.30,
                                                          random_state=42)
    print(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape)

    clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), 10)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_valid)
    acc = np.mean(y_pred == y_valid.reshape(-1, ))
    print(acc)
    with open("datasets/report.txt", 'wb') as file:
        report = classification_report(y_valid,
                                       y_pred,
                                       target_names=['face', 'nonface'])
        file.write(report.encode())
コード例 #10
0
        pickle.dump(X_train, output)
        pickle.dump(X_vali, output)
        pickle.dump(y_train, output)
        pickle.dump(y_vali, output)
        output.close()

    #create adaboost/weak classifier
    dtc = DecisionTreeClassifier(random_state=0,
                                 max_depth=3,
                                 max_features="sqrt")
    classifier = AdaBoostClassifier(dtc, 15)
    #train classifiers
    classifier.fit(X_train, y_train)
    dtc.fit(X_train, y_train)
    #do prediction
    result = classifier.predict(X_vali)
    weakresult = dtc.predict(X_vali)

    #calculate predicting accuracy for both
    adacount = 0
    weakcount = 0
    for i in range(0, result.shape[0]):
        if (np.abs(result[i] - 1) < np.abs(result[i] + 1)):
            result[i] = 1
        else:
            result[i] = -1
        if result[i] == y_vali[i]:
            adacount = adacount + 1
        if weakresult[i] == y_vali[i]:
            weakcount = weakcount + 1
    print("adaboost accuracy: " + str(adacount / result.shape[0]))
コード例 #11
0
ファイル: train.py プロジェクト: delbertbeta/ML2018-lab-03
                npd = feature.NPDFeature(imageData)
                features.append(npd.extract())
        AdaBoostClassifier.save(features, 'features.dump')

    features = np.array(features)
    print(features.shape)

    X_train, X_val, y_train, y_val = train_test_split(features,
                                                      y,
                                                      test_size=0.25)

    classifier = AdaBoostClassifier(DecisionTreeClassifier, 5)
    classifier.fit(X_train, y_train)

    score = classifier.predict_scores(X_val, y_val)
    predict = classifier.predict(X_val)

    y_val = np.array(list(map(lambda x: int(x), y_val.reshape(1, -1)[0])))
    predict = np.array(list(map(lambda x: int(x), predict.reshape(1, -1)[0])))

    print(predict)
    print(y_val)

    reportContent = 'score = ' + str(score) + '\n'
    reportContent += classification_report(y_val, predict)

    with open('classifier_report.txt', 'w') as report:
        report.write(reportContent)

    pass
コード例 #12
0
    max_score = 0
    report = ''

    #弱分类器数目从5到50进行实验
    while (num_weak_classifier <= 50):
        print('弱分类器数量:', num_weak_classifier)
        num_list.append(num_weak_classifier)

        #定义弱分类器
        b = DecisionTreeClassifier(splitter='random', max_depth=4)
        #定义adaboost分类器
        a = AdaBoostClassifier(b, num_weak_classifier)
        #使用训练集进行训练
        a.fit(X_train, y_train)
        #对测试集进行分类
        y_pred = a.predict(X_val)
        #计算准确率
        correct = 0
        for i in range(y_pred.shape[0]):
            if (y_pred[i] == y_val[i]):
                correct += 1
        score = correct / y_val.shape[0]
        print('准确率:', score)
        pred_score_list.append(score)

        #生成准确率最高时的报告
        if (score > max_score):
            max_score = score
            report = classification_report(y_val, y_pred)
        num_weak_classifier += 1
コード例 #13
0
    y_test = y_test.reshape(75, 1)
    return y_train, y_test


def acc(y_test, y_preds):
    for n, y in enumerate(y_preds):
        if y > 0:
            y_preds[n] = 1
        if y <= 0:
            y_preds[n] = -1
    num = 0
    for z in zip(y_preds, y_test):
        if int(z[0]) == int(z[1][0]):
            num = num + 1
    print('arr:', num / len(y_test))


if __name__ == "__main__":
    X = trainX()
    X_test = testX()
    y_train, y_test = dataY()
    clf = tree.DecisionTreeClassifier(max_depth=50,
                                      min_samples_leaf=50,
                                      random_state=30,
                                      criterion='gini')
    gbdt = AdaBoostClassifier(clf, 10)
    gbdt.fit(X, y_train)
    y_preds = gbdt.predict(X_test)
    # y_preds
    acc(y_test, y_preds)
コード例 #14
0
ファイル: train.py プロジェクト: MuyiLi/ML2017-lab-03
features_face_array = np.array(features_face)[0:n_limit]

n_pos_sample = features_face_array.shape[0]
n_feature = features_face_array.shape[1]

features_nonface = load('features_nonface')
features_nonface_array = np.array(features_nonface)[0:n_limit]
n_neg_sample = features_nonface_array.shape[0]

X = np.concatenate((features_face_array, features_nonface_array), axis=0)

y = np.array([1] * n_pos_sample + [-1] * n_neg_sample)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# model = dt()
# model.fit(X_train,y_train)
# s = model.score(X_test,y_test)
# print(s)

Ada = AdaBoostClassifier(dt, 10)
Ada.fit(X_train, y_train)
pred = Ada.predict(X_test)
acc = accuracy_score(pred, y_test)
print('acc:', acc)

f = open('report.txt', 'w')
content = classification_report(pred, y_test)
f.write(content)
f.close()
コード例 #15
0
ファイル: train.py プロジェクト: zedom1/Class_Project
    else:
        path_face = './datasets/original/face/'
        path_non_face = './datasets/original/nonface/'
        for i in os.listdir(path_face):
            features = processImage(Image.open(path_face + i))
            X.append(features)
            y.append(1)

        for i in os.listdir(path_non_face):
            features = processImage(Image.open(path_non_face + i))
            X.append(features)
            y.append(0)
        with open("data_x", "wb") as f:
            pickle.dump(X, f)
        with open("data_y", "wb") as f:
            pickle.dump(y, f)
    return X, y


if __name__ == "__main__":

    X, y = getData()
    X_train, X_val, y_train, y_val = train_test_split(X,
                                                      y,
                                                      test_size=0.2,
                                                      shuffle=True)
    clf = AdaBoostClassifier(DecisionTreeClassifier, 10)
    clf.fit(X_train, y_train)
    predict = clf.predict(X_val)
    print(classification_report(y_val, predict))
コード例 #16
0
        face_im=face_im.convert('L')#灰度化
        face_im=face_im.resize((24,24)) #缩小尺寸
        nonface_im=nonface_im.convert('L')
        nonface_im=nonface_im.resize((24,24)) 
        face.append(np.array(face_im))#转为ndarray
        nonface.append(np.array(nonface_im))
    feature_face=[]
    feature_nonface=[]
    for i in range(500) :
        feature_face.append(NPDFeature(face[i]).extract())
        feature_nonface.append(NPDFeature(nonface[i]).extract())
    # #缓存特征
    # AdaBoostClassifier.save(feature_face,'feature_face')
    # AdaBoostClassifier.save(feature_nonface,'feature_nonface')
    # #读取缓存的特征
    # feature_face=np.array(AdaBoostClassifier.load('feature_face'))
    # feature_nonface=np.array(AdaBoostClassifier.load('feature_nonface'))

    data=np.row_stack((feature_face,feature_nonface))
    label=np.concatenate((np.ones(500),-np.ones(500)))
    X_train,X_validation,y_train,y_validation=train_test_split(data,label,test_size=0.3,random_state=1000)
    #Adaboost 20个分类器,每个决策树只有一个节点
    model=AdaBoostClassifier(DecisionTreeClassifier,20)
    model.fit(X_train,y_train)#训练模型
    y_pre=model.predict(X_validation)#预测
    with open('report.txt',mode='w') as f:
        f.write(classification_report(y_pre,y_validation))
    #单个分类器
    model=DecisionTreeClassifier(max_depth=1).fit(X_train,y_train)
    y_pre=model.predict(X_validation)
    print(classification_report(y_pre,y_validation))
コード例 #17
0
ファイル: train.py プロジェクト: VincentYiu1996/ML2017-lab-03
        # train_y.append(-1)

    clf = AdaBoostClassifier(tree.DecisionTreeClassifier, 10)

    #cal NPD
    for i in range(num_face + num_nonface):
        NPD.append(NPDFeature(imgs[i]).extract())

    #save NPD
    # clf.save(NPD,'output')

    #load NPD
    # NPD=np.array(clf.load('output'))

    train_x = np.row_stack((NPD[0:100], NPD[500:600]))
    train_y = np.append(np.ones((1, 100)), np.linspace(-1, -1, 100))
    test_x = np.row_stack((NPD[200:300], NPD[700:800]))
    test_y = np.append(np.ones((1, 100)), np.linspace(-1, -1, 100))

    clf.fit(train_x, train_y)
    y = clf.predict(test_x)

    hit = 0
    for i in range(test_x.shape[0]):
        if (y[i] == test_y[i]):
            hit += 1
    print("hit rate:", hit / test_x.shape[0])

    with open('report.txt', 'w') as f:
        f.write(classification_report(test_y, y))
コード例 #18
0
    trainLabel.extend(nonfaceLabel)
    trainLabel.extend(faceLabel)
    X_train, X_test, Y_train, Y_test = train_test_split(trainImage, trainLabel, test_size = 0.33)
    return np.array(X_train), np.array(X_test), np.array(Y_train), np.array(Y_test)


if __name__ == "__main__":
    # write your code here
    #dataProcess()
    trainData, testData, trainLabel, testLabel = divideData('./dataFeature/face', './dataFeature/nonface')
    clf = DecisionTreeClassifier(max_depth=2)
    row, col = trainData.shape
    weightArray = [(1 / row)] * row
    clf.fit(trainData, trainLabel, weightArray)
    predictY = clf.predict(testData)
    rate = calculateAcc(predictY, testLabel)
    print(rate)
    target_name = ['class-1', 'class1']
    with open("./report.txt", 'w') as fp:
        fp.write(classification_report(testLabel, predictY, target_names=target_name))
    print(classification_report(testLabel, predictY, target_names=target_name))
    clf2 = AdaBoostClassifier(DecisionTreeClassifier, 20)
    clf2.fit(trainData, trainLabel)
    predictY2 = clf2.predict(testData)
    rate = calculateAcc(predictY2, testLabel)
    print(rate)
    target_name = ['class-1', 'class1']
    with open("./report1.txt", 'w') as fp:
        fp.write(classification_report(testLabel, predictY2, target_names=target_name))
    print(classification_report(testLabel, predictY2, target_names=target_name))
コード例 #19
0
                         str(i).rjust(3, '0') + ".jpg")
        img = cv2.resize(img, dsize=(24, 24))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 转换了灰度化
        npd = NPDFeature(img)
        x.append(npd.extract())
        y.append(1)
    for i in tqdm(range(500)):
        img = cv2.imread("./datasets/original/nonface/nonface_" +
                         str(i).rjust(3, '0') + ".jpg")
        img = cv2.resize(img, dsize=(24, 24))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 转换了灰度化
        npd = NPDFeature(img)
        x.append(npd.extract())
        y.append(-1)
    x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2)

    print('begin train data')
    ada = AdaBoostClassifier()
    ada.fit(x_train, y_train)
    y_predict = ada.predict(x_val, threshold=0)
    print(
        classification_report(y_val,
                              y_predict,
                              target_names=["nonface", "face"],
                              digits=4))
    with open("report.txt", "w") as f:
        f.write(
            classification_report(y_val,
                                  y_predict,
                                  target_names=["nonface", "face"],
                                  digits=4))
コード例 #20
0
ファイル: train.py プロジェクト: cshzh/ML2017-lab-03
        image = np.array(image)
        feature = NPDFeature(image).extract()
        if 'nonface' in image_path:
            y[i] = -1
        else:
            y[i] = 1
        X[i, :] = feature
    np.savez(features_save_path, X, y)

    # load features
    npzfile = np.load(features_save_path)
    X = npzfile['arr_0']
    y = npzfile['arr_1']

    # Split the dataset into training set and validation set
    X_train, X_validation, y_train, y_validation = train_test_split(
        X, y, test_size=0.44, random_state=42, shuffle=True)
    adaboost_classifier = AdaBoostClassifier(max_number_classifier=1)
    # Train the model
    adaboost_classifier.fit(X_train, y_train)
    y_predict = adaboost_classifier.predict(X_validation)
    target_names = ['non_face', 'face']
    accuracy = np.mean(y_predict == y_validation)
    print(accuracy)
    results = classification_report(y_validation,
                                    y_predict,
                                    target_names=target_names)
    with open(results_path, 'w+') as f:
        f.write(results)
    print(results)
コード例 #21
0
classifier.train(trainloader, classifier_num=CLASSIFIER_NUM)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          download=True,
                                          transform=transforms.ToTensor())
test_dataloader = torch.utils.data.DataLoader(test_dataset, shuffle=False)

# Test the AdaBoostClassifier
correct = 0
for batch_index, (data, target) in enumerate(test_dataloader):
    # Copy data to GPU if needed
    data = data.to(device)
    target = target.to(device)

    category = classifier.predict(data)
    target_category = target.cpu().numpy().item()
    correct += 1 if category == target_category else 0
accuracy = correct / len(test_dataloader.dataset)
print('\nTest dataset: AdaBoostClassifier accuracy: {}/{} ({:.2f}%)\n'.format(
    correct, len(test_dataloader.dataset), accuracy * 100.0))

# Test the base classifier
for i in range(CLASSIFIER_NUM):
    correct = 0
    for batch_index, (data, target) in enumerate(test_dataloader):
        # Copy data to GPU if needed
        data = data.to(device)
        target = target.to(device)

        category = classifier.predict_using_base_classifier(i, data)
コード例 #22
0
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from ensemble import AdaBoostClassifier
#这里是直接读取灰度图,灰度图在original文件夹里面
path1=[os.path.join('G:\\Users\\qqqqqq1997520\\Desktop\\original\\face\\face',f) for f in os.listdir('G:\\Users\\qqqqqq1997520\\Desktop\\original\\face\\face')]
path2 = [os.path.join('G:\\Users\\qqqqqq1997520\\Desktop\\original\\face\\nonface',f) for f in os.listdir('G:\\Users\\qqqqqq1997520\\Desktop\\original\\face\\nonface')]
ABC=AdaBoostClassifier(DecisionTreeClassifier(), 1)
im=[0 for i in range(1000)]
for i in range(500):
    im[i]=plt.imread(path1[i])
for i in arange(500,1000):
    im[i]=plt.imread(path2[(i%500)])
_feature=[0 for i in range(1000)]
for i in range(1000):
    feature=NPDFeature(im[i])
    _feature[i]=feature.extract()
    
feature_data=array(_feature)
y=[1 for i in range(1000)]
for i in range(500,1000):
    y[i]=-1;
y=array(y)
X_train, X_vali, y_train, y_vali = train_test_split(feature_data, y, test_size=0.3, random_state=37)

ABC=AdaBoostClassifier(DecisionTreeClassifier(),20)
ABC.fit(X_train,y_train)
predict=ABC.predict(X_vali,0)
classification_name=["Y","N"]
with open('report.txt', 'w') as f:
    f.write(classification_report(y_vali, predict, target_names=classification_name))
コード例 #23
0
ファイル: train.py プロジェクト: ymNickole/ML2017-lab-03
        dataset = np.array(samples)
        with open('tmp.pkl', 'wb') as output:
            pickle.dump(dataset, output, True)

    with open('tmp.pkl', 'rb') as input:
        dataset = pickle.load(input)
        print(dataset.shape)
    # 将数据集切分为训练集和验证集
    X_train = dataset[:dataset.shape[0] * 3 // 4, :dataset.shape[1] - 1]
    y_train = dataset[:dataset.shape[0] * 3 // 4, dataset.shape[1] - 1]
    X_validation = dataset[dataset.shape[0] * 3 // 4:, :dataset.shape[1] - 1]
    y_validation = dataset[dataset.shape[0] * 3 // 4:, dataset.shape[1] - 1]
    return X_train, X_validation, y_train, y_validation


if __name__ == "__main__":
    X_train, X_validation, y_train, y_validation = loadDataSet()
    abc = AdaBoostClassifier(DecisionTreeClassifier, 20)
    abc.fit(X_train, y_train)
    final_pre_y = abc.predict(X_validation)
    error = 0
    for i in range(final_pre_y.shape[0]):
        if final_pre_y[i] != y_validation[i]:
            error = error + 1
    accuracy = 1 - error / y_validation.shape[0]
    print('accuracy: %f' % accuracy)
    target_names = ['face', 'nonface']
    report = classification_report(y_validation, final_pre_y, target_names=target_names)
    print(report)
    with open('report.txt', 'w') as f:
        f.write(report)
コード例 #24
0
ファイル: train.py プロジェクト: sxyzc/ML_2017_Lab3
    #将X_data与y_data分开
    X_data,y_data = Data[:,:-1],Data[:,-1]

    #切分训练集与验证集
    X_train,X_test,y_train,y_test = train_test_split(X_data,y_data,test_size=0.3,random_state=10)

    print(len(y_train),len(y_test))

    #进行AdaBoost训练
    mode = tree.DecisionTreeClassifier(max_depth=1)
    adaboost=AdaBoostClassifier(mode,20)
    adaboost.fit(X_train,y_train)

    #得到预测结果
    y_predict=adaboost.predict(X_test)

    #输出正确率
    count=0
    for i in range(len(y_test)):
        if y_test[i]==y_predict[i]:
            count=count+1
    target_names = ['1', '-1']
    print(count/len(y_test))

    #调用classification_report获得预测结果
    report=classification_report(y_test, y_predict, target_names=target_names)

    #写入report.txt
    with open("report.txt", 'w') as f:
        f.write(report)
コード例 #25
0
ファイル: train.py プロジェクト: we1m1n/MachineLearning
            img = Image.open(nonfaces_path[i])
            img = img.convert('L').resize((24, 24))
            nf = NPDFeature(np.array(img))
            train[i * 2 + 1] = nf.extract()
        AdaBoostClassifier.save(train, 'train.txt')

    try:
        X = AdaBoostClassifier.load("train.txt")
    except IOError:
        Feature_extract()
        X = AdaBoostClassifier.load("train.txt")

    Y = np.zeros((1000, 1))
    for i in range(1000):
        Y[i] = (i + 1) % 2
    Y = np.where(Y > 0, 1, -1)

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    booster = AdaBoostClassifier(DecisionTreeClassifier, 15)
    booster.fit(X_train, Y_train)
    predict = booster.predict(X_test)
    wrong_count = 0
    for j in range(predict.shape[0]):
        if predict[j] != Y_test[j]:
            wrong_count += 1
    AdaBoostClassifier.save(classification_report(Y_test, predict),
                            "classifier_report.txt")
    pass
コード例 #26
0
    print('X_train.shape', X_train.shape)
    print('X_val.shape', X_val.shape)
    print('y_train.shape', y_train.shape)
    print('y_val.shape', y_val.shape)

    # 尝试使用不同深度的决策树,不同数量的决策树来进行建模和预测
    result = []
    max_depth = 4
    max_num_tree = 10
    for depth in range(1, max_depth + 1):
        result_item = []
        for num_tree in range(1, max_num_tree + 1):
            adaboostclassifier = AdaBoostClassifier(
                DecisionTreeClassifier(max_depth=depth), num_tree)
            adaboostclassifier.fit(X_train, y_train)
            pre_label = adaboostclassifier.predict(X_val)
            correct = [1 if a == b else 0 for (a, b) in zip(pre_label, y_val)]
            accurary = sum(correct) / len(correct)
            result_item.append(accurary * 100)

            report = classification_report(y_val,
                                           pre_label,
                                           labels=[-1, 1],
                                           target_names=["face", "nonface"])
            model_num = (depth - 1) * 10 + num_tree
            with open('report.txt', 'a') as f:
                f.write('\nmodel ' + str(model_num) + ':\n')
                f.write('number of decision tree:' + str(num_tree) + '\n')
                f.write('max_depth of decision tree:' + str(depth) + '\n')
                f.write(report)
            print("model " + str(model_num) + "/40 is finished")
コード例 #27
0
    plt.ylabel('Accuracy')
    plt.plot(range(len(validation_score_list)), validation_score_list)
    #plt.grid()
    plt.show()


if __name__ == "__main__":

    pre_image()
    with open('features', "rb") as f:
        x = pickle.load(f)
    with open('labels', "rb") as f:
        y = pickle.load(f)
    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=0)
    maxIteration = 10
    s, validation_score_list = AdaBoostClassifier(
        DecisionTreeClassifier(max_depth=3),
        maxIteration).fit(X_train, y_train)
    predict_y = s.predict(X_test)

    acc_plot(validation_score_list)

    with open('report.txt', "wb") as f:
        report = classification_report(y_test,
                                       predict_y,
                                       target_names=["face", "nonface"])
        f.write(report.encode())
コード例 #28
0
if __name__ == "__main__":
    load_img()
    npd_feature()
    img_features = np.array(img_features)
    img_labels = np.array(img_labels).reshape((-1, 1))
    print(img_features.shape)
    print(img_features)
    X_train, X_val, y_train, y_val = train_test_split(img_features,
                                                      img_labels,
                                                      test_size=0.25)
    print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

    ada = AdaBoostClassifier(DecisionTreeClassifier, WEAKERS_LIMIT)
    ada.fit(X_train, y_train)

    y_predict = ada.predict(X_val)
    acc = ada.predict_scores(X_val, y_val)

    print(acc)

    y_val = np.array(list(map(lambda x: int(x), y_val.reshape(1, -1)[0])))
    y_predict = np.array(
        list(map(lambda x: int(x),
                 y_predict.reshape(1, -1)[0])))

    print(y_predict)
    print(y_val)

    reportContent = 'Accuracy = ' + str(acc) + '\n'
    reportContent += classification_report(y_val, y_predict)
コード例 #29
0
ファイル: train.py プロジェクト: dishonored36/lab_3
    # 数据集加标签,并划分训练集,验证集
    Data = loadData("data")
    label = np.ones(1000)
    label[500:] = -1
    train_x, train_y, validation_x, validation_y = split(Data, label, 0.4)
    saveData("train", train_x)
    saveData("label", train_y)
    saveData("validation", validation_x)
    saveData("target", validation_y)

    train = loadData("train")
    train_x = np.array(train)
    label = loadData("label")
    train_y = np.array(label)
    validation = loadData("validation")
    test_x = np.array(validation)
    target = loadData("target")
    test_y = np.array(target)
    weakClassifier = DecisionTreeClassifier(max_depth=3)
    cls = AdaBoostClassifier(weakClassifier, num_classifier)
    cls = cls.fit(train_x, train_y)
    result_adaboost = cls.predict(test_x, 0)
    print('adaboost result: ', result_adaboost)
    print('accuracy: ', validate_result(result_adaboost, test_y))
    target_names = {'nonface', 'face'}
    output = open('report.txt', 'w')
    output.write(
        classification_report(test_y,
                              result_adaboost,
                              target_names=target_names))
コード例 #30
0
import numpy as np
import pickle

with open("dataset.pkl", "rb") as file:
    dataset = pickle.load(file)

X = dataset[:, :-1]
y = dataset[:, -1].reshape((-1, 1))

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

from ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

Classifier = AdaBoostClassifier(DecisionTreeClassifier, 10)
Classifier.fit(X_train, y_train)

y_pre = Classifier.predict(X_test)

from sklearn.metrics import classification_report
print(
    classification_report(y_test,
                          y_pre,
                          target_names=["Face", "Not Face"],
                          digits=4))  # print("Hello AdaBoost!")