Esempio n. 1
0
dataY = np.array(dataY)  
# axis=0 means the array of the corresponding columns is spliced horizontally
# axis=1 means the array of the corresponding rows is spliced vertically
dataX = np.concatenate((dataX[0],dataX[1]),axis=0) 
dataY = np.concatenate((dataY[0],dataY[1]),axis=0) 
dataY = dataY.reshape((len(dataY),1)) #make sure the shape of the label data is (n_samples,1).

# Devide dataset
x_train, x_validation, y_train, y_validation = train_test_split(dataX, dataY, test_size=0.2, random_state=42)


# In[3]:


# Training model
model = ensemble.AdaBoostClassifier( weak_classifier=DecisionTreeClassifier, n_weakers_limit=40)
model.fit(X=x_train,y=y_train)


# In[4]:


# Predict
y_train_pred=model.predict(X=x_train,threshold=0)
y_validation_pred=model.predict(X=x_validation,threshold=0)


# In[12]:


# Verify the accuracy on the validation set 
Esempio n. 2
0
    y = features_dataset[:, features_dataset.shape[1] - 1:]

    np.save("X.npy", X)
    np.save("y.npy", y)


if __name__ == "__main__":
    #normalize()
    #face_features = np.load("./datasets/face_features.npy")
    #nonface_features = np.load("./datasets/nonface_features.npy")
    #generateDateset(face_features, nonface_features)

    X = np.load("X.npy")
    y = np.load("y.npy")
    mode = DecisionTreeClassifier(criterion='gini')
    adaBoost = ensemble.AdaBoostClassifier(mode, 10)
    xTrain, xValidation, yTrain, yValidation = train_test_split(
        X, y, test_size=0.5, random_state=42)
    m = adaBoost.n_weakers_limit
    for i in range(m):
        mode = adaBoost.fit(xTrain, yTrain)
        xTest = adaBoost.predict(xValidation)
        errorRate = 0
        for j in range(xValidation.shape[0]):
            if xTest[j] != yValidation[j]:
                errorRate = errorRate + adaBoost.weight[j]
        if errorRate > 0.5:
            break
        alpha = math.log((1 - errorRate) / errorRate) / 2
        z = 0
        for k in range(adaBoost.weight.shape[0]):
Esempio n. 3
0
import numpy as np
import pickle
import ensemble
from sklearn.metrics import classification_report

if __name__ == "__main__":
    #划分训练集与验证集
    print('loading features!')
    dataX = pickle.load(open('trainingX.data','rb'))
    dataY = pickle.load(open('trainingY.data','rb'))
    print('load success!')
    print(dataX)
    print(dataX.shape)
    x_train = dataX[0:900]
    y_train = dataY[0:900].flatten()
    x_valid = dataX[900:1000]
    y_valid = dataY[900:1000].flatten()
    classifier = ensemble.AdaBoostClassifier('sklearn.tree.DecisionTreeClassifier',3)
    classifier.fit(x_train,y_train)
    y_pred = classifier.predict(x_valid,0)
    print(classification_report(y_valid,y_pred))
    with open('laclassifier_report1.txt','w+') as f:
        f.write(classification_report(y_valid, y_pred))
        f.close()

Esempio n. 4
0
    x_train = train[:, 0: -1]
    y_train = train[:, -1]

    x_test = test[:, 0: -1]
    y_test = test[:, -1]

    return x_train, y_train, x_test, y_test

#Begin to train
if __name__ == "__main__":
    process.process_data()

    x_train, y_train, x_test, y_test = get_data()

    adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 1)
    adaBoost.fit(x_train, y_train)
    ytest_ = adaBoost.predict(x_test)
    print(classification_report(y_test, ytest_))

    adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 2)
    adaBoost.fit(x_train, y_train)
    ytest_ = adaBoost.predict(x_test)
    print(classification_report(y_test, ytest_))

    adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 5)
    adaBoost.fit(x_train, y_train)
    ytest_ = adaBoost.predict(x_test)
    print(classification_report(y_test, ytest_))

    adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 10)
Esempio n. 5
0
for i in range(sample_num):
    extract_feature(pic_type='face', feature_type=face_feature)
    extract_feature(pic_type='nonface', feature_type=nonface_feature)

# 将数据集切分为训练集和验证集
temp_pos = np.ones(sample_num)
temp_neg = -temp_pos
all_attribute = np.concatenate((face_feature, nonface_feature), axis=0)
all_label = np.concatenate((temp_pos.reshape(-1, 1), temp_neg.reshape(-1, 1)),
                           axis=0)
X_train, X_valid, Y_train, Y_valid = train_test_split(all_attribute,
                                                      all_label,
                                                      test_size=0.2)

# 建立强分类器
clf = ensemble.AdaBoostClassifier(weak_classifier=weak_clf,
                                  n_weakers_limit=weak_clf_num)
clf.fit(X_train, Y_train)

# 使用强分类器对验证集进行预测
y_pred = clf.predict(X_valid)
y_pred = y_pred.reshape(-1).tolist()
y_true = Y_valid.reshape(-1).tolist()

# 写入预测结果
fout = codecs.open('classifier_report.txt', 'w', 'utf-8')
target_names = ['face', 'nonface']
result = classification_report(y_true, y_pred, target_names=target_names)
fout.write(result)
fout.close()

print(result)
Esempio n. 6
0
    X_train, X_val, y_train, y_val = train_test_split(X,
                                                      y,
                                                      test_size=0.2,
                                                      random_state=214)
    DTC = tree.DecisionTreeClassifier(max_depth=2)
    # 下面为训练一个新的模型,基分类器的数量从2到10
    # 为了加深迭代过程理解,此处决策树的深度设置为2或4
    # for i in range(2, 10):
    #     My_AdaBoost = ensemble.AdaBoostClassifier(DTC, i)
    #     My_AdaBoost.fit(X_train, y_train)
    #     My_AdaBoost.predict(X_val)
    #     My_AdaBoost.is_good_enough(y_val)
    #     My_AdaBoost.save(My_AdaBoost, '2_val_' + str(i) + '_model')

    # 下面为读取已训练好的模型并进行预测
    My_AdaBoost = ensemble.AdaBoostClassifier(DTC)
    f = open('Tree_depth_4_reports.txt', 'a')
    acc = []
    for i in range(2, 10):
        My_model = My_AdaBoost.load('model/4_' + str(i) + '_model')
        My_model.predict(X_train)
        My_model.is_good_enough(y_train)
        y_predict = My_model.predict(X_val)
        accuray = accuracy_score(y_val, y_predict)
        acc.append(accuray)
        # print accuray
        report = My_model.is_good_enough(y_val)
        f.write('Classifier_Num = ' + str(i) + ':\n' + str(report) + '\n\n')
    f.close()
    draw_predict(acc)
Esempio n. 7
0
    neg_dir = pwd + 'datasets/original/nonface/'  #反例目录

    #从文件导入特征矩阵,如果文件还没生成(特征未提取),则提取特征并保存在文件中
    if not os.path.exists('a.npy'):
        transeToNPD(pos_dir, neg_dir)
    else:
        X, y = loadDataSet()

    #将特征集和标记集切分,33.3%划分为测试集
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33)

    #声明决策树
    DTC = tree.DecisionTreeClassifier(max_depth=6)

    #声明Adaboost类,base_learner的最大个数为10
    myAdaBoost = ensemble.AdaBoostClassifier(DTC, 10)

    #训练AdaBoost模型
    myAdaBoost.fit(X_train, y_train)

    #保存AdaBoost模型
    myAdaBoost.save(myAdaBoost, 'train_model')

    #预测测试集,产生的标记集保存在AdaBoost类中.
    myAdaBoost.predict(X_val)

    validation = myAdaBoost.is_good_enough(y_val)
    print(validation)
    fh = open('report.txt', 'w')
    fh.write(validation)
    fh.close()
Esempio n. 8
0
import numpy as np
import ensemble
from sklearn import tree

labels = np.load('labels.npy')
features = np.load('features.npy')

train_size = 800
X = features[:train_size]
y = labels[:train_size]

rng_state = np.random.get_state()
np.random.shuffle(X)
np.random.set_state(rng_state)
np.random.shuffle(y)

ada = ensemble.AdaBoostClassifier(tree.DecisionTreeClassifier, 12)

ada.fit(X, y)
Esempio n. 9
0
#import os
#import numpy as np
#import pickle
#from PIL import Image
#import feature
#x=[]
#path_face = "C:/Users/asus/project3/datasets/original/face/"
#for img_face in os.listdir(path_face):
#    x.append(feature.NPDFeature.extract(feature.NPDFeature(np.array(Image.open(path_face+img_face).resize((24, 24)).convert('L')))))
#path_nonface = "C:/Users/asus/project3/datasets/original/nonface/"
#for img_nonface in os.listdir(path_nonface):
#    x.append(feature.NPDFeature.extract(feature.NPDFeature(np.array(Image.open(path_nonface+img_nonface).resize((24, 24)).convert('L')))))
#pickle.dump(np.array(x),open('datasets.pkl','wb'))
import numpy as np
import pickle
import ensemble
from sklearn.cross_validation import train_test_split 
from sklearn.tree import DecisionTreeClassifier
x=pickle.load(open('datasets.pkl','rb'))
y=np.ones(1000)
y[500:999]=-1
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.33,random_state=0)
DTclf = DecisionTreeClassifier(max_depth=4)
ADBTclf=ensemble.AdaBoostClassifier(DTclf,20).fit(x_train,y_train)
y_predict=ensemble.AdaBoostClassifier.predict(ADBTclf,x_test,0)
print(1-np.sum(y_predict!=y_test)/np.shape(x_test)[0])