dataY = np.array(dataY) # axis=0 means the array of the corresponding columns is spliced horizontally # axis=1 means the array of the corresponding rows is spliced vertically dataX = np.concatenate((dataX[0],dataX[1]),axis=0) dataY = np.concatenate((dataY[0],dataY[1]),axis=0) dataY = dataY.reshape((len(dataY),1)) #make sure the shape of the label data is (n_samples,1). # Devide dataset x_train, x_validation, y_train, y_validation = train_test_split(dataX, dataY, test_size=0.2, random_state=42) # In[3]: # Training model model = ensemble.AdaBoostClassifier( weak_classifier=DecisionTreeClassifier, n_weakers_limit=40) model.fit(X=x_train,y=y_train) # In[4]: # Predict y_train_pred=model.predict(X=x_train,threshold=0) y_validation_pred=model.predict(X=x_validation,threshold=0) # In[12]: # Verify the accuracy on the validation set
y = features_dataset[:, features_dataset.shape[1] - 1:] np.save("X.npy", X) np.save("y.npy", y) if __name__ == "__main__": #normalize() #face_features = np.load("./datasets/face_features.npy") #nonface_features = np.load("./datasets/nonface_features.npy") #generateDateset(face_features, nonface_features) X = np.load("X.npy") y = np.load("y.npy") mode = DecisionTreeClassifier(criterion='gini') adaBoost = ensemble.AdaBoostClassifier(mode, 10) xTrain, xValidation, yTrain, yValidation = train_test_split( X, y, test_size=0.5, random_state=42) m = adaBoost.n_weakers_limit for i in range(m): mode = adaBoost.fit(xTrain, yTrain) xTest = adaBoost.predict(xValidation) errorRate = 0 for j in range(xValidation.shape[0]): if xTest[j] != yValidation[j]: errorRate = errorRate + adaBoost.weight[j] if errorRate > 0.5: break alpha = math.log((1 - errorRate) / errorRate) / 2 z = 0 for k in range(adaBoost.weight.shape[0]):
import numpy as np import pickle import ensemble from sklearn.metrics import classification_report if __name__ == "__main__": #划分训练集与验证集 print('loading features!') dataX = pickle.load(open('trainingX.data','rb')) dataY = pickle.load(open('trainingY.data','rb')) print('load success!') print(dataX) print(dataX.shape) x_train = dataX[0:900] y_train = dataY[0:900].flatten() x_valid = dataX[900:1000] y_valid = dataY[900:1000].flatten() classifier = ensemble.AdaBoostClassifier('sklearn.tree.DecisionTreeClassifier',3) classifier.fit(x_train,y_train) y_pred = classifier.predict(x_valid,0) print(classification_report(y_valid,y_pred)) with open('laclassifier_report1.txt','w+') as f: f.write(classification_report(y_valid, y_pred)) f.close()
x_train = train[:, 0: -1] y_train = train[:, -1] x_test = test[:, 0: -1] y_test = test[:, -1] return x_train, y_train, x_test, y_test #Begin to train if __name__ == "__main__": process.process_data() x_train, y_train, x_test, y_test = get_data() adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 1) adaBoost.fit(x_train, y_train) ytest_ = adaBoost.predict(x_test) print(classification_report(y_test, ytest_)) adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 2) adaBoost.fit(x_train, y_train) ytest_ = adaBoost.predict(x_test) print(classification_report(y_test, ytest_)) adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 5) adaBoost.fit(x_train, y_train) ytest_ = adaBoost.predict(x_test) print(classification_report(y_test, ytest_)) adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 10)
for i in range(sample_num): extract_feature(pic_type='face', feature_type=face_feature) extract_feature(pic_type='nonface', feature_type=nonface_feature) # 将数据集切分为训练集和验证集 temp_pos = np.ones(sample_num) temp_neg = -temp_pos all_attribute = np.concatenate((face_feature, nonface_feature), axis=0) all_label = np.concatenate((temp_pos.reshape(-1, 1), temp_neg.reshape(-1, 1)), axis=0) X_train, X_valid, Y_train, Y_valid = train_test_split(all_attribute, all_label, test_size=0.2) # 建立强分类器 clf = ensemble.AdaBoostClassifier(weak_classifier=weak_clf, n_weakers_limit=weak_clf_num) clf.fit(X_train, Y_train) # 使用强分类器对验证集进行预测 y_pred = clf.predict(X_valid) y_pred = y_pred.reshape(-1).tolist() y_true = Y_valid.reshape(-1).tolist() # 写入预测结果 fout = codecs.open('classifier_report.txt', 'w', 'utf-8') target_names = ['face', 'nonface'] result = classification_report(y_true, y_pred, target_names=target_names) fout.write(result) fout.close() print(result)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=214) DTC = tree.DecisionTreeClassifier(max_depth=2) # 下面为训练一个新的模型,基分类器的数量从2到10 # 为了加深迭代过程理解,此处决策树的深度设置为2或4 # for i in range(2, 10): # My_AdaBoost = ensemble.AdaBoostClassifier(DTC, i) # My_AdaBoost.fit(X_train, y_train) # My_AdaBoost.predict(X_val) # My_AdaBoost.is_good_enough(y_val) # My_AdaBoost.save(My_AdaBoost, '2_val_' + str(i) + '_model') # 下面为读取已训练好的模型并进行预测 My_AdaBoost = ensemble.AdaBoostClassifier(DTC) f = open('Tree_depth_4_reports.txt', 'a') acc = [] for i in range(2, 10): My_model = My_AdaBoost.load('model/4_' + str(i) + '_model') My_model.predict(X_train) My_model.is_good_enough(y_train) y_predict = My_model.predict(X_val) accuray = accuracy_score(y_val, y_predict) acc.append(accuray) # print accuray report = My_model.is_good_enough(y_val) f.write('Classifier_Num = ' + str(i) + ':\n' + str(report) + '\n\n') f.close() draw_predict(acc)
neg_dir = pwd + 'datasets/original/nonface/' #反例目录 #从文件导入特征矩阵,如果文件还没生成(特征未提取),则提取特征并保存在文件中 if not os.path.exists('a.npy'): transeToNPD(pos_dir, neg_dir) else: X, y = loadDataSet() #将特征集和标记集切分,33.3%划分为测试集 X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33) #声明决策树 DTC = tree.DecisionTreeClassifier(max_depth=6) #声明Adaboost类,base_learner的最大个数为10 myAdaBoost = ensemble.AdaBoostClassifier(DTC, 10) #训练AdaBoost模型 myAdaBoost.fit(X_train, y_train) #保存AdaBoost模型 myAdaBoost.save(myAdaBoost, 'train_model') #预测测试集,产生的标记集保存在AdaBoost类中. myAdaBoost.predict(X_val) validation = myAdaBoost.is_good_enough(y_val) print(validation) fh = open('report.txt', 'w') fh.write(validation) fh.close()
import numpy as np import ensemble from sklearn import tree labels = np.load('labels.npy') features = np.load('features.npy') train_size = 800 X = features[:train_size] y = labels[:train_size] rng_state = np.random.get_state() np.random.shuffle(X) np.random.set_state(rng_state) np.random.shuffle(y) ada = ensemble.AdaBoostClassifier(tree.DecisionTreeClassifier, 12) ada.fit(X, y)
#import os #import numpy as np #import pickle #from PIL import Image #import feature #x=[] #path_face = "C:/Users/asus/project3/datasets/original/face/" #for img_face in os.listdir(path_face): # x.append(feature.NPDFeature.extract(feature.NPDFeature(np.array(Image.open(path_face+img_face).resize((24, 24)).convert('L'))))) #path_nonface = "C:/Users/asus/project3/datasets/original/nonface/" #for img_nonface in os.listdir(path_nonface): # x.append(feature.NPDFeature.extract(feature.NPDFeature(np.array(Image.open(path_nonface+img_nonface).resize((24, 24)).convert('L'))))) #pickle.dump(np.array(x),open('datasets.pkl','wb')) import numpy as np import pickle import ensemble from sklearn.cross_validation import train_test_split from sklearn.tree import DecisionTreeClassifier x=pickle.load(open('datasets.pkl','rb')) y=np.ones(1000) y[500:999]=-1 x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.33,random_state=0) DTclf = DecisionTreeClassifier(max_depth=4) ADBTclf=ensemble.AdaBoostClassifier(DTclf,20).fit(x_train,y_train) y_predict=ensemble.AdaBoostClassifier.predict(ADBTclf,x_test,0) print(1-np.sum(y_predict!=y_test)/np.shape(x_test)[0])