def main(): learner = lambda d, l: adaboost.train( stump.random, stump.predict, data=d, labels=l, iters=3000) codes, predictors = train('data/8newsgroup/train.trec', 20, learner, adaboost.predict) train_error = test('data/8newsgroup/train.trec', codes, predictors) test_error = test('data/8newsgroup/test.trec', codes, predictors) print "\ntrain_err=%.6f, test_err=%.6f" % (train_error, test_error)
def test_train(self): print("Test Train:") data_mat, labels_arr = adb.load_simple_data() classifies_arr, est_agg = adb.train(data_mat, labels_arr, 9) self.assertEqual(len(classifies_arr), 3) self.assertEqual(0, classifies_arr[-1]['dim']) self.assertEqual('lt', classifies_arr[-1]['ineq']) self.assertEqual(0.9, classifies_arr[-1]['thresh']) self.assertEqual(0.8958797346, round(classifies_arr[-1]['alpha'], 10))
def test_classify(self): print("Test Classify:") print("Classify Simple Data:") data_mat, labels_arr = adb.load_simple_data() classifies_arr, est_agg = adb.train(data_mat, labels_arr, 30) pred = adb.classify([[5, 5], [0, 0]], classifies_arr) res = np.matrix([[1.], [-1.]]) self.assertEqual(True, (pred == res).all()) print("Classify Loaded Data:") datArr, labelArr = adb.load_data_set('horseColicTraining2.txt') classiferArray, aggClassEst = adb.train(datArr, labelArr, 10) testArr, testLabelArr = adb.load_data_set('horseColicTest2.txt') prediction10 = adb.classify(testArr, classiferArray) errArr = np.mat(np.ones((67, 1))) err_rate = errArr[prediction10 != np.mat(testLabelArr).T].sum() / 67 self.assertEqual(16.0 / 67, err_rate) print("Test Error: %f%%" % (err_rate * 100)) # 绘制ROC和计算AUC val_auc = adb.plot_roc(aggClassEst, labelArr) self.assertLessEqual(0.8582969635, round(val_auc, 10))
def active_learning(filenames, random=False): data, labels = load_data(filenames, delimiter=",", label_map={0.0:-1.0,1.0:1.0}) total = len(data) state = np.random.get_state() np.random.shuffle(data) np.random.set_state(state) np.random.shuffle(labels) step = int(ceil(0.025 * len(labels))) sample_data = np.copy(data[-step*2:]) sample_labels = np.copy(labels[-step*2:]) data = np.delete(data, np.s_[-step*2:], axis=0) labels = np.delete(labels, np.s_[-step*2:]) for x in range(19): assert len(data) + len(sample_data) == total assert len(labels) + len(sample_labels) == total # train using data alphas, predictors = adaboost.train( stump.random, stump.predict, data=sample_data, labels=sample_labels, iters=1000 ) # calculate the error train_error = adaboost.test(alphas, predictors, data=sample_data, labels=sample_labels) test_error = adaboost.test(alphas, predictors, data=data, labels=labels) sample = (x + 2) * 0.025 msg = "sample=%.3f, train_err=%.6f, test_err=%.6f" print msg % (sample, train_error, test_error) # pick new sample points if random: sample_data = np.append(sample_data, data[-step:], axis=0) sample_labels = np.append(sample_labels, labels[-step:]) data = np.delete(data, np.s_[-step:], axis=0) labels = np.delete(labels, np.s_[-step:]) else: margins = np.absolute(sum([ alpha * predictor(data) for alpha, predictor in zip(alphas, predictors) ])) sorted = margins.argsort()[:step] sample_data = np.append(sample_data, data[sorted], axis=0) sample_labels = np.append(sample_labels, labels[sorted]) data = np.delete(data, sorted, axis=0) labels = np.delete(labels, sorted)
def uci(folder): data, labels = load_data(folder) for step in STEPS: state = np.random.get_state() np.random.shuffle(data) np.random.set_state(state) np.random.shuffle(labels) i = ceil(step * data.shape[DATA_AXIS]) alphas, predictors = adaboost.train(stump.random, stump.predict, data=data[:i], labels=labels[:i], iters=500) train_error = adaboost.test(alphas, predictors, data=data[:i], labels=labels[:i]) test_error = adaboost.test(alphas, predictors, data=data[i:], labels=labels[i:]) msg = "sample=%.2f, train_err=%.6f, test_err=%.6f" print msg % (step, train_error, test_error)
# Resolving conflict, if any. Shouldn't be the same if column_1 == column_2: if column_2 != total_num_of_cols: column_2 += 1 else: column_2 -= 1 random_hyp_pairs.append((column_1,column_2)) # Running train for the different orientations... # Each run will return a dict of dicts with key as particular orientation # Pipeline the dicts returned from one training stage to another till we finish. # Output will be a dict of dict with 4 keys (0, 90, 180, 270) print("Max iterations set to: ", max_iterations) alphas_for_0 = adaboost.train(data, 0, random_hyp_pairs, max_iterations, hyp_alphas) alphas_for_90 = adaboost.train(data, 90, random_hyp_pairs, max_iterations, alphas_for_0) alphas_for_180 = adaboost.train(data, 180, random_hyp_pairs, max_iterations, alphas_for_90) alphas_for_270 = adaboost.train(data, 270, random_hyp_pairs, max_iterations, alphas_for_180) # Storing model params in a pickle to retain dictionary structure pickle.dump(alphas_for_270, f, protocol=pickle.HIGHEST_PROTOCOL) end = datetime.datetime.now() print("Finished training in", end-start) # Training ends.... f.close() #----------------------------------# # TESTING # #----------------------------------#
import numpy as np import adaboost def createDataSet(filename): file = open(filename, 'r') lines = file.readlines() file.close() dataSet, labels = [], [] for line in lines: data = line.split() dataSet.append(list(map(float, data[:-1]))) labels.append(int(float(data[-1]))) return np.array(dataSet), np.array(labels) trainingSet, trainingLabels = createDataSet('data/horse/training.txt') stumps = adaboost.train(trainingSet, trainingLabels) testSet, testLabels = createDataSet('data/horse/test.txt') res = adaboost.classify(testSet, stumps) a, b = np.sum(res == testLabels), len(testLabels) print('Correctness: %d/%d = %.2f%%' % (a, b, a/b * 100))
return mat train_face_features = features_to_mat(features, train_face_integral_images) train_face_labels = [1.] * len(train_face_features) train_non_face_features = features_to_mat(features, train_non_face_integral_images) train_non_face_labels = [-1.] * len(train_non_face_features) test_face_features = features_to_mat(features, test_face_integral_images) test_non_face_features = features_to_mat(features, test_non_face_integral_images) adaboost_classifiers = adaboost.train( train_face_features + train_non_face_features, train_face_labels + train_non_face_labels, 10) def output_round(features, ada_classifiers, round): best_feature_idx = ada_classifiers[round - 1]['index'] best_feature = features[best_feature_idx] print("\nAdaboost rounds: %d" % round) # print image with top feature rectangle if best_feature.type == FeatureType.TWO_HORIZONTAL: printed_img = draw_feature_2h(open_face(test_face_images[0]), best_feature) print("\nType: TWO_HORIZONTAL") elif best_feature.type == FeatureType.TWO_VERTICAL:
with open(join(mypath, file), "r") as f: templist = [] for line in f: for word in line.split(): if word not in templist and word.isnumeric(): #len(word) > 1 templist.append(word) templist.append(not "spmsg" in file) development_mails.append(templist) print("Training bayes and adaboost...") for i in range(len(generalHyperParameters)): print(i) correctBayes = 0 correctAdaboost = 0 basic_classifiers_with_weights = adaboost.train( trainMailsList, trainHam, trainSpam, sortedIGs, generalHyperParameters[i]) #maybe allMailsList as well bayes_probabilities = naiveBayes.train( sortedIGs, allWords, trainHam, trainSpam, generalHyperParameters[i]) #parameters for naive_bayes for incoming in development_mails: if adaboost.predict(basic_classifiers_with_weights, sortedIGs, incoming, generalHyperParameters[i]) == incoming[-1]: correctAdaboost += 1 if naiveBayes.predict(bayes_probabilities, incoming, sortedIGs, trainHam / (trainHam + trainSpam), 1 - trainHam / (trainHam + trainSpam), generalHyperParameters[i]) == incoming[-1]: correctBayes += 1 if correctBayes > maxBayes:
if __name__ == "__main__": writer = csv.writer(open("F12.csv", "w", encoding="utf-8", newline="")) traindataSet, valdataSet = loadDataSet('newTrain2.csv') testdataSet = loadTestSet('newTest2.csv') """ traindataSet = [[1.,2.1, 1.0], [2.,1.1, 1.0], [1.3,1., -1.0], [1.,1., -1.0], [2.,1., 1.0] ] """ trainLabels = [row[-1] for row in traindataSet] valLabels = [row[-1] for row in valdataSet] valdataSet = [row[:-1] for row in valdataSet] for depth in range(10, 11): for numOfClassifier in range(16, 21): print("\nnumofClassifier: " + str(numOfClassifier) + " depth: " + str(depth)) weekCartClassList, trainResult = adaboost.train( traindataSet, numOfClassifier, depth) trainF1 = printResult(trainResult, trainLabels) valResult = adaboost.predict(valdataSet, weekCartClassList) valF1 = printResult(valResult, valLabels) # testResult = adaboost.predict(testdataSet, weekCartClassList) # outputResult(testResult, numOfClassifier, depth, trainF1, 0) writer.writerow([trainF1, valF1])
from metrics import acc import numpy as np cross_validate = True if __name__ == "__main__": # First obtain our training and testing data Xt, Yt, Xv = load_validation_data() if cross_validate: # for cross-validation Xt1, Xt2, Yt1, Yt2 = shuffle_split(Xt, Yt) classifiers = [ adaboost.train(Xt1, Yt1), extra_randomized_trees.train(Xt1, Yt1), gradient_boost.train(Xt1, Yt1), random_forest.train(Xt1, Yt1), logistic_regression.train(Xt1, Yt1), ] # Train another classifier on the ensembles output training predictions # for each sample in the training data training_predictions = np.mat( [[c.predict(sample)[0] for c in classifiers] for sample in Xt1]) meta_classifier = logistic_regression.train(training_predictions, Yt1) # Check results on training data print "Accuracy for individual classifiers:", [
def train(data, labels): learner = lambda d, l: adaboost.train( stump.random, stump.predict, data=d, labels=l, iters=5000) return ecoc.train(data, labels, 50, learner, adaboost.predict)
import cascade train_x, train_y, test_x, test_y = data_loader.load() train_f, i_f = feature.get_features(train_x) test_selection_index = np.concatenate( [range(472), np.random.choice(19572, 2000, replace=False) + 472]) test_x = test_x[test_selection_index] test_y = test_y[test_selection_index] try: with open("classifier.pkl", "rb") as f: classifier = pickle.load(f) except: classifier = adaboost.train(train_f, train_y, 10) with open("classifier.pkl", "wb") as f: pickle.dump(classifier, f, protocol=pickle.HIGHEST_PROTOCOL) # 0.99, 0.4, 0.01 try: with open("cascade.pkl", "rb") as f: cascade_classifier = pickle.load(f) except: cascade_classifier = cascade.train_cascade(train_f, train_y, 0.99, 0.4, 0.01) with open("cascade.pkl", "wb") as f: pickle.dump(cascade_classifier, f, protocol=pickle.HIGHEST_PROTOCOL) test_f, i_f = feature.get_features(test_x)
# Designed by Junbo Zhao # 12/23/2013 # This is a simple demo, training and testing the adaboost classifier from numpy import * import data import adaboost # The following data files are only used to show you how this program works. # The two files are generated by function randomData() # It is better to use your own data to see the power of adaboost! # Face recognition problems are good for using adaboost. trainData,label = data.readData('train.txt','train') #trainData,label = data.loadSimpleData() testData = data.readData('test.txt','test') classifier = adaboost.train(trainData,label,150) adaboost.test(testData,classifier) input()
from __future__ import print_function,division import numpy as np import data import adaboost dataMat,labels=data.loadSimpleData() print("dataMat:",dataMat) print("labels:",labels) adaboost.train(dataMat,labels)
from metrics import acc import numpy as np cross_validate = True if __name__ == "__main__": # First obtain our training and testing data Xt, Yt, Xv = load_validation_data() if cross_validate: # for cross-validation Xt1, Xt2, Yt1, Yt2 = shuffle_split(Xt, Yt) classifiers = [ adaboost.train(Xt1, Yt1), extra_randomized_trees.train(Xt1, Yt1), gradient_boost.train(Xt1, Yt1), random_forest.train(Xt1, Yt1), logistic_regression.train(Xt1, Yt1), ] # Train another classifier on the ensembles output training predictions # for each sample in the training data training_predictions = np.mat([[c.predict(sample)[0] for c in classifiers] for sample in Xt1]) meta_classifier = logistic_regression.train(training_predictions, Yt1) # Check results on training data print "Accuracy for individual classifiers:", [acc(Yt2, c.predict(Xt2)) for c in classifiers] predictions = np.mat([c.predict(Xt2) for c in classifiers]).transpose()