def getModel(): featureMat = loadFeatures() print("features loading over...") random.shuffle(featureMat) train_data = featureMat[:5000, :-1] train_label = featureMat[:5000, -1].reshape(-1, 1) clf = Adaboost(n_estimators=200, debug=True) clf.fit(train_data, train_label) return clf
def train(instances, args): # if the user explicitly requests a weighted model if (args.algorithm == "averaged_perceptron"): predictor = Weighted_Perceptron(args.online_learning_rate, args.online_training_iterations) elif (args.algorithm == "perceptron"): # use a simple perceptron model predictor = Perceptron(args.online_learning_rate, args.online_training_iterations) elif (args.algorithm == "mc_perceptron"): predictor = MC_Perceptron(args.online_training_iterations) elif (args.algorithm == "margin_perceptron"): predictor = Margin_Perceptron(args.online_learning_rate, args.online_training_iterations) elif (args.algorithm == "pegasos"): predictor = Pegasos(args.pegasos_lambda, args.online_training_iterations) elif (args.algorithm == "knn"): predictor = Standard_knn(args.knn) elif (args.algorithm == "distance_knn"): predictor = Distance_knn(args.knn) elif (args.algorithm == "adaboost"): predictor = Adaboost(args.num_boosting_iterations) elif (args.algorithm == "lambda_means"): predictor = Lambda_Means(args.cluster_lambda, args.clustering_training_iterations) elif (args.algorithm == "nb_clustering"): predictor = Naive_Bayes(args.num_clusters, args.clustering_training_iterations) # train it on the data else: raise ValueError("You did not pass a relevant algorithm name." + "Options are 'averaged_perceptron', 'perceptron'." + "'margin_perceptron', and 'pegasos'. You passed: " + str(algorithm)) predictor.train(instances) return predictor # return it back
def handle_predict(argv): hypothesis = None model = None with open(argv[3], "r") as f: # DONT DO THIS ITS INSECURE. IM INSANE model = f.readline().strip('\n') hypothesis = f.readline() f.close() hypothesis = literal_eval(hypothesis) tree = Adaboost() tree.define_positive_class(lambda x: x.classification == 'en') tree.define_classes(processing.classes) tree.define_attributes(processing.attr_definitions) examples = process_file(argv[4], training=False) examples = tree.create_examples(examples) return tree.classify(examples, hypothesis)
def train_adaboost(train, test, model_parameters=[1, "SAMME", 200]): num_classifier = len(train) predicted_label = numpy.zeros((num_classifier, test.shape[0])) predicted_max_label = numpy.zeros((test.shape[0])) for i in range(num_classifier): for j in range(i): adaboost = Adaboost(train[i], train[j], model_parameters) adaboost.adaboost_train() label_1, label_2 = adaboost.adaboost_predict(test[:, 0:-1]) predicted_label[i, :] = predicted_label[i, :] + label_1 predicted_label[j, :] = predicted_label[j, :] + label_2 compare_matrix = (predicted_label == numpy.max(predicted_label, axis=0)) for i in range(compare_matrix.shape[1]): for j in range(compare_matrix.shape[0]): if (compare_matrix[j][i] == 1): predicted_max_label[i] = j print_report('Adaboost', test[:, -1], predicted_max_label)
def train(instances): # print('starting knn training') p = None if args.algorithm == 'knn': p = Knn(max_size, args.knn) p.train(instances) elif args.algorithm == 'adaboost': p = Adaboost(max_size, max_max_index, args.num_boosting_iterations) p.train(instances) # print('ending training') return p
def run_adaboost(X, y, data1, data2): # init avg avg_train = np.zeros(8) avg_test = np.zeros(8) # runs n = 50 total_avg = 0 for _ in range(n): # split the data to train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=5) train_x = [] test_x = [] train_y = y_train.to_numpy() test_y = y_test.to_numpy() for i, row in X_train.iterrows(): p = Point(row[data1], row[data2]) train_x.append(p) for i, row in X_test.iterrows(): p = Point(row[data1], row[data2]) test_x.append(p) # call adaboost ad = Adaboost() ad.train(train_x, train_y) predicts = ad.predict(test_x, test_y) sum = 0 for i in range(len(predicts)): if predicts[i] == test_y[i]: sum += 1 total_avg += sum / len(predicts) print("אחוז הצלחה", total_avg / n)
def handle_train(argv): examples = process_file(argv[2], training=True) tree = None if argv[4] == "dt": tree = DecisionTree() else: tree = Adaboost() tree.define_positive_class(lambda x: x.classification == 'en') tree.define_classes(processing.classes) tree.define_attributes(processing.attr_definitions) tree.load_examples(examples) tree.generate(tree.examples) with open(argv[3], "w") as f: f.write(argv[4] + "\n") f.write(str(tree.tree)) f.close() tree.print()
def train(examples, hypothesis_out, algo): """ To train the model according to the user's choice. :param examples: the training data :param hypothesis_out: the file to store the model :param algo: the algorithm that the user choose. dt stands for decision tree, ada stands for adaboost :return: """ if algo == DECISION_TREE: model = DecisionTree(train_file=examples, out_file=hypothesis_out) elif algo == ADABOOST: model = Adaboost(train_file=examples, out_file=hypothesis_out) else: print("Please use \"dt\" as decision tree model, \"ada\" as Adaboost model") sys.exit() model.train()
def handle_predict(argv): hypothesis = None model = None with open(argv[2], "r") as f: model = f.readline().strip('\n') hypothesis = f.readline() f.close() hypothesis = literal_eval(hypothesis) tree = None if model == "dt": tree = DecisionTree() else: tree = Adaboost() tree.define_positive_class(lambda x: x.classification == 'en') tree.define_classes(processing.classes) tree.define_attributes(processing.attr_definitions) examples = process_file(argv[3], training=False) examples = tree.create_examples(examples) for classification in tree.classify(examples, hypothesis): print(classification)
def train(instances, algorithm, rate, iterations, lambd, knn, boost_iter): # if the user explicitly requests a weighted model if (algorithm == "averaged_perceptron"): predictor = Weighted_Perceptron(rate, iterations) elif (algorithm == "perceptron"): # use a simple perceptron model predictor = Perceptron(rate, iterations) elif (algorithm == "margin_perceptron"): predictor = Margin_Perceptron(rate, iterations) elif (algorithm == "pegasos"): predictor = Pegasos(lambd, iterations) elif (algorithm == "knn"): predictor = Standard_knn(knn) elif (algorithm == "distance_knn"): predictor = Distance_knn(knn) elif (algorithm == "adaboost"): predictor = Adaboost(boost_iter) # train it on the data else: raise ValueError("You did not pass a relevant algorithm name." + "Options are 'averaged_perceptron', 'perceptron'." + "'margin_perceptron', and 'pegasos'. You passed: " + str(algorithm)) predictor.train(instances) return predictor # return it back
def main(): #Read in the dataset X = np.loadtxt('banknote_auth/data_banknote_auth.csv',delimiter=',') Y = np.loadtxt('banknote_auth/labels_banknote_auth.csv',dtype=str) #Map labels to {-1,1} labels = list(set(Y)) Y = np.array([1 if y == labels[0] else -1 for y in Y]) #Split the dataset training_ratio = 0.5 # Defines how much percentage of data to be used for training n = len(Y) perm = np.random.permutation(n) train_data_size = int(n * training_ratio) print(n) print(train_data_size) train_idx, test_idx = perm[:train_data_size], perm[train_data_size:] train_data = X[train_idx,:] train_label = Y[train_idx] test_data = X[test_idx,:] test_label = Y[test_idx] #Maximum number of weak learners to be used in Adaboost max_num_weak_learners = 69 #Train and test error train_error = [] test_error = [] #Training Adaboost with weak learners model = Adaboost() for m in range(1,max_num_weak_learners+1): print("Training Adaboost with weak learners %d" % m) model.add_learner(train_data,train_label) train_error.append(model.prediction_error(train_data,train_label)) test_error.append(model.prediction_error(test_data,test_label)) print("Initial Training Error=%.4f Testing Error= %.4f " % (train_error[0],test_error[0])) print("Final Training Error=%.4f Testing Error= %.4f " % (train_error[-1],test_error[-1])) plot_results(X,Y,train_idx,test_idx,train_error,test_error)
def handle_train(argv, size=None, depth=None): examples = process_file(argv[2], training=True) tree = Adaboost() tree.define_positive_class(lambda x: x.classification == 'en') tree.define_classes(processing.classes) tree.define_attributes(processing.attr_definitions) tree.load_examples(examples) tree.generate(tree.examples) with open(argv[3], "w") as f: f.write("ada" + "\n") f.write(str(tree.tree)) f.close() tree.print()
print 'Start time', time() if model == 'best': model = 'nnet' # K-Nearest neighbors if model == 'nearest' and phase == 'train': model = knn.train(image_list) serialize_to_file(model, model_file) elif model == 'nearest' and phase == 'test': model = deserialize_from_file(model_file) knn.test(image_list, model) # ADA boost elif model == "adaboost" and phase == "train": params = Adaboost(image_list).adaboost() serialize_to_file(params, model_file) elif model == "adaboost" and phase == "test": params = deserialize_from_file(model_file) Adaboost(image_list).adaboost_test(image_list, params) # Neural net elif model == 'nnet' and phase == 'train': net = neural_net.train(image_list) serialize_to_file(net, model_file) elif model == 'nnet' and phase == 'test': net = deserialize_from_file(model_file) neural_net.test(net, image_list) print 'End time', time()
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from adaboost import Adaboost def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy data = datasets.load_breast_cancer() X = data.data y = data.target y[y == 0] = -1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5) # Adaboost classification with 5 weak classifiers clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc = accuracy(y_test, y_pred) print("Accuracy:", acc)
from adaboost import Adaboost from data_manager import DataManager from classifier_weak import ClassifierWeak import numpy as np adb = Adaboost(200, 200) dm = DataManager() percentage = 60 def count_correct_predictions(predictions, tags): hits = 0 for (p, t) in zip(predictions, tags): hits += 1 if p == t else 0 return hits def print_results(prediction_training, prediction_test, training_tags, test_tags): size_training = len(training_tags) size_test = len(test_tags) training_hits = count_correct_predictions(prediction_training, training_tags) test_hits = count_correct_predictions(prediction_test, test_tags) training_hits_percentage = training_hits * 100 / size_training test_hits_percentage = test_hits * 100 / size_test print('\nNumber of weak classifiers per strong: ' + str(adb.T_CLASSIFIERS))
pca = PCA() X = pca.transform(X, n_components=5) # Reduce to 5 dimensions # .......................... # TRAIN / TEST SPLIT # .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescale label for Adaboost to {-1, 1} rescaled_y_train = 2*y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2*y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf = 8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20) perceptron = Perceptron() decision_tree = DecisionTree() random_forest = RandomForest(n_estimators=150) support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel) # ........ # TRAIN # ........ print "Training:" print "\tAdaboost" adaboost.fit(X_train, rescaled_y_train)
# .......................... pca = PCA() X = pca.transform(X, n_components=5) # Reduce to 5 dimensions # .......................... # TRAIN / TEST SPLIT # .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescale label for Adaboost to {-1, 1} rescaled_y_train = 2 * y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf=8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20) perceptron = Perceptron() decision_tree = DecisionTree() random_forest = RandomForest(n_estimators=150) support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel) # ........ # TRAIN # ........ print "Training:" print "\tAdaboost" adaboost.fit(X_train, rescaled_y_train)
def train(instances, algorithm, high_idx, learn_rate, iterate, peg_lambda, k_val, T, clus_lambda, K, clus_iter): if (algorithm == "perceptron"): classifier = Perceptron(instances, high_idx, learn_rate) #iterate the training for i in range(iterate): classifier.train(instances) return classifier elif (algorithm == "averaged_perceptron"): classifier2 = AveragePerceptron(instances, high_idx, learn_rate) for i in range(iterate): classifier2.train(instances) return classifier2 elif (algorithm == "pegasos"): classifier3 = Pegasos(instances, high_idx, peg_lambda) for i in range(iterate): classifier3.train(instances) return classifier3 elif (algorithm == "margin_perceptron"): classifier4 = PerceptronMargin(instances, high_idx, learn_rate, iterate) for i in range(iterate): classifier4.train(instances) return classifier4 elif (algorithm == "knn"): classifier5 = KNN(instances, k_val, high_idx) for i in range(iterate): classifier5.train(instances) return classifier5 elif (algorithm == "distance_knn"): classifier6 = Distance_KNN(instances, k_val, high_idx) for i in range(iterate): classifier6.train(instances) return classifier6 elif (algorithm == "adaboost"): classifier7 = Adaboost(instances, T, high_idx) for i in range(iterate): classifier7.train(instances) return classifier7 elif (algorithm == "lambda_means"): classifier8 = Lambda_Means2(instances, high_idx, clus_lambda, clus_iter) for i in range(iterate): #print "##################Training", i+1, "out of", iterate,"###############" classifier8.train(instances) return classifier8 elif (algorithm == "nb_clustering"): classifier9 = Naive_Bayes(instances, high_idx, K) for i in range(iterate): #print "##################Training", i+1, "out of", iterate,"###############" classifier9.train(instances) return classifier9 elif (algorithm == "mc_perceptron"): classifier10 = MC_Perceptron(instances, high_idx) for i in range(iterate): classifier10.train(instances) return classifier10 else: return None