def cross_validate_rf_model(X_data, y_data): k_parameters = [10, 50, 100] params = {"k": k_parameters} model = md.RandomForest() print("Cross validation SVM Model") model.cross_validate_model(params, X_data, y_data, 5) print() return model
import config import models def huber_approx_obj(preds, dtrain): ''' xgboost optimizing function for mean absolute error ''' d = preds - dtrain #add .get_labels() for xgb.train() h = 1 #h is delta in the graphic scale = 1 + (d / h)**2 scale_sqrt = np.sqrt(scale) grad = d / scale_sqrt hess = 1 / scale / scale_sqrt return grad, hess models = { "dt": models.DecisionTree(), "rf": models.RandomForest(), "lr": models.LR(), "xgb": models.XGBoost(), "svm": models.SVM(), "lgb": models.LGB(), # "mlp": models.MLP(), "lstm": models.LSTM() } # to get the final accuracy, calculate the mean and the mean absolute error should be the percentage of the # performance since he wants to see performance
def main(): data = Data() logistic_regression = models.LogisticRegression() neural_network = models.NeuralNet() svm = models.SupportVectorMachine(C=1.0, kernel='rbf', gamma='scale') random_forest = models.RandomForest(n_estimators=100, max_depth=None, random_state=None) # Process dataset training_data_features, training_data_labels, mnist_test_data_features, mnist_test_data_labels, \ usps_test_data_features, usps_test_data_labels, combined_test_data_features, combined_test_data_labels = \ data.pre_process() # Logistic Regression logistic_regression.fit(training_data_features, training_data_labels, learning_rate=0.01, epochs=500) accuracy_mnist, confusion_mnist = logistic_regression.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = logistic_regression.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = logistic_regression.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Logistic Regression', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Neural Network neural_network.fit(training_data_features, training_data_labels, epochs=10) accuracy_mnist, confusion_mnist = neural_network.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = neural_network.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = neural_network.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Neural Network', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Support Vector Machine svm.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = svm.predict(mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = svm.predict(usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = svm.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('SVM', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Random Forest random_forest.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = random_forest.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = random_forest.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = random_forest.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Random Forest', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined)
#loaded_model = load("model_SVC.joblib") #SVM.test_svm_classifier(loaded_model, val_data, val_labels) loaded_model = load_model("models/best_model_DNN_Adam.h5") NN.test_neural_network(loaded_model, val_data, val_labels) if __name__ == "__main__": total_features = 545333 # total unique features testing_set_size = 1500 # set site that will be used to create random test set malware_ratio = 0.3 # malware ratio in the set size print("Creating data-labels...") onehot.create_list_of_apps() # function from set_one_encoding.py # initialize sklearn models GNB = models.GaussianNaiveBayes() MNB = models.MultinomialNaiveBayes() CNB = models.ComplementNaiveBayes() BNB = models.BernoulliNaiveBayes() DT = models.DecisionTree() RF = models.RandomForest() KNN = models.KNearestNeighbors() LR = models.LogRegression() SVM = models.SupportVectorMachine() val_runs = 8 #evaluate_models(val_runs) evaluate_on_test_set()
feature_cols = x_train.columns dot_data = StringIO() export_graphviz(tree, out_file=dot_data, filled=True, rounded=True, special_characters=True, feature_names=feature_cols, class_names=['0', '1']) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) graph.write_png('../data/tree_entropy_{}.png'.format(datetime.date.today())) Image(graph.create_png()) # ---------------------------------------------------------- RandomForest r_forest = models.RandomForest(x_train, y_train.isFraud).fit() y_train['r_forest'] = r_forest.predict(x_train) y_test['r_forest'] = r_forest.predict(x_test) y_validation['r_forest'] = r_forest.predict(x_validation) cm_r_forest_train = helpers.confusion_matrix(y_train, ['Fraude', 'r_forest']) cm_r_forest_test = helpers.confusion_matrix(y_test, ['Fraude', 'r_forest']) cm_r_forest_val = helpers.confusion_matrix(y_validation, ['Fraude', 'r_forest']) # ---------------------------------------------------------- Neural Net nn_model = Sequential() nn_model.add(Dense(15, input_dim=29, activation='relu')) nn_model.add(Dense(15, activation='relu'))
def predict(in_fname, lin_n_cv_iters, n_cv_iters, regularizations, n_labs, age_index, gender_index, out_fname, nn_out_fname=None, verbose=False, emb_fnames=None): if verbose: print "loading data" X_train, Y_train, X_validation, Y_validation, X_test, Y_test = features.get_data( in_fname) emb_data_list = [None] emb_fname_list = [''] if emb_fnames is not None: for emb_fname in emb_fnames: emb_data_list.append(emb.get_emb_data(emb_fname)) emb_fname_list.append(emb_fname) if verbose: print "training, validating and testing models" results = [] for e, emb_data in enumerate(emb_data_list): if verbose: print str(e) if verbose: print "-->L2" model = models.L2(X_train, Y_train, X_validation, Y_validation, X_test, Y_test, n_labs, emb_data) if lin_n_cv_iters == -1: params = [[False, True], regularizations] else: params = [['sample', False, True], ['uniform', regularizations[0], regularizations[-1]]] model.crossvalidate(params=params, param_names=['fit_intercept', 'C'], n_cv_iters=lin_n_cv_iters) model.test() s = model.summarize() s['emb_fname'] = emb_fname_list[e] results.append(s) if verbose: print "-->L1" model = models.L1(X_train, Y_train, X_validation, Y_validation, X_test, Y_test, n_labs, age_index, gender_index, emb_data) if lin_n_cv_iters == -1: params = [[False, True], regularizations] else: params = [['sample', False, True], ['uniform', regularizations[0], regularizations[-1]]] model.crossvalidate(params=params, param_names=['fit_intercept', 'C'], n_cv_iters=lin_n_cv_iters) model.test() s = model.summarize() s['emb_fname'] = emb_fname_list[e] results.append(s) if verbose: print "-->RandomForest" model = models.RandomForest(X_train, Y_train, X_validation, Y_validation, X_test, Y_test, emb_data) if n_cv_iters == -1: params = [[1, 10, 20], [1, 3, 10], ['sqrt_n_features', 'n_features'], [1, 3, 10], [1, 3, 10], [True, False], ['gini', 'entropy']] else: params = [['randint', 1, 20], ['randint', 1, 10], ['sample', 'sqrt_n_features', 'n_features'], ['randint', 1, 10], ['randint', 1, 10], ['sample', True, False], ['sample', 'gini', 'entropy']] param_names = [ 'n_estimators', 'max_depth', 'max_features', 'min_samples_split', 'min_samples_leaf', 'bootstrap', 'criterion' ] model.crossvalidate(params=params, param_names=param_names, n_cv_iters=n_cv_iters) model.test() s = model.summarize() s['emb_fname'] = emb_fname_list[e] results.append(s) if emb_data is not None: if verbose: print "-->Only embeddings" model = models.L(emb_data[0], Y_train, emb_data[1], Y_validation, emb_data[2], Y_test, None) if lin_n_cv_iters == -1: params = [['l1', 'l2'], [False, True], regularizations] else: params = [['sample', 'l1', 'l2'], ['sample', False, True], ['uniform', regularizations[0], regularizations[-1]]] model.crossvalidate(params=params, param_names=['penalty', 'fit_intercept', 'C'], n_cv_iters=lin_n_cv_iters) model.test() s = model.summarize() s['emb_fname'] = emb_fname_list[e] results.append(s) with open(out_fname, 'w') as fout: fout.write(yaml.dump(results)) if nn_out_fname is not None: best_model = nn.evaluate(nn_out_fname, n_cv_iters, 20, X_train, Y_train, X_validation, Y_validation, X_test, Y_test, 45, models=['cnn2'], random_seed=345, verbose=verbose)
import feature_extraction from sklearn.model_selection import cross_val_score import numpy as np import torch import models from models import train_test_split from sklearn.metrics import precision_score data, labels, name_list = feature_extraction.raw_data(two_cat=True) tree_counts = [5, 10, 25, 50, 100, 1000] plt.figure() for t in tree_counts: rf = models.RandomForest(n_estimators=t) rf.train(data, labels, name_list) sc = rf.test(rf.X_valid, rf.y_valid) # construct ROC curve for random forest rf_voting = rf.clf.predict_proba(data) thresholds = np.linspace(0, 1, t + 1) tpr = np.zeros((thresholds.size, )) fpr = np.zeros((thresholds.size, )) for th in range(len(thresholds)): thresh = thresholds[th] pos = (rf_voting[:, 1] > thresh).astype(int) tpr[th] = np.sum(((pos == 1) & (labels == 1))) / np.sum(labels) fpr[th] = np.sum(((pos == 1) * (labels == 0))).astype(float) / np.sum(
for train_index, test_index in group_kfold.split(Xdata, Ydata, groups): model.train(Xdata[train_index], Ydata[train_index]) Ypred = model.test(Xdata[test_index]) confusion = sklearn.metrics.confusion_matrix(Ydata[test_index], Ypred, labels=features.labels) if sum_confusion is None: sum_confusion = np.zeros(confusion.shape) sum_confusion += confusion return sum_confusion / k def select_best_model(Xdata, Ydata, models): avg_accuracies = [(i, k_fold_cross_validate(Xdata, Ydata, 4, model)) for i, model in enumerate(models)] print(avg_accuracies) return max(avg_accuracies, key=operator.itemgetter(1)) allfeatures = features.compute_or_read_features() Xdata, Ydata = to_numpy_arrays(allfeatures) models = [models.RandomForest(200, 'gini'), models.LogisticRegression(), models.SVMNonLinear('rbf'), models.SVMNonLinear('sigmoid'), models.NeuralNet(), models.KNN()] #best = select_best_model(Xdata, Ydata, models) #print(best) for model in models: cm = k_fold_confusion_matrix(Xdata, Ydata, 4, model) save_confusion_matrix(cm, model._name) print(f"Confusion matrix for {model._name} saved")
'n_estimators': [7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 'max_depth': [2, 3, 4, 5, 6, None], 'random_state': [42] } models = { 'MLP': { 'build_fn': m.build_MLP((24, )), 'params': param_grid_MLP }, 'Decision_tree': { 'build_fn': m.DecisionTreeModel(train=False), 'params': param_grid_Dt }, 'Random_forest': { 'build_fn': m.RandomForest(train=False), 'params': param_grid_random_forest }, 'svm': { 'build_fn': m.SVM(train=False), 'params': param_grid_svm } } # to find the best parameters of a given model. A parameters grid should be provided. if finetune: print("Finetuning ...") model = models[model_name]['build_fn'] param_grid = models[model_name]['params'] gs, fitted_model, pred = search_pipeline(X_train, X_test,
def main(): data = Data() logistic_regression = models.LogisticRegression() neural_network = models.NeuralNet() svm = models.SupportVectorMachine(C=1.0, kernel='rbf', gamma='scale') random_forest = models.RandomForest(n_estimators=100, max_depth=None, random_state=None) discriminant_analysis = DiscriminantAnalysis() vaecnn = deep_learning_models.VAEConvolutionNeuralNet( input_data.read_data_sets("data", one_hot=True), (28, 28), (28, 28)) # Process dataset training_data_features, training_data_labels, mnist_test_data_features, mnist_test_data_labels, \ usps_test_data_features, usps_test_data_labels, combined_test_data_features, combined_test_data_labels = \ data.pre_process() # Discriminant Analysis IMAGE_SIZE = int(training_data_features.shape[-1]**0.5) discriminant_analysis.fit( training_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), training_data_labels) accuracy_mnist, confusion_mnist = discriminant_analysis.predict( 'MNIST dataset', mnist_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), mnist_test_data_labels) accuracy_usps, confusion_usps = discriminant_analysis.predict( 'USPS dataset', usps_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), usps_test_data_labels) accuracy_combined, confusion_combined = discriminant_analysis.predict( 'Combined dataset', combined_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), combined_test_data_labels) print_and_plot('Bayesian Discriminant Analysis', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Logistic Regression logistic_regression.fit(training_data_features, training_data_labels, learning_rate=0.01, epochs=500) accuracy_mnist, confusion_mnist = logistic_regression.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = logistic_regression.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = logistic_regression.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Logistic Regression', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Neural Network neural_network.fit(training_data_features, training_data_labels, epochs=10) accuracy_mnist, confusion_mnist = neural_network.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = neural_network.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = neural_network.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Neural Network', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Support Vector Machine svm.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = svm.predict(mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = svm.predict(usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = svm.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('SVM', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Random Forest random_forest.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = random_forest.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = random_forest.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = random_forest.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Random Forest', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Restricted Boltzmann Machine num_hidden_nodes_list = [20, 100, 500] for num_hidden_nodes in num_hidden_nodes_list: rbm = deep_learning_models.RBM(images=input_data.read_data_sets( "data", one_hot=True), n_components=num_hidden_nodes, learning_rate=0.02, batch_size=100, n_iter=1000, random_state=0) rbm.fit() rbm.gibbs_sampling(1000) rbm.generate_images(num_hidden_nodes) # Variational Auto Encoders code_unit_list = [2, 8, 16] for code_unit in code_unit_list: vae = deep_learning_models.VAE( input_data.read_data_sets("data", one_hot=True), code_unit) vae.generate_images(epochs=20) # Variational Auto Encoders with Convolutional Neural Networks vaecnn.encode() vaecnn.decode() vaecnn.compile_() vaecnn.train(epochs=10, batch_size=100)
def train_external_detector(): train_data, train_labels, test_data, test_labels = create_sets() trained_model = tf.keras.models.load_model('best_model_Adam.h5') predict_original = trained_model.predict(train_data) confusion = confusion_matrix(train_labels, np.argmax(predict_original, axis=1)) TP = confusion[1, 1] TN = confusion[0, 0] FP = confusion[0, 1] FN = confusion[1, 0] FNR_original = FN / float(FN + TP) * 100 FPR = FP / float(FP + TN) * 100 accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100 print(confusion) print("Original FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN) print("Original Accuracy:", accuracy, "- FPR:", FPR, "- FNR:", FNR_original) average_changes = 0 amount_malwares = 0 averageChanges = 0 # the numpy array will be filled dynamically adversarial_data = np.zeros((0, 3880), dtype=float) for i in range(len(train_data)): if train_labels[i] == 1: x = train_data[i:i + 1] # print("x: ", x) # print(x.shape) try: adv_x, changes = craft_adversarial_samples( x, 0, trained_model, 1) # print(adv_x) # append the adversarial data to the numpy array adversarial_data = np.concatenate((adversarial_data, adv_x)) if changes >= 0: average_changes += changes amount_malwares += 1 except NameError: pass except ValueError: pass if amount_malwares > 0: averageChanges += (average_changes / float(amount_malwares)) train_data, train_labels, test_data, test_labels = create_sets() predictions = trained_model.predict(train_data) confusion = confusion_matrix(train_labels, np.argmax(predictions, axis=1)) print(confusion) TP = confusion[1, 1] TN = confusion[0, 0] FP = confusion[0, 1] FN = confusion[1, 0] FNR = FN / float(FN + TP) * 100 FPR = FP / float(FP + TN) * 100 accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100 print("Adversarial FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN) print("Adversarial Accuracy:", accuracy, "- FPR:", FPR, "- FNR:", FNR) print("Misclassification Rate:", FNR - FNR_original) print("Distortion:", averageChanges) predictions = trained_model.predict(adversarial_data) adversarial_labels = np.ones((len(adversarial_data), ), dtype=int) confusion = confusion_matrix(adversarial_labels, np.argmax(predictions, axis=1)) print(confusion) TP = confusion[1, 1] TN = confusion[0, 0] FP = confusion[0, 1] FN = confusion[1, 0] FNR = FN / float(FN + TP) * 100 FPR = FP / float(FP + TN) * 100 accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100 print("Adversarial FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN) print("Adversarial Accuracy:", accuracy, "- FPR:", FPR, "- FNR:", FNR) print("Misclassification Rate:", FNR - FNR_original) print("Distortion:", averageChanges) print(changes_dict) del predict_original, FNR_original, predictions, confusion, TP, TN, FP, FN, FNR, FPR, accuracy # concatenate legit with produced adversarial input final_train_data = np.concatenate((train_data, adversarial_data)) print("final train data shape:", final_train_data.shape) train_labels = np.zeros((len(train_labels), ), dtype=int) # fill with 0 (the original class) print("train labels shape:", train_labels.shape) adverarial_labels = np.ones( (len(adversarial_data), ), dtype=int) # fill with 1 (the adversarial class) print("adversarial labels:", adverarial_labels.shape) final_train_labels = np.concatenate((train_labels, adverarial_labels)) print("final labels shape:", final_train_labels.shape) print("Unique classes:", np.unique(final_train_labels)) del train_data, train_labels, adversarial_data, adverarial_labels #shuffle the set shuffle(final_train_data, final_train_labels, random_state=123) # train with the augmented dataset (with adverarial examples belong to class '1') model = generate_neural_network(total_features, [200, 200], 0.2, 0.001, "glorot_uniform", "zeros", "relu", 2) train_neural_network(model, epochs=30, batch_size=150, features=final_train_data, labels=final_train_labels, verbose=2, validation=True, val_data=final_train_data, val_labels=final_train_labels, callbacks=True, path=dir_path + "logs/fit/", model_name="external_detector_2") GNB = models.GaussianNaiveBayes() MNB = models.MultinomialNaiveBayes() CNB = models.ComplementNaiveBayes() BNB = models.BernoulliNaiveBayes() DT = models.DecisionTree() RF = models.RandomForest() KNN = models.KNearestNeighbors() LR = models.LogRegression() SVM = models.SupportVectorMachine() model = GNB.train_gaussian_naive_bayes_classifier( final_train_data, final_train_labels) # train Naive Bayes score_GNB = GNB.evaluate_gaussian_naive_bayes_classifier( model, final_train_data, final_train_labels) # test performance print("GNB", score_GNB) model = MNB.train_multi_naive_bayes_classifier(final_train_data, final_train_labels) score_MNB = MNB.evaluate_multi_naive_bayes_classifier( model, final_train_data, final_train_labels) print("MNB", score_MNB) model = CNB.train_complement_naive_bayes_classifier( final_train_data, final_train_labels) score_CNB = CNB.evaluate_complement_naive_bayes_classifier( model, final_train_data, final_train_labels) print("CNB", score_CNB) model = BNB.train_bernoulli_naive_bayes_classifier(final_train_data, final_train_labels) score_BNB = BNB.evaluate_bernoulli_naive_bayes_classifier( model, test_data, test_labels) print("BNB", score_BNB) model = DT.train_decision_tree_classifier( final_train_data, final_train_labels) # train Decision Tree Classifier score_dt = DT.evaluate_decision_tree_classifier(model, final_train_data, final_train_labels) print("DT:", score_dt) model = LR.train_logistic_regression_classifier( final_train_data, final_train_labels) # train logistic Regression score_lr = LR.evaluate_logistic_regression_classifier( model, final_train_data, final_train_labels) print("LR", score_lr) model = KNN.train_knn_classifier( final_train_data, final_train_labels) # train k-Nearest Neighbors Classifier score_knn = KNN.evaluate_knn_classifier(model, final_train_data, final_train_labels) print("KNN", score_knn) model = SVM.train_svm_classifier( final_train_data, final_train_labels) # train Support Vector Machines score_svm = SVM.evaluate_svm_classifier(model, final_train_data, final_train_labels) print("SVM", score_svm) model = RF.train_random_forest_classifier( final_train_data, final_train_labels) # train Random Forest score_rf = RF.evaluate_random_forest_classifier(model, final_train_data, final_train_labels) print("RF:", score_rf)
def main(): data = Data() logistic_regression = models.LogisticRegression() neural_network = models.NeuralNet() svm = models.SupportVectorMachine(C=1.0, kernel='rbf', gamma='scale') random_forest = models.RandomForest(n_estimators=100, max_depth=None, random_state=None) discriminant_analysis = DiscriminantAnalysis() # Process dataset training_data_features, training_data_labels, mnist_test_data_features, mnist_test_data_labels, \ usps_test_data_features, usps_test_data_labels, combined_test_data_features, combined_test_data_labels = \ data.pre_process() # Discriminant Analysis IMAGE_SIZE = int(training_data_features.shape[-1]**0.5) discriminant_analysis.fit( training_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), training_data_labels) accuracy_mnist, confusion_mnist = discriminant_analysis.predict( 'MNIST dataset', mnist_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), mnist_test_data_labels) accuracy_usps, confusion_usps = discriminant_analysis.predict( 'USPS dataset', usps_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), usps_test_data_labels) accuracy_combined, confusion_combined = discriminant_analysis.predict( 'Combined dataset', combined_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), combined_test_data_labels) print_and_plot('Bayesian Discriminant Analysis', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Logistic Regression logistic_regression.fit(training_data_features, training_data_labels, learning_rate=0.01, epochs=500) accuracy_mnist, confusion_mnist = logistic_regression.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = logistic_regression.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = logistic_regression.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Logistic Regression', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Neural Network neural_network.fit(training_data_features, training_data_labels, epochs=10) accuracy_mnist, confusion_mnist = neural_network.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = neural_network.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = neural_network.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Neural Network', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Support Vector Machine svm.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = svm.predict(mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = svm.predict(usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = svm.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('SVM', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Random Forest random_forest.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = random_forest.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = random_forest.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = random_forest.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Random Forest', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined)