from DataInterface import get_pendigits_dataset, get_car_dataset, split_dataset from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import LinearSVC from sklearn.svm import SVC from sklearn.metrics import accuracy_score if __name__ == '__main__': dataset = get_car_dataset() data = split_dataset(dataset, 0.25) train, test = data features_test, labels_test = test training_accuracy = [] testing_accuracy = [] for train_size in range(6, 99, 2): train_size /= 100.0 data = split_dataset(train, 1 - train_size) if train_size * 100 == 100.0: print("Bro") train2, test2 = data features_train, labels_train = train2 rbf_kernel_svm_clf = SVC(kernel='rbf', gamma='auto', C=10) rbf_kernel_svm_clf.fit(features_train, labels_train) predictions = rbf_kernel_svm_clf.predict(features_train) training_accuracy.append(accuracy_score(labels_train, predictions)) predictions = rbf_kernel_svm_clf.predict(features_test)
num_iter = 10 print( "Now training and testing decision tree on car dataset for 10 runs of train/test split:\n" ) features_name = [ "buying", "maint", "doors", "persons", "lug_boot", "safety" ] dataset = get_car_dataset() accuracies = 0 max_depth = 10 start_time = time.time() print('To prune the tree, the maximum depth is set to ' + str(max_depth)) for _ in range(num_iter): data = split_dataset(dataset, 0.25) train, test = data features_train, labels_train = train dt = train_decision_tree(data, max_depth) accuracy = test_decision_tree(data, dt) accuracies += accuracy duration = time.time() - start_time print("Average accuracy is {0:.3f}.\n".format(accuracies / num_iter)) print("The run time is " + str(duration) + " sec.") print("\nComplete.\n") print( "-----------------------------------------------------------------\n") print( "Now training and testing decision tree on pen digits dataset for 10 runs of train/test split:\n"
if __name__ == '__main__': num_iter = 10 # number of test/train splits print("\nNow training and testing on the car dataset with " + str(num_iter) + " runs of train/test splits:\n") data = get_car_dataset() hidden_layers = (10,10,10,10) print("The neural network has {} hidden layers, each layer has size: ".format(len(hidden_layers))), for layer in hidden_layers: print(layer), print('\n') accuracies = 0 start_time = time.time() for _ in range(num_iter): train, test = split_dataset(data, 0.25) mlp = training(train, hidden_layers) accuracy = testing(mlp, test) accuracies += accuracy duration = time.time() - start_time print("The average classification rate is {0:.3f}.\n".format(accuracies / num_iter)) print("The run time is " + str(duration) + " sec.") print("\nComplete\n----------------------------------\n") print("Now training and testing on the pen digits dataset " + str(num_iter) + " runs of train/test splits:\n") data = get_pendigits_dataset() hidden_layers = (10,10) print("The neural network has {} hidden layers, each layer has size: ".format(len(hidden_layers))), for layer in hidden_layers: print(layer),
print( "\nNow training and testing boosted version of decision tree on car dataset with train/test split for 10 times:\n" ) dataset = get_car_dataset() feature_names = [ "buying", "maint", "doors", "persons", "lug_boot", "safety" ] max_depth = 4 print("To prune the decision tree, the maximum depth is set to " + str(max_depth)) start_time = time.time() accuracies = 0 for _ in range(10): data = split_dataset(dataset, 0.3) train, test = data features_train, labels_train = train clf = train_boosting(data, max_depth) accuracy = test_boosting(data, clf) accuracies += accuracy # save_trees_as_png(clf, feature_names, 'car') # print("Trees visualization written to the current folder.") duration = time.time() - start_time print("The average training accuracy over 10 runs is {0:.3f}.\n".format( accuracies / 10)) print("The run time is " + str(duration) + " sec.") print("\nComplete.\n") print( "-----------------------------------------------------------------\n")