def sklearn_mlp_metrics(x, y, x_labels, y_labels, one_vs_rest=False, hn=(64,), activation='relu'): nn = NeuralNetwork(x, y, x_labels, y_labels, hidden_neurons=hn, activation_fun=activation) label = 'Multilayer Perceptron' y_pred_mlp = nn.crossval_mlp(one_vs_rest) cm = nn.plot_confusion_matrix(y_pred_mlp, label=label) nn.show_basic_metrics(y_pred_mlp, label=label) acc, f1 = nn.count_basic_metrics(y_pred_mlp) return [[label, acc, f1, y_pred_mlp, cm]]
def create_model(model_type, labels): if model_type == "sparse": from classifiers.sparse_perceptron import SparsePerceptron from features.sparse_features import SparseFeatureExtractor features = SparseFeatureExtractor() model = SparsePerceptron(labels, min_update=Config().min_update) elif model_type == "dense": from features.embedding import FeatureEmbedding from classifiers.dense_perceptron import DensePerceptron features = dense_features_wrapper(FeatureEmbedding) model = DensePerceptron(labels, num_features=features.num_features()) elif model_type == "nn": from features.indexer import FeatureIndexer from classifiers.neural_network import NeuralNetwork features = dense_features_wrapper(FeatureIndexer) model = NeuralNetwork(labels, inputs=features.feature_types) else: raise ValueError("Invalid model type: '%s'" % model_type) return features, model
adwin_delta = argument_parser.get_delta() training_set_ratio = argument_parser.get_training_set_ratio() neighbors_number = argument_parser.get_neighbors_number() kernel = argument_parser.get_kernel() regulation = argument_parser.get_regulation() max_iters = argument_parser.get_iterations() n_of_hidden = argument_parser.get_n_of_hidden_layers() algorithm = argument_parser.get_algorithm() printing = argument_parser.is_printing() data, labels = load_data(filename) classifiers = { 'bayes': Bayes(data, labels, training_set_ratio), 'knn': KNN(data, labels, training_set_ratio, neighbors_number), 'nn': NeuralNetwork(data, labels, training_set_ratio, n_of_hidden, max_iters), 'svm': SVM(data, labels, training_set_ratio, kernel, regulation) } classifier = classifiers[algorithm] classifier.train() classifier.test() accuracy_table = classifier.get_accuracy_table() precision_table = classifier.get_precision_table() sensitivity_table = classifier.get_sensitivity_table() specificity_table = classifier.get_specificity_table() metrics = (accuracy_table, precision_table, sensitivity_table, specificity_table)
args = ArgumentParser().get_arguments() data = load_dataset(args.dataset) labels = data[data.columns[-1]].unique() data = data.apply(factorize) fraction = args.training_fraction class_args = args.class_args if args.classifier is 0: classifiers = [DecisionTree(data, labels, fraction, class_args), Bayes(data, labels, fraction, class_args), SVM(data, labels, fraction, class_args), KNeighbors(data, labels, fraction, class_args), NeuralNetwork(data, labels, fraction, class_args)] elif args.classifier is 1: classifiers = [DecisionTree(data, labels, fraction, class_args)] elif args.classifier is 2: classifiers = [Bayes(data, labels, fraction, class_args)] elif args.classifier is 3: classifiers = [SVM(data, labels, fraction, class_args)] elif args.classifier is 4: classifiers = [KNeighbors(data, labels, fraction, class_args)] elif args.classifier is 5: classifiers = [NeuralNetwork(data, labels, fraction, class_args)] fig, ax = plt.subplots() print_basic_stats(args.dataset_name, args.training_percent)
from statistics.confusion_matrix import confusion_matrix from statistics.performance import compute_performance_metrics, compute_auc if __name__ == '__main__': ''' Classify data changing balancing ratio. ''' # Train and test random forests. # load_path = "../homesite_data/resources/oversampled_normalized_data_ratio_2.5.bin" load_path = "../homesite_data/resources/oversampled_normalized_data_ratio_2.bin" homesite = Data() homesite.load_sliptted_data(load_path) del homesite.test_x # Deleted to save memory. clf_ann = NeuralNetwork(path = "../homesite_data/ann_weights.bin", lr = 0.00005, \ lamb = 0) train_output_ann = clf_ann.get_hidden_output(homesite.train_x) validation_output_ann = clf_ann.get_hidden_output(homesite.validation_x) # train_output_ann = np.hstack((train_output_ann, homesite.train_x)) # validation_output_ann = np.hstack((validation_output_ann, homesite.validation_x)) for c in range(1, 10): # Train classifier. print "Training classifier." clf = AdaBoostClassifier(n_estimators=1 + 100 * c) clf.fit(train_output_ann, homesite.train_y) # Test classifier. print 'Testing classifier.' predicted_labels = clf.predict_proba(validation_output_ann)[:, 1]
# print 'Saving result.', i * 10 # save_np_array("../homesite_data/results/ann_grid_search_accuracy.bin", np.array(accuracy_history)) # save_np_array("../homesite_data/results/ann_grid_search_precision.bin", np.array(precision_history)) # save_np_array("../homesite_data/results/ann_grid_search_recall.bin", np.array(recall_history)) # save_np_array("../homesite_data/results/ann_grid_search_auc.bin", np.array(auc_history)) # # del clf for i in range(0, 10): # Creating classifier. if i == 0: a = 1 else: a = i * 10 clf = NeuralNetwork(input_units = 644, hidden_units = a, output_units = 2, \ lr = 0.05, lamb = 0) # Train classifier. print "Training classifier." clf.fit(homesite, batch_size = 128, max_iterations = 500, save_interval = 500, path = "../homesite_data/resources/ann_weights.bin") # Test classifier. print 'Testing classifier.' predicted_labels = clf.predict_proba(homesite.validation_x)[:, 1] # Show final results. results = confusion_matrix(np.argmax(homesite.validation_y, axis = 1), np.round(predicted_labels)) accuracy, precision, recall = compute_performance_metrics(results)
mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] c = 50 cvs = StratifiedKFold(homesite.train_y, n_folds=5) # Train classifier. print "\nTraining classifier param %d" % c for i, (train, test) in enumerate(cvs): print i sm = OverSampler(verbose=False, ratio=2.5) train_oversampled_x, train_oversampled_train_y = sm.fit_transform( homesite.train_x[train], homesite.train_y[train]) clf = NeuralNetwork(input_units = 644, hidden_units = c, output_units = 2, \ lr = 0.05, lamb = 0) data.train_x = train_oversampled_x data.train_y = train_oversampled_train_y n = 2 data.train_y = data.train_y.flatten() o_h = np.zeros((len(data.train_y), n)) o_h[np.arange(len(data.train_y)), data.train_y.astype(int)] = 1 data.train_y = o_h.astype(bool) clf.fit(data, batch_size=128, max_iterations=500, save_interval=500, path="../../../homesite_data/resources/ann_weights.bin")