Ejemplo n.º 1
0
def sklearn_mlp_metrics(x, y, x_labels, y_labels, one_vs_rest=False, hn=(64,), activation='relu'):
    nn = NeuralNetwork(x, y, x_labels, y_labels, hidden_neurons=hn, activation_fun=activation)
    label = 'Multilayer Perceptron'
    y_pred_mlp = nn.crossval_mlp(one_vs_rest)
    cm = nn.plot_confusion_matrix(y_pred_mlp, label=label)
    nn.show_basic_metrics(y_pred_mlp, label=label)
    acc, f1 = nn.count_basic_metrics(y_pred_mlp)
    return [[label, acc, f1, y_pred_mlp, cm]]
Ejemplo n.º 2
0
def create_model(model_type, labels):
    if model_type == "sparse":
        from classifiers.sparse_perceptron import SparsePerceptron
        from features.sparse_features import SparseFeatureExtractor
        features = SparseFeatureExtractor()
        model = SparsePerceptron(labels, min_update=Config().min_update)
    elif model_type == "dense":
        from features.embedding import FeatureEmbedding
        from classifiers.dense_perceptron import DensePerceptron
        features = dense_features_wrapper(FeatureEmbedding)
        model = DensePerceptron(labels, num_features=features.num_features())
    elif model_type == "nn":
        from features.indexer import FeatureIndexer
        from classifiers.neural_network import NeuralNetwork
        features = dense_features_wrapper(FeatureIndexer)
        model = NeuralNetwork(labels, inputs=features.feature_types)
    else:
        raise ValueError("Invalid model type: '%s'" % model_type)
    return features, model
Ejemplo n.º 3
0
adwin_delta = argument_parser.get_delta()
training_set_ratio = argument_parser.get_training_set_ratio()
neighbors_number = argument_parser.get_neighbors_number()
kernel = argument_parser.get_kernel()
regulation = argument_parser.get_regulation()
max_iters = argument_parser.get_iterations()
n_of_hidden = argument_parser.get_n_of_hidden_layers()
algorithm = argument_parser.get_algorithm()
printing = argument_parser.is_printing()

data, labels = load_data(filename)

classifiers = {
    'bayes': Bayes(data, labels, training_set_ratio),
    'knn': KNN(data, labels, training_set_ratio, neighbors_number),
    'nn': NeuralNetwork(data, labels, training_set_ratio, n_of_hidden,
                        max_iters),
    'svm': SVM(data, labels, training_set_ratio, kernel, regulation)
}

classifier = classifiers[algorithm]

classifier.train()
classifier.test()

accuracy_table = classifier.get_accuracy_table()
precision_table = classifier.get_precision_table()
sensitivity_table = classifier.get_sensitivity_table()
specificity_table = classifier.get_specificity_table()

metrics = (accuracy_table, precision_table, sensitivity_table,
           specificity_table)
Ejemplo n.º 4
0
args = ArgumentParser().get_arguments()

data = load_dataset(args.dataset)
labels = data[data.columns[-1]].unique()
data = data.apply(factorize)

fraction = args.training_fraction
class_args = args.class_args

if args.classifier is 0:
    classifiers = [DecisionTree(data, labels, fraction, class_args),
                    Bayes(data, labels, fraction, class_args),
                    SVM(data, labels, fraction, class_args),
                    KNeighbors(data, labels, fraction, class_args),
                    NeuralNetwork(data, labels, fraction, class_args)]
elif args.classifier is 1:
    classifiers = [DecisionTree(data, labels, fraction, class_args)]
elif args.classifier is 2:
    classifiers = [Bayes(data, labels, fraction, class_args)]
elif args.classifier is 3:
    classifiers = [SVM(data, labels, fraction, class_args)]
elif args.classifier is 4:
    classifiers = [KNeighbors(data, labels, fraction, class_args)]
elif args.classifier is 5:
    classifiers = [NeuralNetwork(data, labels, fraction, class_args)]

fig, ax = plt.subplots()

print_basic_stats(args.dataset_name, args.training_percent)
Ejemplo n.º 5
0
from statistics.confusion_matrix import confusion_matrix
from statistics.performance import compute_performance_metrics, compute_auc

if __name__ == '__main__':
    '''
    Classify data changing balancing ratio.
    '''

    # Train and test random forests.
    # load_path = "../homesite_data/resources/oversampled_normalized_data_ratio_2.5.bin"
    load_path = "../homesite_data/resources/oversampled_normalized_data_ratio_2.bin"
    homesite = Data()
    homesite.load_sliptted_data(load_path)
    del homesite.test_x  # Deleted to save memory.

    clf_ann = NeuralNetwork(path = "../homesite_data/ann_weights.bin", lr = 0.00005, \
                        lamb = 0)
    train_output_ann = clf_ann.get_hidden_output(homesite.train_x)
    validation_output_ann = clf_ann.get_hidden_output(homesite.validation_x)
    # train_output_ann = np.hstack((train_output_ann, homesite.train_x))
    # validation_output_ann = np.hstack((validation_output_ann, homesite.validation_x))

    for c in range(1, 10):
        # Train classifier.
        print "Training classifier."
        clf = AdaBoostClassifier(n_estimators=1 + 100 * c)
        clf.fit(train_output_ann, homesite.train_y)

        # Test classifier.
        print 'Testing classifier.'
        predicted_labels = clf.predict_proba(validation_output_ann)[:, 1]
Ejemplo n.º 6
0
#         print 'Saving result.', i * 10
#         save_np_array("../homesite_data/results/ann_grid_search_accuracy.bin", np.array(accuracy_history))
#         save_np_array("../homesite_data/results/ann_grid_search_precision.bin", np.array(precision_history))
#         save_np_array("../homesite_data/results/ann_grid_search_recall.bin", np.array(recall_history))
#         save_np_array("../homesite_data/results/ann_grid_search_auc.bin", np.array(auc_history))
#
#         del clf

    for i in range(0, 10):
        # Creating classifier.
        if i == 0:
            a = 1
        else:
            a = i * 10

        clf = NeuralNetwork(input_units = 644, hidden_units = a, output_units = 2, \
                   lr = 0.05, lamb = 0)

        # Train classifier.
        print "Training classifier."

        clf.fit(homesite, batch_size = 128,
            max_iterations = 500, save_interval = 500,
            path = "../homesite_data/resources/ann_weights.bin")

        # Test classifier.
        print 'Testing classifier.'
        predicted_labels = clf.predict_proba(homesite.validation_x)[:, 1]

        # Show final results.
        results = confusion_matrix(np.argmax(homesite.validation_y, axis = 1), np.round(predicted_labels))
        accuracy, precision, recall = compute_performance_metrics(results)
Ejemplo n.º 7
0
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []
    c = 50
    cvs = StratifiedKFold(homesite.train_y, n_folds=5)

    # Train classifier.
    print "\nTraining classifier param %d" % c

    for i, (train, test) in enumerate(cvs):
        print i
        sm = OverSampler(verbose=False, ratio=2.5)
        train_oversampled_x, train_oversampled_train_y = sm.fit_transform(
            homesite.train_x[train], homesite.train_y[train])

        clf = NeuralNetwork(input_units = 644, hidden_units = c, output_units = 2, \
                            lr = 0.05, lamb = 0)
        data.train_x = train_oversampled_x
        data.train_y = train_oversampled_train_y

        n = 2
        data.train_y = data.train_y.flatten()
        o_h = np.zeros((len(data.train_y), n))
        o_h[np.arange(len(data.train_y)), data.train_y.astype(int)] = 1
        data.train_y = o_h.astype(bool)

        clf.fit(data,
                batch_size=128,
                max_iterations=500,
                save_interval=500,
                path="../../../homesite_data/resources/ann_weights.bin")