Esempio n. 1
0
def Additional_tests():
    results = []

    id3 = id3_factory()
    avg_accuracy, avg_error = evaluate(id3, 2)
    results.append([1, avg_accuracy, avg_error])

    perceptron = perceptron_factory()
    avg_accuracy, avg_error = evaluate(perceptron, 2)
    results.append([2, avg_accuracy, avg_error])

    with open("experiments12.csv", "w") as csv_file:
        writer = csv.writer(csv_file, delimiter=',', lineterminator='\n')
        for row in results:
            writer.writerow(row)
Esempio n. 2
0
def KNN_test():
    results = []

    for k in [1, 3, 5, 7, 13]:
        knn_k = knn_factory(k)
        avg_accuracy, avg_error = evaluate(knn_k, 2)
        results.append([k, avg_accuracy, avg_error])

    with open("experiments6.csv", "w") as csv_file:
        writer = csv.writer(csv_file, delimiter=',', lineterminator='\n')
        for row in results:
            writer.writerow(row)
def run_pass(cfg, imagecache='images', n_images=100):
    ''' pipeline.run_pass(cfg) -> metrics

    Given a set of parameters, 'cfg', of the format defined below, run
    a single pass through the pipeline. This includes loading a dataset,
    downloading the images, filtering undesirable images, training the
    CNN classifier, and producing the resulting scoring metrics. Returns
    a dictionary containing the scoring metrics.

    The format of the input parameters is a dictionary. An example of
    the structure is given below:

        'dataset_filename': '100marvelcharacters.csv',
        'base_search_term': 'Marvel Comic Character',
        'search_options': {'style': 'lineart'},
        'optimizer': ('SGD', {'lr': 0.0001, 'momentum': 0.9}),
        'test_size': 0.3,
        'val_size': 0.1,

    'dataset_filename' is the path to the dataset to load.

    'base_search_term' is a string appended to the character name when
    executing the Google image search.

    'search_options' is the kwargs dictionary passed to
    download.generate_search_url().

    'optimizer' is a tuple containing the name of the optimizer in
    torch.optim, and the kwargs dictionary passed to its constructor.

    'test_size' is the fraction of the data to hold out from the
    training set for validation.
    '''

    dataset = pandas.read_csv(cfg['dataset_filename'])
    imgdir = prepare_imageset(dataset,
                              cfg['base_search_term'],
                              cfg['search_options'],
                              imagecache,
                              download_count=n_images)

    splitdir = train_test_split(imgdir, cfg['test_size'], cfg['val_size'])

    opt_name, opt_kwargs = cfg['optimizer']
    stats_trn, stats_val, stats_tst = classifier.evaluate(
        splitdir, opt_name, opt_kwargs)

    return {'train': stats_trn, 'val': stats_val, 'test': stats_tst}
Esempio n. 4
0
def main(args):
    data_pth = "data/%s" % args.data_name
    train_pth = os.path.join(data_pth, "train_data.txt")
    train_data = MonoTextData(train_pth, True, vocab=100000)
    vocab = train_data.vocab
    source_pth = os.path.join(data_pth, "test_data.txt")
    target_pth = args.target_path
    eval_data = MonoTextData(target_pth, True, vocab=vocab)
    source = pd.read_csv(source_pth, names=['label', 'content'], sep='\t')
    target = pd.read_csv(target_pth, names=['label', 'content'], sep='\t')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Classification Accuracy
    model = CNNClassifier(len(vocab), 300, [1, 2, 3, 4, 5], 500,
                          0.5).to(device)
    model.load_state_dict(
        torch.load("checkpoint/%s-classifier.pt" % args.data_name))
    model.eval()
    eval_data, eval_label = eval_data.create_data_batch_labels(
        64, device, batch_first=True)
    acc = 100 * evaluate(model, eval_data, eval_label)
    print("Acc: %.2f" % acc)

    # BLEU Score
    total_bleu = 0.0
    sources = []
    targets = []
    for i in range(source.shape[0]):
        s = source.content[i].split()
        t = target.content[i].split()
        sources.append([s])
        targets.append(t)

    total_bleu += compute_bleu(sources, targets)[0]
    total_bleu *= 100
    print("Bleu: %.2f" % total_bleu)
Esempio n. 5
0
        #scheduler.step() # <- update schedular

        epoch_loss = running_loss / n_s_train  # <- calculate epoch loss

        print("Epoch %4d\tLoss : %s" % (epoch_idx + 1, epoch_loss))

        if math.isnan(epoch_loss): continue  # if loss is NAN, skip!

        if (epoch_idx + 1) % 1 == 0:

            clf.eval()  # Classifier evaluation mode: ON

            # ----------------------------------------------------------------------------------------------- #
            # ZERO-SHOT ACCURACY
            acc_zsl = evaluate(model=clf,
                               x=x_u_test,
                               y=y_u_test_ix,
                               attrs=u_attr)
            # ------------------------------------------------------- #
            # * ----- * ----- * ----- * ----- * ----- * ----- * ----- *
            # ------------------------------------------------------- #
            # GENERALIZED SEEN ACCURACY
            acc_g_seen = evaluate(model=clf,
                                  x=x_s_test,
                                  y=y_s_test,
                                  attrs=attr)
            # ------------------------------------------------------- #
            # * ----- * ----- * ----- * ----- * ----- * ----- * ----- *
            # ------------------------------------------------------- #
            # GENERALIZED UNSEEN ACCURACY
            acc_g_unseen = evaluate(model=clf,
                                    x=x_u_test,
Esempio n. 6
0
# question 3.2

patients, labels, test = utils.load_data()
split_crosscheck_groups([patients, labels], 2)

# question 5.1

k_list = [1, 3, 5, 7, 13]
accuracy_list = []

file_name = 'experiments6.csv'
with open(file_name, 'wb') as file:
    for k in k_list:
        knn_f = knn_factory(k)
        accuracy, error = evaluate(knn_f, 2)
        line = str(k) + "," + str(accuracy) + "," + str(error) + "\n"
        accuracy_list.append(accuracy)
        file.write(line.encode())

# question 5.2

plt.plot(k_list, accuracy_list)
plt.xlabel('K value')
plt.ylabel('Average accuracy')
plt.title('Part B, question 5.2')
plt.show()

# questions 7.1, 7.2

file_name = 'experiments12.csv'
Esempio n. 7
0
        (train_data, test_data
         ))  # concatenate the train and test data (for structure exploitation)
    test_labels_none = -1 * np.ones([
        test_labels.shape[0],
    ])  # the label of the test_data is set to -1
    y_all = np.concatenate(
        (train_labels,
         test_labels_none))  # concatenate the train labels and -1 test labels

    consist_model = LabelSpreading(gamma=4, max_iter=60)
    consist_model.fit(x_all, y_all)
    clf.evaluate_sub('consistency model', test_labels,
                     consist_model.predict(test_data))

    lgr_model = clf.classifier('LGR', train_data, train_labels)
    clf.evaluate('LGR', lgr_model, test_data, test_labels)

    knn_model = clf.classifier('KNN', train_data, train_labels)
    clf.evaluate('KNN', knn_model, test_data, test_labels)

    bnb_model = clf.classifier('BNB', train_data, train_labels)
    clf.evaluate('BNB', bnb_model, test_data, test_labels)

    svm_model = clf.classifier('SVM', train_data, train_labels)
    clf.evaluate('SVM', svm_model, test_data, test_labels)

    dtc_model = clf.classifier('DTC', train_data, train_labels)
    clf.evaluate('DTC', dtc_model, test_data, test_labels)

    model_to_attack = clf.classifier('MLP', train_data, train_labels)
    # the number of adversarial examples that models can resist to
Esempio n. 8
0
data = []
data.append(examples)
data.append(labels)
data_new = []
data_new.append(SelectKBest(f_classif, 100).fit_transform(examples, labels))
data_new.append(labels)
classifier.split_crosscheck_groups(data_new, 2)

print("using CUT data\n")

decision_tree = classifier.sklearn_factory_wrapper(RandomForestClassifier())
perceptron = classifier.sklearn_factory_wrapper(Perceptron())
knn = classifier.knn_factory(7)
print("knn and perceptron: \n")
ensemble = classifier.ensemble_factory([knn, perceptron])
accuracy, error = classifier.evaluate(ensemble, 2)
print("%.3f, %.3f\n" % (accuracy, error))

print("knn and decision tree: \n")
ensemble = classifier.ensemble_factory([knn, perceptron])
accuracy, error = classifier.evaluate(ensemble, 2)
print("%.3f, %.3f\n" % (accuracy, error))

print("all three: \n")
ensemble = classifier.ensemble_factory([knn, perceptron, decision_tree])
accuracy, error = classifier.evaluate(ensemble, 2)
print("%.3f, %.3f\n" % (accuracy, error))

print("knn alone: \n")
ensemble = classifier.ensemble_factory([knn])
accuracy, error = classifier.evaluate(ensemble, 2)
Esempio n. 9
0
#--------------- Produce accuracy rates and graphs ------------------

clf_names_and_results = []

for c in classifier_factory_list:

    accuracy_list_for_classifier = []
    classifier_name = c().to_string()

    for v in best_num_list:
        selector = SelectKBest(score_func=f_classif, k=v)
        selector.fit(patients, labels)
        newData = selector.transform(patients)
        split_crosscheck_groups((newData, labels), num_folds_num)
        # Creating a classifier factory
        clf = c()
        accuracy, error = evaluate(clf, num_folds_num)
        accuracy_list_for_classifier.append(accuracy)

    clf_names_and_results.append(
        (classifier_name, accuracy_list_for_classifier))

    # accuracy_list_for_all_classifiers.append(accuracy_list_for_classifier)

present_graphs(best_num_list, clf_names_and_results)

# --------------------- Code that produces the dot file for the ID3 tree presented in the report ---------------------
clf = tree.DecisionTreeClassifier()
clf = clf.fit(patients, labels)
tree.export_graphviz(clf, out_file='tree.dot')
Esempio n. 10
0
def main():
    # Variables used for debug
    skip_knn = True
    skip_tree = True
    skip_perc = True

    train_features, train_labels, test_features = load_data('data/Data.pickle')

    # Split once the dataset to two folds.
    folds = 2
    #split_crosscheck_groups(train_features, train_labels, folds)

    if skip_knn != True:
        # Evaluating KNN with different k value:
        k_list = [1, 3, 5, 7, 13]
        acc_list = []
        err_list = []
        with open('experiments6.csv', mode='w', newline='') as csv_file:
            exp_writer = csv.writer(csv_file)
            for k in k_list:
                knn_fac = knn_factory(k)
                err, acc = evaluate(knn_fac, folds)
                print("k=", k, " acc=", acc, " err=", err)
                exp_writer.writerow([k, acc, err])
                acc_list.append(acc)
                err_list.append(err)

        # Plot KNN Results
        plt.subplot(2, 1, 1)
        plt.plot(k_list, acc_list, '--', color='g')
        plt.plot(k_list, acc_list, 'bo')
        plt.ylabel("Accuracy")
        plt.xlabel("k")
        plt.xticks(k_list)
        plt.subplot(2, 1, 2)
        plt.plot(k_list, err_list, '--', color='r')
        plt.plot(k_list, err_list, 'bo')
        plt.ylabel("Error")
        plt.xlabel("k")
        plt.xticks(k_list)
        plt.tight_layout()
        plt.show()

    # Perform classification for Perceptron and Tree and write to files.
    with open('experiments12.csv', mode='w', newline='') as csv_file:
        exp_writer = csv.writer(csv_file)
        if skip_tree != True:
            # Decision Tree experiment
            myTree = tree.DecisionTreeClassifier(criterion="entropy")
            err, acc = evaluate(myTree, folds)
            print("tree acc=", acc, " tree err=", err)
            exp_writer.writerow([1, acc, err])

        if skip_perc != True:
            # Perceptron experiment
            myPerc = Perceptron(tol=1e-3, random_state=0)
            err, acc = evaluate(myPerc, folds)
            print("perceptron acc=", acc, " perceptron err=", err)
            exp_writer.writerow([2, acc, err])

    # Competition: Classify test_features
    print("Triple model")
    my_model = triple_model()
    my_model.fit(train_features, train_labels)
    res = my_model.final_predict(preprocessing.scale(test_features))
    write_prediction(res)
Esempio n. 11
0
def main():
    ######
    # mat = get_data.read_data("C:/Users/Furkan/Desktop/Bitirme/dataset/video4.mp4")
    ######
    print("Dataset has been reading...")
    data = read_data()
    labels = read_labels()
    print("Dataset has been read.")

    print('Applying contrast streching manipulation...')
    contrasted_data, contrasted_labels = data_manipulation.apply_contrast(
        data, labels)
    print('Size of contrasted data: %d' % len(contrasted_data))

    print('Applying rotation manipulation...')
    rotated_data, rotated_labels = data_manipulation.apply_rotation(
        contrasted_data, labels)
    print('Size of rotated data: %d' % len(rotated_data))

    print('Applying shifting manipulation...')
    shifted_data, shifted_labels = data_manipulation.apply_shifting(
        contrasted_data, labels)
    print('Size of shifted data: %d' % len(shifted_data))

    print('Applying flipping manipulation...')
    flipped_data, flipped_labels = data_manipulation.apply_horizontal_flip(
        contrasted_data, labels)
    print('Size of shifted data: %d' % len(flipped_data))

    print('Concatenating manipulated data')
    concat_data = rotated_data + shifted_data + contrasted_data + flipped_data
    # concat_data = data  data For 2.5k sized Original data.

    print("Reshaping images...")
    reshaped_concat_data = reshape_list(concat_data)
    print('Shape of data: %s' % str(reshaped_concat_data[0].shape))

    print("PCA has been applying...")
    data_pca = data_manipulation.pca(reshaped_concat_data)
    print("PCA has been applied.")

    data = data_pca
    concat_labels = labels

    print("Spliting dataset into training and test set...")
    X_train, X_test, y_train, y_test = split_dataset(data,
                                                     concat_labels[:len(data)])

    start = datetime.now()
    print("Appyling K-Nearest Neighbours Classifier...")
    knn_labels = classifier.knn_classifier(X_train, y_train, X_test)
    print("Evaluating accuracy...")
    classifier.evaluate(y_test, knn_labels)
    cm_knn = confusion_matrix(y_test, knn_labels)
    print("Confusion matrix: %s \n\t" % str(cm_knn))
    print("Running time: %s" % str(datetime.now() - start))

    start = datetime.now()
    print("Appyling Support Vector Machines Classifier...")
    svm_labels = classifier.svm_classifier(X_train, y_train, X_test)
    print("Evaluating accuracy...")
    classifier.evaluate(y_test, svm_labels)
    cm_svm = confusion_matrix(y_test, svm_labels)
    print("Confusion matrix: %s \n\t" % str(cm_svm))
    print("Running time: %s" % str(datetime.now() - start))

    start = datetime.now()
    print("Appyling Naive Bayes Classifier...")
    nbc_labels = classifier.naive_bayes_classifier(X_train, y_train, X_test)
    print("Evaluating accuracy...")
    classifier.evaluate(y_test, nbc_labels)
    cm_nbc = confusion_matrix(y_test, nbc_labels)
    print("Confusion matrix: %s \n\t" % str(cm_nbc))
    print("Running time: %s" % str(datetime.now() - start))

    start = datetime.now()
    print("Appyling Decision Tree Classifier...")
    dtc_labels = classifier.decision_tree_classifier(X_train, y_train, X_test)
    print("Evaluating accuracy...")
    classifier.evaluate(y_test, dtc_labels)
    cm_dtc = confusion_matrix(y_test, dtc_labels)
    print("Confusion matrix: %s \n\t" % str(cm_dtc))
    print("Running time: %s" % str(datetime.now() - start))

    start = datetime.now()
    print("Appyling Random Forest Classifier...")
    rfc_labels = classifier.random_forest_classifier(X_train, y_train, X_test)
    print("Evaluating accuracy...")
    classifier.evaluate(y_test, rfc_labels)
    cm_rfc = confusion_matrix(y_test, rfc_labels)
    print("Confusion matrix: %s \n\t" % str(cm_rfc))
    print("Running time: %s" % str(datetime.now() - start))
    """print("Applying K-Means Clustering...")
Esempio n. 12
0
from sklearn.feature_selection import f_classif
from sklearn.ensemble import RandomForestClassifier

examples, labels, test = load_data()
data = []
data.append(examples)
data.append(labels)

for feature_num in range(5, 180, 8):
    print("data cut to: %d\n" % feature_num)
    data_new = []
    data_new.append(
        SelectKBest(f_classif, feature_num).fit_transform(examples, labels))
    data_new.append(labels)
    classifier.split_crosscheck_groups(data_new, 2)

    forest = classifier.sklearn_factory_wrapper(RandomForestClassifier())
    perceptron = classifier.sklearn_factory_wrapper(Perceptron())
    knn = classifier.knn_factory(7)

    print("forest perf:\n")
    accuracy, error = classifier.evaluate(forest, 2)
    print("%.3f, %.3f\n" % (accuracy, error))
    print("perc perf:\n")
    accuracy, error = classifier.evaluate(perceptron, 2)
    print("%.3f, %.3f\n" % (accuracy, error))
    print("knn perf:\n")
    accuracy, error = classifier.evaluate(knn, 2)
    print("%.3f, %.3f\n" % (accuracy, error))
    print("-----------------------------------\n")