Beispiel #1
0
def run_part1_digit_2(k):
    print("Importing data...")
    (_, traindata, _, trainlabel) = feature_map_part1_1(train_raw_digit_dataset, {' ': 0, '#': 1, '+': 1}, (28, 28), 10)
    (_, testdata, _, testlabel) = feature_map_part1_1(test_raw_digit_dataset, {' ': 0, '#': 1, '+': 1}, (28, 28), 10)

    print("Testing")
    start_time = time.time()
    preds_test = []
    counter = 0
    for (example, data) in zip(testdata, testlabel):
        diffs = distance(example, traindata)
        #diffs = sin_distance(example, traindata)
        #diffs = manhattan_distance(example, traindata)
        idx = np.argsort(diffs)
        neighbor_rank = trainlabel[idx]
        neighbor_rank = neighbor_rank[0:k]
        hist = [np.sum(neighbor_rank[neighbor_rank == i]) for i in np.arange(0, 10)]
        preds_test.append(np.argmax(hist))
        counter += 1
        print(str(counter) + "/" + str(len(testlabel)), end='\r')
        sys.stdout.write("\033[K")
    preds_test = np.array(preds_test)
    print("Accuracy: " + str(np.sum(preds_test == testlabel) / (len(testlabel) * 1.0) * 100) + "%")
    end_time = time.time()
    print('Testing finished in %.3f seconds' % (end_time - start_time))
    confusion_matrix = np.array([np.array([np.sum(preds_test[testlabel == i] == j) \
                                    for j in np.arange(0, 10)]) \
                                for i in np.arange(0, 10)])
    print('Confusion Matrix:')
    print('\n'.join([''.join(['{:6}'.format(item) for item in row]) for row in confusion_matrix]))
Beispiel #2
0
def run_part1_digit_ec4():
    print("Importing data...")
    (_, traindata, _,
     trainlabel) = feature_map_part1_1(train_raw_digit_dataset, {
         ' ': 0,
         '#': 1,
         '+': 1
     }, (28, 28), 10)
    (_, testdata, _, testlabel) = feature_map_part1_1(test_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10)
    clf = svm.SVC()

    print("Training...")
    clf.fit(traindata, trainlabel)
    print("Testing")
    preds_test = clf.predict(testdata)

    print("\nAccuracy: " +
          str(np.sum(preds_test == testlabel) /
              (len(testlabel) * 1.0) * 100) + "%")
    confusion_matrix = np.array([np.array([np.sum(preds_test[testlabel == i] == j) \
                                    for j in np.arange(0, 10)]) \
                                for i in np.arange(0, 10)])
    print('Confusion Matrix:')
    print('\n'.join([
        ''.join(['{:6}'.format(item) for item in row])
        for row in confusion_matrix
    ]))
Beispiel #3
0
def run_extra_credit_1_audio(k):
    print("================AUDIO - BINARY FEATURE================")
    print("Importing data...")

    train_dataset = feature_map_part1_1(train_segmented_dataset, {
        ' ': 1,
        '%': 0
    }, (25, 10), 2)
    test_dataset = feature_map_part1_1(test_segmented_dataset, {
        ' ': 1,
        '%': 0
    }, (25, 10), 2)

    (model, _, examples, confusion_matrix) = run(k, train_dataset,
                                                 test_dataset)
    print("=====================================================\n")
Beispiel #4
0
def run_part1_face_1(k):
    print("================FACE - BINARY FEATURE================")
    print("Importing data...")

    train_dataset = feature_map_part1_1(train_raw_face_dataset, {
        ' ': 0,
        '#': 1
    }, (70, 60), 2)
    test_dataset = feature_map_part1_1(test_raw_face_dataset, {
        ' ': 0,
        '#': 1
    }, (70, 60), 2)

    run(0.1, train_dataset, test_dataset)

    print("=====================================================\n")
Beispiel #5
0
def run_part1_digit_extra_1(k):
    print("================DIGIT - TERNARY FEATURE================")
    print("Importing data...")

    train_dataset = feature_map_part1_1(train_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 2
    }, (28, 28), 10)
    test_dataset = feature_map_part1_1(test_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 2
    }, (28, 28), 10)

    run(0.1, train_dataset, test_dataset)

    print("=====================================================\n")
Beispiel #6
0
def run_part1_digit_1(learning_rate, include_bias, random_init, rand_set,
                      epoch_num):
    print("Importing data...")
    (_, traindata, _,
     trainlabel) = feature_map_part1_1(train_raw_digit_dataset, {
         ' ': 0,
         '#': 1,
         '+': 1
     }, (28, 28), 10)
    (_, testdata, _, testlabel) = feature_map_part1_1(test_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10)

    # w = np.zeros((10, 28 * 28))
    w = np.random.rand(10, len(traindata[0]))
    bias = np.random.rand(10)

    if (not random_init):
        w = np.zeros((10, len(traindata[0])))
        bias = np.zeros(10)
    if (not include_bias):
        bias = np.zeros(10)

    print("Training...")
    trainerr = []
    testerr = []
    for epoch in np.arange(0, epoch_num + 1):
        # Permute the dataset
        p_ind = np.arange(0, len(trainlabel))
        if rand_set:
            p_ind = np.random.permutation(p_ind)
        p_traindata = traindata[p_ind]
        p_trainlabel = trainlabel[p_ind]
        # print(traindata.shape)
        # print(p_traindata.shape)
        # print(trainlabel.shape)
        # print(p_trainlabel.shape)
        # print(p_ind.shape)
        # break

        # Epoch test
        preds_test = np.argmax(
            np.dot(w, testdata.transpose()) + bias.reshape(10, 1), 0)
        preds_train = np.argmax(
            np.dot(w, traindata.transpose()) + bias.reshape(10, 1), 0)
        trainerr.append(
            np.sum(preds_train != trainlabel) / (len(trainlabel) * 1.0))
        testerr.append(
            np.sum(preds_test != testlabel) / (len(testlabel) * 1.0))
        print("Epoch " + str(epoch) + ": " + str(trainerr[epoch] * 100) +
              "% " + str(testerr[epoch] * 100) + "%",
              end='\r')
        sys.stdout.write("\033[K")

        # Train for one epoch
        alpha = learning_rate * 1000.0 / (1000.0 + epoch + 1)
        for (example, label) in zip(p_traindata, p_trainlabel):
            pred = np.argmax(np.dot(w, example) + bias)
            if (pred != label):
                w[label] += alpha * example
                w[pred] -= alpha * example
                if (include_bias):
                    bias[label] += alpha
                    bias[pred] -= alpha

    print("Testing")
    preds_test = np.argmax(
        np.dot(w, testdata.transpose()) + bias.reshape(10, 1), 0)
    print("Accuracy: " +
          str(np.sum(preds_test == testlabel) /
              (len(testlabel) * 1.0) * 100) + "%")
    confusion_matrix = np.array([np.array([np.sum(preds_test[testlabel == i] == j) \
                                    for j in np.arange(0, 10)]) \
                                for i in np.arange(0, 10)])
    print('Confusion Matrix:')
    print('\n'.join([
        ''.join(['{:6}'.format(item) for item in row])
        for row in confusion_matrix
    ]))

    # Plot learning curve
    fig1 = plt.figure(figsize=(10, 6))
    plt.plot(np.arange(0, epoch_num + 1), trainerr, label='Training Error')
    plt.plot(np.arange(0, epoch_num + 1), testerr, label='Testing Error')
    plt.xlabel('Epoch', fontsize=14)
    plt.ylabel('Error rate', fontsize=14)
    plt.xlim(xmin=0, xmax=epoch_num)
    plt.ylim(ymin=0, ymax=1)
    plt.legend()
    plt.grid(True)
    plt.title('Learning Curves', fontsize=16)

    # EC 2
    fig2, axes1 = plt.subplots(nrows=2, ncols=5, figsize=(10, 6))
    fig2.subplots_adjust(left=0.03,
                         right=0.95,
                         top=0.93,
                         bottom=0.05,
                         wspace=0.30,
                         hspace=0.05)
    for i in np.arange(0, 5):
        axstop = axes1[0][i]
        axsbot = axes1[1][i]
        imtop = axstop.imshow(np.reshape(w[i], (28, 28)),
                              interpolation='nearest',
                              cmap="jet")
        imbot = axsbot.imshow(np.reshape(w[i + 5], (28, 28)),
                              interpolation='nearest',
                              cmap="jet")
        axstop.set_title(str(i))
        axsbot.set_title(str(i + 5))
        axstop.set_axis_off()
        axsbot.set_axis_off()
        cbartop = plt.colorbar(imtop, ax=axstop, fraction=0.046, pad=0.04)
        cbartop.locator = ticker.MaxNLocator(nbins=5)
        cbartop.update_ticks()
        cbarbot = plt.colorbar(imbot, ax=axsbot, fraction=0.046, pad=0.04)
        cbarbot.locator = ticker.MaxNLocator(nbins=5)
        cbarbot.update_ticks()
    plt.suptitle('Weight Vectors Visualization', fontsize=16)

    plt.show()
Beispiel #7
0
def run_part1_digit_1(k):
    print("================DIGIT - BINARY FEATURE================")
    print("Importing data...")
    train_dataset = feature_map_part1_1(train_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10)
    test_dataset = feature_map_part1_1(test_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10)

    (model, _, examples, confusion_matrix) = run(0.1, train_dataset,
                                                 test_dataset)

    confusion_matrix_ndig = np.array(confusion_matrix)
    np.fill_diagonal(confusion_matrix_ndig, 0)
    confusion_pairs = largest_indices(confusion_matrix_ndig, 4)
    confusion_pairs = list(zip(confusion_pairs[0], confusion_pairs[1]))

    (priors, distributions) = model

    fig1, axes1 = plt.subplots(nrows=5, ncols=4, figsize=(6, 7.5))
    fig1.subplots_adjust(left=0.07,
                         right=0.92,
                         top=0.93,
                         bottom=0.05,
                         wspace=0.05,
                         hspace=0.05)
    for i in np.arange(0, 5):
        axs = axes1[i]
        ims = [axs[0].imshow(np.reshape(1-examples[2*i][0], (28, 28)), interpolation = 'nearest', cmap="Greys"), \
                axs[1].imshow(np.reshape(1-examples[2*i][1], (28, 28)), interpolation = 'nearest', cmap="Greys"), \
                axs[2].imshow(np.reshape(1-examples[2*i+1][0], (28, 28)), interpolation = 'nearest', cmap="Greys"), \
                axs[3].imshow(np.reshape(1-examples[2*i+1][1], (28, 28)), interpolation = 'nearest', cmap="Greys")]
        for j in np.arange(0, 4):
            axs[j].set_axis_off()
    plt.suptitle(
        'Example Pairs with Lowest(left) and Highest(right) posterior probability',
        fontsize=12)

    fig2, axes2 = plt.subplots(nrows=4, ncols=3, figsize=(6, 8))
    fig2.subplots_adjust(left=0.05,
                         right=0.92,
                         top=0.95,
                         bottom=0.05,
                         wspace=0.35,
                         hspace=0.01)
    for pairi in np.arange(0, 4):
        axs = axes2[pairi]
        logp1 = np.log(
            np.array([d[1] for d in distributions[confusion_pairs[pairi][0]]]))
        logp2 = np.log(
            np.array([d[1] for d in distributions[confusion_pairs[pairi][1]]]))
        ims = [axs[0].imshow(np.reshape(logp1, (28, 28)), interpolation = 'nearest', cmap='jet'), \
                axs[1].imshow(np.reshape(logp2, (28, 28)), interpolation = 'nearest', cmap='jet'), \
                axs[2].imshow(np.reshape(logp1 - logp2, (28, 28)), interpolation = 'nearest', cmap='jet')]
        for j in np.arange(0, 3):
            axs[j].set_axis_off()
            cbar = plt.colorbar(ims[j], ax=axs[j], fraction=0.046, pad=0.04)
            cbar.locator = ticker.MaxNLocator(nbins=5)
            cbar.update_ticks()
    plt.suptitle('Odds ratios', fontsize=16)
    plt.show()

    print("=====================================================\n")