예제 #1
def run_neural_network_with_best_set_of_features_and_binary(
        train_images, train_labels, test_images, test_labels):
    r = 0.001
    lr = 0.00001
    batch_size = 60000
    iteration_length = 1000
    hid_layer_size = 800

    hid_w = np.random.uniform(-0.1, 0.1, (in_layer_size, hid_layer_size))
    out_w = np.random.uniform(-0.1, 0.1,
                              (hid_layer_size, out_layer_size_binary_format))

    hid_b = np.ones(hid_layer_size)
    out_b = np.ones(out_layer_size_binary_format)

    norm_1_train = mean_stdeviation_normalization(train_images)
    norm_1_test = mean_stdeviation_normalization(test_images)

    gradient_descent(norm_1_train, convert_labels_to_binary(train_labels[0]),
                     hid_w, out_w, hid_b, out_b, iteration_length, r, lr,
    train_data_accuracy_with_binary_format(norm_1_train, train_labels, hid_w,
                                           hid_b, out_w, out_b)
    test_data_accuracy_with_binary_format(norm_1_test, test_labels, hid_w,
                                          hid_b, out_w, out_b)
예제 #2
def test_the_effect_of_different_initial_values(train_images, train_labels,
                                                test_images, test_labels):
    r = 0.001
    lr = 0.00001
    batch_size = 60000
    iteration_length = 500
    hid_layer_size = 150

    hid_w2 = np.random.uniform(-10, 10, (in_layer_size, hid_layer_size))
    out_w2 = np.random.uniform(-10, 10, (hid_layer_size, out_layer_size))

    hid_b2 = np.ones(hid_layer_size)
    out_b2 = np.ones(out_layer_size)

    gradient_descent(train_images, convert_labels_to_one_hot(train_labels[0]),
                     hid_w2, out_w2, hid_b2, out_b2, iteration_length, r, lr,
    train_data_accuracy(train_images, train_labels, hid_w2, hid_b2, out_w2,
    test_data_accuracy(test_images, test_labels, hid_w2, hid_b2, out_w2,

    hid_w3 = np.random.uniform(0, 1, (in_layer_size, hid_layer_size))
    out_w3 = np.random.uniform(0, 1, (hid_layer_size, out_layer_size))

    hid_b3 = np.ones(hid_layer_size)
    out_b3 = np.ones(out_layer_size)

    gradient_descent(train_images, convert_labels_to_one_hot(train_labels[0]),
                     hid_w3, out_w3, hid_b3, out_b3, iteration_length, r, lr,
    train_data_accuracy(train_images, train_labels, hid_w3, hid_b3, out_w3,
    test_data_accuracy(test_images, test_labels, hid_w3, hid_b3, out_w3,
예제 #3
def test_the_effect_of_linear_activation_function(train_images, train_labels,
                                                  test_images, test_labels):
    r = 0.001
    lr = 0.00001
    batch_size = 60000
    iteration_length = 100
    hid_layer_size = 150

    hid_w = np.random.uniform(-0.1, 0.1, (in_layer_size, hid_layer_size))
    out_w = np.random.uniform(-0.1, 0.1, (hid_layer_size, out_layer_size))

    hid_b = np.ones(hid_layer_size)
    out_b = np.ones(out_layer_size)

    norm_1_train = mean_stdeviation_normalization(train_images)
    norm_1_test = mean_stdeviation_normalization(test_images)

    gradient_descent(norm_1_train, convert_labels_to_one_hot(train_labels[0]),
                     hid_w, out_w, hid_b, out_b, iteration_length, r, lr,

    dataset = np.array(norm_1_train)
    result = np.array(convert_labels_to_one_hot(train_labels[0]))
    hid_amount = dataset.dot(hid_w) + hid_b
    hid_amount_active = linear(hid_amount)
    out_amount = hid_amount_active.dot(out_w) + out_b
    out_amount_active = (out_amount)
    cnt = 0
    for i in range(len(result)):
        if (np.argmax(result[i]) == np.argmax(out_amount_active[i])):
            cnt += 1
    print("Training Acc= ", cnt / len(result) * 100, "%")

    dataset2 = np.array(norm_1_test)
    result2 = np.array(convert_labels_to_one_hot(test_labels[0]))

    hid_amount = dataset2.dot(hid_w) + hid_b
    hid_amount_active = linear(hid_amount)
    out_amount = hid_amount_active.dot(out_w) + out_b
    out_amount_active = (out_amount)
    cnt = 0
    for i in range(len(result2)):
        if (np.argmax(result2[i]) == np.argmax(out_amount_active[i])):
            cnt += 1
    print("New Data Acc= ", cnt / len(result2) * 100, "%")
예제 #4
def test_the_model_once(train_images, train_labels, test_images, test_labels):
    r = 0.001
    lr = 0.00001
    batch_size = 60000
    iteration_length = 100
    hid_layer_size = 150

    hid_w = np.random.uniform(-0.1, 0.1, (in_layer_size, hid_layer_size))
    out_w = np.random.uniform(-0.1, 0.1, (hid_layer_size, out_layer_size))

    hid_b = np.ones(hid_layer_size)
    out_b = np.ones(out_layer_size)

    norm_1_train = mean_stdeviation_normalization(train_images)
    norm_1_test = mean_stdeviation_normalization(test_images)

    gradient_descent(norm_1_train, convert_labels_to_one_hot(train_labels[0]),
                     hid_w, out_w, hid_b, out_b, iteration_length, r, lr,
    train_data_accuracy(norm_1_train, train_labels, hid_w, hid_b, out_w, out_b)
    test_data_accuracy(norm_1_test, test_labels, hid_w, hid_b, out_w, out_b)
예제 #5
def test_the_effect_of_different_learning_rates(train_images, train_labels,
                                                test_images, test_labels):
    hid_layer_size = 50
    r = 0.1
    batch_size = 60000
    iteration_length = 500

    hid_w = np.random.uniform(-0.1, 0.1, (in_layer_size, hid_layer_size))
    out_w = np.random.uniform(-0.1, 0.1, (hid_layer_size, out_layer_size))

    hid_b = np.ones(hid_layer_size)
    out_b = np.ones(out_layer_size)

    lr = 0.1
    gradient_descent(train_images, convert_labels_to_one_hot(train_labels[0]),
                     hid_w, out_w, hid_b, out_b, iteration_length, r, lr,
    train_data_accuracy(train_images, train_labels, hid_w, hid_b, out_w, out_b)
    test_data_accuracy(test_images, test_labels, hid_w, hid_b, out_w, out_b)

    hid_w2 = np.random.uniform(-0.1, 0.1, (in_layer_size, hid_layer_size))
    out_w2 = np.random.uniform(-0.1, 0.1, (hid_layer_size, out_layer_size))

    hid_b2 = np.ones(hid_layer_size)
    out_b2 = np.ones(out_layer_size)

    lr = 1
    gradient_descent(train_images, convert_labels_to_one_hot(train_labels[0]),
                     hid_w2, out_w2, hid_b2, out_b2, iteration_length, r, lr,
    train_data_accuracy(train_images, train_labels, hid_w2, hid_b2, out_w2,
    test_data_accuracy(test_images, test_labels, hid_w2, hid_b2, out_w2,

    hid_w3 = np.random.uniform(-0.1, 0.1, (in_layer_size, hid_layer_size))
    out_w3 = np.random.uniform(-0.1, 0.1, (hid_layer_size, out_layer_size))

    hid_b3 = np.ones(hid_layer_size)
    out_b3 = np.ones(out_layer_size)

    lr = 5
    gradient_descent(train_images, convert_labels_to_one_hot(train_labels[0]),
                     hid_w3, out_w3, hid_b3, out_b3, iteration_length, r, lr,
    train_data_accuracy(train_images, train_labels, hid_w3, hid_b3, out_w3,
    test_data_accuracy(test_images, test_labels, hid_w3, hid_b3, out_w3,
def main():
    print("Please select question to display output for:\n" +
          "\t1 2 3 4 5 6")
    question = raw_input()

    from scipy.io import loadmat
    M = loadmat("mnist_all.mat")
    np.random.seed(10007)  # Ensure consistent results

    if question == '1':
        print("Example images used in the report are being displayed.")
        for i in range(10):
            m = np.vstack((M['train' + str(i)], M['test' + str(i)]))
            digits = m[np.random.randint(0, len(m), 10)]
            plt.figure(i + 1, figsize=(10, 4))
            for j in range(10):
                plt.subplot(2, 5, j + 1)
                plt.imshow(digits[j].reshape((28, 28)), cmap=plt.cm.gray)
            plt.savefig('sample_digits_' + str(i) + '.png')

    elif question == '2':
        print("Nothing to display; all content presented fully in the report.")

    elif question == '3':
        print("Displaying correctness of gradient descent.")
        h = 1e-5
        print("Correctness for changes in weights.")
        for i in range(10):
            y = np.random.randint(10)
            m = M['train' + str(y)] / 255.0
            y = [int(j == y) for j in range(10)]
            y = np.tile(y, (len(m), 1))
            W = np.random.normal(0, 1 / 140, (28 * 28, 10))
            b = np.random.normal(0, 1 / 140, 10)
            dW, db = nn.basic_gradient(W, m, b, y)

            x_c = np.random.randint(W.shape[0])
            y_c = np.random.randint(W.shape[1])
            W_mod = W.copy()
            W_mod[x_c, y_c] += h
            dW_est = (nn.NLL(nn.basic_network(W_mod, m, b), y) -
                      nn.NLL(nn.basic_network(W, m, b), y)) / h

            print("Random point {} (weights). Computed, Estimate: {:f} {:f}"
                  "".format(i, dW[x_c, y_c], dW_est))
        print("Correctness for changes in biases.")
        for i in range(10):
            y = np.random.randint(10)
            m = M['train' + str(y)] / 255.0
            y = [int(j == y) for j in range(10)]
            y = np.tile(y, (len(m), 1))
            W = np.random.normal(0, 1 / 140, (28 * 28, 10))
            b = np.random.normal(0, 1 / 140, 10)
            dW, db = nn.basic_gradient(W, m, b, y)

            x_c = np.random.randint(b.shape[0])
            b_mod = b.copy()
            b_mod[x_c] += h
            db_est = (nn.NLL(nn.basic_network(W, m, b_mod), y) -
                      nn.NLL(nn.basic_network(W, m, b), y)) / h

            print("Random point {} (biases). Computed, Estimate: {:f} {:f}"
                  "".format(i, db[x_c], db_est))

    elif question == '4':
        print("Starting basic gradient descent.")
        W = np.random.normal(0, 1 / 140, (28 * 28, 10))
        b = np.random.normal(0, 1 / 140, 10)

        l = np.array([1] + [0] * 9)
        x = np.vstack([M['train' + str(i)] for i in range(10)]) / 255
        y = np.vstack([np.tile(np.roll(l, i), (M['train' + str(i)].shape[0], 1))
                       for i in range(10)])
        xv = np.vstack([M['test' + str(i)] for i in range(10)]) / 255
        yv = np.vstack([np.tile(np.roll(l, i), (M['test' + str(i)].shape[0], 1))
                        for i in range(10)])

        W, b = nn.gradient_descent(W, x, b, y, xv, yv, momentum=0,
        plt.figure(figsize=(10, 4))
        for i, x in enumerate(W.T):
            plt.subplot(2, 5, i + 1)
            plt.imshow(x.reshape((28, 28)), cmap=plt.cm.gray)

    elif question == '5':
        print("Starting gradient descent with momentum.")
        W = np.random.normal(0, 1 / 140, (28 * 28, 10))
        b = np.random.normal(0, 1 / 140, 10)

        l = np.array([1] + [0] * 9)
        x = np.vstack([M['train' + str(i)] for i in range(10)]) / 255
        y = np.vstack([np.tile(np.roll(l, i), (M['train' + str(i)].shape[0], 1))
                       for i in range(10)])
        xv = np.vstack([M['test' + str(i)] for i in range(10)]) / 255
        yv = np.vstack([np.tile(np.roll(l, i), (M['test' + str(i)].shape[0], 1))
                        for i in range(10)])

        W, b = nn.gradient_descent(W, x, b, y, xv, yv,
        plt.figure(figsize=(10, 4))
        for i, x in enumerate(W.T):
            plt.subplot(2, 5, i + 1)
            plt.imshow(x.reshape((28, 28)), cmap=plt.cm.gray)

        with open('nn.pkl', 'wb') as f:
            cPickle.dump((W, b), f)

    elif question == '6':
            with open('nn.pkl', 'rb') as f:
                W, b = cPickle.load(f)
        except IOError:
            print('Trained neural network does not exists. '
                  'Please run question 5 to train and store network.')

        l = np.array([1] + [0] * 9)
        x = np.vstack([M['train' + str(i)] for i in range(10)]) / 255
        y = np.vstack([np.tile(np.roll(l, i), (M['train' + str(i)].shape[0], 1))
                       for i in range(10)])

        size = 0.4
        w, z = 13, 13
        n1, n2 = 0, 1
        w1o, w2o = W[w * 28 + z, n1], W[w * 28 + z, n2]
        w1v = np.linspace(w1o - size, w1o + size, 10)
        w2v = np.linspace(w2o - size, w2o + size, 10)
        meshw, meshz = np.meshgrid(w1v, w2v)

            with open('C.pkl', 'rb') as f:
                C = cPickle.load(f)
        except IOError:
            C = np.empty((w1v.size, w2v.size))
            for i, w1 in enumerate(w1v):
                for j, w2 in enumerate(w2v):
                    print(i, j)
                    W[w * 28 + z, n1] = w1
                    W[w * 28 + z, n2] = w2
                    C[i, j] = nn.NLL(nn.basic_network(W, x, b), y)
            with open('C.pkl', 'wb') as f:
                cPickle.dump(C, f)

        plt.contour(meshw, meshz, C.T, cmap=plt.cm.gray)

            with open('w_paths.pkl', 'rb') as f:
                w1, w2, w1m, w2m = cPickle.load(f)
        except IOError:
            w1 = [w1o - 0.3]
            w2 = [w2o - 0.3]
            for i in range(10):
                W[w * 28 + z, n1] = w1[-1]
                W[w * 28 + z, n2] = w2[-1]
                dW, _ = nn.basic_gradient(W, x, b, y)
                w1.append(w1[-1] - dW[w * 28 + z, n1] * 6e-3)
                w2.append(w2[-1] - dW[w * 28 + z, n2] * 6e-3)

            w1m = [w1o - 0.3]
            w2m = [w2o - 0.3]
            dW = np.zeros_like(W)
            for i in range(10):
                W[w * 28 + z, n1] = w1m[-1]
                W[w * 28 + z, n2] = w2m[-1]
                dW_n, _ = nn.basic_gradient(W, x, b, y)
                dW = dW * 0.25 - dW_n * 6e-3
                w1m.append(w1m[-1] + dW[w * 28 + z, n1])
                w2m.append(w2m[-1] + dW[w * 28 + z, n2])
            with open('w_paths.pkl', 'wb') as f:
                cPickle.dump((w1, w2, w1m, w2m), f)

        plt.plot(w1, w2, 'bo-')
        plt.plot(w1m, w2m, 'ro-')
        plt.legend(('No Momentum', 'Momentum'))

        plt.contour(meshw, meshz, C.T, cmap=plt.cm.gray)

            with open('w_paths_2.pkl', 'rb') as f:
                w1, w2, w1m, w2m = cPickle.load(f)
        except IOError:
            w1 = [w1o - 0.3]
            w2 = [w2o - 0.3]
            for i in range(10):
                W[w * 28 + z, n1] = w1[-1]
                W[w * 28 + z, n2] = w2[-1]
                dW, _ = nn.basic_gradient(W, x, b, y)
                w1.append(w1[-1] - dW[w * 28 + z, n1] * 6e-3)
                w2.append(w2[-1] - dW[w * 28 + z, n2] * 6e-3)

            w1m = [w1o - 0.3]
            w2m = [w2o - 0.3]
            dW = np.zeros_like(W)
            for i in range(10):
                W[w * 28 + z, n1] = w1m[-1]
                W[w * 28 + z, n2] = w2m[-1]
                dW_n, _ = nn.basic_gradient(W, x, b, y)
                dW = dW * 0.8 - dW_n * 6e-3
                w1m.append(w1m[-1] + dW[w * 28 + z, n1])
                w2m.append(w2m[-1] + dW[w * 28 + z, n2])
            with open('w_paths_2.pkl', 'wb') as f:
                cPickle.dump((w1, w2, w1m, w2m), f)

        plt.plot(w1, w2, 'bo-')
        plt.plot(w1m, w2m, 'ro-')
        plt.legend(('No Momentum', 'Momentum'))

        print("Invalid question selected. Please try again.")