Esempio n. 1
0
    # A simple strided convnet
    layers = [
        Conv((4, 4, 1, 20), strides=2, activation=lkrelu, filter_init=lambda shp: np.random.normal(size=shp) * np.sqrt(1.0 / (28*28 + 13*13*20)) ),
        Conv((5, 5, 20, 40), strides=2, activation=lkrelu, filter_init=lambda shp:  np.random.normal(size=shp) *  np.sqrt(1.0 / (13*13*20 + 5*5*40)) ),
        Flatten((5, 5, 40)),
        FullyConnected((5*5*40, 100), activation=sigmoid, weight_init=lambda shp: np.random.normal(size=shp) * np.sqrt(1.0 / (5*5*40 + 100.))),
        FullyConnected((100, 10), activation=linear, weight_init=lambda shp: np.random.normal(size=shp) * np.sqrt(1.0 / (110.)))
    ]
    lr = 0.001
    k = 2000
    net = Network(layers, lr=lr, loss=cross_entropy)

    (train_data_X, train_data_Y), v, (tx, ty) = mnist_loader.load_data('./data/mnist.pkl.gz')
    train_data_Y = one_hot(train_data_Y, size=10)
    ty = one_hot(ty, size=10)
    train_data_X = np.reshape(train_data_X, [-1, 28, 28, 1])
    tx = np.reshape(tx, [-1, 28, 28, 1])
    for epoch in xrange(100000):
        shuffled_index = np.random.permutation(train_data_X.shape[0])

        batch_train_X = train_data_X[shuffled_index[:batch_size]]
        batch_train_Y = train_data_Y[shuffled_index[:batch_size]]
        net.train_step((batch_train_X, batch_train_Y))

        loss = np.sum(cross_entropy.compute((net.forward(batch_train_X), batch_train_Y)))
        print 'Epoch: %d loss : %f' % (epoch, loss)
        if epoch % 1000 == 1:
            print 'Accuracy on first 500 test set\'s batch : %f' % accuracy(net, tx[:500], ty[:500])
        if epoch % 5000 == 5000 - 1:
            print 'Accuracy over all test set %f' % accuracy(net, tx, ty)
    # If you want to continue training a network uncomment the line below
    # net = pickle.load(open("network.nn", "rb"))

    train_data_X = np.reshape(train_data_X, [-1, 28, 28, 3])

    try:
        for epoch in range(100000):
            shuffled_index = np.random.permutation(train_data_X.shape[0])

            # Get minibatch of training data
            batch_train_X = train_data_X[shuffled_index[:batch_size]]
            batch_train_Y = train_data_Y[shuffled_index[:batch_size]]
            net.train_step((batch_train_X, batch_train_Y))

            loss = np.sum(
                mse.compute((net.forward(batch_train_X), batch_train_Y)))

            if epoch % 200 == 0:
                # Print average loss of neural network
                print(
                    accuracy(net, train_data_X, train_data_Y) /
                    len(train_data_X))
                pickle.dump(net, open("network.nn", "wb"))

    except KeyboardInterrupt:
        print(accuracy(net, train_data_X, train_data_Y))
        pickle.dump(net, open("network.nn", "wb"))

print(accuracy(net, train_data_X, train_data_Y))
pickle.dump(net, open("network.nn", "wb"))