Example #1
0
def example_train(n_epochs=100, batch_size=20, gradient_reg=1.0):
    import timeit
    print_initial_parameters = False
    print_initial_gradient_cost = False
    print_initial_gradient_norms = False
    plot_time = 10

    fake_x_data = generate_data(10000, 0)
    real_x_data = generate_data(10000, 3)
    fake_x_valid = generate_data(1000, 0)
    real_x_valid = generate_data(1000, 3)
    index = T.lscalar()
    x_fake = T.matrix('x_f')
    x_real = T.matrix('x_r')

    n_train_batches = fake_x_data.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = fake_x_valid.get_value(
        borrow=True).shape[0] // batch_size

    print('... building the model')
    rng = np.random.RandomState(1000)
    network = Simple_Discriminator(
        rng=rng,
        input_fake=x_fake,
        input_real=x_real,
        #info_layers=[(5,1,20),(5,20,20),(5,20,20),(5,20,1)]
        info_layers=[(5, 1, 20), (1, 20, 1)])
    cost = -network.mean_difference + gradient_reg / (1.0 -
                                                      network.gradient_cost)

    if print_initial_parameters:
        print('printing initial parameters')
        for param in network.params:
            print(param.get_value())

    get_max_gradient = theano.function(inputs=[],
                                       outputs=network.max_gradient,
                                       givens={})

    get_gradient_norms = theano.function(
        inputs=[],
        outputs=[layer.gradient_norms for layer in network.layers],
        givens={})
    get_gradient_cost = theano.function(inputs=[],
                                        outputs=network.gradient_cost,
                                        givens={})

    if print_initial_gradient_cost:
        print('initial gradient cost: %f ' % get_gradient_cost())
    if print_initial_gradient_norms:
        print('printing gradient norms')
        for matrix in get_gradient_norms():
            print(matrix)
    validate_model = theano.function(
        inputs=[index],
        outputs=network.mean_difference,
        givens={
            x_fake: fake_x_valid[index * batch_size:(index + 1) * batch_size],
            x_real: real_x_valid[index * batch_size:(index + 1) * batch_size]
        })
    updates = rmsprop(cost, network.params)
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x_fake: fake_x_data[index * batch_size:(index + 1) * batch_size],
            x_real: real_x_data[index * batch_size:(index + 1) * batch_size]
        })
    print('... training')

    validation_frequency = n_train_batches
    plot_frequency = n_train_batches * plot_time
    start_time = timeit.default_timer()

    epoch = 0

    while (epoch < n_epochs):
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = epoch * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                this_gradient_max = get_max_gradient()

                print(
                    'epoch %i, minibatch %i/%i, validation mean square error %f, max_gradient %f'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss, this_gradient_max))
            if (iter + 1) % plot_frequency == 0:
                with open('test_discriminator_model.pkl', 'wb') as f:
                    pickle.dump(network, f)
                example_graph()
        epoch += 1

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))
    if print_end_parameters:
        print('printing end parameters')
        for param in network.params:
            print(param.get_value())
Example #2
0
def example_train(data,n_epochs=100,batch_size=20):
    import timeit

    d_shape=[(5,2,20),(5,20,20),(5,20,1)]
    g_shape=[(5,1,20),(5,20,20),(5,20,2)]
    r_shape=[(5,2,20),(5,20,20),(5,20,1)]

    validation_frequency = 1
    plot_time=10
    initial_r_train=20

    g_per_epoch=5
    r_per_epoch=9

    data_train=data[0]
    data_valid=data[1]

    n_train_batches = data_train.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = data_train.get_value(borrow=True).shape[0] // batch_size

    rng = np.random.RandomState(1001)
    trng = RandomStreams(seed=234)
    print('... building the model')

    index = T.lscalar() 
    x_rand = T.matrix('x_rand')
    x_real = T.matrix('x_real') 
    x_fake = T.matrix('x_rake') 

    generator = Simple_Generator(
        rng=rng,
        input_rand=x_rand,
        g_shape=g_shape,
        r_shape=r_shape
    )
    discriminator = Simple_Discriminator(
        rng=rng,
        input_fake=generator.output,
        input_real=x_real,
        info_layers=d_shape
    )

    #the 1.0 befor the / is a hyperparameter
    g_cost = generator.mse(x_rand)+(discriminator.output).mean()
    g_updates = rmsprop(g_cost,generator.g_params)

    r_cost = generator.mse(x_rand)+1.0/(1.0-generator.gradient_cost)
    r_updates=rmsprop(r_cost,generator.r_params)
    
    d_cost = -discriminator.mean_difference+1.0/(1.0-discriminator.gradient_cost)
    d_updates = rmsprop(d_cost,discriminator.params)

    train_generator = theano.function(
        inputs=[],
        outputs=g_cost,
        updates=g_updates,
        givens={
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )
    train_reversor = theano.function(
        inputs=[],
        outputs=r_cost,
        updates=r_updates,
        givens={
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )
    train_discriminator = theano.function(
        inputs=[index],
        outputs=d_cost,
        updates=d_updates,
        givens={
            x_real: data_train[index*batch_size:(index+1)*batch_size],
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=discriminator.mean_difference,
        givens={
            x_real: data_valid[index * batch_size:(index + 1) * batch_size],
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )

    get_max_gradient = theano.function(
        inputs=[],
        outputs=(generator.max_gradient,discriminator.max_gradient),
        givens={
        }
    )
    get_reversor_error = theano.function(
        inputs=[],
        outputs=generator.mse(x_rand),
        givens={
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )
    print('... training')

    start_time = timeit.default_timer()

    for _ in range(initial_r_train):
        _ = train_reversor()
    for epoch in range(n_epochs):
        for minibatch_index in range(n_train_batches):
            _ = train_discriminator(minibatch_index)
        for _ in range(g_per_epoch):
            _ = train_generator()
        for _ in range(r_per_epoch):
            _ = train_reversor()
                
        if epoch % validation_frequency == 0:
            validation_losses = [validate_model(i) for i
                                    in range(n_valid_batches)]
            this_validation_loss = np.mean(validation_losses)
            max_grad = get_max_gradient()

            print('epoch %i, mean difference %f, r_error %f,r_grad %f, d_grad %f' %
                (
                    epoch,
                    this_validation_loss,
                    get_reversor_error(),
                    max_grad[0],
                    max_grad[1]
                )
            )
        if epoch % plot_time == 0:
            with open('test_GAN_g.pkl', 'wb') as f:
                pickle.dump(generator.generator, f)
            with open('test_GAN_r.pkl', 'wb') as f:
                pickle.dump(generator.reversor, f)
            with open('test_GAN_d.pkl', 'wb') as f:
                pickle.dump(discriminator, f)
            example_graph()

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))
Example #3
0
def test_mnist(n_epoch=1000, batch_size=500):
    from load_mnist import load_data_mnist
    import timeit

    valid_time = 1
    scale_factor = 0.01
    #one entry per layer
    CNN_shape = [[batch_size, 28, 28, 5, 5, 1, 2, 20],
                 [batch_size, 24, 24, 5, 5, 20, 2, 50]]
    fc_info = [[2, 20 * 20 * 50, 500], [2, 500, 10]]

    datasets = load_data_mnist()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    print('... building the model')

    index = T.lscalar()
    x = T.matrix('x')
    y = T.matrix('y')
    rng = np.random.RandomState(1001)
    reshaped_input = x.reshape((batch_size, 1, 28, 28))
    convnet = LCNN(rng, input=reshaped_input, shape_layers=CNN_shape)
    fc_layer_input = convnet.output.flatten(2)
    fc_layer = LMLP(rng, input=fc_layer_input, info_layers=fc_info)
    print('number of parameters: ' + str(fc_layer.n_params + convnet.n_params))
    params = convnet.params + fc_layer.params
    scale_params = convnet.scale_params + fc_layer.scale_params
    max_gradient = convnet.max_gradient * fc_layer.max_gradient
    get_gradient_max = theano.function(inputs=[],
                                       outputs=max_gradient,
                                       givens={})
    cost = fc_layer.mse(y * scale_factor)
    validate_model = theano.function(
        inputs=[index],
        outputs=cost,
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })
    prediction = T.argmax(fc_layer.output, axis=1)
    ground_truth = T.argmax(y, axis=1)
    accuracy = T.mean(T.eq(prediction, ground_truth))
    validate_model_acc = theano.function(
        inputs=[index],
        outputs=accuracy,
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    updates = rmsprop(cost, params)
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    rescale_model = theano.function(inputs=[],
                                    updates=list(zip(params, scale_params)))
    print('... training')

    start_time = timeit.default_timer()

    epoch = 0

    for epoch in range(n_epoch):
        if epoch % valid_time == 0:
            validation_losses = [
                validate_model(i) for i in range(n_valid_batches)
            ]
            this_validation_loss = np.mean(validation_losses)
            validation_acc = [
                validate_model_acc(i) for i in range(n_valid_batches)
            ]
            this_validation_acc = np.mean(validation_acc)
            print('epoch %i,mse %f, g_max %f, acc %f' %
                  (epoch, this_validation_loss, get_gradient_max(),
                   this_validation_acc))

        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            rescale_model()

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))
Example #4
0
def example_train(n_epochs=1000, batch_size=200):
    import timeit
    g_shape = [(5, 1, 20), (5, 20, 20), (5, 20, 2)]
    r_shape = [(5, 2, 20), (5, 20, 20), (5, 20, 1)]
    g_per_epoch = 20
    r_per_epoch = 10
    plot_time = 10
    print_validation_g_parameters = False
    print_validation_r_parameters = False
    initial_reversor_train = 1000
    print('... building the model')
    rng = np.random.RandomState(1001)
    trng = RandomStreams(seed=234)
    x_rand = T.matrix('x_rand')

    generator = Simple_Generator(rng=rng,
                                 input_rand=x_rand,
                                 g_shape=g_shape,
                                 r_shape=r_shape)

    r_cost = generator.mse(x_rand) + 1.0 / (1.0 - generator.gradient_cost)
    r_updates = rmsprop(r_cost, generator.r_params)

    f = lambda x: ((1.0 - T.dot(x**2, np.array([[1.0], [1.0]])))**2).mean()
    cost = f(generator.output)

    g_cost = generator.mse(x_rand) + cost
    g_updates = rmsprop(g_cost, generator.g_params)

    train_reversor = theano.function(inputs=[],
                                     outputs=r_cost,
                                     updates=r_updates,
                                     givens={
                                         x_rand:
                                         trng.uniform(size=(batch_size,
                                                            g_shape[0][1]),
                                                      low=-1.0,
                                                      high=1.0)
                                     })

    train_generator = theano.function(inputs=[],
                                      outputs=g_cost,
                                      updates=g_updates,
                                      givens={
                                          x_rand:
                                          trng.uniform(size=(batch_size,
                                                             g_shape[0][1]),
                                                       low=-1.0,
                                                       high=1.0)
                                      })
    test_generator = theano.function(inputs=[],
                                     outputs=cost,
                                     givens={
                                         x_rand:
                                         trng.uniform(size=(batch_size,
                                                            g_shape[0][1]),
                                                      low=-1.0,
                                                      high=1.0)
                                     })
    test_reversor = theano.function(inputs=[],
                                    outputs=generator.mse(x_rand),
                                    givens={
                                        x_rand:
                                        trng.uniform(size=(batch_size,
                                                           g_shape[0][1]),
                                                     low=-1.0,
                                                     high=1.0)
                                    })
    get_max_gradient = theano.function(inputs=[],
                                       outputs=generator.max_gradient)

    print('... training')
    start_time = timeit.default_timer()
    for _ in range(initial_reversor_train):
        _ = train_reversor()
    for epoch in range(n_epochs):
        if epoch % plot_time == 0:
            if print_validation_g_parameters:
                print('printing gradient parameters')
                for param in generator.g_params:
                    print(param.get_value())
            if print_validation_r_parameters:
                print('printing reversor parameters')
                for param in reversor.r_params:
                    print(param.get_value())
            with open('test_generator_model.pkl', 'wb') as f:
                pickle.dump(generator.generator, f)
            with open('test_reversor_model.pkl', 'wb') as f:
                pickle.dump(generator.reversor, f)
            example_graph()

        for _ in range(g_per_epoch):
            _ = train_generator()

        for _ in range(r_per_epoch):
            _ = train_reversor()

        gen_cost = test_generator()
        rev_cost = test_reversor()
        max_grad = get_max_gradient()
        print(
            'epoch %i, generator cost %f, reversor cost %f, max gradient %f' %
            (epoch, gen_cost, rev_cost, max_grad))

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))
Example #5
0
def example_train(n_epochs=1000, batch_size=20, gradient_reg=1.0, data_num=2):
    plot_time = 100

    import timeit
    datasets = load_data_test(data_num)

    train_set_x, train_set_y = datasets[1]
    valid_set_x, valid_set_y = datasets[0]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size

    print('... building the model')

    index = T.lscalar()
    x = T.matrix('x')
    y = T.matrix('y')

    rng = np.random.RandomState(1001)

    # construct the MLP class
    network = LMLP(rng=rng,
                   input=x,
                   info_layers=[(5, 1, 20), (5, 20, 100), (5, 100, 20),
                                (5, 20, 1)])
    cost = network.mse(y)
    validate_model = theano.function(
        inputs=[index],
        outputs=network.mse(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })
    get_gradient_max = theano.function(inputs=[],
                                       outputs=network.max_gradient,
                                       givens={})
    num_params = len(network.params)
    updates = rmsprop(cost, network.params)
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    rescale_model = theano.function(inputs=[],
                                    updates=list(
                                        zip(network.params,
                                            network.scale_params)))
    print('... training')

    validation_frequency = n_train_batches
    plot_frequency = n_train_batches * plot_time
    start_time = timeit.default_timer()

    epoch = 0

    while (epoch < n_epochs):
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            rescale_model()

            iter = epoch * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                this_gradient_max = get_gradient_max()

                print('epoch %i, minibatch %i/%i, mse %f, g_max %f' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss, this_gradient_max))
            if (iter + 1) % plot_frequency == 0:
                with open('test_mlp_model.pkl', 'wb') as f:
                    pickle.dump(network, f)
                example_predict(1000, data_num)
        epoch += 1

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))