Python rmsprop Examples

Programming Language: Python

Namespace/Package Name: optimize

Method/Function: rmsprop

Examples at hotexamples.com: 5

Python rmsprop - 5 examples found. These are the top rated real world Python examples of optimize.rmsprop extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def example_train(n_epochs=100, batch_size=20, gradient_reg=1.0):
    import timeit
    print_initial_parameters = False
    print_initial_gradient_cost = False
    print_initial_gradient_norms = False
    plot_time = 10

    fake_x_data = generate_data(10000, 0)
    real_x_data = generate_data(10000, 3)
    fake_x_valid = generate_data(1000, 0)
    real_x_valid = generate_data(1000, 3)
    index = T.lscalar()
    x_fake = T.matrix('x_f')
    x_real = T.matrix('x_r')

    n_train_batches = fake_x_data.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = fake_x_valid.get_value(
        borrow=True).shape[0] // batch_size

    print('... building the model')
    rng = np.random.RandomState(1000)
    network = Simple_Discriminator(
        rng=rng,
        input_fake=x_fake,
        input_real=x_real,
        #info_layers=[(5,1,20),(5,20,20),(5,20,20),(5,20,1)]
        info_layers=[(5, 1, 20), (1, 20, 1)])
    cost = -network.mean_difference + gradient_reg / (1.0 -
                                                      network.gradient_cost)

    if print_initial_parameters:
        print('printing initial parameters')
        for param in network.params:
            print(param.get_value())

    get_max_gradient = theano.function(inputs=[],
                                       outputs=network.max_gradient,
                                       givens={})

    get_gradient_norms = theano.function(
        inputs=[],
        outputs=[layer.gradient_norms for layer in network.layers],
        givens={})
    get_gradient_cost = theano.function(inputs=[],
                                        outputs=network.gradient_cost,
                                        givens={})

    if print_initial_gradient_cost:
        print('initial gradient cost: %f ' % get_gradient_cost())
    if print_initial_gradient_norms:
        print('printing gradient norms')
        for matrix in get_gradient_norms():
            print(matrix)
    validate_model = theano.function(
        inputs=[index],
        outputs=network.mean_difference,
        givens={
            x_fake: fake_x_valid[index * batch_size:(index + 1) * batch_size],
            x_real: real_x_valid[index * batch_size:(index + 1) * batch_size]
        })
    updates = rmsprop(cost, network.params)
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x_fake: fake_x_data[index * batch_size:(index + 1) * batch_size],
            x_real: real_x_data[index * batch_size:(index + 1) * batch_size]
        })
    print('... training')

    validation_frequency = n_train_batches
    plot_frequency = n_train_batches * plot_time
    start_time = timeit.default_timer()

    epoch = 0

    while (epoch < n_epochs):
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = epoch * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                this_gradient_max = get_max_gradient()

                print(
                    'epoch %i, minibatch %i/%i, validation mean square error %f, max_gradient %f'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss, this_gradient_max))
            if (iter + 1) % plot_frequency == 0:
                with open('test_discriminator_model.pkl', 'wb') as f:
                    pickle.dump(network, f)
                example_graph()
        epoch += 1

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))
    if print_end_parameters:
        print('printing end parameters')
        for param in network.params:
            print(param.get_value())

Example #2

Show file

File: simple_GAN.py Project: caoge4/WGAN-mod

def example_train(data,n_epochs=100,batch_size=20):
    import timeit

    d_shape=[(5,2,20),(5,20,20),(5,20,1)]
    g_shape=[(5,1,20),(5,20,20),(5,20,2)]
    r_shape=[(5,2,20),(5,20,20),(5,20,1)]

    validation_frequency = 1
    plot_time=10
    initial_r_train=20

    g_per_epoch=5
    r_per_epoch=9

    data_train=data[0]
    data_valid=data[1]

    n_train_batches = data_train.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = data_train.get_value(borrow=True).shape[0] // batch_size

    rng = np.random.RandomState(1001)
    trng = RandomStreams(seed=234)
    print('... building the model')

    index = T.lscalar() 
    x_rand = T.matrix('x_rand')
    x_real = T.matrix('x_real') 
    x_fake = T.matrix('x_rake') 

    generator = Simple_Generator(
        rng=rng,
        input_rand=x_rand,
        g_shape=g_shape,
        r_shape=r_shape
    )
    discriminator = Simple_Discriminator(
        rng=rng,
        input_fake=generator.output,
        input_real=x_real,
        info_layers=d_shape
    )

    #the 1.0 befor the / is a hyperparameter
    g_cost = generator.mse(x_rand)+(discriminator.output).mean()
    g_updates = rmsprop(g_cost,generator.g_params)

    r_cost = generator.mse(x_rand)+1.0/(1.0-generator.gradient_cost)
    r_updates=rmsprop(r_cost,generator.r_params)
    
    d_cost = -discriminator.mean_difference+1.0/(1.0-discriminator.gradient_cost)
    d_updates = rmsprop(d_cost,discriminator.params)

    train_generator = theano.function(
        inputs=[],
        outputs=g_cost,
        updates=g_updates,
        givens={
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )
    train_reversor = theano.function(
        inputs=[],
        outputs=r_cost,
        updates=r_updates,
        givens={
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )
    train_discriminator = theano.function(
        inputs=[index],
        outputs=d_cost,
        updates=d_updates,
        givens={
            x_real: data_train[index*batch_size:(index+1)*batch_size],
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=discriminator.mean_difference,
        givens={
            x_real: data_valid[index * batch_size:(index + 1) * batch_size],
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )

    get_max_gradient = theano.function(
        inputs=[],
        outputs=(generator.max_gradient,discriminator.max_gradient),
        givens={
        }
    )
    get_reversor_error = theano.function(
        inputs=[],
        outputs=generator.mse(x_rand),
        givens={
            x_rand : trng.uniform(
                size=(batch_size, g_shape[0][1]), 
                low=-1.0,
                high=1.0
            )
        }
    )
    print('... training')

    start_time = timeit.default_timer()

    for _ in range(initial_r_train):
        _ = train_reversor()
    for epoch in range(n_epochs):
        for minibatch_index in range(n_train_batches):
            _ = train_discriminator(minibatch_index)
        for _ in range(g_per_epoch):
            _ = train_generator()
        for _ in range(r_per_epoch):
            _ = train_reversor()
                
        if epoch % validation_frequency == 0:
            validation_losses = [validate_model(i) for i
                                    in range(n_valid_batches)]
            this_validation_loss = np.mean(validation_losses)
            max_grad = get_max_gradient()

            print('epoch %i, mean difference %f, r_error %f,r_grad %f, d_grad %f' %
                (
                    epoch,
                    this_validation_loss,
                    get_reversor_error(),
                    max_grad[0],
                    max_grad[1]
                )
            )
        if epoch % plot_time == 0:
            with open('test_GAN_g.pkl', 'wb') as f:
                pickle.dump(generator.generator, f)
            with open('test_GAN_r.pkl', 'wb') as f:
                pickle.dump(generator.reversor, f)
            with open('test_GAN_d.pkl', 'wb') as f:
                pickle.dump(discriminator, f)
            example_graph()

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))

Example #3

Show file

def test_mnist(n_epoch=1000, batch_size=500):
    from load_mnist import load_data_mnist
    import timeit

    valid_time = 1
    scale_factor = 0.01
    #one entry per layer
    CNN_shape = [[batch_size, 28, 28, 5, 5, 1, 2, 20],
                 [batch_size, 24, 24, 5, 5, 20, 2, 50]]
    fc_info = [[2, 20 * 20 * 50, 500], [2, 500, 10]]

    datasets = load_data_mnist()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    print('... building the model')

    index = T.lscalar()
    x = T.matrix('x')
    y = T.matrix('y')
    rng = np.random.RandomState(1001)
    reshaped_input = x.reshape((batch_size, 1, 28, 28))
    convnet = LCNN(rng, input=reshaped_input, shape_layers=CNN_shape)
    fc_layer_input = convnet.output.flatten(2)
    fc_layer = LMLP(rng, input=fc_layer_input, info_layers=fc_info)
    print('number of parameters: ' + str(fc_layer.n_params + convnet.n_params))
    params = convnet.params + fc_layer.params
    scale_params = convnet.scale_params + fc_layer.scale_params
    max_gradient = convnet.max_gradient * fc_layer.max_gradient
    get_gradient_max = theano.function(inputs=[],
                                       outputs=max_gradient,
                                       givens={})
    cost = fc_layer.mse(y * scale_factor)
    validate_model = theano.function(
        inputs=[index],
        outputs=cost,
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })
    prediction = T.argmax(fc_layer.output, axis=1)
    ground_truth = T.argmax(y, axis=1)
    accuracy = T.mean(T.eq(prediction, ground_truth))
    validate_model_acc = theano.function(
        inputs=[index],
        outputs=accuracy,
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    updates = rmsprop(cost, params)
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    rescale_model = theano.function(inputs=[],
                                    updates=list(zip(params, scale_params)))
    print('... training')

    start_time = timeit.default_timer()

    epoch = 0

    for epoch in range(n_epoch):
        if epoch % valid_time == 0:
            validation_losses = [
                validate_model(i) for i in range(n_valid_batches)
            ]
            this_validation_loss = np.mean(validation_losses)
            validation_acc = [
                validate_model_acc(i) for i in range(n_valid_batches)
            ]
            this_validation_acc = np.mean(validation_acc)
            print('epoch %i,mse %f, g_max %f, acc %f' %
                  (epoch, this_validation_loss, get_gradient_max(),
                   this_validation_acc))

        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            rescale_model()

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))

Example #4

Show file

def example_train(n_epochs=1000, batch_size=200):
    import timeit
    g_shape = [(5, 1, 20), (5, 20, 20), (5, 20, 2)]
    r_shape = [(5, 2, 20), (5, 20, 20), (5, 20, 1)]
    g_per_epoch = 20
    r_per_epoch = 10
    plot_time = 10
    print_validation_g_parameters = False
    print_validation_r_parameters = False
    initial_reversor_train = 1000
    print('... building the model')
    rng = np.random.RandomState(1001)
    trng = RandomStreams(seed=234)
    x_rand = T.matrix('x_rand')

    generator = Simple_Generator(rng=rng,
                                 input_rand=x_rand,
                                 g_shape=g_shape,
                                 r_shape=r_shape)

    r_cost = generator.mse(x_rand) + 1.0 / (1.0 - generator.gradient_cost)
    r_updates = rmsprop(r_cost, generator.r_params)

    f = lambda x: ((1.0 - T.dot(x**2, np.array([[1.0], [1.0]])))**2).mean()
    cost = f(generator.output)

    g_cost = generator.mse(x_rand) + cost
    g_updates = rmsprop(g_cost, generator.g_params)

    train_reversor = theano.function(inputs=[],
                                     outputs=r_cost,
                                     updates=r_updates,
                                     givens={
                                         x_rand:
                                         trng.uniform(size=(batch_size,
                                                            g_shape[0][1]),
                                                      low=-1.0,
                                                      high=1.0)
                                     })

    train_generator = theano.function(inputs=[],
                                      outputs=g_cost,
                                      updates=g_updates,
                                      givens={
                                          x_rand:
                                          trng.uniform(size=(batch_size,
                                                             g_shape[0][1]),
                                                       low=-1.0,
                                                       high=1.0)
                                      })
    test_generator = theano.function(inputs=[],
                                     outputs=cost,
                                     givens={
                                         x_rand:
                                         trng.uniform(size=(batch_size,
                                                            g_shape[0][1]),
                                                      low=-1.0,
                                                      high=1.0)
                                     })
    test_reversor = theano.function(inputs=[],
                                    outputs=generator.mse(x_rand),
                                    givens={
                                        x_rand:
                                        trng.uniform(size=(batch_size,
                                                           g_shape[0][1]),
                                                     low=-1.0,
                                                     high=1.0)
                                    })
    get_max_gradient = theano.function(inputs=[],
                                       outputs=generator.max_gradient)

    print('... training')
    start_time = timeit.default_timer()
    for _ in range(initial_reversor_train):
        _ = train_reversor()
    for epoch in range(n_epochs):
        if epoch % plot_time == 0:
            if print_validation_g_parameters:
                print('printing gradient parameters')
                for param in generator.g_params:
                    print(param.get_value())
            if print_validation_r_parameters:
                print('printing reversor parameters')
                for param in reversor.r_params:
                    print(param.get_value())
            with open('test_generator_model.pkl', 'wb') as f:
                pickle.dump(generator.generator, f)
            with open('test_reversor_model.pkl', 'wb') as f:
                pickle.dump(generator.reversor, f)
            example_graph()

        for _ in range(g_per_epoch):
            _ = train_generator()

        for _ in range(r_per_epoch):
            _ = train_reversor()

        gen_cost = test_generator()
        rev_cost = test_reversor()
        max_grad = get_max_gradient()
        print(
            'epoch %i, generator cost %f, reversor cost %f, max gradient %f' %
            (epoch, gen_cost, rev_cost, max_grad))

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))

Example #5

Show file

def example_train(n_epochs=1000, batch_size=20, gradient_reg=1.0, data_num=2):
    plot_time = 100

    import timeit
    datasets = load_data_test(data_num)

    train_set_x, train_set_y = datasets[1]
    valid_set_x, valid_set_y = datasets[0]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size

    print('... building the model')

    index = T.lscalar()
    x = T.matrix('x')
    y = T.matrix('y')

    rng = np.random.RandomState(1001)

    # construct the MLP class
    network = LMLP(rng=rng,
                   input=x,
                   info_layers=[(5, 1, 20), (5, 20, 100), (5, 100, 20),
                                (5, 20, 1)])
    cost = network.mse(y)
    validate_model = theano.function(
        inputs=[index],
        outputs=network.mse(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })
    get_gradient_max = theano.function(inputs=[],
                                       outputs=network.max_gradient,
                                       givens={})
    num_params = len(network.params)
    updates = rmsprop(cost, network.params)
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    rescale_model = theano.function(inputs=[],
                                    updates=list(
                                        zip(network.params,
                                            network.scale_params)))
    print('... training')

    validation_frequency = n_train_batches
    plot_frequency = n_train_batches * plot_time
    start_time = timeit.default_timer()

    epoch = 0

    while (epoch < n_epochs):
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            rescale_model()

            iter = epoch * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                this_gradient_max = get_gradient_max()

                print('epoch %i, minibatch %i/%i, mse %f, g_max %f' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss, this_gradient_max))
            if (iter + 1) % plot_frequency == 0:
                with open('test_mlp_model.pkl', 'wb') as f:
                    pickle.dump(network, f)
                example_predict(1000, data_num)
        epoch += 1

    end_time = timeit.default_timer()
    print(('The code ran for %.2fs' % (end_time - start_time)))