def train_rbm():
    batch_size = 20
    learning_rate = 0.1
    n_training_epochs = 15
    n_visible=28*28
    n_hidden=500
    n_contrastive_divergence_steps=15
    persistent_contrastive_divergence=True

    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    train_set, valid_set, test_set = get_dataset('mnist')
    train_set_x, _ = load_dataset(train_set)
    test_set_x, _ = load_dataset(test_set)
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size

    x = T.matrix('x')

    if persistent_contrastive_divergence:
        persistent_chain = theano.shared(
            np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True)
    else:
        persistent_chain = None

    rbm = RBM.create_with_random_weights(n_visible, n_hidden, rng)

    # persistent contrastive divergence with n_contrastive_divergence_steps steps
    cost, updates = rbm.get_cost_updates(
        x, learning_rate, number_of_gibbs_steps=n_contrastive_divergence_steps,
        theano_rng=theano_rng, persistent_state=persistent_chain)

    minibatch_index = T.iscalar('minibatch_index')

    train_rbm = theano.function(
        inputs=[minibatch_index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        }
    )

    start_time = time.time()

    for epoch in range(n_training_epochs):
        epoch_start_time = time.time()
        costs = []
        for batch_index in range(n_train_batches):
            costs.append(train_rbm(batch_index))
        print('Training epoch %d of %d, cost is %f, took %.1fs' %
              (epoch, n_training_epochs, np.mean(costs), time.time() - epoch_start_time))
    filters = tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28))
    cv2.imshow('filter', filters)
    cv2.waitKey(-1)
    cv2.destroyWindow('filter')

    print ('Training took %d minutes' % ((time.time()-start_time)/60.))

    return rbm.get_parameter_values()
def test_check_mnist():
    train_set, valid_set, test_set = get_dataset('mnist')
    assert (len(train_set), len(valid_set), len(test_set)) == (2, 2, 2)
    # mnist pictures are 28x28 = 784 of float grayscale values
    assert (train_set[0].shape, train_set[1].shape) == ((50000, 784), (50000,))
    assert (valid_set[0].shape, valid_set[1].shape) == ((10000, 784), (10000,))
    assert (test_set[0].shape, test_set[1].shape) == ((10000, 784), (10000,))
    assert (train_set[0].dtype, train_set[1].dtype) == (np.float32, np.int64)
def run_3_denoising_autoencoder(corruption_level=0.3):
    batch_size = 20
    learning_rate = 0.01
    training_epochs = 250
    n_in=28*28
    n_hidden=500
    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    train_set, _, _ = get_dataset('mnist')
    train_set_x, train_set_y = load_dataset(train_set)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size

    x = T.matrix('x')

    corrupted_input = theano_rng.binomial(size=x.shape, n=1, p=1-corruption_level, dtype=theano.config.floatX)*x
    reconstructed, params = autoencoder(corrupted_input, n_in, n_hidden, rng)
    cost = mean_cross_entropy(reconstructed, x)

    minibatch_index = T.iscalar('minibatch_index')
    train_model = theano.function(
        inputs=[minibatch_index],
        outputs=[cost],
        updates=[[p, p - learning_rate*T.grad(cost, p)]
                 for p in params],
        givens={
            x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        },
        profile=False
    )

    start_time = time.time()

    print('Going to run the training with floatX=%s' % (theano.config.floatX))
    for epoch in range(training_epochs):
        costs = []
        epoch_start_time = time.time()
        for minibatch_index in range(n_train_batches):
            costs.append(train_model(minibatch_index))
        print("Mean costs at epoch %d is %f%% (ran for %.1fs)" % (epoch, np.mean(costs), time.time() - epoch_start_time))

    total_time = time.time()-start_time
    print('The training code run %.1fs, for %d epochs, for with %f epochs/sec' % (total_time, epoch, epoch/total_time))

    filters = tile_raster_images(X=params[0].get_value(borrow=True).T,
                                 img_shape=(28, 28), tile_shape=(23, 22),
                                 tile_spacing=(1, 1))
    filters = cv2.resize(filters, dsize=None, fx=1., fy=1.)
    cv2.imshow('filters', filters)
    cv2.waitKey(-1)
def run_rnn_rbm_training(trained_model_filename, reuse_pretrained=False, num_epochs = 1):
    batch_size = 100
    train_set_files, valid_set_files, test_set_files = get_dataset('nottingham')

    if reuse_pretrained:
        with open(trained_model_filename, 'r') as f:
            trained_model_params = cPickle.load(f)
    else:
        trained_model_params = None

    model = RnnRBM(network_parameters=trained_model_params)
    model.train(train_set_files, batch_size, num_epochs)

    with open(trained_model_filename, 'w') as f:
        cPickle.dump(model.get_params(), f, cPickle.HIGHEST_PROTOCOL)
def train_dbn():
    batch_size = 10
    finetune_learning_rate = 0.1
    pretrain_learning_rate = 0.01
    n_pretraining_epochs = 100
    n_finetune_training_epochs = 100
    n_in = 28 * 28
    n_out = 10
    hidden_layers_sizes = [1000, 1000, 1000]
    n_contrastive_divergence_steps = 1
    persistent_contrastive_divergence = True

    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    train_set, valid_set, test_set = get_dataset("mnist")
    train_set_x, train_set_y = load_dataset(train_set)
    valid_set_x, valid_set_y = load_dataset(valid_set)
    test_set_x, test_set_y = load_dataset(test_set)
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    test_batch_size = valid_set_x.get_value(borrow=True).shape[0]
    n_validation_batches = valid_set_x.get_value(borrow=True).shape[0] / test_batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / test_batch_size

    x = T.matrix("x")
    y = T.ivector("y")

    minibatch_index = T.iscalar("minibatch_index")

    mlp_output, mlp_params, mlp_layers_description = deep_mlp(x, n_in, n_out, hidden_layers_sizes, rng)
    pretrain_functions = []
    for layer_input, (W, b) in mlp_layers_description:
        rbm_layer = RBM(W=W, b_hidden=b)

        if persistent_contrastive_divergence:
            n_hidden = b.get_value(borrow=True).shape[0]
            persistent_chain = theano.shared(np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True)
        else:
            persistent_chain = None

        layer_cost, layer_updates = rbm_layer.get_cost_updates(
            layer_input, pretrain_learning_rate, n_contrastive_divergence_steps, theano_rng, persistent_chain
        )

        pretrain_rbm = theano.function(
            inputs=[minibatch_index],
            outputs=layer_cost,
            updates=layer_updates,
            givens={x: train_set_x[minibatch_index * batch_size : (minibatch_index + 1) * batch_size]},
        )
        pretrain_functions.append(pretrain_rbm)

    # PRETRAINING
    start_time = time.time()
    for i, pretrain_function in enumerate(pretrain_functions):
        layer_start_time = time.time()
        for epoch in range(n_pretraining_epochs):
            epoch_start_time = time.time()
            costs = []
            for batch_index in range(n_train_batches):
                costs.append(pretrain_function(batch_index))
            print(
                "Training epoch %d of %d, cost is %f, took %.1fs"
                % (epoch, n_pretraining_epochs, np.mean(costs), time.time() - epoch_start_time)
            )
        print("Pretraining of layer %d took %d min" % (i, (time.time() - layer_start_time) / 60.0))

    print("Pre training took %d minutes" % ((time.time() - start_time) / 60.0))

    # FINETUNING
    y_predict = T.argmax(mlp_output, axis=1)
    finetune_cost = negative_log_likelihood_loss(mlp_output, y)

    finetune_train_model = theano.function(
        inputs=[minibatch_index],
        outputs=[finetune_cost],
        updates=[[p, p - finetune_learning_rate * T.grad(finetune_cost, p)] for p in mlp_params],
        givens={
            x: train_set_x[minibatch_index * batch_size : (minibatch_index + 1) * batch_size],
            y: train_set_y[minibatch_index * batch_size : (minibatch_index + 1) * batch_size],
        },
        profile=False,
    )

    validation_model = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: valid_set_x[minibatch_index * test_batch_size : (minibatch_index + 1) * test_batch_size],
            y: valid_set_y[minibatch_index * test_batch_size : (minibatch_index + 1) * test_batch_size],
        },
    )

    test_model = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: test_set_x[minibatch_index * test_batch_size : (minibatch_index + 1) * test_batch_size],
            y: test_set_y[minibatch_index * test_batch_size : (minibatch_index + 1) * test_batch_size],
        },
    )

    start_time = time.time()

    def main_loop():
        patience = 4 * n_train_batches
        patience_increase = 2
        improvement_threshold = 0.995
        validation_frequency = min(n_train_batches, patience / 2)
        test_score = 0.0
        best_validation_loss = np.inf

        print("Going to run the finetuning training with floatX=%s" % (theano.config.floatX))
        for epoch in range(n_finetune_training_epochs):
            epoch_start = time.time()
            for minibatch_index_value in range(n_train_batches):
                finetune_train_model(minibatch_index_value)
                iteration = epoch * n_train_batches + minibatch_index_value
                if (iteration + 1) % validation_frequency == 0.0:
                    validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)])
                    print("epoch %i, validation error %f %%" % (epoch, validation_cost * 100.0))
                    if validation_cost < best_validation_loss:
                        if validation_cost < best_validation_loss * improvement_threshold:
                            patience = max(patience, iteration * patience_increase)
                        best_validation_loss = validation_cost
                        test_score = np.mean([test_model(i) for i in range(n_test_batches)])
                        print("  epoch %i, minibatch test error of best model %f %%" % (epoch, test_score * 100.0))
                if patience <= iteration:
                    return epoch, best_validation_loss, test_score
            print(
                " - finished epoch %d out of %d in %.1fs"
                % (epoch, n_finetune_training_epochs, time.time() - epoch_start)
            )
        return epoch, best_validation_loss, test_score

    epoch, best_validation_loss, test_score = main_loop()

    total_time = time.time() - start_time
    print(
        "Optimization complete in %d min with best validation score of %f %%, with test performance %f %%"
        % (total_time / 60, best_validation_loss * 100.0, test_score * 100.0)
    )
    print("The code run for %d epochs, with %f epochs/sec" % (epoch, epoch / total_time))
def run_2_lenet_training(
        n_epochs = 200
):
    batch_size = 500
    learning_rate = 0.1
    n_hidden = 500
    n_out=10
    rng = np.random.RandomState(23455)
    number_of_kernels = [20, 50]

    train_set, valid_set, test_set = get_dataset('mnist')
    train_set_x, train_set_y = load_dataset(train_set)
    valid_set_x, valid_set_y = load_dataset(valid_set)
    test_set_x, test_set_y = load_dataset(test_set)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size

    test_batch_size = batch_size
    n_validation_batches = valid_set_x.get_value(borrow=True).shape[0]/test_batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]/test_batch_size

    x = T.matrix('x')
    y = T.ivector('y')

    layer_0_input = x.reshape((batch_size, 1, 28, 28))

    conv_layer_0_out, conv_layer_0_params = conv_poll_layer(
        layer_0_input,
        feature_maps_count_in=1,
        feature_maps_count_out=number_of_kernels[0],
        filter_shape=(5, 5),
        maxpool_shape=(2, 2),
        image_shape=(batch_size, 1, 28, 28),
        rng=rng
    )

    # filtering from the previous layer reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling from the prev. layer reduces this further to (24/2, 24/2) = (12, 12)
    conv_layer_1_out, conv_layer_1_params = conv_poll_layer(
        conv_layer_0_out,
        feature_maps_count_in=number_of_kernels[0],
        feature_maps_count_out=number_of_kernels[1],
        filter_shape=(5, 5),
        maxpool_shape=(2, 2),
        image_shape=(batch_size, number_of_kernels[0], 12, 12),
        rng=rng
    )

    hidden_layer_output, hidden_layer_params = hidden_layer(
        conv_layer_1_out.flatten(2),
        n_in=number_of_kernels[1]*4*4,  # 4 is the shape of output from layer1 maxpool layer
        n_out=n_hidden,
        rng=rng)

    output_layer_output, output_layer_params = logistic_layer(hidden_layer_output, n_hidden, n_out)

    y_predict = T.argmax(output_layer_output, axis=1)

    cost = negative_log_likelihood_loss(output_layer_output, y)

    minibatch_index = T.iscalar('minibatch_index')

    all_parameters = (conv_layer_0_params + conv_layer_1_params + hidden_layer_params + output_layer_params)

    train_model_impl = theano.function(
        inputs=[minibatch_index],
        outputs=[],
        updates=[[p, p - learning_rate*T.grad(cost, p)]
                 for p in (conv_layer_0_params + conv_layer_1_params + hidden_layer_params + output_layer_params)],
        givens={
            x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
            y: train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        },
        profile=False
    )

    def train_model(*args):
        return train_model_impl(*args)

    validation_model_impl = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: valid_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
            y: valid_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
        }
    )

    def validation_model(*args):
        return validation_model_impl(*args)

    test_model_impl = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: test_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
            y: test_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
        }
    )

    def test_model(*args):
        return test_model_impl(*args)

    start_time = time.time()

    def main_loop():
        patience = 10000
        patience_increase = 2
        improvement_threshold = 0.995
        validation_frequency = n_train_batches
        test_score = 0.
        best_validation_loss = np.inf

        print('Going to run the training with floatX=%s' % (theano.config.floatX))
        for epoch in range(n_epochs):
            for minibatch_index in range(n_train_batches):
                batch_start = time.time()
                train_model(minibatch_index)
                print('Run training iteration in %.2f' % (time.time() - batch_start))
                iteration = epoch*n_train_batches + minibatch_index
                if (iteration + 1) % validation_frequency == 0.:
                    validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)])
                    print('epoch %i, validation error %f %%' % (epoch, validation_cost * 100.))
                    if validation_cost < best_validation_loss:
                        if validation_cost < best_validation_loss*improvement_threshold:
                            patience = max(patience, iteration*patience_increase)
                        best_validation_loss = validation_cost
                        test_score = np.mean([test_model(i) for i in range(n_test_batches)])
                        print('  epoch %i, minibatch test error of best model %f %%' % (epoch, test_score * 100.))
                if patience <= iteration:
                    return epoch, best_validation_loss, test_score
        return epoch, best_validation_loss, test_score

    epoch, best_validation_loss, test_score = main_loop()

    total_time = time.time()-start_time
    print('Optimization complete in %.1fs with best validation score of %f %%, with test performance %f %%' %
          (total_time, best_validation_loss * 100., test_score * 100.))
    print('The code run for %d epochs, with 1 epoch per %d sec' % (epoch+1, total_time/epoch))

    with open('trained_lenet.pkl', 'w') as f:
        pickle.dump([p.get_value(borrow=True) for p in all_parameters], f, protocol=pickle.HIGHEST_PROTOCOL)
def run_1_mlp():

    batch_size = 20
    learning_rate = 0.01
    n_epochs = 10
    L1_reg_coeff = 0.00
    L2_reg_coeff = 0.0001
    n_in=28*28
    n_hidden=500
    n_out=10
    rng = np.random.RandomState(1234)

    train_set, valid_set, test_set = get_dataset('mnist')
    train_set_x, train_set_y = load_dataset(train_set)
    valid_set_x, valid_set_y = load_dataset(valid_set)
    test_set_x, test_set_y = load_dataset(test_set)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size

    # load the whole test and validation set
    test_batch_size = valid_set_x.get_value(borrow=True).shape[0]
    n_validation_batches = valid_set_x.get_value(borrow=True).shape[0]/test_batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]/test_batch_size

    x = T.matrix('x')
    y = T.ivector('y')

    hidden_layer_output, hidden_layer_params = hidden_layer(x, n_in, n_hidden, rng)
    output_layer_output, output_layer_params = logistic_layer(hidden_layer_output, n_hidden, n_out)

    y_predict = T.argmax(output_layer_output, axis=1)

    # weights decay
    L1 = abs(hidden_layer_params[0]).sum() + abs(output_layer_params[0]).sum()
    L2 = T.sqr(hidden_layer_params[0]).sum() + T.sqr(output_layer_params[0]).sum()

    cost = negative_log_likelihood_loss(output_layer_output, y) + L1_reg_coeff*L1 + L2_reg_coeff*L2

    minibatch_index = T.iscalar('minibatch_index')

    train_model = theano.function(
        inputs=[minibatch_index],
        outputs=[],
        updates=[[p, p - learning_rate*T.grad(cost, p)]
                 for p in (output_layer_params + hidden_layer_params)],
        givens={
            x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
            y: train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        },
        profile=True
    )

    validation_model = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: valid_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
            y: valid_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
        }
    )

    test_model = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: test_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
            y: test_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
        }
    )

    start_time = time.time()

    def main_loop():
        patience = 10000
        patience_increase = 2
        improvement_threshold = 0.995
        validation_frequency = n_train_batches
        test_score = 0.
        best_validation_loss = np.inf

        print('Going to run the training with floatX=%s' % (theano.config.floatX))
        for epoch in range(n_epochs):
            for minibatch_index in range(n_train_batches):
                train_model(minibatch_index)

                iteration = epoch*n_train_batches + minibatch_index
                if (iteration + 1) % validation_frequency == 0.:
                    validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)])
                    print('epoch %i, validation error %f %%' % (epoch, validation_cost * 100.))
                    if validation_cost < best_validation_loss:
                        if validation_cost < best_validation_loss*improvement_threshold:
                            patience = max(patience, iteration*patience_increase)
                        best_validation_loss = validation_cost
                        test_score = np.mean([test_model(i) for i in range(n_test_batches)])
                        print('  epoch %i, minibatch test error of best model %f %%' % (epoch, test_score * 100.))
                if patience <= iteration:
                    return epoch, best_validation_loss, test_score
        return epoch, best_validation_loss, test_score

    epoch, best_validation_loss, test_score = main_loop()

    total_time = time.time()-start_time
    print('Optimization complete in %.1fs with best validation score of %f %%, with test performance %f %%' %
          (total_time, best_validation_loss * 100., test_score * 100.))
    print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch/total_time))
def run_7_lstm_training():
    train_set, valid_set, test_set = get_dataset('imdb')

    ydim = 2  # n_out
    dim_proj = 128
    n_words = 10000  # this is implied in preprocessed imdb dataset
    use_dropout = True
    optimizer = adadelta
    valid_batch_size=64
    validFreq=370
    max_epochs= 100
    batch_size=16
    lrate=0.0001
    dispFreq=10
    patience=10

    W_embedding, W_lstm, U_lstm, b_lstm, W_classifier, b_classifier = init_params(n_words, dim_proj, ydim)

    # use_noise is for dropout
    (use_noise, x, mask, y, f_pred_prob, f_pred, cost) = build_model(
        W_embedding, W_lstm, U_lstm, b_lstm, W_classifier, b_classifier, dim_proj, use_dropout)

    grads = T.grad(cost, wrt=[W_embedding, W_lstm, U_lstm, b_lstm, W_classifier, b_classifier])

    lr = T.scalar(name='learning_rate')
    f_grad_shared, f_update = optimizer(lr, [W_embedding, W_lstm, U_lstm, b_lstm, W_classifier, b_classifier],
                                        grads, x, mask, y, cost)

    print 'Optimization'

    validation_minibatched_inidices = get_minibatches_idx(len(valid_set[0]), valid_batch_size)
    test_minibatched_inidices = get_minibatches_idx(len(test_set[0]), valid_batch_size)

    history_errs = []
    uidx = 0  # the number of update done
    estop = False  # early stop
    start_time = time.time()
    try:
        for eidx in xrange(max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            training_minibatches_indices = get_minibatches_idx(len(train_set[0]), batch_size, shuffle=True)

            for train_index in training_minibatches_indices:
                uidx += 1
                use_noise.set_value(1.)

                # Select the random examples for this minibatch
                current_x = [train_set[0][t] for t in train_index]
                current_y = [train_set[1][t] for t in train_index]

                # Get the data in numpy.ndarray format
                # This swap the axis!
                # Return something of shape (minibatch maxlen, n samples)
                current_x, current_mask = prepare_data(current_x)
                n_samples += current_x.shape[1]

                cost = f_grad_shared(current_x, current_mask, current_y)
                f_update(lrate)

                if uidx % dispFreq == 0:
                    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost

                if uidx % validFreq == 0:
                    use_noise.set_value(0.)
                    train_err = pred_error(f_pred, train_set, training_minibatches_indices)
                    valid_err = pred_error(f_pred, valid_set, validation_minibatched_inidices)
                    test_err = pred_error(f_pred, test_set, test_minibatched_inidices)

                    history_errs.append([valid_err, test_err])

                    if (valid_err <= np.array(history_errs)[:, 0].min()):
                        bad_counter = 0

                    print ('Train ', train_err, 'Valid ', valid_err,
                           'Test ', test_err)

                    if (len(history_errs) > patience and
                            valid_err >= np.array(history_errs)[:-patience, 0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            print 'Early Stop!'
                            estop = True
                            break

            print 'Seen %d samples' % n_samples

            if estop:
                break

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    use_noise.set_value(0.)
    train_minibatch_indices_sorted = get_minibatches_idx(len(train_set[0]), batch_size)
    train_err = pred_error(f_pred, train_set, train_minibatch_indices_sorted)
    valid_err = pred_error(f_pred, valid_set, validation_minibatched_inidices)
    test_err = pred_error(f_pred, test_set, test_minibatched_inidices)

    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
    print 'The code run for %d epochs, with %f sec/epochs' % (
        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
    print ('Training took %.1fs' % (end_time - start_time))
    return train_err, valid_err, test_err
def run_4_stacked_autoencoder():
    batch_size = 1
    finetune_learning_rate = 0.1
    finetune_training_epochs = 50

    pretrain_learning_rate = 0.001
    pretraining_epochs = 15
    n_in=28*28
    hidden_layers_sizes=[1000, 1000, 1000]
    corruption_levels = [.1, .2, .3]
    n_out=10
    rng = np.random.RandomState(89677)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    train_set, valid_set, test_set = get_dataset('mnist')
    train_set_x, train_set_y = load_dataset(train_set)
    valid_set_x, valid_set_y = load_dataset(valid_set)
    test_set_x, test_set_y = load_dataset(test_set)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size

    test_batch_size = valid_set_x.get_value(borrow=True).shape[0]
    n_validation_batches = valid_set_x.get_value(borrow=True).shape[0]/test_batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]/test_batch_size

    # construct deep mlp
    x = T.matrix('x')
    y = T.ivector('y')
    mlp_output, mlp_params, layers_description = deep_mlp(x, n_in=n_in, n_out=n_out, hidden_layers_sizes=hidden_layers_sizes, rng=rng)

    minibatch_index = T.iscalar('minibatch_index')

    # pretrain
    pretraining_models = []
    for i, (layer_input, (W, b_hidden)) in enumerate(layers_description):
        corrupted_input = theano_rng.binomial(
            size=layer_input.shape, n=1, p=1-corruption_levels[i], dtype=theano.config.floatX)*layer_input
        reconstructed_output, autoencoder_params = autoencoder(corrupted_input, W, b_hidden)
        pretraining_cost = mean_cross_entropy(reconstructed_output, layer_input)
        pretraining_model = theano.function(
            inputs=[minibatch_index],
            outputs=[pretraining_cost],
            updates=[[p, p - pretrain_learning_rate*T.grad(pretraining_cost, p)]
                     for p in autoencoder_params],
            givens={
                x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
            }
        )
        pretraining_models.append(pretraining_model)

    y_predict = T.argmax(mlp_output, axis=1)
    finetune_cost = negative_log_likelihood_loss(mlp_output, y)

    finetune_train_model = theano.function(
        inputs=[minibatch_index],
        outputs=[finetune_cost],
        updates=[[p, p - finetune_learning_rate*T.grad(finetune_cost, p)]
                 for p in mlp_params],
        givens={
            x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
            y: train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
        },
        profile=False
    )

    validation_model = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: valid_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
            y: valid_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
        }
    )

    test_model = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: test_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
            y: test_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size],
        }
    )

    for i, pretraining_model in enumerate(pretraining_models):
        pretraining_start_time = time.time()

        print('Going to run the pretraining for layer %d with floatX=%s' % (i, theano.config.floatX))
        for epoch in range(pretraining_epochs):
            costs = []
            epoch_start_time = time.time()
            for minibatch_index_value in range(n_train_batches):
                costs.append(pretraining_model(minibatch_index_value))
            print("Layer %d: mean costs at epoch %d is %f%% (ran for %.1fs)" %
                  (i, epoch, np.mean(costs), time.time() - epoch_start_time))

        total_pretraining_time = time.time()-pretraining_start_time
        print('The pretraining code for layer %d run %.1fs, for %d epochs, for with %f epochs/sec' %
              (i, total_pretraining_time, epoch, epoch/total_pretraining_time))

    start_time = time.time()

    def main_loop():
        patience = 10 * n_train_batches
        patience_increase = 2
        improvement_threshold = 0.995
        validation_frequency = min(n_train_batches, patience / 2)
        test_score = 0.
        best_validation_loss = np.inf

        print('Going to run the finetuning training with floatX=%s' % (theano.config.floatX))
        for epoch in range(finetune_training_epochs):
            epoch_start = time.time()
            for minibatch_index_value in range(n_train_batches):
                finetune_train_model(minibatch_index_value)
                iteration = epoch*n_train_batches + minibatch_index_value
                if (iteration + 1) % validation_frequency == 0.:
                    validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)])
                    print('epoch %i, validation error %f %%' % (epoch, validation_cost * 100.))
                    if validation_cost < best_validation_loss:
                        if validation_cost < best_validation_loss*improvement_threshold:
                            patience = max(patience, iteration*patience_increase)
                        best_validation_loss = validation_cost
                        test_score = np.mean([test_model(i) for i in range(n_test_batches)])
                        print('  epoch %i, minibatch test error of best model %f %%' % (epoch, test_score * 100.))
                if patience <= iteration:
                    return epoch, best_validation_loss, test_score
            print(' - finished epoch %d out of %d in %.1fs' %
                  (epoch, finetune_training_epochs, time.time() - epoch_start))
        return epoch, best_validation_loss, test_score

    epoch, best_validation_loss, test_score = main_loop()

    total_time = time.time()-start_time
    print('Optimization complete in %.1fs with best validation score of %f %%, with test performance %f %%' %
          (total_time, best_validation_loss * 100., test_score * 100.))
    print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch/total_time))
def sample_from_trained_rbm(w_init, b_hidden_init, b_visible_init):

    # for sampling from trained model
    n_chains = 20
    n_samples = 10

    mnist_pkl = get_dataset('mnist')
    with open(mnist_pkl) as f:
        train_set, valid_set, test_set = pickle.load(f)

    test_set_x, _ = load_dataset(test_set)
    # sample from trained RBM
    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]
    # pick random test examples, with which to initialize the persistent chain
    rng = np.random.RandomState(123)
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(
        np.asarray(
            test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains],
            dtype=theano.config.floatX
        )
    )

    theano_rng = RandomStreams(rng.randint(2 ** 30))
    plot_every = 1000

    rbm = RBM(w_init, b_hidden_init, b_visible_init)

    (hidden_samples, hidden_activations, hidden_linear_activations,
     visible_samples, visible_activations, linear_visible_activations), sampling_updates = theano.scan(
        fn=lambda x: rbm.gibbs_update_visible_hidden_visible(x, theano_rng),
        outputs_info=[None, None, None, persistent_vis_chain, None, None],
        n_steps=plot_every
    )

    sampling_updates[persistent_vis_chain] = visible_samples[-1]
    sample_fn = theano.function(
        [],
        [
            visible_activations[-1],
            visible_samples[-1]
        ],
        updates=sampling_updates
    )

    image_data = np.zeros(
        (29 * n_samples + 1, 29 * n_chains - 1),
        dtype='uint8'
    )
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_activations, vis_sample = sample_fn()
        print ' ... plotting sample ', idx
        image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
            X=vis_activations,
            img_shape=(28, 28),
            tile_shape=(1, n_chains),
            tile_spacing=(1, 1)
        )

    image_data = cv2.resize(image_data, dsize=None, fx=2., fy=2.)
    cv2.imshow('sampling', image_data)
    cv2.waitKey(-1)
def run_0_logistic_regression():
    batch_size = 600
    learning_rate = 0.13
    n_epochs = 1000

    train_set, valid_set, test_set = get_dataset('mnist')
    train_set_x, train_set_y = load_dataset(train_set)
    valid_set_x, valid_set_y = load_dataset(valid_set)
    test_set_x, test_set_y = load_dataset(test_set)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size
    n_validation_batches = valid_set_x.get_value(borrow=True).shape[0]/batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]/batch_size

    x = T.matrix('x')
    y = T.ivector('y')

    n_in=28*28
    n_out=10

    W = theano.shared(
        np.zeros((n_in, n_out), dtype=theano.config.floatX),
        name='W',
        borrow=True)
    b = theano.shared(
        np.zeros((n_out,), dtype=theano.config.floatX),
        name='b',
        borrow=True
    )
    py_given_x = T.nnet.softmax(T.dot(x, W)+b)
    y_predict = T.argmax(py_given_x, axis=1)

    cost = negative_log_likelihood_loss(py_given_x, y)

    minibatch_index = T.iscalar('minibatch_index')

    train_model = theano.function(
        inputs=[minibatch_index],
        outputs=[],
        updates=(
            [W, W - learning_rate*T.grad(cost, W)],
            [b, b - learning_rate*T.grad(cost, b)],
        ),
        givens={
            x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
            y: train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        }
    )

    validation_model = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: valid_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
            y: valid_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        }
    )

    test_model = theano.function(
        inputs=[minibatch_index],
        outputs=one_zero_loss(y_predict, y),
        givens={
            x: test_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
            y: test_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        }
    )

    start_time = time.time()

    def main_loop():
        patience = 5000
        patience_increase = 2
        improvement_threshold = 0.995
        validation_frequency = n_train_batches
        test_score = 0.
        best_validation_loss = np.inf
        for epoch in range(n_epochs):
            for minibatch_index in range(n_train_batches):
                train_model(minibatch_index)

                iteration = epoch*n_train_batches + minibatch_index
                if (iteration + 1) % validation_frequency == 0.:
                    validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)])
                    print('epoch %i, validation error %f %%' % (epoch, validation_cost * 100.))
                    if validation_cost < best_validation_loss:
                        if validation_cost < best_validation_loss*improvement_threshold:
                            patience = max(patience, iteration*patience_increase)
                        best_validation_loss = validation_cost
                        test_score = np.mean([test_model(i) for i in range(n_test_batches)])
                        print('  epoch %i, minibatch test error of best model %f %%' % (epoch, test_score * 100.))
                if patience <= iteration:
                    return epoch, best_validation_loss, test_score
        return epoch, best_validation_loss, test_score

    epoch, best_validation_loss, test_score = main_loop()

    total_time = time.time()-start_time
    print('Optimization complete in %.1fs with best validation score of %f %%, with test performance %f %%' %
          (total_time, best_validation_loss * 100., test_score * 100.))
    print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch/total_time))

    assert(abs(best_validation_loss - 0.075) < 1e-6)
    assert(abs(test_score == 0.07489583) < 1e-6)