Example #1
0
 def _set_model(self, data_set, errors, batch_size=None):
     set_x, set_y = data_set
     if batch_size is None:
         model = my_theano.function([], errors, givens={
             self.x: set_x,
             self.y: set_y
         })
     else:
         index = self.index
         model = my_theano.function([index], errors, givens={
             self.x: self._get_mini_batch(set_x, batch_size, index),
             self.y: self._get_mini_batch(set_y, batch_size, index),
         })
     return model
Example #2
0
    def _set_train_model(self, train_set, cost, batch_size, learning_rate):
        print('compiling train model..')

        # compute gradients of weights and biases
        updates = []
        for i in range(self.depth):
            g_w = T.grad(cost, self.weights[i])
            g_b = T.grad(cost, self.biases[i])
            updates += [(self.weights[i], self.weights[i] - learning_rate * g_w),
                        (self.biases[i], self.biases[i] - learning_rate * g_b)]

        # print(self.train_set[1:3])
        train_set_x, train_set_y = train_set
        index = self.index
        self.train_model = my_theano.function([index], cost, updates=updates, givens={
            self.x: self._get_mini_batch(train_set_x, batch_size, index),
            self.y: self._get_mini_batch(train_set_y, batch_size, index)
        })
        if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
                self.train_model.maker.fgraph.toposort()]):
            print('Used the cpu')

        elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in
                  self.train_model.maker.fgraph.toposort()]):
            print('Used the gpu')

        else:
            print('ERROR, not able to tell if my_theano used the cpu or the gpu')
            print(self.train_model.maker.fgraph.toposort())
Example #3
0
 def get_b(self):
     print('_get_b:')
     layer = self.weighted_layers[1]
     print(layer.b.get_value())
     # print(layer.w.get_value())
     _get_b = my_theano.function([], layer.b)
     print(_get_b())
Example #4
0
    def set_train_model(self, train_set, cost_func, batch_size, learning_rate, l1_a=0.0, l2_a=0.0001):

        cost = cost_func(self.p_y, self.y) \
               + self.l1 * l1_a + self.l2 * l2_a

        print('compiling train model..')

        # compute gradients of weights and biases
        updates = []
        for i in range(self.depth):
            g_w = T.grad(cost, self.weights[i])
            g_b = T.grad(cost, self.biases[i])
            updates += [(self.weights[i], self.weights[i] - learning_rate * g_w),
                        (self.biases[i], self.biases[i] - learning_rate * g_b)]

        # print(self.train_set[1:3])
        train_set_x, train_set_y = train_set
        index = self.index
        self.train_model = my_theano.function([index], cost, updates=updates, givens={
            self.x: self._get_mini_batch(train_set_x, batch_size, index),
            self.y: self._get_mini_batch(train_set_y, batch_size, index)
        })

        # check if using gpu
        tu.check_gpu(self.train_model)
Example #5
0
 def get_b(self):
     print('_get_b:')
     layer = self.weighted_layers[1]
     print(layer.b.get_value())
     # print(layer.w.get_value())
     _get_b = my_theano.function([], layer.b)
     print(_get_b())
Example #6
0
    def set_train_model(self, train_set, cost_func, batch_size, learning_rate, l1_a=0.0, l2_a=0.0001):
        self.p_y = self.forward(self.x, batch_size)
        cost = cost_func(self.p_y, self.y) + self.l1 * l1_a + self.l2 * l2_a

        # set early stopping patience
        self.patience = 20
        self.lest_valid_error = np.inf

        print('compiling train model..')

        # compute gradients of weights and biases
        updates = []
        for layer in reversed(self.weighted_layers):
            g_w = T.grad(cost, layer.w)
            g_b = T.grad(cost, layer.b)
            updates += [(layer.w, layer.w - learning_rate * g_w),
                        (layer.b, layer.b - learning_rate * g_b)]

        train_set_x, train_set_y = train_set
        index = self.index
        self.train_model = my_theano.function([index], cost, updates=updates, givens={
            self.x: self._get_mini_batch(train_set_x, batch_size, index),
            self.y: self._get_mini_batch(train_set_y, batch_size, index)
        })

        # check if using gpu
        tu.check_gpu(self.train_model)
Example #7
0
 def _set_model(self, data_set, errors, batch_size=None):
     set_x, set_y = data_set
     if batch_size is None:
         model = my_theano.function([], errors, givens={
             self.x: set_x,
             self.y: set_y
         })
     else:
         raise NotImplementedError
     return model
Example #8
0
 def _set_model(self, data_set, errors, batch_size=None):
     set_x, set_y = data_set
     if batch_size is None:
         model = my_theano.function([],
                                    errors,
                                    givens={
                                        self.x: set_x,
                                        self.y: set_y
                                    })
     else:
         index = self.index
         model = my_theano.function(
             [index],
             errors,
             givens={
                 self.x: self._get_mini_batch(set_x, batch_size, index),
                 self.y: self._get_mini_batch(set_y, batch_size, index),
             })
     return model
Example #9
0
 def _set_model(self, data_set, errors, batch_size=None):
     set_x, set_y = data_set
     if batch_size is None:
         model = my_theano.function([], errors, givens={
             self.x: set_x,
             self.y: set_y
         })
     else:
         raise NotImplementedError
     return model
Example #10
0
def predict():
    """
    An example of how to load a trained model and use it
    to predict labels.
    """

    # load the saved model
    classifier = pickle.load(open('best_model.pkl'))

    # compile a predictor function
    predict_model = my_theano.function(
        inputs=[classifier.input],
        outputs=classifier.y_pred)

    # We can test it on some examples from test test
    dataset='mnist.pkl.gz'
    datasets = load_data(dataset)
    test_set_x, test_set_y = datasets[2]
    test_set_x = test_set_x.get_value()

    predicted_values = predict_model(test_set_x[:10])
    print("Predicted values for the first 10 examples in test set:")
    print(predicted_values)
Example #11
0
    def set_train_model(self,
                        train_set,
                        cost_func,
                        batch_size,
                        learning_rate,
                        l1_a=0.0,
                        l2_a=0.0001):
        self.p_y = self.forward(self.x, batch_size)
        cost = cost_func(self.p_y, self.y) + self.l1 * l1_a + self.l2 * l2_a

        # set early stopping patience
        self.patience = 20
        self.lest_valid_error = np.inf

        print('compiling train model..')

        # compute gradients of weights and biases
        updates = []
        for layer in reversed(self.weighted_layers):
            g_w = T.grad(cost, layer.w)
            g_b = T.grad(cost, layer.b)
            updates += [(layer.w, layer.w - learning_rate * g_w),
                        (layer.b, layer.b - learning_rate * g_b)]

        train_set_x, train_set_y = train_set
        index = self.index
        self.train_model = my_theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                self.x: self._get_mini_batch(train_set_x, batch_size, index),
                self.y: self._get_mini_batch(train_set_y, batch_size, index)
            })

        # check if using gpu
        tu.check_gpu(self.train_model)
Example #12
0
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                           dataset='data/mnist/mnist.pkl.gz',
                           batch_size=600):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = my_theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = my_theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = my_theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print('... training the model')
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )

                    # save the best model
                    with open('best_model.pkl', 'wb') as f:
                        pickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(
        (
            'Optimization complete with best validation score of %f %%,'
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., test_score * 100.)
    )
    print('The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time)))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)
Example #13
0
 def get_b(self):
     print(self.biases[0].get_value())
     print(self.weights[0].get_value())
     _get_b = my_theano.function([], self.biases[0])
     print(_get_b())
Example #14
0
File: mlp.py Project: yxiaohan/nn
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=600, n_hidden=50):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    datasets = mnist.MNIST()

    train_set_x, train_set_y = datasets.theano_train_set()
    valid_set_x, valid_set_y = datasets.theano_valid_set()
    test_set_x, test_set_y = datasets.theano_test_set()

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=28 * 28,
        n_hidden=n_hidden,
        n_out=10
    )

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = my_theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = my_theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sorted in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = my_theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm / %f epochs per sec' % ((end_time - start_time) / 60., epoch/(end_time-start_time))), file=sys.stderr)
Example #15
0
from my_theano import function, config, shared, sandbox
import my_theano.sandbox.cuda.basic_ops
import my_theano.tensor as T
import numpy
import time

vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), 'float32'))
f = function([], T.exp(x))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in range(iters):
    r = f()
t1 = time.time()
print("Looping %d times took %f seconds" % (iters, t1 - t0))
print("Result is %s" % (r,))
print("Numpy result is %s" % (numpy.asarray(r),))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
    print('Used the cpu')
else:
    print('Used the gpu')
Example #16
0
 def get_b(self):
     print(self.biases[0].get_value())
     print(self.weights[0].get_value())
     _get_b = my_theano.function([], self.biases[0])
     print(_get_b())