Esempio n. 1
0
def sync_tparams(tparams, dup_tparams):
    try:
        from multiverso.theano_ext import sharedvar
    except ImportError:
        from ..multiverso_.theano_ext import sharedvar

    for kk, vv in dup_tparams.iteritems():
        vv.set_value(np.ones(dup_size) * tparams[kk].get_value()[0])
    sharedvar.sync_all_mv_shared_vars()
    for kk, vv in dup_tparams.iteritems():
        tparams[kk].set_value(
            np.array([vv.get_value()[0]], dtype=fX).reshape((1, )))
Esempio n. 2
0
    def _test_sharedvar(self, row, col):
        W = sharedvar.mv_shared(value=np.zeros((row, col),
                                               dtype=theano.config.floatX),
                                name='W',
                                borrow=True)
        delta = np.array(range(1, row * col + 1),
                         dtype=theano.config.floatX).reshape((row, col))
        train_model = theano.function([], updates=[(W, W + delta)])
        mv.barrier()

        for i in xrange(100):
            train_model()
            train_model()
            sharedvar.sync_all_mv_shared_vars()
            mv.barrier()
            # to get the newest value, we must sync again
            sharedvar.sync_all_mv_shared_vars()
            for j, actual in enumerate(W.get_value().reshape(-1)):
                self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(),
                                 actual)
            mv.barrier()
Esempio n. 3
0
    def _test_sharedvar(self, row, col):
        W = sharedvar.mv_shared(
            value=np.zeros(
                (row, col),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        delta = np.array(range(1, row * col + 1),
                        dtype=theano.config.floatX).reshape((row, col))
        train_model = theano.function([], updates=[(W, W + delta)])
        mv.barrier()

        for i in xrange(100):
            train_model()
            train_model()
            sharedvar.sync_all_mv_shared_vars()
            mv.barrier()
            # to get the newest value, we must sync again
            sharedvar.sync_all_mv_shared_vars()
            for j, actual in enumerate(W.get_value().reshape(-1)):
                self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual)
            mv.barrier()
Esempio n. 4
0
# MULTIVERSO: all the workers will synchronize at the place you call barrier
mv.barrier()

# train model
batch_size = 50

for i in range(50):
    for start in range(0, len(x_train), batch_size):
        # every process only train batches assigned to itself
        if start / batch_size % workers_num != worker_id:
            continue
        x_batch = x_train[start:start + batch_size]
        t_batch = t_train[start:start + batch_size]
        cost = train(x_batch, t_batch)

        # MULTIVERSO: sync value with multiverso after every batch
        sharedvar.sync_all_mv_shared_vars()

    # MULTIVERSO: all the workers will synchronize at the place you call barrier
    mv.barrier()  # barrier every epoch

    # master will calc the accuracy
    if mv.is_master_worker():
        predictions_test = predict(x_test)
        accuracy = np.mean(predictions_test == labels_test)

        print "epoch %d - accuracy: %.4f" % (i + 1, accuracy)

# MULTIVERSO: You must call shutdown at the end of the file
mv.shutdown()
Esempio n. 5
0
mv.barrier()


# train model
batch_size = 50

for i in range(50):
    for start in range(0, len(x_train), batch_size):
        # every process only train batches assigned to itself
        if start / batch_size % workers_num != worker_id:
            continue
        x_batch = x_train[start:start + batch_size]
        t_batch = t_train[start:start + batch_size]
        cost = train(x_batch, t_batch)

        # MULTIVERSO: sync value with multiverso after every batch
        sharedvar.sync_all_mv_shared_vars()

    # MULTIVERSO: all the workers will synchronize at the place you call barrier
    mv.barrier()  # barrier every epoch

    # master will calc the accuracy
    if mv.is_master_worker():
        predictions_test = predict(x_test)
        accuracy = np.mean(predictions_test == labels_test)

        print "epoch %d - accuracy: %.4f" % (i + 1, accuracy)

# MULTIVERSO: You must call shutdown at the end of the file
mv.shutdown()
Esempio n. 6
0
def sgd_optimization_mnist(learning_rate=0.13,
                           n_epochs=1000,
                           dataset='mnist.pkl.gz',
                           batch_size=600):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # MULTIVERSO: you should call mv.init before call multiverso apis
    mv.init()
    # MULTIVERSO: every process has distinct worker id
    worker_id = mv.worker_id()

    # MULTIVERSO: mv.workers_num will return the number of workers
    total_worker = mv.workers_num()

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print('... training the model')
    validation_frequency = n_train_batches
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            # MULTIVERSO: we distribute the batches to different workers.
            # A worker will only train batches belonged to itself
            if minibatch_index % total_worker == worker_id:
                minibatch_avg_cost = train_model(minibatch_index)
                # MULTIVERSO: when you want to commit all the delta of
                # parameters produced by mv_shared and update the latest
                # parameters from parameter server, you can call this function to
                # synchronize the values
                sharedvar.sync_all_mv_shared_vars()

            iter = (epoch - 1) * n_train_batches + minibatch_index

            # MULTIVERSO: only master worker will output the model
            if mv.is_master_worker() and (iter +
                                          1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       validation_loss * 100.))
        # MULTIVERSO: all the workers will synchronize at the place you call barrier
        mv.barrier()

    # MULTIVERSO: You should make sure only one process will output the result.
    # Otherwise results will be outputted repeatedly
    if mv.is_master_worker():
        end_time = timeit.default_timer()

        test_losses = [test_model(i) for i in range(n_test_batches)]
        test_score = numpy.mean(test_losses)

        print(('Optimization complete with validation score of %f %%,'
               'with test performance %f %%') %
              (validation_loss * 100., test_score * 100.))
        print('The code run for %d epochs, with %f epochs/sec' %
              (epoch, 1. * epoch / (end_time - start_time)))
        print(('The code for file ' + os.path.split(__file__)[1] +
               ' ran for %.1fs' % ((end_time - start_time))),
              file=sys.stderr)

        # save the model
        with open('model.pkl', 'wb') as f:
            pickle.dump(classifier, f)
    # MULTIVERSO: You must call shutdown at the end of the file
    mv.shutdown()