def sync_tparams(tparams, dup_tparams): try: from multiverso.theano_ext import sharedvar except ImportError: from ..multiverso_.theano_ext import sharedvar for kk, vv in dup_tparams.iteritems(): vv.set_value(np.ones(dup_size) * tparams[kk].get_value()[0]) sharedvar.sync_all_mv_shared_vars() for kk, vv in dup_tparams.iteritems(): tparams[kk].set_value( np.array([vv.get_value()[0]], dtype=fX).reshape((1, )))
def _test_sharedvar(self, row, col): W = sharedvar.mv_shared(value=np.zeros((row, col), dtype=theano.config.floatX), name='W', borrow=True) delta = np.array(range(1, row * col + 1), dtype=theano.config.floatX).reshape((row, col)) train_model = theano.function([], updates=[(W, W + delta)]) mv.barrier() for i in xrange(100): train_model() train_model() sharedvar.sync_all_mv_shared_vars() mv.barrier() # to get the newest value, we must sync again sharedvar.sync_all_mv_shared_vars() for j, actual in enumerate(W.get_value().reshape(-1)): self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual) mv.barrier()
def _test_sharedvar(self, row, col): W = sharedvar.mv_shared( value=np.zeros( (row, col), dtype=theano.config.floatX ), name='W', borrow=True ) delta = np.array(range(1, row * col + 1), dtype=theano.config.floatX).reshape((row, col)) train_model = theano.function([], updates=[(W, W + delta)]) mv.barrier() for i in xrange(100): train_model() train_model() sharedvar.sync_all_mv_shared_vars() mv.barrier() # to get the newest value, we must sync again sharedvar.sync_all_mv_shared_vars() for j, actual in enumerate(W.get_value().reshape(-1)): self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual) mv.barrier()
# MULTIVERSO: all the workers will synchronize at the place you call barrier mv.barrier() # train model batch_size = 50 for i in range(50): for start in range(0, len(x_train), batch_size): # every process only train batches assigned to itself if start / batch_size % workers_num != worker_id: continue x_batch = x_train[start:start + batch_size] t_batch = t_train[start:start + batch_size] cost = train(x_batch, t_batch) # MULTIVERSO: sync value with multiverso after every batch sharedvar.sync_all_mv_shared_vars() # MULTIVERSO: all the workers will synchronize at the place you call barrier mv.barrier() # barrier every epoch # master will calc the accuracy if mv.is_master_worker(): predictions_test = predict(x_test) accuracy = np.mean(predictions_test == labels_test) print "epoch %d - accuracy: %.4f" % (i + 1, accuracy) # MULTIVERSO: You must call shutdown at the end of the file mv.shutdown()
mv.barrier() # train model batch_size = 50 for i in range(50): for start in range(0, len(x_train), batch_size): # every process only train batches assigned to itself if start / batch_size % workers_num != worker_id: continue x_batch = x_train[start:start + batch_size] t_batch = t_train[start:start + batch_size] cost = train(x_batch, t_batch) # MULTIVERSO: sync value with multiverso after every batch sharedvar.sync_all_mv_shared_vars() # MULTIVERSO: all the workers will synchronize at the place you call barrier mv.barrier() # barrier every epoch # master will calc the accuracy if mv.is_master_worker(): predictions_test = predict(x_test) accuracy = np.mean(predictions_test == labels_test) print "epoch %d - accuracy: %.4f" % (i + 1, accuracy) # MULTIVERSO: You must call shutdown at the end of the file mv.shutdown()
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # MULTIVERSO: you should call mv.init before call multiverso apis mv.init() # MULTIVERSO: every process has distinct worker id worker_id = mv.worker_id() # MULTIVERSO: mv.workers_num will return the number of workers total_worker = mv.workers_num() # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-3 ############### # TRAIN MODEL # ############### print('... training the model') validation_frequency = n_train_batches start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): # MULTIVERSO: we distribute the batches to different workers. # A worker will only train batches belonged to itself if minibatch_index % total_worker == worker_id: minibatch_avg_cost = train_model(minibatch_index) # MULTIVERSO: when you want to commit all the delta of # parameters produced by mv_shared and update the latest # parameters from parameter server, you can call this function to # synchronize the values sharedvar.sync_all_mv_shared_vars() iter = (epoch - 1) * n_train_batches + minibatch_index # MULTIVERSO: only master worker will output the model if mv.is_master_worker() and (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, validation_loss * 100.)) # MULTIVERSO: all the workers will synchronize at the place you call barrier mv.barrier() # MULTIVERSO: You should make sure only one process will output the result. # Otherwise results will be outputted repeatedly if mv.is_master_worker(): end_time = timeit.default_timer() test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print(('Optimization complete with validation score of %f %%,' 'with test performance %f %%') % (validation_loss * 100., test_score * 100.)) print('The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr) # save the model with open('model.pkl', 'wb') as f: pickle.dump(classifier, f) # MULTIVERSO: You must call shutdown at the end of the file mv.shutdown()