def train_offline(self, data, mean=None, std=None): print 'training....' train_samples=300000 val_samples=1000 test_samples=1000 batchSize = self.batchSize learning_rate = self.learning_rate momentum = self.momentum def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate*param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) return updates rng = numpy.random.RandomState(23455) # training data d = data.gen_samples_offline( nsamples=train_samples, purpose='train', patchSize=patchSize, mean=mean, std=std) data_mean = d[2] data_std = d[3] train_set_x, train_set_y = shared_dataset((d[0],d[1]), doCastLabels=True) d = data.gen_samples_offline( nsamples=val_samples, purpose='validate', patchSize=patchSize, mean=data_mean, std=data_std) valid_set_x, valid_set_y = shared_dataset((d[0],d[1]), doCastLabels=True) d = data.gen_samples_offline( nsamples=test_samples, purpose='test', patchSize=patchSize, mean=data_mean, std=data_std) test_set_x, test_set_y = shared_dataset((d[0],d[1]), doCastLabels=True) # compute number of minibatches for training, validation and testing n_train_batches = train_samples / batchSize n_valid_batches = val_samples / batchSize n_test_batches = test_samples / batchSize # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = self.x #T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels cost = self.cost(y) lr = T.scalar('learning_rate') m = T.scalar('momentum') learning_rate_shared = theano.shared(np.float32(learning_rate)) momentum_shared = theano.shared(np.float32(momentum)) print 'training data....' print 'min: ', np.min( train_set_x.eval() ) print 'max: ', np.max( train_set_x.eval() ) print 'n_train_batches:',n_train_batches print 'n_valid_batches:',n_valid_batches print 'n_test_batches:',n_test_batches # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], self.errors(y), givens={ x: test_set_x[index * batchSize: (index + 1) * batchSize], y: test_set_y[index * batchSize: (index + 1) * batchSize] } ) validate_model = theano.function( [index], self.errors(y), givens={ x: valid_set_x[index * batchSize: (index + 1) * batchSize], y: valid_set_y[index * batchSize: (index + 1) * batchSize] } ) gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = gradient_updates_momentum(cost, self.params, lr, m) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize], lr: learning_rate_shared, m: momentum_shared}) ############### # TRAIN MODEL # ############### print '... training' best_validation_loss = numpy.inf best_iter = 0 decrease_epoch = 1 decrease_patience = 1 test_score = 0. doResample = True validation_frequency = 1 start_time = time.clock() epoch = 0 done_looping = False last_avg_validation_loss = 0 avg_validation_losses = [] while (epoch < n_epochs) and (not self.done): minibatch_avg_costs = [] epoch = epoch + 1 if doResample and epoch>1: # and len(avg_validation_losses) > 0: epoch=0 avg = np.mean(avg_validation_losses) diff = abs(avg-last_avg_validation_loss) last_avg_validation_loss = avg avg_validation_losses = [] d = data.gen_samples_offline( nsamples=train_samples, purpose='train', patchSize=patchSize, mean=mean, std=std) dx = d[0] dy = d[1] train_set_x.set_value(np.float32(dx)) train_set_y.set_value(np.int32(dy)) for minibatch_index in xrange(n_train_batches): if self.done: break train_cost = train_model(minibatch_index) minibatch_avg_costs.append( train_cost ) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: #self.save() # compute zero-one loss on validation set validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)]) this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples msg = 'epoch %i, minibatch %i/%i, training error %.3f, validation error %.2f %%' % (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss) print(msg) # if we got the best validation score until now if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss best_iter = iter self.save() print "New best score!" end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_mlp(learning_rate=0.01, n_epochs=10, batch_size=500, n_hidden=[500], patchSize=19, train_samples=1000, val_samples=10000, test_samples=1000, doResample=False, validation_frequency = 1, activation=rectified_linear, doEmailUpdate=False, momentum=0.0): def adadelta_updates(parameters,gradients,rho,eps): # create variables to store intermediate updates gradients_sq = [ theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ] deltas_sq = [ theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ] # calculates the new "average" delta for the next iteration gradients_sq_new = [ rho*g_sq + (1-rho)*(g**2) for g_sq,g in zip(gradients_sq,gradients) ] # calculates the step in direction. The square root is an approximation to getting the RMS for the average value deltas = [ (T.sqrt(d_sq+eps)/T.sqrt(g_sq+eps))*grad for d_sq,g_sq,grad in zip(deltas_sq,gradients_sq_new,gradients) ] # calculates the new "average" deltas for the next step. deltas_sq_new = [ rho*d_sq + (1-rho)*(d**2) for d_sq,d in zip(deltas_sq,deltas) ] # Prepare it as a list f gradient_sq_updates = zip(gradients_sq,gradients_sq_new) deltas_sq_updates = zip(deltas_sq,deltas_sq_new) parameters_updates = [ (p,p - d) for p,d in zip(parameters,deltas) ] return gradient_sq_updates + deltas_sq_updates + parameters_updates def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate*param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) return updates if doEmailUpdate: gmail_pwd = getpass.getpass() rng = numpy.random.RandomState(1234) data, norm_mean, norm_std, grayImages, labelImages, maskImages = generate_experiment_data_supervised(purpose='train', nsamples=train_samples, patchSize=patchSize, balanceRate=0.5, data_mean=0.5, data_std=1.0) train_set_x, train_set_y = shared_dataset(data, doCastLabels=True) data = generate_experiment_data_supervised(purpose='validate', nsamples=val_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0] valid_set_x, valid_set_y = shared_dataset(data, doCastLabels=True) data = generate_experiment_data_supervised(purpose='test', nsamples=test_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0] test_set_x, test_set_y = shared_dataset(data, doCastLabels=True) # compute number of minibatches for training, validation and testing n_train_batches = train_samples / batch_size n_valid_batches = val_samples / 1000 n_test_batches = test_samples / 1000 learning_rate_shared = theano.shared(np.float32(learning_rate)) momentum_shared = theano.shared(np.float32(momentum)) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels lr = T.scalar('learning_rate') m = T.scalar('momentum') # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=patchSize**2, n_hidden=n_hidden, n_out=2, activation=activation) cost = classifier.negative_log_likelihood(y) test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) #SGD # updates = [] # for param, gparam in zip(classifier.params, gparams): # updates.append((param, param - lr * gparam)) #updates = adadelta_updates(classifier.params, gparams, lr, 0.000001) updates = gradient_updates_momentum(cost, classifier.params, lr, m) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], lr: learning_rate_shared, m: momentum_shared}) print '... training' best_validation_loss = numpy.inf best_iter = 0 decrease_epoch = 1 decrease_patience = 1 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False # start pool for data print "Starting worker." pool = multiprocessing.Pool(processes=1) futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]]) while (epoch < n_epochs) and (not done_looping): minibatch_avg_costs = [] epoch = epoch + 1 if doResample and epoch>1: print "Waiting for data." data = futureData.get() print "GOT NEW DATA" train_set_x.set_value(np.float32(data[0])) train_set_y.set_value(np.int32(data[1])) futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]]) # try: # data = futureData.get(timeout=1) # print "GOT NEW DATA" # train_set_x.set_value(np.float32(data[0])) # train_set_y.set_value(np.int32(data[1])) # futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, norm_mean, 1.0]]) # except multiprocessing.TimeoutError: # print "TIMEOUT, TRAINING ANOTHER ROUND WITH CURRENT DATA" # pass # for minibatch_index in xrange(n_train_batches): minibatch_avg_costs.append(train_model(minibatch_index)) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: classifier.save_MLP('current.pkl') # compute zero-one loss on validation set validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)]) this_validation_loss = numpy.mean(validation_losses*100.0) msg = 'epoch %i, minibatch %i/%i, training error %.3f, validation error %.2f %%' % (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss) print(msg) classifier.trainingCost.append(minibatch_avg_costs[-1]) classifier.validationError.append(this_validation_loss*100) # if we got the best validation score until now if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss best_iter = iter classifier.save_MLP('best_so_far.pkl') print "New best score!" if doEmailUpdate: send_email(gmail_pwd, msg) # test it on the test set #test_losses = [test_model(i) for i # in xrange(n_test_batches)] #test_score = numpy.mean(test_losses) # #print(('epoch %i, minibatch %i/%i, test error of ' # 'best model %f %%') % # (epoch, minibatch_index + 1, n_train_batches, # test_score * 100.)) pool.close() pool.join() print "Pool closed." end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) return classifier
def train(self, offline=False, data=None, mean=None, std=None ): print 'mlp.train' def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate*param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) return updates patchSize = self.patchSize batchSize = self.batchSize learning_rate = self.learning_rate momentum = self.momentum rng = numpy.random.RandomState(1234) tx, ty, vx, vy, reset = data.sample() train_samples = len(ty) val_samples = len(vy) train_set_x, train_set_y = shared_dataset((tx, ty), doCastLabels=True) if val_samples > 0: valid_set_x, valid_set_y = shared_dataset((vx, vy), doCastLabels=True) if reset: self.best_validation_loss = numpy.inf # compute number of minibatches for training, validation and testing n_train_batches = train_samples / batchSize n_valid_batches = val_samples / 1000 #batchSize # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = self.x #T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels cost = self.cost(y) lr = T.scalar('learning_rate') m = T.scalar('momentum') learning_rate_shared = theano.shared(np.float32(learning_rate)) momentum_shared = theano.shared(np.float32(momentum)) print 'training data....' print 'n_train_batches:',n_train_batches print 'n_valid_batches:',n_valid_batches print 'train_samples:', train_samples print 'val_samples:', val_samples print 'best_validation:', self.best_validation_loss if val_samples > 0: validate_model = theano.function( [index], self.errors(y), givens={ x: valid_set_x[index * batchSize: (index + 1) * batchSize], y: valid_set_y[index * batchSize: (index + 1) * batchSize] } ) predict_samples = theano.function( [], outputs=T.neq(self.y_pred, y), givens={ x: train_set_x, y: train_set_y, } ) gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = gradient_updates_momentum(cost, self.params, lr, m) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize], lr: learning_rate_shared, m: momentum_shared}) ############### # TRAIN MODEL # ############### print '... training' validation_frequency = 1 start_time = time.clock() minibatch_avg_costs = [] iter = 0 epoch = 0 self.best_train_error = np.inf last_train_error = numpy.inf for minibatch_index in xrange(n_train_batches): if self.done: break train_cost = train_model(minibatch_index) minibatch_avg_costs.append( train_cost ) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if n_valid_batches == 0: train_error = minibatch_avg_costs[-1].item(0) print minibatch_index, '-', train_error if train_error < self.best_train_error: self.best_train_error = train_error self.save() if n_valid_batches > 0 and (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)]) #this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples this_validation_loss = numpy.mean(validation_losses*100.0) elapsed_time = time.clock() - start_time data.report_stats( self.id, elapsed_time, minibatch_index, this_validation_loss, minibatch_avg_costs[-1].item(0)) # if we got the best validation score until now if this_validation_loss < self.best_validation_loss: self.best_validation_loss = this_validation_loss self.save() print "New best score!" #if n_valid_batches == 0: # self.save() if not self.offline: probs = predict_samples() data.p[ data.i_train ] = probs data.save_stats()
def train_online(self, data): print 'train online...' def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate*param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) return updates # DATA INITIALIZATION d = data.sample() train_x = d[0] train_y = d[1] valid_x = d[2] valid_y = d[3] reset = d[4] if reset: self.best_validation_loss = numpy.inf print 'best_validation:', self.best_validation_loss train_samples = len(train_y) valid_samples = len(valid_y) if self.resample: self.lr_shared.set_value( np.float32(self.learning_rate) ) self.m_shared.set_value( np.float32(self.momentum) ) else: self.resample = True self.y = T.ivector('y') # the labels are presented as 1D vector of [int] labels self.lr = T.scalar('learning_rate') self.m = T.scalar('momentum') self.lr_shared = theano.shared(np.float32(self.learning_rate)) self.m_shared = theano.shared(np.float32(self.momentum)) index = T.lscalar() # index to a [mini]batch x = self.x y = self.y lr = self.lr m = self.m lr_shared = self.lr_shared m_shared = self.m_shared patchSize = self.patchSize batchSize = self.batchSize train_set_x, train_set_y = shared_dataset((train_x, train_y), doCastLabels=True) if valid_samples > 0: valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y), doCastLabels=True) # compute number of minibatches for training, validation n_train_batches = train_samples / batchSize n_valid_batches = valid_samples / batchSize #BUILD THE MODEL cost = self.cost(y) if valid_samples > 0: validate_model = theano.function( [index], self.errors(y), givens={ x: valid_set_x[index * batchSize: (index + 1) * batchSize], y: valid_set_y[index * batchSize: (index + 1) * batchSize] } ) predict_samples = theano.function( inputs=[index], outputs=T.neq(self.mlp.y_pred, self.y), givens={ x: train_set_x[index * batchSize: (index + 1) * batchSize], y: train_set_y[index * batchSize: (index + 1) * batchSize] } ) gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = gradient_updates_momentum(cost, self.params, lr, m) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize], lr: lr_shared, m: m_shared}) # TRAIN THE MODEL print '... training' best_iter = 0 validation_frequency = 1 start_time = time.clock() elapsed_time = 0 iter = 0 minibatch_avg_costs = [] minibatch_index = 0 count1 = 0 count2 = 0 while (elapsed_time < self.trainTime)\ and (minibatch_index<n_train_batches)\ and (not self.done): train_cost = train_model(minibatch_index) #print '----->traincost:', type(train_cost), train_cost minibatch_avg_costs.append(train_cost) #print 'minibatch_index:', minibatch_index, 'n_train_batches:',n_train_batches, self.batchSize, probs = predict_samples(minibatch_index) indices = data.i_train[minibatch_index * batchSize:(minibatch_index + 1) * batchSize] data.p[ indices ] = probs #print 'probs:', probs iter += 1 if (iter + 1) % validation_frequency == 0 and n_valid_batches > 0: validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)]) this_validation_loss = numpy.sum(validation_losses) * 100.0 / valid_samples elapsed_time = time.clock() - start_time data.report_stats( self.id, elapsed_time, minibatch_index, this_validation_loss, minibatch_avg_costs[-1].item(0)) # if we got the best validation score until now count1 += len(np.where(probs==0)[0]) count2 += len(np.where(probs==1)[0]) data.add_validation_loss( this_validation_loss ) if this_validation_loss < self.best_validation_loss: self.best_validation_loss = this_validation_loss best_iter = iter print '===>saving....' self.save() print "New best score!" # advance to next mini batch minibatch_index += 1 # update elapsed time elapsed_time = time.clock() - start_time data.save_stats() p = data.p[ data.i_train ] n_bad = len( np.where( p == 1 )[0] ) error = float(n_bad)/len(p) print '----------' print 'accuracy:', data.accuracy print 'error:', error print 'lerror:', self.error print 'probi:', np.bincount( np.int64( p ) ) if n_valid_batches == 0: self.save() elapsed_time = time.clock() - start_time msg = 'The code ran for' status = '%f seconds' % (elapsed_time) Utility.report_status( msg, status )
def train_offline(self, data, mean=None, std=None): print 'training....' train_samples = 700000 val_samples = 5000 test_samples = 1000 n_epochs = 5000 patchSize = self.patchSize batchSize = 50 #self.batchSize learning_rate = self.learning_rate momentum = 0.9 #self.momentum def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (1. - momentum) * T.grad(cost, param))) return updates rng = numpy.random.RandomState(1234) # training data d = data.gen_samples_offline(nsamples=train_samples, purpose='train', patchSize=patchSize, mean=mean, std=std) data_mean = d[2] data_std = d[3] train_set_x, train_set_y = shared_dataset((d[0], d[1]), doCastLabels=True) d = data.gen_samples_offline(nsamples=val_samples, purpose='validate', patchSize=patchSize, mean=data_mean, std=data_std) valid_set_x, valid_set_y = shared_dataset((d[0], d[1]), doCastLabels=True) d = data.gen_samples_offline(nsamples=test_samples, purpose='test', patchSize=patchSize, mean=data_mean, std=data_std) test_set_x, test_set_y = shared_dataset((d[0], d[1]), doCastLabels=True) ''' d = gen_data_supervised( purpose='train', nsamples=train_samples, patchSize=patchSize, balanceRate=0.5, data_mean=mean, data_std=std) data = d[0] train_set_x, train_set_y = shared_dataset(data, doCastLabels=True) #print 'data:', np.shape(data) #print 'train:', np.shape(train_set_x), np.shape(train_set_y) #print 'valid:', np.shape(valid_set_x), np.shape(valid_set_y) #print 'test :', np.shape(test_set_x), np.shape(test_set_y) norm_mean = d[1] norm_std = d[2] grayImages = d[3] labelImages = d[4] maskImages = d[5] print 'norm_std:', norm_std print 'norm_mean:',norm_mean # validation data d = gen_data_supervised( purpose='validate', nsamples=val_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0] valid_set_x, valid_set_y = shared_dataset(d, doCastLabels=True) # test data d = gen_data_supervised( purpose='test', nsamples=test_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0] test_set_x, test_set_y = shared_dataset(d, doCastLabels=True) ''' # compute number of minibatches for training, validation and testing n_train_batches = train_samples / batchSize n_valid_batches = val_samples / 1000 #batchSize n_test_batches = test_samples / 1000 #batchSize # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = self.x #T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels cost = self.cost(y) lr = T.scalar('learning_rate') m = T.scalar('momentum') learning_rate_shared = theano.shared(np.float32(learning_rate)) momentum_shared = theano.shared(np.float32(momentum)) print 'training data....' print 'min: ', np.min(train_set_x.eval()) print 'max: ', np.max(train_set_x.eval()) print 'n_train_batches:', n_train_batches print 'n_valid_batches:', n_valid_batches print 'n_test_batches:', n_test_batches # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], self.errors(y), givens={ x: test_set_x[index * batchSize:(index + 1) * batchSize], y: test_set_y[index * batchSize:(index + 1) * batchSize] }) validate_model = theano.function( [index], self.errors(y), givens={ x: valid_set_x[index * batchSize:(index + 1) * batchSize], y: valid_set_y[index * batchSize:(index + 1) * batchSize] }) gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = gradient_updates_momentum(cost, self.params, lr, m) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize], lr: learning_rate_shared, m: momentum_shared }) ############### # TRAIN MODEL # ############### print '... training' best_validation_loss = numpy.inf best_iter = 0 decrease_epoch = 1 decrease_patience = 1 test_score = 0. doResample = True validation_frequency = 1 start_time = time.clock() epoch = 0 done_looping = False print 'lr:', learning_rate print 'patchsizwe:', patchSize print 'm:', momentum print 'n_train_batches:', n_train_batches print 'n_valid_batches:', n_valid_batches print 'n_test_batches:', n_test_batches # start pool for data print "Starting worker." ''' pool = multiprocessing.Pool(processes=1) futureData = pool.apply_async( stupid_map_wrapper, [[gen_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]]) ''' last_avg_validation_loss = 0 avg_validation_losses = [] while (epoch < n_epochs) and (not self.done): minibatch_avg_costs = [] epoch = epoch + 1 if doResample and epoch > 1: # and len(avg_validation_losses) > 0: epoch = 0 avg = np.mean(avg_validation_losses) diff = abs(avg - last_avg_validation_loss) last_avg_validation_loss = avg avg_validation_losses = [] #if diff < 0.025: print 'resampling...' print 'diff:', diff print 'last_avg_validation_loss:', last_avg_validation_loss ''' d = gen_data_supervised( purpose='train', nsamples=train_samples, patchSize=patchSize, balanceRate=0.5, data_mean=mean, data_std=std) data = d[0] train_set_x.set_value(np.float32(data[0])) train_set_y.set_value(np.int32(data[1])) ''' d = data.gen_samples_offline(nsamples=train_samples, purpose='train', patchSize=patchSize, mean=mean, std=std) dx = d[0] dy = d[1] train_set_x.set_value(np.float32(dx)) train_set_y.set_value(np.int32(dy)) for minibatch_index in xrange(n_train_batches): if self.done: break train_cost = train_model(minibatch_index) minibatch_avg_costs.append(train_cost) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: #self.save() # compute zero-one loss on validation set validation_losses = np.array( [validate_model(i) for i in xrange(n_valid_batches)]) #this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples this_validation_loss = numpy.mean(validation_losses * 100.0) avg_validation_losses.append(this_validation_loss * 100) msg = 'epoch %i, minibatch %i/%i, training error %.3f, validation error %.2f %%' % ( epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss) print(msg) # if we got the best validation score until now if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss best_iter = iter self.save() print "New best score!" #pool.close() #pool.join() print "Pool closed." end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_online(self, data): print 'train online...' def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate*param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) return updates # DATA INITIALIZATION d = data.sample() train_x = d[0] train_y = d[1] valid_x = d[2] valid_y = d[3] reset = d[4] if reset: self.best_validation_loss = numpy.inf train_samples = len(train_y) valid_samples = len(valid_y) print 'valid_samples:',valid_samples print 'train_samples:', train_samples if self.resample: self.lr_shared.set_value( np.float32(self.learning_rate) ) self.m_shared.set_value( np.float32(self.momentum) ) else: self.resample = True self.y = T.ivector('y') # the labels are presented as 1D vector of [int] labels self.lr = T.scalar('learning_rate') self.m = T.scalar('momentum') self.lr_shared = theano.shared(np.float32(self.learning_rate)) self.m_shared = theano.shared(np.float32(self.momentum)) index = T.lscalar() # index to a [mini]batch x = self.x y = self.y lr = self.lr m = self.m lr_shared = self.lr_shared m_shared = self.m_shared patchSize = self.patchSize batchSize = self.batchSize train_set_x, train_set_y = shared_dataset((train_x, train_y), doCastLabels=True) if valid_samples > 0: valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y), doCastLabels=True) # compute number of minibatches for training, validation n_train_batches = train_samples / batchSize n_valid_batches = valid_samples / batchSize #BUILD THE MODEL cost = self.cost(y) if valid_samples > 0: validate_model = theano.function( [index], self.errors(y), givens={ x: valid_set_x[index * batchSize: (index + 1) * batchSize], y: valid_set_y[index * batchSize: (index + 1) * batchSize] } ) ''' predict_samples = theano.function( inputs=[index], outputs=T.neq(self.y_pred, self.y), givens={ x: train_set_x[index * batchSize: (index + 1) * batchSize], y: train_set_y[index * batchSize: (index + 1) * batchSize] } ) ''' predict_samples = theano.function( [], outputs=T.neq(self.y_pred, self.y), givens={ x: train_set_x, y: train_set_y, } ) gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = gradient_updates_momentum(cost, self.params, lr, m) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize], lr: lr_shared, m: m_shared}) # TRAIN THE MODEL print '... training' print 'self.best_validation_loss:', self.best_validation_loss best_iter = 0 validation_frequency = 1 start_time = time.clock() elapsed_time = 0 iter = 0 minibatch_avg_costs = [] minibatch_index = 0 #while (elapsed_time < self.trainTime)\ # and (minibatch_index<n_train_batches)\ # and (not self.done): while (minibatch_index<n_train_batches) and (not self.done): if (elapsed_time >= self.trainTime): break train_cost = train_model(minibatch_index) # test the trained samples against the target # values to measure the training performance i = minibatch_index ''' probs = predict_samples(minibatch_index) #print 'probs:', probs.shape i_batch = data.i_train[ i * batchSize:(i+1)*batchSize ] data.p[ i_batch ] = probs ''' ''' good = np.where( probs == 0)[0] bad = np.where( probs == 1)[0] print 'bad:', len(bad) print 'good:', len(good) #print probs ''' #print '----->traincost:', type(train_cost), train_cost minibatch_avg_costs.append(train_cost) iter += 1 #iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0 and valid_samples > 0: validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)]) this_validation_loss = numpy.sum(validation_losses) * 100.0 / valid_samples elapsed_time = time.clock() - start_time ''' self.reportTrainingStats(elapsed_time, minibatch_index, this_validation_loss, minibatch_avg_costs[-1].item(0)) ''' print this_validation_loss, '/', self.best_validation_loss data.add_validation_loss( this_validation_loss ) # if we got the best validation score until now if this_validation_loss < self.best_validation_loss: self.best_validation_loss = this_validation_loss best_iter = iter self.save() print "New best score!" # advance to next mini batch minibatch_index += 1 # update elapsed time elapsed_time = time.clock() - start_time if valid_samples == 0: self.save() probs = predict_samples() data.p[ data.i_train ] = probs elapsed_time = time.clock() - start_time msg = 'The code an for' status = '%f seconds' % (elapsed_time) Utility.report_status( msg, status ) print 'done...'
def train(self, offline=False, data=None, mean=None, std=None): print 'mlp.train' def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (1. - momentum) * T.grad(cost, param))) return updates patchSize = self.patchSize batchSize = self.batchSize learning_rate = self.learning_rate momentum = self.momentum rng = numpy.random.RandomState(1234) tx, ty, vx, vy, reset = data.sample() train_samples = len(ty) val_samples = len(vy) train_set_x, train_set_y = shared_dataset((tx, ty), doCastLabels=True) if val_samples > 0: valid_set_x, valid_set_y = shared_dataset((vx, vy), doCastLabels=True) if reset: self.best_validation_loss = numpy.inf # compute number of minibatches for training, validation and testing n_train_batches = train_samples / batchSize n_valid_batches = val_samples / 1000 #batchSize # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = self.x #T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels cost = self.cost(y) lr = T.scalar('learning_rate') m = T.scalar('momentum') learning_rate_shared = theano.shared(np.float32(learning_rate)) momentum_shared = theano.shared(np.float32(momentum)) print 'training data....' print 'n_train_batches:', n_train_batches print 'n_valid_batches:', n_valid_batches print 'train_samples:', train_samples print 'val_samples:', val_samples print 'best_validation:', self.best_validation_loss if val_samples > 0: validate_model = theano.function( [index], self.errors(y), givens={ x: valid_set_x[index * batchSize:(index + 1) * batchSize], y: valid_set_y[index * batchSize:(index + 1) * batchSize] }) predict_samples = theano.function([], outputs=T.neq(self.y_pred, y), givens={ x: train_set_x, y: train_set_y, }) gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = gradient_updates_momentum(cost, self.params, lr, m) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize], lr: learning_rate_shared, m: momentum_shared }) ############### # TRAIN MODEL # ############### print '... training' validation_frequency = 1 start_time = time.clock() minibatch_avg_costs = [] iter = 0 epoch = 0 self.best_train_error = np.inf last_train_error = numpy.inf for minibatch_index in xrange(n_train_batches): if self.done: break train_cost = train_model(minibatch_index) minibatch_avg_costs.append(train_cost) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if n_valid_batches == 0: train_error = minibatch_avg_costs[-1].item(0) print minibatch_index, '-', train_error if train_error < self.best_train_error: self.best_train_error = train_error self.save() if n_valid_batches > 0 and (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = np.array( [validate_model(i) for i in xrange(n_valid_batches)]) #this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples this_validation_loss = numpy.mean(validation_losses * 100.0) elapsed_time = time.clock() - start_time data.report_stats(self.id, elapsed_time, minibatch_index, this_validation_loss, minibatch_avg_costs[-1].item(0)) # if we got the best validation score until now if this_validation_loss < self.best_validation_loss: self.best_validation_loss = this_validation_loss self.save() print "New best score!" #if n_valid_batches == 0: # self.save() if not self.offline: probs = predict_samples() data.p[data.i_train] = probs data.save_stats()
def train_online(self, data): print 'train online...' def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (1. - momentum) * T.grad(cost, param))) return updates # DATA INITIALIZATION d = data.sample() train_x = d[0] train_y = d[1] valid_x = d[2] valid_y = d[3] reset = d[4] if reset: self.best_validation_loss = numpy.inf train_samples = len(train_y) valid_samples = len(valid_y) print 'valid_samples:', valid_samples print 'train_samples:', train_samples if self.resample: self.lr_shared.set_value(np.float32(self.learning_rate)) self.m_shared.set_value(np.float32(self.momentum)) else: self.resample = True self.y = T.ivector( 'y') # the labels are presented as 1D vector of [int] labels self.lr = T.scalar('learning_rate') self.m = T.scalar('momentum') self.lr_shared = theano.shared(np.float32(self.learning_rate)) self.m_shared = theano.shared(np.float32(self.momentum)) index = T.lscalar() # index to a [mini]batch x = self.x y = self.y lr = self.lr m = self.m lr_shared = self.lr_shared m_shared = self.m_shared patchSize = self.patchSize batchSize = self.batchSize train_set_x, train_set_y = shared_dataset((train_x, train_y), doCastLabels=True) if valid_samples > 0: valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y), doCastLabels=True) # compute number of minibatches for training, validation n_train_batches = train_samples / batchSize n_valid_batches = valid_samples / batchSize #BUILD THE MODEL cost = self.cost(y) if valid_samples > 0: validate_model = theano.function( [index], self.errors(y), givens={ x: valid_set_x[index * batchSize:(index + 1) * batchSize], y: valid_set_y[index * batchSize:(index + 1) * batchSize] }) ''' predict_samples = theano.function( inputs=[index], outputs=T.neq(self.y_pred, self.y), givens={ x: train_set_x[index * batchSize: (index + 1) * batchSize], y: train_set_y[index * batchSize: (index + 1) * batchSize] } ) ''' predict_samples = theano.function([], outputs=T.neq(self.y_pred, self.y), givens={ x: train_set_x, y: train_set_y, }) gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = gradient_updates_momentum(cost, self.params, lr, m) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize], lr: lr_shared, m: m_shared }) # TRAIN THE MODEL print '... training' print 'self.best_validation_loss:', self.best_validation_loss best_iter = 0 validation_frequency = 1 start_time = time.clock() elapsed_time = 0 iter = 0 minibatch_avg_costs = [] minibatch_index = 0 #while (elapsed_time < self.trainTime)\ # and (minibatch_index<n_train_batches)\ # and (not self.done): while (minibatch_index < n_train_batches) and (not self.done): if (elapsed_time >= self.trainTime): break train_cost = train_model(minibatch_index) # test the trained samples against the target # values to measure the training performance i = minibatch_index ''' probs = predict_samples(minibatch_index) #print 'probs:', probs.shape i_batch = data.i_train[ i * batchSize:(i+1)*batchSize ] data.p[ i_batch ] = probs ''' ''' good = np.where( probs == 0)[0] bad = np.where( probs == 1)[0] print 'bad:', len(bad) print 'good:', len(good) #print probs ''' #print '----->traincost:', type(train_cost), train_cost minibatch_avg_costs.append(train_cost) iter += 1 #iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0 and valid_samples > 0: validation_losses = np.array( [validate_model(i) for i in xrange(n_valid_batches)]) this_validation_loss = numpy.sum( validation_losses) * 100.0 / valid_samples elapsed_time = time.clock() - start_time ''' self.reportTrainingStats(elapsed_time, minibatch_index, this_validation_loss, minibatch_avg_costs[-1].item(0)) ''' print this_validation_loss, '/', self.best_validation_loss data.add_validation_loss(this_validation_loss) # if we got the best validation score until now if this_validation_loss < self.best_validation_loss: self.best_validation_loss = this_validation_loss best_iter = iter self.save() print "New best score!" # advance to next mini batch minibatch_index += 1 # update elapsed time elapsed_time = time.clock() - start_time if valid_samples == 0: self.save() probs = predict_samples() data.p[data.i_train] = probs elapsed_time = time.clock() - start_time msg = 'The code an for' status = '%f seconds' % (elapsed_time) Utility.report_status(msg, status) print 'done...'
def train_online(self, data): print 'train online...' def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (1. - momentum) * T.grad(cost, param))) return updates # DATA INITIALIZATION d = data.sample() train_x = d[0] train_y = d[1] valid_x = d[2] valid_y = d[3] reset = d[4] if reset: self.best_validation_loss = numpy.inf print 'best_validation:', self.best_validation_loss train_samples = len(train_y) valid_samples = len(valid_y) if self.resample: self.lr_shared.set_value(np.float32(self.learning_rate)) self.m_shared.set_value(np.float32(self.momentum)) else: self.resample = True self.y = T.ivector( 'y') # the labels are presented as 1D vector of [int] labels self.lr = T.scalar('learning_rate') self.m = T.scalar('momentum') self.lr_shared = theano.shared(np.float32(self.learning_rate)) self.m_shared = theano.shared(np.float32(self.momentum)) index = T.lscalar() # index to a [mini]batch x = self.x y = self.y lr = self.lr m = self.m lr_shared = self.lr_shared m_shared = self.m_shared patchSize = self.patchSize batchSize = self.batchSize train_set_x, train_set_y = shared_dataset((train_x, train_y), doCastLabels=True) if valid_samples > 0: valid_set_x, valid_set_y = shared_dataset((valid_x, valid_y), doCastLabels=True) # compute number of minibatches for training, validation n_train_batches = train_samples / batchSize n_valid_batches = valid_samples / batchSize #BUILD THE MODEL cost = self.cost(y) if valid_samples > 0: validate_model = theano.function( [index], self.errors(y), givens={ x: valid_set_x[index * batchSize:(index + 1) * batchSize], y: valid_set_y[index * batchSize:(index + 1) * batchSize] }) predict_samples = theano.function( inputs=[index], outputs=T.neq(self.mlp.y_pred, self.y), givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize] }) gparams = [] for param in self.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = gradient_updates_momentum(cost, self.params, lr, m) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batchSize:(index + 1) * batchSize], y: train_set_y[index * batchSize:(index + 1) * batchSize], lr: lr_shared, m: m_shared }) # TRAIN THE MODEL print '... training' best_iter = 0 validation_frequency = 1 start_time = time.clock() elapsed_time = 0 iter = 0 minibatch_avg_costs = [] minibatch_index = 0 count1 = 0 count2 = 0 while (elapsed_time < self.trainTime)\ and (minibatch_index<n_train_batches)\ and (not self.done): train_cost = train_model(minibatch_index) #print '----->traincost:', type(train_cost), train_cost minibatch_avg_costs.append(train_cost) #print 'minibatch_index:', minibatch_index, 'n_train_batches:',n_train_batches, self.batchSize, probs = predict_samples(minibatch_index) indices = data.i_train[minibatch_index * batchSize:(minibatch_index + 1) * batchSize] data.p[indices] = probs #print 'probs:', probs iter += 1 if (iter + 1) % validation_frequency == 0 and n_valid_batches > 0: validation_losses = np.array( [validate_model(i) for i in xrange(n_valid_batches)]) this_validation_loss = numpy.sum( validation_losses) * 100.0 / valid_samples elapsed_time = time.clock() - start_time data.report_stats(self.id, elapsed_time, minibatch_index, this_validation_loss, minibatch_avg_costs[-1].item(0)) # if we got the best validation score until now count1 += len(np.where(probs == 0)[0]) count2 += len(np.where(probs == 1)[0]) data.add_validation_loss(this_validation_loss) if this_validation_loss < self.best_validation_loss: self.best_validation_loss = this_validation_loss best_iter = iter print '===>saving....' self.save() print "New best score!" # advance to next mini batch minibatch_index += 1 # update elapsed time elapsed_time = time.clock() - start_time data.save_stats() p = data.p[data.i_train] n_bad = len(np.where(p == 1)[0]) error = float(n_bad) / len(p) print '----------' print 'accuracy:', data.accuracy print 'error:', error print 'lerror:', self.error print 'probi:', np.bincount(np.int64(p)) if n_valid_batches == 0: self.save() elapsed_time = time.clock() - start_time msg = 'The code ran for' status = '%f seconds' % (elapsed_time) Utility.report_status(msg, status)
def evaluate_lenet5(learning_rate=0.0001, n_epochs=20000, nkerns=[48,48,48], kernelSizes=[5,5,5], hiddenSizes=[200], doResample=True, batch_size=1, patchSize=65, train_samples=50000, val_samples=10000, test_samples=1000, validation_frequency = 100, doEmailUpdate=False, momentum=0.98, filename='tmp_cnn.pkl'): def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate*param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) return updates rng = numpy.random.RandomState(23455) data, norm_mean, norm_std, grayImages, labelImages, maskImages = generate_experiment_data_supervised(purpose='train', nsamples=train_samples, patchSize=patchSize, balanceRate=0.5, data_mean=0.5, data_std=1.0) train_set_x, train_set_y = shared_dataset(data, doCastLabels=True) data = generate_experiment_data_supervised(purpose='validate', nsamples=val_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0] valid_set_x, valid_set_y = shared_dataset(data, doCastLabels=True) data = generate_experiment_data_supervised(purpose='test', nsamples=test_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0] test_set_x, test_set_y = shared_dataset(data, doCastLabels=True) # compute number of minibatches for training, validation and testing n_train_batches = train_samples / batch_size n_valid_batches = val_samples / batch_size n_test_batches = test_samples / batch_size learning_rate_shared = theano.shared(np.float32(learning_rate)) momentum_shared = theano.shared(np.float32(momentum)) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels lr = T.scalar('learning_rate') m = T.scalar('momentum') if doEmailUpdate: gmail_pwd = getpass.getpass() ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' classifier = CNN(input=x, batch_size=batch_size, patchSize=patchSize, rng=rng, nkerns=nkerns, kernelSizes=kernelSizes, hiddenSizes=hiddenSizes, fileName=filename) cost = classifier.cost(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) #SGD # updates = [] # for param, gparam in zip(classifier.params, gparams): # updates.append((param, param - lr * gparam)) #updates = adadelta_updates(classifier.params, gparams, lr, 0.000001) updates = gradient_updates_momentum(cost, classifier.params, lr, m) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], lr: learning_rate_shared, m: momentum_shared}) ############### # TRAIN MODEL # ############### print '... training' best_validation_loss = numpy.inf best_iter = 0 decrease_epoch = 1 decrease_patience = 1 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False # start pool for data print "Starting worker." pool = multiprocessing.Pool(processes=1) futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]]) while (epoch < n_epochs) and (not done_looping): minibatch_avg_costs = [] epoch = epoch + 1 if doResample and epoch>1: print "Waiting for data." data = futureData.get() print "GOT NEW DATA" train_set_x.set_value(np.float32(data[0])) train_set_y.set_value(np.int32(data[1])) futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]]) # try: # data = futureData.get(timeout=1) # print "GOT NEW DATA" # train_set_x.set_value(np.float32(data[0])) # train_set_y.set_value(np.int32(data[1])) # futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, norm_mean, 1.0]]) # except multiprocessing.TimeoutError: # print "TIMEOUT, TRAINING ANOTHER ROUND WITH CURRENT DATA" # pass # for minibatch_index in xrange(n_train_batches): minibatch_avg_costs.append(train_model(minibatch_index)) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: classifier.save_CNN('current_cnn.pkl') # compute zero-one loss on validation set validation_losses = np.array([validate_model(i) for i in xrange(n_valid_batches)]) this_validation_loss = numpy.sum(validation_losses) * 100.0 / val_samples msg = 'epoch %i, minibatch %i/%i, training error %.3f, validation error %.2f %%' % (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss) print(msg) classifier.trainingCost.append(minibatch_avg_costs[-1]) classifier.validationError.append(this_validation_loss*100) # if we got the best validation score until now if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss best_iter = iter classifier.save_CNN('best_cnn_so_far.pkl') print "New best score!" if doEmailUpdate: send_email(gmail_pwd, msg) # test it on the test set #test_losses = [test_model(i) for i # in xrange(n_test_batches)] #test_score = numpy.mean(test_losses) # #print(('epoch %i, minibatch %i/%i, test error of ' # 'best model %f %%') % # (epoch, minibatch_index + 1, n_train_batches, # test_score * 100.)) pool.close() pool.join() print "Pool closed." end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) return classifier
def train_mlp(learning_rate=0.01, n_epochs=10, batch_size=500, n_hidden=[500], patchSize=39, train_samples=10000, val_samples=10000, test_samples=10000, doResample=False, validation_frequency = 50, dropout_rate=0.0, activation=rectified_linear, doEmailUpdate=False, momentum=0.9): def adadelta_updates(parameters,gradients,rho,eps): # create variables to store intermediate updates gradients_sq = [ theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ] deltas_sq = [ theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ] # calculates the new "average" delta for the next iteration gradients_sq_new = [ rho*g_sq + (1-rho)*(g**2) for g_sq,g in zip(gradients_sq,gradients) ] # calculates the step in direction. The square root is an approximation to getting the RMS for the average value deltas = [ (T.sqrt(d_sq+eps)/T.sqrt(g_sq+eps))*grad for d_sq,g_sq,grad in zip(deltas_sq,gradients_sq_new,gradients) ] # calculates the new "average" deltas for the next step. deltas_sq_new = [ rho*d_sq + (1-rho)*(d**2) for d_sq,d in zip(deltas_sq,deltas) ] # Prepare it as a list f gradient_sq_updates = zip(gradients_sq,gradients_sq_new) deltas_sq_updates = zip(deltas_sq,deltas_sq_new) parameters_updates = [ (p,p - d) for p,d in zip(parameters,deltas) ] return gradient_sq_updates + deltas_sq_updates + parameters_updates def gradient_updates_momentum(cost, params, learning_rate, momentum): updates = [] for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) updates.append((param, param - learning_rate*param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) return updates if doEmailUpdate: gmail_pwd = getpass.getpass() rng = numpy.random.RandomState(1234) data, norm_mean, norm_std, grayImages, labelImages, maskImages = generate_experiment_data_supervised(purpose='train', nsamples=train_samples, patchSize=patchSize, balanceRate=0.5, data_mean=0.5, data_std=1.0) train_set_x, train_set_y = shared_dataset(data, doCastLabels=True) data = generate_experiment_data_supervised(purpose='validate', nsamples=val_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0] valid_set_x, valid_set_y = shared_dataset(data, doCastLabels=True) data = generate_experiment_data_supervised(purpose='test', nsamples=test_samples, patchSize=patchSize, balanceRate=0.5, data_mean=norm_mean, data_std=norm_std)[0] test_set_x, test_set_y = shared_dataset(data, doCastLabels=True) # compute number of minibatches for training, validation and testing n_train_batches = train_samples / batch_size n_valid_batches = val_samples / 1000 n_test_batches = test_samples / 1000 learning_rate_shared = theano.shared(np.float32(learning_rate)) momentum_shared = theano.shared(np.float32(momentum)) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels lr = T.scalar('learning_rate') m = T.scalar('momentum') # construct the MLP class classifier = MLP_dropout(rng=rng, input=x, n_in=patchSize**2, n_hidden=n_hidden, n_out=2, dropout_rate=dropout_rate, activation=activation) cost = classifier.dropout_negative_log_likelihood(y) test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) #SGD updates = [] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - lr * gparam)) #updates = adadelta_updates(classifier.params, gparams, lr, 0.000001) # updates = gradient_updates_momentum(cost, classifier.params, lr, m) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], lr: learning_rate_shared})#, #m: momentum_shared}) print '... training' best_validation_loss = numpy.inf best_iter = 0 decrease_epoch = 1 decrease_patience = 1 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False # start pool for data print "Starting worker." pool = multiprocessing.Pool(processes=1) futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]]) while (epoch < n_epochs) and (not done_looping): minibatch_avg_costs = [] epoch = epoch + 1 if epoch % 10 == 0: classifier.save_MLP('train_progress.pkl') if doResample and epoch>1: print "Waiting for data." data = futureData.get() print "GOT NEW DATA" train_set_x.set_value(np.float32(data[0])) train_set_y.set_value(np.int32(data[1])) futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, 0.5, 1.0]]) # try: # data = futureData.get(timeout=1) # print "GOT NEW DATA" # train_set_x.set_value(np.float32(data[0])) # train_set_y.set_value(np.int32(data[1])) # futureData = pool.apply_async(stupid_map_wrapper, [[generate_experiment_data_supervised,True, 'train', train_samples, patchSize, 0.5, norm_mean, 1.0]]) # except multiprocessing.TimeoutError: # print "TIMEOUT, TRAINING ANOTHER ROUND WITH CURRENT DATA" # pass # for minibatch_index in xrange(n_train_batches): minibatch_avg_costs.append(train_model(minibatch_index)) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) msg = 'epoch %i, minibatch %i/%i, training error %f, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_costs[-1], this_validation_loss * 100.) print(msg) classifier.trainingCost.append(minibatch_avg_costs[-1]) classifier.validationError.append(this_validation_loss*100) # if we got the best validation score until now if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss best_iter = iter classifier.save_MLP('best_so_far.pkl') print "New best score!" if doEmailUpdate: send_email(gmail_pwd, msg) # test it on the test set #test_losses = [test_model(i) for i # in xrange(n_test_batches)] #test_score = numpy.mean(test_losses) # #print(('epoch %i, minibatch %i/%i, test error of ' # 'best model %f %%') % # (epoch, minibatch_index + 1, n_train_batches, # test_score * 100.)) pool.close() pool.join() print "Pool closed." end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) return classifier