def fit(self, train, test=None, validate_every=100, show_norms=False, show_output=False, error_logging=True): """ Fit model Pass in test to compute test error and report during training. train : ndarray (T x n_in) validation_frequency : int in terms of number of epochs """ if test is not None: self.interactive = True test_set = self.shared_dataset(test) else: self.interactive = False train_set = self.shared_dataset(train) # compute number of minibatches for training # note that cases are the second dimension, not the first n_train = train_set.get_value(borrow=True).shape[0] n_train_batches = int(np.ceil(1.0 * n_train / self.batch_size)) if self.interactive: n_test = test_set.get_value(borrow=True).shape[0] n_test_batches = int(np.ceil(1.0 * n_test / self.batch_size)) #validate_every is specified in terms of epochs validation_frequency = validate_every * n_train_batches ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') index = T.lscalar('index') # index to a [mini]batch n_ex = T.lscalar('n_ex') # total number of examples # learning rate (may change) l_r = T.scalar('l_r', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum # Proper implementation of variable-batch size evaluation # Note that classifier.errors() returns the mean error # But the last batch may be a smaller size # So we keep around the effective_batch_size (whose last element may # be smaller than the rest) # And weight the reported error by the batch_size when we average # Also, by keeping batch_start and batch_stop as symbolic variables, # we make the theano function easier to read batch_start = index * self.batch_size batch_stop = T.minimum(n_ex, (index + 1) * self.batch_size) effective_batch_size = batch_stop - batch_start get_batch_size = theano.function(inputs=[index, n_ex], outputs=effective_batch_size) compute_train_error = [] compute_each_train_error = [] compute_test_error = [] train_model = [] for da in self.estimator.dA_layers: loss = da.loss updates = da.get_updates(l_r, mom) f_ctrain = theano.function( inputs=[index, n_ex], outputs=loss, givens={self.x: train_set[batch_start:batch_stop]}, mode=mode) compute_train_error.append(f_ctrain) f_cetrain = theano.function( inputs=[index, n_ex], outputs=da.each_loss, givens={self.x: train_set[batch_start:batch_stop]}, mode=mode) compute_train_error.append(f_ctrain) compute_each_train_error.append(f_cetrain) if self.interactive: f_ctest = theano.function( inputs=[index, n_ex], outputs=loss, givens={self.x: test_set[batch_start:batch_stop]}, mode=mode) compute_test_error.append(f_ctest) # compiling a Theano function `train_model` that returns the # cost, but in the same time updates the parameter of the # model based on the rules defined in `updates` f_train = theano.function( inputs=[index, n_ex, l_r, mom], outputs=loss, updates=updates, givens={self.x: train_set[batch_start:batch_stop]}, mode=mode) train_model.append(f_train) ############### # TRAIN MODEL # ############### keyMonitoringThread = Threads.KeyMonitoringThread() keyMonitoringThread.start() initial_learning_rate = self.learning_rate t0 = time.time() for n in xrange(self.estimator.n_layers): logger.info('... training dA layer[%d]' % n) epoch = 0 this_train_loss = np.inf stopFlg = False t0_l = time.time() self.learning_rate = initial_learning_rate self.errorlog.append([]) while (epoch < self.n_epochs) and ( this_train_loss > self.t_error) and (stopFlg is False): epoch = epoch + 1 effective_momentum = self.final_momentum \ if epoch > self.momentum_switchover \ else self.initial_momentum for minibatch_idx in xrange(n_train_batches): minibatch_avg_cost = train_model[n](minibatch_idx, n_train, self.learning_rate, effective_momentum) # iteration number (how many weight updates have we made?) # epoch is 1-based, index is 0 based iter = (epoch - 1) * n_train_batches + minibatch_idx + 1 if iter % validation_frequency == 0: # compute loss on training set train_losses = [ compute_train_error[n](i, n_train) for i in xrange(n_train_batches) ] train_batch_sizes = [ get_batch_size(i, n_train) for i in xrange(n_train_batches) ] this_train_loss = np.average(train_losses, weights=train_batch_sizes) # compute each output unit loss on training set if error_logging is True: train_each_losses = np.array([ compute_each_train_error[n](i, n_train) for i in xrange(n_train_batches) ]) train_batch_sizes_for_each = [] for i in xrange(train_each_losses.shape[1]): train_batch_sizes_for_each.append( train_batch_sizes) this_train_each_loss = np.average( train_each_losses.T, weights=train_batch_sizes_for_each, axis=1) el = np.r_[np.array([epoch]), this_train_each_loss] self.errorlog[n] = np.vstack((self.errorlog[n], el)) \ if self.errorlog[n] != [] \ else np.array([el]) # エラーの推移をpngに保存 self.save_errorlog_png() # self.save_errorlog_png(fname='dALayer%d' % n) if self.interactive: test_losses = [ compute_test_error[n](i, n_test) for i in xrange(n_test_batches) ] test_batch_sizes = [ get_batch_size(i, n_test) for i in xrange(n_test_batches) ] this_test_loss = np.average( test_losses, weights=test_batch_sizes) logger.info('*** dA layer[%d] *** epoch %i, mb %i/%i, tr loss %f ' 'te loss %f lr: %f mom: %f'% \ (n, epoch, minibatch_idx + 1, n_train_batches, this_train_loss, this_test_loss, self.learning_rate, effective_momentum)) else: logger.info( '*** dA layer[%d] *** epoch %i, mb %i/%i, train loss %f' ' lr: %f mom: %f' % (n, epoch, minibatch_idx + 1, n_train_batches, this_train_loss, self.learning_rate, effective_momentum)) self.optional_output(train_set, show_norms, show_output) self.learning_rate *= self.learning_rate_decay # 学習途中のパラメータをスナップショットとして保存 if self.snapshot_every is not None: if (epoch + 1) % self.snapshot_every == 0: date_obj = datetime.datetime.now() date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S') class_name = self.__class__.__name__ fname = '%s.%s-snapshot-%d' % (class_name, date_str, epoch + 1) self.save(fpath=self.snapshot_path, fname=fname) # 学習中のコマンド入力を別スレッドで受け取る var = keyMonitoringThread.GetInput() # 'q' を受け取った場合、学習を途中で切り上げる if var == 'q': stopFlg = True h, m = divmod(time.time() - t0_l, 3600) m, s = divmod(m, 60) print "*** dA layer[%d] *** Elapsed time: %d hour %d min %f sec" % ( n, int(h), int(m), s) h, m = divmod(time.time() - t0, 3600) m, s = divmod(m, 60) print "Elapsed time: %d hour %d min %f sec" % (int(h), int(m), s) # コマンド入力スレッドの停止 # ('q'入力による学習の途中終了ではなく、終了条件を満たして # 学習が正常に終了した場合、スレッドを明示的に終了する必要がある) keyMonitoringThread.Stop() print 'Press any key...'
def fit(self, X_train, Y_train, X_test=None, Y_test=None, validate_every=100, optimizer='sgd', compute_zero_one=False, show_norms=True, show_output=True, error_logging=True): """ Fit model Pass in X_test, Y_test to compute test error and report during training. X_train : ndarray (T x n_in) Y_train : ndarray (T x n_out) validation_frequency : int in terms of number of epochs optimizer : string Optimizer type. Possible values: 'sgd' : batch stochastic gradient descent 'cg' : nonlinear conjugate gradient algorithm (scipy.optimize.fmin_cg) 'bfgs' : quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (scipy.optimize.fmin_bfgs) 'l_bfgs_b' : Limited-memory BFGS (scipy.optimize.fmin_l_bfgs_b) compute_zero_one : bool in the case of binary output, compute zero-one error in addition to cross-entropy error show_norms : bool Show L2 norms of individual parameter groups while training. show_output : bool Show the model output on first training case while training. """ if X_test is not None: assert(Y_test is not None) self.interactive = True test_set_x, test_set_y = self.shared_dataset((X_test, Y_test)) else: self.interactive = False train_set_x, train_set_y = self.shared_dataset((X_train, Y_train)) # compute number of minibatches for training # note that cases are the second dimension, not the first n_train = train_set_x.get_value(borrow=True).shape[1] n_train_batches = int(np.ceil(1.0 * n_train / self.batch_size)) if self.interactive: n_test = test_set_x.get_value(borrow=True).shape[1] n_test_batches = int(np.ceil(1.0 * n_test / self.batch_size)) #validate_every is specified in terms of epochs validation_frequency = validate_every * n_train_batches ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') index = T.lscalar('index') # index to a [mini]batch n_ex = T.lscalar('n_ex') # total number of examples # learning rate (may change) l_r = T.scalar('l_r', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum cost = self.estimator.loss(self.y) \ + self.L1_reg * self.estimator.L1 \ + self.L2_reg * self.estimator.L2_sqr # Proper implementation of variable-batch size evaluation # Note that classifier.errors() returns the mean error # But the last batch may be a smaller size # So we keep around the effective_batch_size (whose last element may # be smaller than the rest) # And weight the reported error by the batch_size when we average # Also, by keeping batch_start and batch_stop as symbolic variables, # we make the theano function easier to read batch_start = index * self.batch_size batch_stop = T.minimum(n_ex, (index + 1) * self.batch_size) effective_batch_size = batch_stop - batch_start get_batch_size = theano.function(inputs=[index, n_ex], outputs=effective_batch_size) compute_train_error = theano.function(inputs=[index, n_ex], outputs=self.estimator.loss(self.y), givens={self.x: train_set_x[:, batch_start:batch_stop], self.y: train_set_y[:, batch_start:batch_stop]}, mode=mode) compute_train_each_error = theano.function(inputs=[index, n_ex], outputs=self.estimator.each_loss(self.y), givens={self.x: train_set_x[:, batch_start:batch_stop], self.y: train_set_y[:, batch_start:batch_stop]}, mode=mode) if self.interactive: compute_test_error = theano.function(inputs=[index, n_ex], outputs=self.estimator.loss(self.y), givens={self.x: test_set_x[:, batch_start:batch_stop], self.y: test_set_y[:, batch_start:batch_stop]}, mode=mode) self.get_norms = {} for param in self.estimator.params: self.get_norms[param] = theano.function(inputs=[], outputs=self.estimator.l2_norms[param], mode=mode) # compute the gradient of cost with respect to theta using BPTT gtheta = T.grad(cost, self.estimator.theta) if optimizer == 'sgd': updates = {} theta = self.estimator.theta theta_update = self.estimator.theta_update # careful here, update to the shared variable # cannot depend on an updated other shared variable # since updates happen in parallel # so we need to be explicit upd = mom * theta_update - l_r * gtheta updates[theta_update] = upd updates[theta] = theta + upd # compiling a Theano function `train_model` that returns the # cost, but in the same time updates the parameter of the # model based on the rules defined in `updates` train_model = theano.function(inputs=[index, n_ex, l_r, mom], outputs=cost, updates=updates, givens={self.x: train_set_x[:, batch_start:batch_stop], self.y: train_set_y[:, batch_start:batch_stop]}, mode=mode) ############### # TRAIN MODEL # ############### logger.info('... training') epoch = 0 this_train_loss = np.inf stopFlg = False keyMonitoringThread = Threads.KeyMonitoringThread() keyMonitoringThread.start() t0 = time.time() while (epoch < self.n_epochs) and (this_train_loss > self.t_error) and (stopFlg is False): epoch = epoch + 1 effective_momentum = self.final_momentum \ if epoch > self.momentum_switchover \ else self.initial_momentum for minibatch_idx in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_idx, n_train, self.learning_rate, effective_momentum) # iteration number (how many weight updates have we made?) # epoch is 1-based, index is 0 based iter = (epoch - 1) * n_train_batches + minibatch_idx + 1 if iter % validation_frequency == 0: # compute loss on training set train_losses = [compute_train_error(i, n_train) for i in xrange(n_train_batches)] train_batch_sizes = [get_batch_size(i, n_train) for i in xrange(n_train_batches)] this_train_loss = np.average(train_losses, weights=train_batch_sizes) # compute each output unit loss on training set if error_logging is True: train_each_losses = np.array([compute_train_each_error(i, n_train) for i in xrange(n_train_batches)]) train_batch_sizes_for_each = [] for i in xrange(self.n_out): train_batch_sizes_for_each.append(train_batch_sizes) this_train_each_loss = np.average(train_each_losses.T, weights=train_batch_sizes_for_each, axis=1) el = np.r_[np.array([epoch]), this_train_each_loss] self.errorlog = np.vstack((self.errorlog, el)) \ if self.errorlog != [] \ else np.array([el]) # エラーの推移をpngに保存 self.save_errorlog_png() if self.interactive: test_losses = [compute_test_error(i, n_test) for i in xrange(n_test_batches)] test_batch_sizes = [get_batch_size(i, n_test) for i in xrange(n_test_batches)] this_test_loss = np.average(test_losses, weights=test_batch_sizes) logger.info('epoch %i, mb %i/%i, tr loss %f ' 'te loss %f lr: %f mom: %f'% \ (epoch, minibatch_idx + 1, n_train_batches, this_train_loss, this_test_loss, self.learning_rate, effective_momentum)) else: logger.info('epoch %i, mb %i/%i, train loss %f' ' lr: %f mom: %f' % (epoch, minibatch_idx + 1, n_train_batches, this_train_loss, self.learning_rate, effective_momentum)) self.optional_output(train_set_x, show_norms, show_output) self.learning_rate *= self.learning_rate_decay # 学習途中のパラメータをスナップショットとして保存 if self.snapshot_every is not None: if (epoch + 1) % self.snapshot_every == 0: date_obj = datetime.datetime.now() date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S') class_name = self.__class__.__name__ fname = '%s.%s-snapshot-%d' % (class_name, date_str, epoch + 1) self.save(fpath=self.snapshot_path, fname=fname) # 学習中のコマンド入力を別スレッドで受け取る var = keyMonitoringThread.GetInput() # 'q' を受け取った場合、学習を途中で切り上げる if var == 'q': stopFlg = True keyMonitoringThread.Stop() h, m = divmod(time.time() - t0, 3600) m, s = divmod(m, 60) print "Elapsed time: %d hour %d min %f sec" % (int(h), int(m), s) # コマンド入力スレッドの停止 # ('q'入力による学習の途中終了ではなく、終了条件を満たして # 学習が正常に終了した場合、スレッドを明示的に終了する必要がある) keyMonitoringThread.Stop() print 'Press any key...' elif optimizer == 'cg' or optimizer == 'bfgs' or optimizer == 'l_bfgs_b': # compile a theano function that returns the cost of a minibatch batch_cost = theano.function(inputs=[index, n_ex], outputs=cost, givens={self.x: train_set_x[:, batch_start:batch_stop], self.y: train_set_y[:, batch_start:batch_stop]}, mode=mode, name="batch_cost") # compile a theano function that returns the gradient of the # minibatch with respect to theta batch_grad = theano.function(inputs=[index, n_ex], outputs=T.grad(cost, self.estimator.theta), givens={self.x: train_set_x[:, batch_start:batch_stop], self.y: train_set_y[:, batch_start:batch_stop]}, mode=mode, name="batch_grad") # creates a function that computes the average cost on the training # set def train_fn(theta_value): theta_value=np.array(theta_value, dtype=theano.config.floatX) self.estimator.theta.set_value(theta_value, borrow=True) train_losses = [batch_cost(i, n_train) for i in xrange(n_train_batches)] train_batch_sizes = [get_batch_size(i, n_train) for i in xrange(n_train_batches)] return np.average(train_losses, weights=train_batch_sizes) # creates a function that computes the average gradient of cost # with respect to theta def train_fn_grad(theta_value): theta_value=np.array(theta_value, dtype=theano.config.floatX) self.estimator.theta.set_value(theta_value, borrow=True) train_grads = [batch_grad(i, n_train) for i in xrange(n_train_batches)] train_batch_sizes = [get_batch_size(i, n_train) for i in xrange(n_train_batches)] return np.average(train_grads, weights=train_batch_sizes, axis=0) # validation function, prints useful output after each iteration def callback(theta_value): self.epoch += 1 if (self.epoch) % validate_every == 0: theta_value=np.array(theta_value, dtype=theano.config.floatX) self.estimator.theta.set_value(theta_value, borrow=True) # compute loss on training set train_losses = [compute_train_error(i, n_train) for i in xrange(n_train_batches)] train_batch_sizes = [get_batch_size(i, n_train) for i in xrange(n_train_batches)] this_train_loss = np.average(train_losses, weights=train_batch_sizes) # compute each output unit loss on training set if error_logging is True: train_each_losses = np.array([compute_train_each_error(i, n_train) for i in xrange(n_train_batches)]) train_batch_sizes_for_each = [] for i in xrange(self.n_out): train_batch_sizes_for_each.append(train_batch_sizes) this_train_each_loss = np.average(train_each_losses.T, weights=train_batch_sizes_for_each, axis=1) el = np.r_[np.array([self.epoch]), this_train_each_loss] self.errorlog = np.vstack((self.errorlog, el)) \ if self.errorlog is not None \ else np.array([el]) # エラーの推移をpngに保存 self.save_errorlog_png(fname=optimizer) if self.interactive: test_losses = [compute_test_error(i, n_test) for i in xrange(n_test_batches)] test_batch_sizes = [get_batch_size(i, n_test) for i in xrange(n_test_batches)] this_test_loss = np.average(test_losses, weights=test_batch_sizes) logger.info('epoch %i, tr loss %f, te loss %f' % \ (self.epoch, this_train_loss, this_test_loss, self.learning_rate)) else: logger.info('epoch %i, train loss %f ' % \ (self.epoch, this_train_loss)) self.optional_output(train_set_x, show_norms, show_output) ############### # TRAIN MODEL # ############### logger.info('... training') # using scipy conjugate gradient optimizer import scipy.optimize if optimizer == 'cg': of = scipy.optimize.fmin_cg elif optimizer == 'bfgs': of = scipy.optimize.fmin_bfgs elif optimizer == 'l_bfgs_b': of = scipy.optimize.fmin_l_bfgs_b logger.info("Optimizing using %s..." % of.__name__) start_time = time.clock() # keep track of epochs externally # these get updated through callback self.epoch = 0 # interface to l_bfgs_b is different than that of cg, bfgs # however, this will be changed in scipy 0.11 # unified under scipy.optimize.minimize if optimizer == 'cg' or optimizer == 'bfgs': best_theta = of( f=train_fn, x0=self.estimator.theta.get_value(), #x0=np.zeros(self.estimator.theta.get_value().shape, # dtype=theano.config.floatX), fprime=train_fn_grad, callback=callback, disp=1, retall=1, maxiter=self.n_epochs) elif optimizer == 'l_bfgs_b': best_theta, f_best_theta, info = of( func=train_fn, x0=self.estimator.theta.get_value(), fprime=train_fn_grad, iprint=validate_every, maxfun=self.n_epochs) # max number of feval end_time = time.clock() h, m = divmod(end_time - start_time, 3600) m, s = divmod(m, 60) print "Optimization time: %d hour %d min %f sec" % (int(h), int(m), s) else: raise NotImplementedError
global IS_GET_OBJECT_SIZE global CONTROL_TIME # 学習済みネットワークを呼び出す model = PrepNetwork('NN_D204060_MSPTS_Short_batch100_f') # 14/10/01 BEST! # model = PrepNetwork('NN_D204060_MSPTS_batch100f_m500_lr9999') # No Good # model = PrepNetwork('NN_D204060_MSPT_Short_batch100_f') # Hostと通信を行うインスタンスを用意 com = HostCommunication() # Hostとの通信を行うスレッドを開始する com.start() keyMonitoringThread = Threads.KeyMonitoringThread() keyMonitoringThread.start() rtcFlag = False print "####################################" print "# Command #" print "####################################" print "# 'i': Set Initial Grasping Pose #" print "# 's': Start Real Time Control #" print "# 'p': Pause Real Time Control #" print "# 'q': Quit Program #" print "####################################" while True: # Hostとの通信が切れるまでループ if com.CompleteFlag == True: break