def optimize(self, nnet): timer = Stopwatch(verbose=False).start() self.total_epochs += self.max_epochs for i in xrange(self.max_epochs): self.epoch += 1 if self.verbose: print_inline('Epoch {0:>{1}}/{2} '.format( self.epoch, len(str(self.total_epochs)), self.total_epochs)) if self.verbose and self.early_stopping and nnet._X_val is not None: print_inline(' early stopping after {0} '.format( self._early_stopping)) losses = self.train_epoch(nnet) self.loss_history.append(losses) msg = 'elapsed: {0} sec'.format( width_format(timer.elapsed(), default_width=5, max_precision=2)) msg += ' - loss: {0}'.format( width_format(np.mean(losses), default_width=5, max_precision=4)) score = nnet._metric(nnet._y, nnet.validate()) self.score_history.append(score) # TODO: change acc to metric name msg += ' - acc.: {0}'.format( width_format(score, default_width=6, max_precision=4)) if nnet._X_val is not None: if self._early_stopping > 0 and self.epoch > 1: self._early_stopping -= 1 val_loss = nnet._loss(nnet._y_val, nnet.validate_proba(nnet._X_val)) self.val_loss_history.append(val_loss) val_score = nnet._metric(nnet._y_val, nnet.validate(nnet._X_val)) if self.epoch > 1 and val_score < 0.2 * self.val_score_history[ -1]: return self.val_score_history.append(val_score) if self.epoch > 1 and val_score > nnet.best_val_score_: nnet.best_val_score_ = val_score nnet.best_epoch_ = self.epoch # TODO move to optimizer nnet._save_best_weights() self._early_stopping = self.early_stopping # reset counter msg += ' - val. loss: {0}'.format( width_format(val_loss, default_width=5, max_precision=4)) # TODO: fix acc. msg += ' - val. acc.: {0}'.format( width_format(val_score, default_width=6, max_precision=4)) if self._early_stopping == 0: if self.verbose: print msg return if self.verbose: print msg if self.epoch > 1 and self.plot: if not os.path.exists(self.plot_dirpath): os.makedirs(self.plot_dirpath) plot_learning_curves(self.loss_history, self.score_history, self.val_loss_history, self.val_score_history, dirpath=self.plot_dirpath)
def train(): """Trains model.""" # define path to log dir logdir = CONFIG.LOGDIR setup_train_dir(logdir) # Common code for multigpu and single gpu strategy = tf.distribute.MirroredStrategy() with strategy.scope(): # get training algorithm algo = train_algo.Algorithm() # Setup summary writer. summary_writer = tf.summary.create_file_writer(os.path.join( logdir, 'train_logs'), flush_millis=10000) # setup learning_rate schedule, optimizer ... learning_rate, optimizer, global_step = get_lr_opt_global_step() ckpt_manager, status, _ = restore_ckpt(logdir=logdir, optimizer=optimizer, **algo.model) global_step_value = global_step.numpy() lr_fn = get_lr_fn(CONFIG.OPTIMIZER) # Setup Dataset Iterators. batch_size_per_replica = CONFIG.TRAIN.BATCH_SIZE total_batch_size = batch_size_per_replica * strategy.num_replicas_in_sync # Setup train iterator train_ds = create_dataset(split='train', mode=CONFIG.MODE, batch_size=total_batch_size) train_iterator = strategy.make_dataset_iterator(train_ds) # define one training step def train_step(data): loss = algo.train_one_iter(data, global_step, optimizer) return loss # gathering loss across different GPUs def dist_train(it): total_loss = strategy.reduce(tf.distribute.ReduceOp.SUM, strategy.experimental_run( train_step, it), axis=None) return total_loss dist_train = tf.function(dist_train) stopwatch = Stopwatch() try: while global_step_value < CONFIG.TRAIN.MAX_ITERS: with summary_writer.as_default(): with tf.summary.record_if( global_step_value % CONFIG.LOGGING.REPORT_INTERVAL == 0): # training loss loss = dist_train(train_iterator) # Update learning rate based in lr_fn. learning_rate.assign(lr_fn(learning_rate, global_step)) tf.summary.scalar('loss', loss, step=global_step) tf.summary.scalar('learning_rate', learning_rate, step=global_step) # Save checkpoint. if global_step_value % CONFIG.CHECKPOINT.SAVE_INTERVAL == 0: ckpt_manager.save() logging.info('Checkpoint saved at iter %d.', global_step_value) # Update global step. global_step_value = global_step.numpy() time_per_iter = stopwatch.elapsed() tf.summary.scalar('timing/time_per_iter', time_per_iter, step=global_step) logging.info( 'Iter[{}/{}], {:.1f}s/iter, Loss: {:.3f}'.format( global_step_value, CONFIG.TRAIN.MAX_ITERS, time_per_iter, loss.numpy())) # Reset stopwatch after iter is complete. stopwatch.reset() except KeyboardInterrupt: logging.info( 'Caught keyboard interrupt. Saving model before quitting.') finally: # Save the final checkpoint. ckpt_manager.save() logging.info('Checkpoint saved at iter %d', global_step_value)
def fit(self, X, y): timer = Stopwatch(verbose=False).start() X, y = self._check_X_y(X, y) unique_params = self.unique_params() tts = TrainTestSplitter(**self.train_test_splitter_params) number_of_combinations = self.number_of_combinations() total_iter = self.n_splits * number_of_combinations current_iter_width = len(str(total_iter)) if self.verbose: print "Training {0} on {1} samples x {2} features.".format( self.model.model_name(), *X.shape) print "{0}-fold CV for each of {1} params combinations == {2} fits ...\n"\ .format(self.n_splits, number_of_combinations, total_iter) # initialize `cv_results_` self.cv_results_['mean_score'] = [] self.cv_results_['std_score'] = [] self.cv_results_['params'] = [] for k in xrange(self.n_splits): self.cv_results_['split{0}_score'.format(k)] = [] self.cv_results_['split{0}_train_time'.format(k)] = [] self.cv_results_['split{0}_test_time'.format(k)] = [] for param_name in unique_params: self.cv_results_['param_{0}'.format(param_name)] = ma.array([]) current_iter = 0 if self.refit: # for each param combination fit consequently on each fold # to obtain mean score across splits as soon as possible for params_index, params in enumerate(self.gen_params()): # set params and add to `cv_results_` self.model.reset_params().set_params(**params) self.cv_results_['params'].append(params) for param_name in unique_params: cv_key = 'param_{0}'.format(param_name) mask = [int(not param_name in params)] to_concat = ma.array([params.get(param_name, None)], mask=mask) self.cv_results_[cv_key] = ma.concatenate( (self.cv_results_[cv_key], to_concat)) splits_scores = [] for split_index, (train, test) in enumerate( tts.k_fold_split(y, n_splits=self.n_splits, stratify=True)): # verbosing if self.verbose: current_iter += 1 t = "iter: {0:{1}}/{2} ".format( current_iter, current_iter_width, total_iter) t += '+' * (split_index + 1) + '-' * (self.n_splits - split_index - 1) print_inline(t) # fit and evaluate with Stopwatch(verbose=False) as s: self.model.fit(X[train], y[train]) self.cv_results_['split{0}_train_time'.format( split_index)].append(s.elapsed()) with Stopwatch(verbose=False) as s: score = self.model.evaluate(X[test], y[test]) self.cv_results_['split{0}_test_time'.format( split_index)].append(s.elapsed()) # score = self.scoring(y[test], y_pred) splits_scores.append(score) # add score to `cv_results_` self.cv_results_['split{0}_score'.format( split_index)].append(score) # verbosing if self.verbose: print_inline(" elapsed: {0} sec".format( width_format(timer.elapsed(), default_width=7))) if split_index < self.n_splits - 1: t = "" if self.best_score_ > -np.inf: t += " - best acc.: {0:.4f} at {1}" \ .format(self.best_score_, self.best_params_) else: t += " ..." print t # compute mean and std score mean_score = np.mean(splits_scores) std_score = np.std(splits_scores) self.cv_results_['mean_score'].append(mean_score) self.cv_results_['std_score'].append(std_score) # update 'best' attributes if mean_score > self.best_score_: self.best_index_ = params_index self.best_score_ = mean_score self.best_std_ = std_score self.best_params_ = params self.best_model_ = self.model if self.save_models: self.best_model_.save(filepath=os.path.join( self.dirpath, self._best_model_name()), **self.save_params) # verbosing if self.verbose: print_inline( " - mean acc.: {0:.4f} +/- 2 * {1:.3f}\n".format( mean_score, std_score)) else: # if self.refit == False # fit for each fold and then evaluate on each combination # of params for split_index, (train, test) in enumerate( tts.k_fold_split(y, n_splits=self.n_splits, stratify=True)): current_best_score = -np.inf current_best_params = None for params_index, params in enumerate(self.gen_params()): # set params self.model.reset_params().set_params(**params) # fit model (only once per split) if params_index == 0: with Stopwatch(verbose=False) as s: self.model.fit(X[train], y[train]) # on first split add params to `cv_results_` if split_index == 0: # store params' values self.cv_results_['params'].append(params) for param_name in unique_params: cv_key = 'param_{0}'.format(param_name) mask = [int(not param_name in params)] to_concat = ma.array( [params.get(param_name, None)], mask=mask) self.cv_results_[cv_key] = ma.concatenate( (self.cv_results_[cv_key], to_concat)) # write training time self.cv_results_['split{0}_train_time'.format(split_index)]\ .append(s.elapsed() if params_index == 0 else 0.) # evaluate with Stopwatch(verbose=False) as s: score = self.model.evaluate(X[test], y[test]) self.cv_results_['split{0}_test_time'.format( split_index)].append(s.elapsed()) # score = self.scoring(y[test], y_pred) # add score to `cv_results_` cv_key = 'split{0}_score'.format(split_index) self.cv_results_[cv_key].append(score) # update "current" best score and params current_mean_score = np.mean([ self.cv_results_['split{0}_score'.format(k)] [params_index] for k in xrange(split_index + 1) ]) if current_mean_score > current_best_score: current_best_score = current_mean_score current_best_params = params # verbosing if self.verbose: current_iter += 1 t = "iter: {0:{1}}/{2} ".format( current_iter, current_iter_width, total_iter) t += '+' * (split_index + 1) + '-' * (self.n_splits - split_index - 1) t += " elapsed: {0} sec".format( width_format(timer.elapsed(), default_width=7)) if split_index < self.n_splits - 1: t += " - best acc.: {0:.4f} [{1}/{2} splits] at {3}"\ .format(current_best_score, split_index + 1, self.n_splits, current_best_params) print_inline(t) if split_index < self.n_splits - 1: print # after last split ... if split_index == self.n_splits - 1: # ... compute means, stds splits_scores = [ self.cv_results_['split{0}_score'.format(k)] [params_index] for k in xrange(self.n_splits) ] mean_score = np.mean(splits_scores) std_score = np.std(splits_scores) self.cv_results_['mean_score'].append(mean_score) self.cv_results_['std_score'].append(std_score) # ... and update best attributes if mean_score > self.best_score_: self.best_index_ = params_index self.best_score_ = mean_score self.best_std_ = std_score self.best_params_ = params self.best_model_ = self.model if self.save_models: self.best_model_.save(filepath=os.path.join( self.dirpath, self._best_model_name()), **self.save_params) # verbosing if self.verbose: print_inline( " - best acc.: {0:.4f} +/- 2 * {1:.3f} at {2}\n" .format(self.best_score_, self.best_std_, self.best_params_)) # convert lists to np.ndarray for key in (['mean_score', 'std_score', 'params'] + [ 'split{0}_{1}'.format(k, s) for k in xrange(self.n_splits) for s in ('score', 'train_time', 'test_time') ]): self.cv_results_[key] = np.asarray(self.cv_results_[key]) return self
def _fit(self, X): if not self._initialized: layer = FullyConnected(self.n_hidden, bias=0., random_seed=self.random_seed) layer.setup_weights(X.shape) self.W = layer.W self.vb = np.zeros(X.shape[1]) self.hb = layer.b self._dW = np.zeros_like(self.W) self._dvb = np.zeros_like(self.vb) self._dhb = np.zeros_like(self.hb) self._rng = RNG(self.random_seed) self._rng.reseed() timer = Stopwatch(verbose=False).start() for _ in xrange(self.n_epochs): self.epoch += 1 if self.verbose: print_inline('Epoch {0:>{1}}/{2} '.format( self.epoch, len(str(self.n_epochs)), self.n_epochs)) if isinstance(self.learning_rate, str): S, F = map(float, self.learning_rate.split('->')) self._learning_rate = S + (F - S) * ( 1. - np.exp(-(self.epoch - 1.) / 8.)) / ( 1. - np.exp(-(self.n_epochs - 1.) / 8.)) else: self._learning_rate = self.learning_rate if isinstance(self.momentum, str): S, F = map(float, self.momentum.split('->')) self._momentum = S + (F - S) * ( 1. - np.exp(-(self.epoch - 1) / 4.)) / ( 1. - np.exp(-(self.n_epochs - 1) / 4.)) else: self._momentum = self.momentum mean_recon = self.train_epoch(X) if mean_recon < self.best_recon: self.best_recon = mean_recon self.best_epoch = self.epoch self.best_W = self.W.copy() self.best_vb = self.vb.copy() self.best_hb = self.hb.copy() self._early_stopping = self.early_stopping msg = 'elapsed: {0} sec'.format( width_format(timer.elapsed(), default_width=5, max_precision=2)) msg += ' - recon. mse: {0}'.format( width_format(mean_recon, default_width=6, max_precision=4)) msg += ' - best r-mse: {0}'.format( width_format(self.best_recon, default_width=6, max_precision=4)) if self.early_stopping: msg += ' {0}*'.format(self._early_stopping) if self.verbose: print msg if self._early_stopping == 0: return if self.early_stopping: self._early_stopping -= 1