def train(self): start_time = time.time() for itr in range(self.start_itr, self.n_itr): itr_start_time = time.time() with logger.prefix('itr #%d | ' % itr): logger.log("Obtaining samples...") sd = self.obtain_samples(itr) if self.alter_sd_fn is not None: self.alter_sd_fn(sd, *self.alter_sd_args) logger.log("Processing samples...") self.process_samples(itr, sd) logger.log("Logging diagnostics...") self.log_diagnostics(sd['stats']) logger.log("Optimizing policy...") self.optimize_policy(itr, sd) logger.record_tabular('Time', time.time() - start_time) logger.record_tabular('ItrTime', time.time() - itr_start_time) logger.dump_tabular(with_prefix=False) if itr % self.plot_every == 0 and self.plot and itr > self.plot_itr_threshold: rollout(self.policy, self.env_obj, self.max_path_length, plot=True) if itr % self.save_step == 0 and logger.get_snapshot_dir() is not None: self.save(logger.get_snapshot_dir() + '/snapshots', itr)
def log_diagnostics(self, stats): for k in sorted(stats.keys()): logger.record_tabular(k, stats[k])
def log_diagnostics(self, stats): for k, v in stats.items(): logger.record_tabular(k, v)