Exemple #1
0
 def train(self):
     start_time = time.time()
     for itr in range(self.start_itr, self.n_itr):
         itr_start_time = time.time()
         with logger.prefix('itr #%d | ' % itr):
             logger.log("Obtaining samples...")
             sd = self.obtain_samples(itr)
             if self.alter_sd_fn is not None:
                 self.alter_sd_fn(sd, *self.alter_sd_args)
             logger.log("Processing samples...")
             self.process_samples(itr, sd)
             logger.log("Logging diagnostics...")
             self.log_diagnostics(sd['stats'])
             logger.log("Optimizing policy...")
             self.optimize_policy(itr, sd)
             logger.record_tabular('Time', time.time() - start_time)
             logger.record_tabular('ItrTime', time.time() - itr_start_time)
             logger.dump_tabular(with_prefix=False)
         if itr % self.plot_every == 0 and self.plot and itr > self.plot_itr_threshold:
             rollout(self.policy, self.env_obj, self.max_path_length, plot=True)
         if itr % self.save_step == 0 and logger.get_snapshot_dir() is not None:
             self.save(logger.get_snapshot_dir() + '/snapshots', itr)
Exemple #2
0
 def log_diagnostics(self, stats):
     for k in sorted(stats.keys()):
         logger.record_tabular(k, stats[k])
Exemple #3
0
 def log_diagnostics(self, stats):
     for k, v in stats.items():
         logger.record_tabular(k, v)