def _end_epoch(self, epoch): self._log_stats(epoch) self.expl_data_collector.end_epoch(epoch) ray.get([self.remote_eval_data_collector.end_epoch.remote(epoch)]) self.replay_buffer.end_epoch(epoch) self.trainer.end_epoch(epoch) # We can only save the state of the program # after we call end epoch on all objects with internal state. # This is so that restoring from the saved state will # lead to identical result as if the program was left running. snapshot = self._get_snapshot(epoch) logger.save_itr_params(epoch + 1, snapshot) gt.stamp('saving') logger.record_dict(_get_epoch_timings()) logger.record_tabular('Epoch', epoch) write_header = True if epoch == 0 else False logger.dump_tabular(with_prefix=False, with_timestamp=False, write_header=write_header)
def _end_epoch(self, epoch): self._log_stats(epoch) if epoch > 0: snapshot = self._get_snapshot(epoch) logger.save_itr_params(epoch + 1, snapshot) gt.stamp('saving', unique=False) self.trainer.end_epoch(epoch) logger.record_dict(_get_epoch_timings()) logger.record_tabular('Epoch', epoch) write_header = True if epoch == 0 else False logger.dump_tabular(with_prefix=False, with_timestamp=False, write_header=write_header)
_, r, _, _ = env.step(a) mse_loss.append((pred_r - r)**2) reward_loss_other_tasks.append( np.mean(np.stack(mse_loss), axis=0).tolist()) reward_loss_other_tasks_std.append( np.std(np.stack(mse_loss), axis=0).tolist()) eval_statistics['reward_loss_other_tasks'] = reward_loss_other_tasks eval_statistics[ 'reward_loss_other_tasks_std'] = reward_loss_other_tasks_std eval_statistics['average_ensemble_reward_loss_other_tasks_mean'] = np.mean( reward_loss_other_tasks, axis=0) eval_statistics['average_ensemble_reward_loss_other_tasks_std'] = np.std( reward_loss_other_tasks, axis=0) eval_statistics['average_task_reward_loss_other_tasks_mean'] = np.mean( reward_loss_other_tasks, axis=1) eval_statistics['average_task_reward_loss_other_tasks_std'] = np.std( reward_loss_other_tasks, axis=1) eval_statistics[ 'num_selected_trans_other_tasks'] = num_selected_trans_other_tasks logger.log("Epoch {} finished".format(epoch), with_timestamp=True) logger.record_dict(eval_statistics, prefix='trainer/') write_header = True if epoch == 0 else False logger.dump_tabular(with_prefix=False, with_timestamp=False, write_header=write_header)
def finish_iteration(self, iter): logger.dump_tabular()