Ejemplo n.º 1
0
    def _end_epoch(self, epoch):
        self._log_stats(epoch)

        self.expl_data_collector.end_epoch(epoch)
        ray.get([self.remote_eval_data_collector.end_epoch.remote(epoch)])

        self.replay_buffer.end_epoch(epoch)
        self.trainer.end_epoch(epoch)

        # We can only save the state of the program
        # after we call end epoch on all objects with internal state.
        # This is so that restoring from the saved state will
        # lead to identical result as if the program was left running.

        snapshot = self._get_snapshot(epoch)
        logger.save_itr_params(epoch + 1, snapshot)
        gt.stamp('saving')

        logger.record_dict(_get_epoch_timings())
        logger.record_tabular('Epoch', epoch)

        write_header = True if epoch == 0 else False
        logger.dump_tabular(with_prefix=False,
                            with_timestamp=False,
                            write_header=write_header)
Ejemplo n.º 2
0
 def _log_stats(self, epoch):
     logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
     """
     Trainer
     """
     logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/')
     """
     Misc
     """
     gt.stamp('logging')
Ejemplo n.º 3
0
 def _log_stats(self, epoch):
     logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
     """
     Policy
     """
     logger.record_dict(self.policy.get_diagnostics(), prefix='policy/')
     """
     Evaluation
     """
     logger.record_dict(self.get_evaluation_diagnostics(), prefix='eval/')
     """
     Misc
     """
     gt.stamp('logging')
Ejemplo n.º 4
0
    def _end_epoch(self, epoch):

        self._log_stats(epoch)
        if epoch > 0:
            snapshot = self._get_snapshot(epoch)
            logger.save_itr_params(epoch + 1, snapshot)
        gt.stamp('saving', unique=False)

        self.trainer.end_epoch(epoch)

        logger.record_dict(_get_epoch_timings())
        logger.record_tabular('Epoch', epoch)

        write_header = True if epoch == 0 else False
        logger.dump_tabular(with_prefix=False, with_timestamp=False,
                            write_header=write_header)
Ejemplo n.º 5
0
 def _log_stats(self, epoch):
     logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
     """
     Replay Buffer
     """
     logger.record_dict(self.replay_buffer.get_diagnostics(),
                        prefix='replay_buffer/')
     """
     Trainer
     """
     logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/')
     """
     Exploration
     """
     logger.record_dict(self.expl_data_collector.get_diagnostics(),
                        prefix='exploration/')
     #expl_paths = self.expl_data_collector.get_epoch_paths()
     #logger.record_dict(
     #    eval_util.get_generic_path_information(expl_paths),
     #    prefix="exploration/",
     #)
     """
     Remote Evaluation
     """
     logger.record_dict(
         ray.get(self.remote_eval_data_collector.get_diagnostics.remote()),
         prefix='remote_evaluation/',
     )
     #remote_eval_paths = ray.get(
     #    self.remote_eval_data_collector.get_epoch_paths.remote())
     #logger.record_dict(
     #    eval_util.get_generic_path_information(remote_eval_paths),
     #    prefix="remote_evaluation/",
     #)
     """
     Misc
     """
     gt.stamp('logging')
Ejemplo n.º 6
0
            _, r, _, _ = env.step(a)
            mse_loss.append((pred_r - r)**2)
        reward_loss_other_tasks.append(
            np.mean(np.stack(mse_loss), axis=0).tolist())
        reward_loss_other_tasks_std.append(
            np.std(np.stack(mse_loss), axis=0).tolist())

    eval_statistics['reward_loss_other_tasks'] = reward_loss_other_tasks
    eval_statistics[
        'reward_loss_other_tasks_std'] = reward_loss_other_tasks_std
    eval_statistics['average_ensemble_reward_loss_other_tasks_mean'] = np.mean(
        reward_loss_other_tasks, axis=0)
    eval_statistics['average_ensemble_reward_loss_other_tasks_std'] = np.std(
        reward_loss_other_tasks, axis=0)

    eval_statistics['average_task_reward_loss_other_tasks_mean'] = np.mean(
        reward_loss_other_tasks, axis=1)
    eval_statistics['average_task_reward_loss_other_tasks_std'] = np.std(
        reward_loss_other_tasks, axis=1)

    eval_statistics[
        'num_selected_trans_other_tasks'] = num_selected_trans_other_tasks

    logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
    logger.record_dict(eval_statistics, prefix='trainer/')

    write_header = True if epoch == 0 else False
    logger.dump_tabular(with_prefix=False,
                        with_timestamp=False,
                        write_header=write_header)