Ejemplo n.º 1
0
    def _end_epoch(self, epoch):
        self._log_stats(epoch)

        self.expl_data_collector.end_epoch(epoch)
        ray.get([self.remote_eval_data_collector.end_epoch.remote(epoch)])

        self.replay_buffer.end_epoch(epoch)
        self.trainer.end_epoch(epoch)

        # We can only save the state of the program
        # after we call end epoch on all objects with internal state.
        # This is so that restoring from the saved state will
        # lead to identical result as if the program was left running.

        snapshot = self._get_snapshot(epoch)
        logger.save_itr_params(epoch + 1, snapshot)
        gt.stamp('saving')

        logger.record_dict(_get_epoch_timings())
        logger.record_tabular('Epoch', epoch)

        write_header = True if epoch == 0 else False
        logger.dump_tabular(with_prefix=False,
                            with_timestamp=False,
                            write_header=write_header)
Ejemplo n.º 2
0
    def _end_epoch(self, epoch):

        self._log_stats(epoch)
        if epoch > 0:
            snapshot = self._get_snapshot(epoch)
            logger.save_itr_params(epoch + 1, snapshot)
        gt.stamp('saving', unique=False)

        self.trainer.end_epoch(epoch)

        logger.record_dict(_get_epoch_timings())
        logger.record_tabular('Epoch', epoch)

        write_header = True if epoch == 0 else False
        logger.dump_tabular(with_prefix=False, with_timestamp=False,
                            write_header=write_header)
Ejemplo n.º 3
0
            _, r, _, _ = env.step(a)
            mse_loss.append((pred_r - r)**2)
        reward_loss_other_tasks.append(
            np.mean(np.stack(mse_loss), axis=0).tolist())
        reward_loss_other_tasks_std.append(
            np.std(np.stack(mse_loss), axis=0).tolist())

    eval_statistics['reward_loss_other_tasks'] = reward_loss_other_tasks
    eval_statistics[
        'reward_loss_other_tasks_std'] = reward_loss_other_tasks_std
    eval_statistics['average_ensemble_reward_loss_other_tasks_mean'] = np.mean(
        reward_loss_other_tasks, axis=0)
    eval_statistics['average_ensemble_reward_loss_other_tasks_std'] = np.std(
        reward_loss_other_tasks, axis=0)

    eval_statistics['average_task_reward_loss_other_tasks_mean'] = np.mean(
        reward_loss_other_tasks, axis=1)
    eval_statistics['average_task_reward_loss_other_tasks_std'] = np.std(
        reward_loss_other_tasks, axis=1)

    eval_statistics[
        'num_selected_trans_other_tasks'] = num_selected_trans_other_tasks

    logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
    logger.record_dict(eval_statistics, prefix='trainer/')

    write_header = True if epoch == 0 else False
    logger.dump_tabular(with_prefix=False,
                        with_timestamp=False,
                        write_header=write_header)
Ejemplo n.º 4
0
 def finish_iteration(self, iter):
     logger.dump_tabular()