Ejemplo n.º 1
0
    def log_diagnostics(self, paths, **kwargs):
        list_of_rewards, terminals, obs, actions, next_obs = split_paths(paths)

        returns = []
        for rewards in list_of_rewards:
            returns.append(np.sum(rewards))
        statistics = OrderedDict()
        statistics.update(
            create_stats_ordered_dict(
                'Undiscounted Returns',
                returns,
            ))
        statistics.update(
            create_stats_ordered_dict(
                'Rewards',
                list_of_rewards,
            ))
        statistics.update(create_stats_ordered_dict(
            'Actions',
            actions,
        ))

        fraction_of_time_on_platform = [o[1] for o in obs]
        statistics['Fraction of time on platform'] = np.mean(
            fraction_of_time_on_platform)

        for key, value in statistics.items():
            logger.record_tabular(key, value)
        return returns
Ejemplo n.º 2
0
    def log_diagnostics(self, paths, **kwargs):
        list_of_rewards, terminals, obs, actions, next_obs = split_paths(paths)

        returns = []
        for rewards in list_of_rewards:
            returns.append(np.sum(rewards))
        last_statistics = OrderedDict()
        last_statistics.update(
            create_stats_ordered_dict(
                'UndiscountedReturns',
                returns,
            ))
        last_statistics.update(
            create_stats_ordered_dict(
                'Rewards',
                list_of_rewards,
            ))
        last_statistics.update(create_stats_ordered_dict(
            'Actions',
            actions,
        ))

        for key, value in last_statistics.items():
            logger.record_tabular(key, value)
        return returns
Ejemplo n.º 3
0
 def _statistics_from_paths(self, paths, stat_prefix):
     rewards, terminals, obs, actions, next_obs = split_paths(paths)
     np_batch = dict(
         rewards=rewards,
         terminals=terminals,
         observations=obs,
         actions=actions,
         next_observations=next_obs,
     )
     batch = np_to_pytorch_batch(np_batch)
     statistics = self._statistics_from_batch(batch, stat_prefix)
     statistics.update(
         create_stats_ordered_dict('Num Paths',
                                   len(paths),
                                   stat_prefix=stat_prefix))
     return statistics
Ejemplo n.º 4
0
 def test_split_paths(self):
     paths = [create_path(0), create_path(1)]
     rewards, terminals, obs, actions, next_obs = split_paths(paths)
     expected_rewards = np.array([-1, 0, 1, 0, 1, 2]).reshape(-1, 1)
     expected_terminals = np.array([0, 0, 1, 0, 0, 1]).reshape(-1, 1)
     expected_obs = np.array([
         [2], [4], [8],
         [3], [5], [9],
     ])
     expected_actions = np.array([
         [5], [7], [9],
         [6], [8], [10],
     ])
     expected_next_obs = np.array([
         [4], [5], [9],
         [5], [6], [10],
     ])
     self.assertNpEqual(rewards, expected_rewards)
     self.assertNpEqual(terminals, expected_terminals)
     self.assertNpEqual(obs, expected_obs)
     self.assertNpEqual(actions, expected_actions)
     self.assertNpEqual(next_obs, expected_next_obs)