Ejemplo n.º 1
0
 def on_episode_start(self, *, worker: RolloutWorker, base_env: BaseEnv,
                      policies: Dict[str, Policy],
                      episode: MultiAgentEpisode, env_index: int, **kwargs):
     print("episode {} (env-idx={}) started.".format(
         episode.episode_id, env_index))
     episode.user_data["pole_angles"] = []
     episode.hist_data["pole_angles"] = []
Ejemplo n.º 2
0
 def on_episode_start(self, worker: RolloutWorker, base_env: BaseEnv,
                      policies: Dict[str, Policy],
                      episode: MultiAgentEpisode, **kwargs):
     episode.hist_data["actions"] = []
     # Action counters per episode
     for i in range(worker.env.nA):
         episode.user_data["actions/action_" + str(i)] = []
Ejemplo n.º 3
0
 def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv,
                    policies: Dict[str, Policy], episode: MultiAgentEpisode,
                    **kwargs):
     self.player_scores.append(episode.last_info_for(f'agent_0_high'))
     for i in range(1, 4):
         self.opponent_scores.append(
             episode.last_info_for(f'agent_{i}_high'))
Ejemplo n.º 4
0
    def on_episode_end(self,
                       *,
                       worker: "RolloutWorker",
                       base_env: BaseEnv,
                       policies: Dict[PolicyID, Policy],
                       episode: MultiAgentEpisode,
                       env_index: Optional[int] = None,
                       **kwargs) -> None:
        snapshot = tracemalloc.take_snapshot()
        top_stats = snapshot.statistics("lineno")

        for stat in top_stats[:10]:
            count = stat.count
            size = stat.size

            trace = str(stat.traceback)

            episode.custom_metrics[f"tracemalloc/{trace}/size"] = size
            episode.custom_metrics[f"tracemalloc/{trace}/count"] = count

        process = psutil.Process(os.getpid())
        worker_rss = process.memory_info().rss
        worker_data = process.memory_info().data
        worker_vms = process.memory_info().vms
        episode.custom_metrics["tracemalloc/worker/rss"] = worker_rss
        episode.custom_metrics["tracemalloc/worker/data"] = worker_data
        episode.custom_metrics["tracemalloc/worker/vms"] = worker_vms
Ejemplo n.º 5
0
 def on_episode_start(self, *, worker: RolloutWorker, base_env: BaseEnv,
                      policies: Dict[str, Policy],
                      episode: MultiAgentEpisode, env_index: int, **kwargs):
     episode.user_data["velocity"] = []
     episode.user_data["steering"] = []
     episode.user_data["step_reward"] = []
     episode.user_data["acceleration"] = []
Ejemplo n.º 6
0
    def on_episode_start(self, *, worker: RolloutWorker, base_env: BaseEnv,
                         policies: Dict[str, Policy],
                         episode: MultiAgentEpisode, env_index: int, **kwargs):

        episode.user_data['op'] = {}
        for op in METRIC_OPS:
            episode.user_data['op'][op] = defaultdict(list)
    def on_episode_step(self, worker: RolloutWorker, base_env: BaseEnv,
                        episode: MultiAgentEpisode, **kwargs):
        """
        pole_angle = abs(episode.last_observation_for()[2])
        raw_angle = abs(episode.last_raw_obs_for()[2])
        assert pole_angle == raw_angle
        episode.user_data["pole_angles"].append(pole_angle)
        """

        prefix = "agt_"
        for i in range(num_agents):
            obs = episode.last_raw_obs_for(i)
            #obs = episode.last_observation_for(i)
            act = episode.last_action_for(i)
            reward = episode.last_info_for(i).get("reward")
            NAV = episode.last_info_for(i).get("NAV")
            NAV = None if NAV is None else float(NAV)
            num_trades = episode.last_info_for(i).get("num_trades")

            if reward is None:  # goto next agent.
                continue

            episode.user_data[prefix + str(i) + "_obs"].append(obs)
            episode.user_data[prefix + str(i) + "_act"].append(act)
            episode.user_data[prefix + str(i) + "_reward"].append(reward)
            episode.user_data[prefix + str(i) + "_NAV"].append(NAV)
            episode.user_data[prefix + str(i) +
                              "_num_trades"].append(num_trades)
Ejemplo n.º 8
0
    def on_episode_end(
        self,
        worker: RolloutWorker,
        base_env: BaseEnv,
        policies: Dict[str, Policy],
        episode: MultiAgentEpisode,
        **kwargs,
    ):
        ego_speed = episode.user_data["ego_speed"]
        mean_ego_speed = {
            _id: np.mean(speed_hist) for _id, speed_hist in ego_speed.items()
        }

        distance_travelled = {
            _id: np.mean(info["score"])
            for _id, info in episode._agent_to_last_info.items()
        }

        speed_list = list(map(lambda x: round(x, 3), mean_ego_speed.values()))
        dist_list = list(map(lambda x: round(x, 3), distance_travelled.values()))
        reward_list = list(map(lambda x: round(x, 3), episode.agent_rewards.values()))

        for _id, speed in mean_ego_speed.items():
            episode.custom_metrics[f"mean_ego_speed_{_id}"] = speed
        for _id, distance in distance_travelled.items():
            episode.custom_metrics[f"distance_travelled_{_id}"] = distance

        print(
            f"episode {episode.episode_id} ended with {episode.length} steps: [mean_speed]: {speed_list} [distance_travelled]: {dist_list} [reward]: {reward_list}"
        )
Ejemplo n.º 9
0
    def on_episode_step(self, worker: RolloutWorker, base_env: BaseEnv,
                        episode: MultiAgentEpisode, **kwargs):
        if episode.last_info_for() is not None:
            profit = episode.last_info_for()['profit']

            episode.user_data["profit"].append(profit)
            episode.user_data["actions"][episode.last_action_for()] += 1
Ejemplo n.º 10
0
    def on_episode_end(
        self,
        worker: RolloutWorker,
        base_env: BaseEnv,
        policies: Dict[str, Policy],
        episode: MultiAgentEpisode,
        **kwargs,
    ):
        ego_speed = episode.user_data["ego_speed"]
        mean_ego_speed = {
            agent_id: np.mean(speed_hist) for agent_id, speed_hist in ego_speed.items()
        }

        distance_travelled = dict()
        for _id, info in episode._agent_to_last_info.items():
            if info.get("_group_info"):
                for i, _info in enumerate(info["_group_info"]):
                    distance_travelled[f"{_id}:AGENT-{i}"] = np.mean(_info["score"])
            else:
                distance_travelled[_id] = np.mean(info["score"])

        speed_list = list(map(lambda x: round(x, 3), mean_ego_speed.values()))
        dist_list = list(map(lambda x: round(x, 3), distance_travelled.values()))
        reward_list = list(map(lambda x: round(x, 3), episode.agent_rewards.values()))

        episode.custom_metrics[f"mean_ego_speed"] = sum(speed_list) / max(
            1, len(speed_list)
        )
        episode.custom_metrics[f"distance_travelled"] = sum(dist_list) / max(
            1, len(dist_list)
        )

        logger.info(f"episode {episode.episode_id} ended with {episode.length} steps")
Ejemplo n.º 11
0
 def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv,
                    policies: Dict[str, Policy], episode: MultiAgentEpisode,
                    **kwargs):
     pole_angle = np.mean(episode.user_data["pole_angles"])
     print("episode {} ended with length {} and pole angles {}".format(
         episode.episode_id, episode.length, pole_angle))
     episode.custom_metrics["pole_angle"] = pole_angle
     episode.hist_data["pole_angles"] = episode.user_data["pole_angles"]
Ejemplo n.º 12
0
 def on_episode_start(self, worker: RolloutWorker, base_env: BaseEnv,
                      policies: Dict[str, Policy],
                      episode: MultiAgentEpisode, **kwargs):
     # print("episode {} started".format(episode.episode_id))
     episode.user_data["reward_score"] = []
     episode.user_data["reward_target_bias"] = []
     episode.user_data["reward_ap"] = []
     episode.user_data["ep_target_bias"] = []
     episode.user_data["num_no_action"] = 0
Ejemplo n.º 13
0
 def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv,
                    policies: Dict[str, Policy], episode: MultiAgentEpisode,
                    **kwargs):
     episode.custom_metrics['nr_ac_crashes'] = sum(
         value == -1 for value in episode.agent_rewards.values())
     episode.custom_metrics['nr_ac_landed'] = sum(
         value == 1 for value in episode.agent_rewards.values())
     episode.custom_metrics['nr_ac_out_of_bounds'] = sum(
         value == -0.5 for value in episode.agent_rewards.values())
Ejemplo n.º 14
0
 def on_episode_step(self, *, worker: RolloutWorker, base_env: BaseEnv,
                     episode: MultiAgentEpisode, env_index: int, **kwargs):
     # Make sure this episode is ongoing.
     assert episode.length > 0, \
         "ERROR: `on_episode_step()` callback should not be called right " \
         "after env reset!"
     pole_angle = abs(episode.last_observation_for()[2])
     raw_angle = abs(episode.last_raw_obs_for()[2])
     assert pole_angle == raw_angle
     episode.user_data["pole_angles"].append(pole_angle)
Ejemplo n.º 15
0
    def on_episode_step(
            self, episode: MultiAgentEpisode = None, step_data: dict = None):
        if not self._log_current_full_episode:
            return None

        assert episode is not None or step_data is not None
        assert episode is None or step_data is None

        if step_data is None:
            step_data = {}
            for agent_id, policy in episode._policies.items():

                if self._first_fake_step_done:
                    if agent_id in self._log_full_epi_tmp_data.keys():
                        obs_before_act = self._log_full_epi_tmp_data[agent_id]
                    else:
                        obs_before_act = None
                    action = episode.last_action_for(agent_id).tolist()
                    epi = episode.episode_id
                    rewards = episode._agent_reward_history[agent_id]
                    reward = rewards[-1] if len(rewards) > 0 else None
                    info = episode.last_info_for(agent_id)
                    if hasattr(policy, "to_log"):
                        info.update(policy.to_log)
                    else:
                        logger.info(f"policy {policy} doesn't have attrib "
                                    "to_log. hasattr(policy, 'to_log'): "
                                    f"{hasattr(policy, 'to_log')}")
                # Episode provide the last action with the given last
                # observation produced by this action. But we need the
                # observation that cause the agent to play this action
                # thus the observation n-1
                obs_after_act = episode.last_observation_for(agent_id)
                self._log_full_epi_tmp_data[agent_id] = obs_after_act

                if self._first_fake_step_done:
                    if self.log_ful_epi_one_hot_obs:
                        obs_before_act = np.argwhere(obs_before_act)
                        obs_after_act = np.argwhere(obs_after_act)

                    step_data[agent_id] = {
                        "obs_before_act": obs_before_act,
                        "obs_after_act": obs_after_act,
                        "action": action,
                        "reward": reward,
                        "info": info,
                        "epi": epi}

        if self._first_fake_step_done:
            self.json_logger.write_json(step_data)
            self.json_logger.write("\n")
            self.step_counter += 1
        else:
            logger.info("FullEpisodeLogger: don't log first fake step")
            self._first_fake_step_done = True
Ejemplo n.º 16
0
 def on_episode_start(
     self,
     worker: RolloutWorker,
     base_env: BaseEnv,
     policies: Dict[str, Policy],
     episode: MultiAgentEpisode,
     **kwargs,
 ):
     print("episode {} started".format(episode.episode_id))
     episode.user_data["ego_speed"] = dict()
     episode.user_data["step_heading_error"] = dict()
Ejemplo n.º 17
0
 def on_episode_start(self, *, worker: RolloutWorker, base_env: BaseEnv,
                      policies: Dict[str, Policy],
                      episode: MultiAgentEpisode, env_index: int, **kwargs):
     # Make sure this episode has just been started (only initial obs
     # logged so far).
     assert episode.length == 0, \
         "ERROR: `on_episode_start()` callback should be called right " \
         "after env reset!"
     print("episode {} (env-idx={}) started.".format(
         episode.episode_id, env_index))
     episode.user_data["pole_angles"] = []
     episode.hist_data["pole_angles"] = []
Ejemplo n.º 18
0
    def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv,
                       policies: Dict[str, Policy], episode: MultiAgentEpisode,
                       **kwargs):
        # https://docs.ray.io/en/master/rllib-package-ref.html?highlight=MultiAgentEpisode#ray.rllib.evaluation.MultiAgentEpisode
        if worker.env.mode == 'test':
            # Test episode ended, save metrics
            # I want this value (or a mean of several of them) to be used as metric for the checkpoints
            episode.custom_metrics["test_return"] = episode.agent_rewards[(
                'agent0', 'default_policy')]

        for i in range(worker.env.nA):
            episode.custom_metrics["actions/action_" + str(i)] = sum(
                episode.user_data["actions/action_" + str(i)])
Ejemplo n.º 19
0
    def on_episode_step(self, *, worker, base_env: BaseEnv,
                        episode: MultiAgentEpisode, env_index: int, **kwargs):
        info = self.get_info(base_env, episode)
        # add all custom scalar metrics in the info dict
        if info is not None and 'scalar_metrics' in info:
            for metric_name, metric_value in info['scalar_metrics'].items():
                episode.custom_metrics[metric_name] = metric_value

                # increment (or init) the sum over all time steps inside the episode
                eps_metric_name = f'eps_{metric_name}'
                if eps_metric_name in episode.user_data:
                    episode.user_data[eps_metric_name] += metric_value
                else:
                    episode.user_data[eps_metric_name] = metric_value
Ejemplo n.º 20
0
 def on_episode_end(self, *, worker: RolloutWorker, base_env: BaseEnv,
                    policies: Dict[str, Policy], episode: MultiAgentEpisode,
                    env_index: int, **kwargs):
     # Make sure this episode is really done.
     assert episode.batch_builder.policy_collectors[
         "default_policy"].buffers["dones"][-1], \
         "ERROR: `on_episode_end()` should only be called " \
         "after episode is done!"
     pole_angle = np.mean(episode.user_data["pole_angles"])
     print("episode {} (env-idx={}) ended with length {} and pole "
           "angles {}".format(episode.episode_id, env_index, episode.length,
                              pole_angle))
     episode.custom_metrics["pole_angle"] = pole_angle
     episode.hist_data["pole_angles"] = episode.user_data["pole_angles"]
Ejemplo n.º 21
0
 def on_episode_end(self, *, worker: RolloutWorker, base_env: BaseEnv,
                    policies: Dict[str, Policy], episode: MultiAgentEpisode,
                    env_index: int, **kwargs):
     # Make sure this episode is really done.
     assert episode.batch_builder.policy_collectors[
         "default_policy"].buffers["dones"][-1], \
         "ERROR: `on_episode_end()` should only be called " \
         "after episode is done!"
     print(
         "episode {} (env-idx={}) ended with length {} , agent rewards {} and total reward {}"
         .format(episode.episode_id, env_index, episode.length,
                 episode.agent_rewards, episode.total_reward))
     episode.custom_metrics["agent_rewards"] = episode.agent_rewards
     episode.custom_metrics["total_reward"] = episode.total_reward
Ejemplo n.º 22
0
 def on_episode_end(  # type: ignore
         self, *_,
         episode: MultiAgentEpisode,
         **__) -> None:
     key = list(episode._agent_to_last_info.keys())[0]
     ep_info = episode.last_info_for(key).copy()
     episode.custom_metrics.update(ray.tune.utils.flatten_dict(ep_info))
Ejemplo n.º 23
0
 def on_episode_start(self, worker: RolloutWorker, base_env: BaseEnv,
                      policies: Dict[str, Policy],
                      episode: MultiAgentEpisode, **kwargs):
     episode.custom_metrics = {
         'episode_average_departure': [],
         'episode_average_arrival': [],
         'episode_average_wait': [],
         'episode_missing_agents': [],
         'episode_on_time_agents': [],
         'episode_total_reward': [],
     }
     episode.hist_data = {
         'info_by_agent': [],
         'rewards_by_agent': [],
         'last_action_by_agent': [],
     }
Ejemplo n.º 24
0
 def on_episode_step(self, worker: RolloutWorker, base_env: BaseEnv,
                     episode: MultiAgentEpisode, **kwargs):
     for agent_name in range(4):
         action = episode.last_action_for(agent_name)
         if action == constants.Action.Bomb.value:
             episode.custom_metrics["agent_{}/num_bombs".format(
                 agent_name)] += 1
Ejemplo n.º 25
0
 def on_postprocess_trajectory(self, *, worker: "RolloutWorker",
                               episode: MultiAgentEpisode,
                               agent_id: AgentID, policy_id: PolicyID,
                               policies: Dict[PolicyID, Policy],
                               postprocessed_batch: SampleBatch,
                               original_batches: Dict[AgentID,
                                                      SampleBatch],
                               **kwargs) -> None:
     if args.store_network_data:
         network_data_list = []
         for i in range(len(postprocessed_batch['obs'])):
             network_data_step = {}
             for key in postprocessed_batch:
                 network_key = None
                 if key == 'actions':
                     network_key = 'action'
                 elif key == 'obs':
                     network_key = 'observation'
                 elif key in [
                         'action_prob', 'action_logp',
                         'action_dist_inputs', 'vf_preds', 'fc_1',
                         'fc_2', 'fc_value_1', 'fc_value_2', 'logits'
                 ]:
                     network_key = key
                 if network_key is not None:
                     network_data_step[
                         network_key] = postprocessed_batch[key][i]
             network_data_list.append(network_data_step)
         episode.user_data['network_data_list'] = network_data_list
Ejemplo n.º 26
0
 def on_episode_end(self, worker: RolloutWorker, base_env: BaseEnv,
                    policies: Dict[str, Policy],
                    episode: MultiAgentEpisode, **kwargs):
     ensemble_rewards = episode.last_info_for()["ensemble_rewards"]
     episode.custom_metrics[f"max_reward"].append(
         np.max(ensemble_rewards))
     for i, ri in enumerate(ensemble_rewards):
         episode.custom_metrics[f"reward_{i}"].append(ri)
Ejemplo n.º 27
0
 def on_postprocess_trajectory(
         self, worker: RolloutWorker, episode: MultiAgentEpisode,
         agent_id: str, policy_id: str, policies: Dict[str, Policy],
         postprocessed_batch: SampleBatch,
         original_batches: Dict[str, SampleBatch], **kwargs):
     if "num_batches" not in episode.custom_metrics:
         episode.custom_metrics["num_batches"] = 0
     episode.custom_metrics["num_batches"] += 1
Ejemplo n.º 28
0
    def on_episode_step(self, *, worker: RolloutWorker, base_env: BaseEnv,
                        episode: MultiAgentEpisode, env_index: int, **kwargs):

        episode_info = episode.last_info_for()
        if episode_info:
            for op in list(episode_info.keys() & METRIC_OPS):
                for k, v in episode_info[op].items():
                    episode.user_data['op'][op][k].append(v)
Ejemplo n.º 29
0
 def on_episode_step(self, *, worker: RolloutWorker, base_env: BaseEnv,
                     episode: MultiAgentEpisode, env_index: int, **kwargs):
     info = episode.last_info_for()
     if info is not None:
         episode.user_data["velocity"].append(info["velocity"])
         episode.user_data["steering"].append(info["steering"])
         episode.user_data["step_reward"].append(info["step_reward"])
         episode.user_data["acceleration"].append(info["acceleration"])
Ejemplo n.º 30
0
    def on_episode_step(self, *, worker: RolloutWorker, base_env: BaseEnv,
                        episode: MultiAgentEpisode, env_index: int, **kwargs):

        if type(episode.last_info_for()) == dict:
            # extract the timestep of the current step from the dict
            timestep = episode.last_info_for()['timestep']
            episode.user_data["timestep"].append(timestep)
            # extract the number of conolidated from the info dict
            num_consolidated = episode.last_info_for()['num_consolidated']
            episode.user_data["num_consolidated"].append(num_consolidated)
            # extract the number of overloaded from the info dict
            num_overloaded = episode.last_info_for()['num_overloaded']
            episode.user_data["num_overloaded"].append(num_overloaded)
            # extract of the greedy_num_consolidated from the dict
            num_moves = episode.last_info_for()['num_moves']
            episode.user_data["num_moves"].append(num_moves)
            # extract of the greedy_num_consolidated from the dict
            greedy_num_consolidated = episode.last_info_for(
            )['greedy_num_consolidated']
            episode.user_data["greedy_num_consolidated"].append(
                greedy_num_consolidated)

            # rewards
            rewards = episode.last_info_for()['rewards']
            episode.user_data["rewards"].append(rewards)