Ejemplo n.º 1
0
 def get_data_dictionary(self):
     """
     Create a dictionary of data to push to the GPU (device).
     """
     data_dict = DataFeed()
     data_dict.add_data(
         name="action_cooldown_period",
         data=self.action_cooldown_period,
     )
     data_dict.add_data(
         name="action_in_cooldown_until",
         data=self.action_in_cooldown_until,
         save_copy_and_apply_at_reset=True,
     )
     data_dict.add_data(
         name="num_stringency_levels",
         data=self.n_stringency_levels,
     )
     data_dict.add_data(
         name="default_agent_action_mask",
         data=[1] + self.default_agent_action_mask,
     )
     data_dict.add_data(
         name="no_op_agent_action_mask",
         data=[1] + self.no_op_agent_action_mask,
     )
     return data_dict
Ejemplo n.º 2
0
 def get_data_dictionary(self):
     """
     Create a dictionary of data to push to the device
     """
     data_dict = DataFeed()
     data_dict.add_data(
         name="subsidy_interval",
         data=self.subsidy_interval,
     )
     data_dict.add_data(
         name="num_subsidy_levels",
         data=self.num_subsidy_levels,
     )
     data_dict.add_data(
         name="max_daily_subsidy_per_state",
         data=self.max_daily_subsidy_per_state,
     )
     data_dict.add_data(
         name="default_planner_action_mask",
         data=[1] + self.default_planner_action_mask,
     )
     data_dict.add_data(
         name="no_op_planner_action_mask",
         data=[1] + self.no_op_planner_action_mask,
     )
     return data_dict
Ejemplo n.º 3
0
 def get_data_dictionary(self):
     """
     Create a dictionary of data to push to the device
     """
     data_dict = DataFeed()
     data_dict.add_data(
         name="num_vaccines_per_delivery",
         data=self.num_vaccines_per_delivery,
     )
     data_dict.add_data(
         name="delivery_interval",
         data=self.delivery_interval,
     )
     data_dict.add_data(
         name="time_when_vaccine_delivery_begins",
         data=self.time_when_vaccine_delivery_begins,
     )
     data_dict.add_data(
         name="num_vaccines_available_t",
         data=np.zeros(self.n_agents),
         save_copy_and_apply_at_reset=True,
     )
     return data_dict
Ejemplo n.º 4
0
 def get_tensor_dictionary(self):
     """
     Create a dictionary of (Pytorch-accesible) data to push to the device
     """
     tensor_dict = DataFeed()
     return tensor_dict
    def test_env_reset_and_step(self):
        """
        Perform consistency checks for the reset() and step() functions
        """

        for scenario in self.env_configs:
            env_config = self.env_configs[scenario]

            print(f"Running {scenario}...")

            # Env Reset
            # CPU version of env
            env_cpu = {}
            obs_dict_of_lists = defaultdict(list)
            obs_cpu = {}

            for env_id in range(self.num_envs):
                env_cpu[env_id] = self.env_class(**env_config)
                # Set use_cuda to False for the CPU envs
                env_cpu[env_id].use_cuda = False
                env_cpu[env_id].world.use_cuda = False

                # obs will be a nested dict of
                # {policy_id: combined_agent_obs for each subkey}
                obs = env_cpu[env_id].reset()

                for key in obs:
                    assert isinstance(obs[key], dict)
                    for subkey in obs[key]:
                        obs_dict_of_lists[_OBSERVATIONS + "_" + key + "_" +
                                          subkey] += [obs[key][subkey]]

            for key in obs_dict_of_lists:
                obs_cpu[key] = np.stack((obs_dict_of_lists[key]), axis=0)

            # GPU version of env
            env_gpu = FoundationEnvWrapper(
                self.env_class(**env_config),
                num_envs=self.num_envs,
                use_cuda=True,
                testing_mode=self.use_gpu_testing_mode,
                customized_env_registrar=self.customized_env_registrar,
            )
            env_gpu.reset_all_envs()

            # Observations placeholders
            # -------------------------
            data_feed = DataFeed()
            for key in obs_cpu:
                data_feed.add_data(
                    name=key,
                    data=obs_cpu[key],
                    save_copy_and_apply_at_reset=True,
                )

            # Define a mapping function: policy -> agent_ids
            # ----------------------------------------------
            policy_to_agent_ids_mapping = {
                "a":
                [str(agent_id) for agent_id in range(env_config["n_agents"])],
                "p": ["p"],
            }

            for key in policy_to_agent_ids_mapping:
                if len(policy_to_agent_ids_mapping[key]) == 1:
                    rewards_data = np.zeros(
                        (self.num_envs, ),
                        dtype=np.float32,
                    )
                else:
                    rewards_data = np.zeros(
                        (self.num_envs, len(policy_to_agent_ids_mapping[key])),
                        dtype=np.float32,
                    )
                data_feed.add_data(
                    name=_REWARDS + "_" + key,
                    data=rewards_data,
                    save_copy_and_apply_at_reset=True,
                )

            env_gpu.cuda_data_manager.push_data_to_device(
                data_feed, torch_accessible=True)

            # Consistency checks at the first reset
            # -------------------------------------
            obs_gpu = {}
            print("Running obs consistency check after the first reset.")
            for key in obs_cpu:
                obs_gpu[key] = env_gpu.cuda_data_manager.pull_data_from_device(
                    key)
                self.run_consistency_checks(obs_cpu[key], obs_gpu[key])

            # Consistency checks during step
            # ------------------------------
            print(
                "Running obs/rew/done consistency check during env resets and steps"
            )

            # Test across multiple episodes
            for _ in range(self.num_episodes * env_config["episode_length"]):
                actions_list_of_dicts = generate_random_actions(
                    env_gpu.env, self.num_envs)

                for policy in policy_to_agent_ids_mapping:
                    actions_list = []
                    for action_dict in actions_list_of_dicts:
                        combined_actions = np.stack(
                            [
                                action_dict[key]
                                for key in policy_to_agent_ids_mapping[policy]
                            ],
                            axis=0,
                        )
                        actions_list += [combined_actions]

                    actions = np.stack((actions_list), axis=0)
                    name = _ACTIONS + "_" + policy
                    actions_data = DataFeed()
                    actions_data.add_data(name=name, data=actions)

                    if not env_gpu.cuda_data_manager.is_data_on_device_via_torch(
                            name):
                        env_gpu.cuda_data_manager.push_data_to_device(
                            actions_data, torch_accessible=True)
                    else:
                        env_gpu.cuda_data_manager.data_on_device_via_torch(
                            name)[:] = torch.from_numpy(actions)

                obs_cpu = {}
                obs_dict_of_lists = defaultdict(list)
                rew_dict_of_lists = defaultdict(list)
                rew_cpu = {}
                done_list = []

                for env_id in range(self.num_envs):
                    obs, rew, done, _ = env_cpu[env_id].step(
                        actions_list_of_dicts[env_id])

                    for key in obs:
                        assert isinstance(obs[key], dict)
                        for subkey in obs[key]:
                            obs_dict_of_lists[_OBSERVATIONS + "_" + key + "_" +
                                              subkey] += [obs[key][subkey]]

                    for key in rew:
                        rew_dict_of_lists[_REWARDS + "_" + key] += [rew[key]]

                    done_list += [done]

                for key in obs_dict_of_lists:
                    obs_cpu[key] = np.stack((obs_dict_of_lists[key]), axis=0)

                for key in rew_dict_of_lists:
                    rew_cpu[key] = np.stack((rew_dict_of_lists[key]), axis=0)

                done_cpu = {
                    "__all__":
                    np.array([done["__all__"] for done in done_list])
                }

                # Update actions tensor on the gpu
                _, _, done_gpu, _ = env_gpu.step()
                done_gpu["__all__"] = done_gpu["__all__"].cpu().numpy()

                obs_gpu = {}
                for key in obs_cpu:
                    obs_gpu[
                        key] = env_gpu.cuda_data_manager.pull_data_from_device(
                            key)
                    self.run_consistency_checks(obs_cpu[key], obs_gpu[key])

                rew_gpu = {}
                for key in rew_cpu:
                    rew_gpu[
                        key] = env_gpu.cuda_data_manager.pull_data_from_device(
                            key)
                    self.run_consistency_checks(rew_cpu[key], rew_gpu[key])

                assert all(done_cpu["__all__"] == done_gpu["__all__"])

                # GPU reset
                env_gpu.reset_only_done_envs()

                # Now, pull done flags and they should be set to 0 (False) again
                done_gpu = env_gpu.cuda_data_manager.pull_data_from_device(
                    "_done_")
                assert done_gpu.sum() == 0

                # CPU reset
                for env_id in range(self.num_envs):
                    if done_cpu["__all__"][env_id]:
                        # Reset the CPU for this env_id
                        obs = env_cpu[env_id].reset()

                        obs_cpu = {}
                        obs_dict_of_lists = defaultdict(list)
                        for key in obs:
                            assert isinstance(obs[key], dict)
                            for subkey in obs[key]:
                                obs_dict_of_lists[_OBSERVATIONS + "_" + key +
                                                  "_" + subkey] += [
                                                      obs[key][subkey]
                                                  ]

                        for key in obs_dict_of_lists:
                            obs_cpu[key] = np.stack((obs_dict_of_lists[key]),
                                                    axis=0)

                        obs_gpu = {}
                        for key in obs_cpu:
                            obs_gpu[
                                key] = env_gpu.cuda_data_manager.pull_data_from_device(
                                    key)
                            self.run_consistency_checks(
                                obs_cpu[key], obs_gpu[key][env_id])