def get_data_dictionary(self): """ Create a dictionary of data to push to the device """ data_dict = DataFeed() data_dict.add_data( name="num_vaccines_per_delivery", data=self.num_vaccines_per_delivery, ) data_dict.add_data( name="delivery_interval", data=self.delivery_interval, ) data_dict.add_data( name="time_when_vaccine_delivery_begins", data=self.time_when_vaccine_delivery_begins, ) data_dict.add_data( name="num_vaccines_available_t", data=np.zeros(self.n_agents), save_copy_and_apply_at_reset=True, ) return data_dict
def get_tensor_dictionary(self): """ Create a dictionary of (Pytorch-accesible) data to push to the device """ tensor_dict = DataFeed() return tensor_dict
def get_data_dictionary(self): """ Create a dictionary of data to push to the GPU (device). """ data_dict = DataFeed() data_dict.add_data( name="action_cooldown_period", data=self.action_cooldown_period, ) data_dict.add_data( name="action_in_cooldown_until", data=self.action_in_cooldown_until, save_copy_and_apply_at_reset=True, ) data_dict.add_data( name="num_stringency_levels", data=self.n_stringency_levels, ) data_dict.add_data( name="default_agent_action_mask", data=[1] + self.default_agent_action_mask, ) data_dict.add_data( name="no_op_agent_action_mask", data=[1] + self.no_op_agent_action_mask, ) return data_dict
def get_data_dictionary(self): """ Create a dictionary of data to push to the device """ data_dict = DataFeed() data_dict.add_data( name="subsidy_interval", data=self.subsidy_interval, ) data_dict.add_data( name="num_subsidy_levels", data=self.num_subsidy_levels, ) data_dict.add_data( name="max_daily_subsidy_per_state", data=self.max_daily_subsidy_per_state, ) data_dict.add_data( name="default_planner_action_mask", data=[1] + self.default_planner_action_mask, ) data_dict.add_data( name="no_op_planner_action_mask", data=[1] + self.no_op_planner_action_mask, ) return data_dict
def test_env_reset_and_step(self): """ Perform consistency checks for the reset() and step() functions """ for scenario in self.env_configs: env_config = self.env_configs[scenario] print(f"Running {scenario}...") # Env Reset # CPU version of env env_cpu = {} obs_dict_of_lists = defaultdict(list) obs_cpu = {} for env_id in range(self.num_envs): env_cpu[env_id] = self.env_class(**env_config) # Set use_cuda to False for the CPU envs env_cpu[env_id].use_cuda = False env_cpu[env_id].world.use_cuda = False # obs will be a nested dict of # {policy_id: combined_agent_obs for each subkey} obs = env_cpu[env_id].reset() for key in obs: assert isinstance(obs[key], dict) for subkey in obs[key]: obs_dict_of_lists[_OBSERVATIONS + "_" + key + "_" + subkey] += [obs[key][subkey]] for key in obs_dict_of_lists: obs_cpu[key] = np.stack((obs_dict_of_lists[key]), axis=0) # GPU version of env env_gpu = FoundationEnvWrapper( self.env_class(**env_config), num_envs=self.num_envs, use_cuda=True, testing_mode=self.use_gpu_testing_mode, customized_env_registrar=self.customized_env_registrar, ) env_gpu.reset_all_envs() # Observations placeholders # ------------------------- data_feed = DataFeed() for key in obs_cpu: data_feed.add_data( name=key, data=obs_cpu[key], save_copy_and_apply_at_reset=True, ) # Define a mapping function: policy -> agent_ids # ---------------------------------------------- policy_to_agent_ids_mapping = { "a": [str(agent_id) for agent_id in range(env_config["n_agents"])], "p": ["p"], } for key in policy_to_agent_ids_mapping: if len(policy_to_agent_ids_mapping[key]) == 1: rewards_data = np.zeros( (self.num_envs, ), dtype=np.float32, ) else: rewards_data = np.zeros( (self.num_envs, len(policy_to_agent_ids_mapping[key])), dtype=np.float32, ) data_feed.add_data( name=_REWARDS + "_" + key, data=rewards_data, save_copy_and_apply_at_reset=True, ) env_gpu.cuda_data_manager.push_data_to_device( data_feed, torch_accessible=True) # Consistency checks at the first reset # ------------------------------------- obs_gpu = {} print("Running obs consistency check after the first reset.") for key in obs_cpu: obs_gpu[key] = env_gpu.cuda_data_manager.pull_data_from_device( key) self.run_consistency_checks(obs_cpu[key], obs_gpu[key]) # Consistency checks during step # ------------------------------ print( "Running obs/rew/done consistency check during env resets and steps" ) # Test across multiple episodes for _ in range(self.num_episodes * env_config["episode_length"]): actions_list_of_dicts = generate_random_actions( env_gpu.env, self.num_envs) for policy in policy_to_agent_ids_mapping: actions_list = [] for action_dict in actions_list_of_dicts: combined_actions = np.stack( [ action_dict[key] for key in policy_to_agent_ids_mapping[policy] ], axis=0, ) actions_list += [combined_actions] actions = np.stack((actions_list), axis=0) name = _ACTIONS + "_" + policy actions_data = DataFeed() actions_data.add_data(name=name, data=actions) if not env_gpu.cuda_data_manager.is_data_on_device_via_torch( name): env_gpu.cuda_data_manager.push_data_to_device( actions_data, torch_accessible=True) else: env_gpu.cuda_data_manager.data_on_device_via_torch( name)[:] = torch.from_numpy(actions) obs_cpu = {} obs_dict_of_lists = defaultdict(list) rew_dict_of_lists = defaultdict(list) rew_cpu = {} done_list = [] for env_id in range(self.num_envs): obs, rew, done, _ = env_cpu[env_id].step( actions_list_of_dicts[env_id]) for key in obs: assert isinstance(obs[key], dict) for subkey in obs[key]: obs_dict_of_lists[_OBSERVATIONS + "_" + key + "_" + subkey] += [obs[key][subkey]] for key in rew: rew_dict_of_lists[_REWARDS + "_" + key] += [rew[key]] done_list += [done] for key in obs_dict_of_lists: obs_cpu[key] = np.stack((obs_dict_of_lists[key]), axis=0) for key in rew_dict_of_lists: rew_cpu[key] = np.stack((rew_dict_of_lists[key]), axis=0) done_cpu = { "__all__": np.array([done["__all__"] for done in done_list]) } # Update actions tensor on the gpu _, _, done_gpu, _ = env_gpu.step() done_gpu["__all__"] = done_gpu["__all__"].cpu().numpy() obs_gpu = {} for key in obs_cpu: obs_gpu[ key] = env_gpu.cuda_data_manager.pull_data_from_device( key) self.run_consistency_checks(obs_cpu[key], obs_gpu[key]) rew_gpu = {} for key in rew_cpu: rew_gpu[ key] = env_gpu.cuda_data_manager.pull_data_from_device( key) self.run_consistency_checks(rew_cpu[key], rew_gpu[key]) assert all(done_cpu["__all__"] == done_gpu["__all__"]) # GPU reset env_gpu.reset_only_done_envs() # Now, pull done flags and they should be set to 0 (False) again done_gpu = env_gpu.cuda_data_manager.pull_data_from_device( "_done_") assert done_gpu.sum() == 0 # CPU reset for env_id in range(self.num_envs): if done_cpu["__all__"][env_id]: # Reset the CPU for this env_id obs = env_cpu[env_id].reset() obs_cpu = {} obs_dict_of_lists = defaultdict(list) for key in obs: assert isinstance(obs[key], dict) for subkey in obs[key]: obs_dict_of_lists[_OBSERVATIONS + "_" + key + "_" + subkey] += [ obs[key][subkey] ] for key in obs_dict_of_lists: obs_cpu[key] = np.stack((obs_dict_of_lists[key]), axis=0) obs_gpu = {} for key in obs_cpu: obs_gpu[ key] = env_gpu.cuda_data_manager.pull_data_from_device( key) self.run_consistency_checks( obs_cpu[key], obs_gpu[key][env_id])