def test_composition_success(self, forward_mock, array_dict_get, module_dict_get, tensor_dict_get, tensor_dict_set): # mock array_dict = ArrayDict(N_EXAMPLES) array_dict_get.return_value = self.dummy_states array_dict.get = array_dict_get tensor_dict = TensorDict() tensor_dict_get.return_value = self.dummy_states_tensor tensor_dict_set.return_value = None tensor_dict.get = tensor_dict_get tensor_dict.set = tensor_dict_set net: nn.Module = DummyNet() forward_mock.return_value = self.dummy_states_tensor net.forward = forward_mock module_dict = ModuleDict() module_dict_get.return_value = net module_dict.get = module_dict_get # run tensor_inserter1 = TensorInserterTensorize(ArrayKey.states, TensorKey.states_tensor) tensor_inserter2 = TensorInserterForward(TensorKey.states_tensor, ModuleKey.scaler, TensorKey.states_tensor) tensor_inserter = tensor_inserter1 + tensor_inserter2 tensor_inserter.insert_tensor(tensor_dict, array_dict, module_dict, np.arange(N_EXAMPLES)) # assert array_dict_get.called_once_with(ArrayKey.states) self.assertEqual(tensor_dict_set.call_count, 2) self.assertEqual(tensor_dict_get.call_count, 1) tensor_dict_get.assert_called_once_with(TensorKey.states_tensor) module_dict_get.assert_called_once_with(ModuleKey.scaler)
def test_forward_success(self, forward_mock, module_dict_get, tensor_dict_get, tensor_dict_set): # mock net: nn.Module = DummyNet() forward_mock.return_value = self.dummy_states_tensor net.forward = forward_mock module_dict = ModuleDict() module_dict_get.return_value = net module_dict.get = module_dict_get tensor_dict = TensorDict() tensor_dict_get.return_value = self.dummy_states_tensor tensor_dict_set.return_value = None tensor_dict.get = tensor_dict_get tensor_dict.set = tensor_dict_set array_dict = ArrayDict(0) # run tensor_inserter = TensorInserterForward(TensorKey.states_tensor, ModuleKey.scaler, TensorKey.next_states_tensor) tensor_inserter.insert_tensor(tensor_dict, array_dict, module_dict, np.arange(N_EXAMPLES)) # assert tensor_dict_get.assert_called_once_with(TensorKey.states_tensor) module_dict_get.assert_called_once_with(ModuleKey.scaler) forward_mock.assert_called_once_with(self.dummy_states_tensor) tensor_dict_set.assert_called_once()
def test_sample_collector_by_number_success(self, mock_env_reset, mock_env_step, mock_envs_reset, mock_envs_step) -> None: dummy_env = Env() dummy_env.observation_space = Box(-1, 1, [STATE_DIM]) dummy_env.action_space = Box(-1, 1, [ACTION_DIM]) mock_env_reset.return_value = self.dummy_state mock_env_step.return_value = (self.dummy_state, self.dummy_reward, self.dummy_done, self.dummy_info) dummy_env.reset = mock_env_reset dummy_env.step = mock_env_step dummy_envs = DummyVectorEnv(N_ENVS, STATE_DIM, ACTION_DIM) mock_envs_reset.return_value = self.dummy_states mock_envs_step.return_value = (self.dummy_next_states, self.dummy_rewards, self.dummy_dones, {}) dummy_envs.reset = mock_envs_reset dummy_envs.step = mock_envs_step dummy_env_container = EnvContainer(dummy_env, dummy_envs) mock_envs_reset.assert_called_once_with() # __init__ of EnvContainer calls reset actor: nn.Module = ProbMLPConstantLogStd(STATE_DIM, ACTION_DIM, HIDDEN_DIMS, ACTIVATION, FINAL_LAYER_ACTIVATION, LOG_STD) scaler: nn.Module = DummyNet() tanh: nn.Module = nn.Tanh() action_getter: ActionGetter = ActionGetterModule(actor, scaler) sample_collector: SampleCollector = SampleCollectorV0(dummy_env_container, action_getter, N_ENVS * 10, 1) array_dict: ArrayDict = sample_collector.collect_samples_by_number() self.assertEqual(mock_envs_reset.call_count, 2) self.assertEqual(mock_envs_step.call_count, 10) collected_states = array_dict.get(ArrayKey.states) self.assertTupleEqual(collected_states.shape, (N_ENVS * 10, STATE_DIM))
def test_module_action_getter_1d_success(self): actor: nn.Module = ProbMLPConstantLogStd(STATE_DIM, ACTION_DIM, HIDDEN_DIMS, ACTIVATION, FINAL_LAYER_ACTIVATION, LOG_STD) scaler: nn.Module = DummyNet() activation: nn.Module = nn.Tanh() action_getter: ActionGetter = ActionGetterModule(actor, scaler) dummy_state = self.dummy_states[0, :] output_action = action_getter.get_action(dummy_state) self.assertEqual(len(output_action.shape), 1, "1D case output shape is not 1D")
def test_module_action_getter_sample_success(self, actor_forward, scaler_forward): scaler_forward.return_value = self.dummy_states_scaled actor_forward.return_value = (self.dummy_actions, self.dummy_log_std) actor: nn.Module = ProbMLPConstantLogStd(STATE_DIM, ACTION_DIM, HIDDEN_DIMS, ACTIVATION, FINAL_LAYER_ACTIVATION, LOG_STD) actor.forward = actor_forward scaler: nn.Module = DummyNet() scaler.forward = scaler_forward action_getter: ActionGetter = ActionGetterModule(actor, scaler) actions, log_prob = action_getter.sample_action(self.dummy_states) self.assertEqual(len(actions.shape), 2, "2D case output shape is not 2D") self.assertTupleEqual(actions.shape, (N_EXAMPLES, ACTION_DIM), "2D case output shape is inconsistent") np.testing.assert_array_equal(actions, self.dummy_actions) np.testing.assert_array_equal(scaler_forward.call_args[0][0], self.dummy_states) np.testing.assert_array_equal(actor_forward.call_args[0][0], self.dummy_states_scaled)
def test_module_updater_optimizer_change_action_success(self) -> None: relu = nn.ReLU() tanh = nn.Tanh() actor = ProbMLPConstantLogStd(STATE_DIM, ACTION_DIM, HIDDEN_DIMS, ACTIVATION, FINAL_LAYER_ACTIVATION, LOG_STD) scaler = DummyNet() action_getter = ActionGetterModule(actor, scaler) optimizer = RAdam(actor.parameters(), lr=3e-4) module_updater: ModuleUpdater = ModuleUpdaterOptimizer(optimizer) action1 = action_getter.get_action(self.dummy_states) action2 = action_getter.get_action(self.dummy_states) output, log_std = actor.forward(self.dummy_states_tensor) mse_loss = nn.MSELoss() loss = mse_loss.forward(output, self.dummy_target_tensor) module_updater.update_module(loss) action3 = action_getter.get_action(self.dummy_states) np.testing.assert_array_equal(action1, action2) self.assertFalse(np.array_equal(action1, action3))
def test_dummy_success(self) -> None: net: nn.Module = DummyNet() dummy_output = net.forward(self.dummy_features) np.testing.assert_array_almost_equal(dummy_output, self.dummy_features)