Esempio n. 1
0
    def test_composition_success(self, forward_mock, array_dict_get, module_dict_get, tensor_dict_get, tensor_dict_set):
        # mock
        array_dict = ArrayDict(N_EXAMPLES)
        array_dict_get.return_value = self.dummy_states
        array_dict.get = array_dict_get

        tensor_dict = TensorDict()
        tensor_dict_get.return_value = self.dummy_states_tensor
        tensor_dict_set.return_value = None
        tensor_dict.get = tensor_dict_get
        tensor_dict.set = tensor_dict_set

        net: nn.Module = DummyNet()
        forward_mock.return_value = self.dummy_states_tensor
        net.forward = forward_mock

        module_dict = ModuleDict()
        module_dict_get.return_value = net
        module_dict.get = module_dict_get

        # run
        tensor_inserter1 = TensorInserterTensorize(ArrayKey.states, TensorKey.states_tensor)
        tensor_inserter2 = TensorInserterForward(TensorKey.states_tensor, ModuleKey.scaler, TensorKey.states_tensor)
        tensor_inserter = tensor_inserter1 + tensor_inserter2
        tensor_inserter.insert_tensor(tensor_dict, array_dict, module_dict, np.arange(N_EXAMPLES))

        # assert
        array_dict_get.called_once_with(ArrayKey.states)
        self.assertEqual(tensor_dict_set.call_count, 2)
        self.assertEqual(tensor_dict_get.call_count, 1)
        tensor_dict_get.assert_called_once_with(TensorKey.states_tensor)
        module_dict_get.assert_called_once_with(ModuleKey.scaler)
Esempio n. 2
0
    def test_forward_success(self, forward_mock, module_dict_get, tensor_dict_get, tensor_dict_set):
        # mock
        net: nn.Module = DummyNet()
        forward_mock.return_value = self.dummy_states_tensor
        net.forward = forward_mock

        module_dict = ModuleDict()
        module_dict_get.return_value = net
        module_dict.get = module_dict_get

        tensor_dict = TensorDict()
        tensor_dict_get.return_value = self.dummy_states_tensor
        tensor_dict_set.return_value = None
        tensor_dict.get = tensor_dict_get
        tensor_dict.set = tensor_dict_set

        array_dict = ArrayDict(0)

        # run
        tensor_inserter = TensorInserterForward(TensorKey.states_tensor, ModuleKey.scaler, TensorKey.next_states_tensor)
        tensor_inserter.insert_tensor(tensor_dict, array_dict, module_dict, np.arange(N_EXAMPLES))

        # assert
        tensor_dict_get.assert_called_once_with(TensorKey.states_tensor)
        module_dict_get.assert_called_once_with(ModuleKey.scaler)
        forward_mock.assert_called_once_with(self.dummy_states_tensor)
        tensor_dict_set.assert_called_once()
Esempio n. 3
0
    def test_sample_collector_by_number_success(self, mock_env_reset, mock_env_step, mock_envs_reset,
                                                mock_envs_step) -> None:
        dummy_env = Env()
        dummy_env.observation_space = Box(-1, 1, [STATE_DIM])
        dummy_env.action_space = Box(-1, 1, [ACTION_DIM])
        mock_env_reset.return_value = self.dummy_state
        mock_env_step.return_value = (self.dummy_state, self.dummy_reward, self.dummy_done, self.dummy_info)
        dummy_env.reset = mock_env_reset
        dummy_env.step = mock_env_step

        dummy_envs = DummyVectorEnv(N_ENVS, STATE_DIM, ACTION_DIM)
        mock_envs_reset.return_value = self.dummy_states
        mock_envs_step.return_value = (self.dummy_next_states, self.dummy_rewards, self.dummy_dones, {})
        dummy_envs.reset = mock_envs_reset
        dummy_envs.step = mock_envs_step

        dummy_env_container = EnvContainer(dummy_env, dummy_envs)
        mock_envs_reset.assert_called_once_with()  # __init__ of EnvContainer calls reset

        actor: nn.Module = ProbMLPConstantLogStd(STATE_DIM, ACTION_DIM, HIDDEN_DIMS, ACTIVATION, FINAL_LAYER_ACTIVATION, LOG_STD)
        scaler: nn.Module = DummyNet()
        tanh: nn.Module = nn.Tanh()
        action_getter: ActionGetter = ActionGetterModule(actor, scaler)
        sample_collector: SampleCollector = SampleCollectorV0(dummy_env_container, action_getter, N_ENVS * 10, 1)

        array_dict: ArrayDict = sample_collector.collect_samples_by_number()
        self.assertEqual(mock_envs_reset.call_count, 2)
        self.assertEqual(mock_envs_step.call_count, 10)

        collected_states = array_dict.get(ArrayKey.states)
        self.assertTupleEqual(collected_states.shape, (N_ENVS * 10, STATE_DIM))
Esempio n. 4
0
    def test_module_action_getter_1d_success(self):
        actor: nn.Module = ProbMLPConstantLogStd(STATE_DIM, ACTION_DIM, HIDDEN_DIMS, ACTIVATION, FINAL_LAYER_ACTIVATION, LOG_STD)
        scaler: nn.Module = DummyNet()
        activation: nn.Module = nn.Tanh()

        action_getter: ActionGetter = ActionGetterModule(actor, scaler)
        dummy_state = self.dummy_states[0, :]
        output_action = action_getter.get_action(dummy_state)

        self.assertEqual(len(output_action.shape), 1, "1D case output shape is not 1D")
Esempio n. 5
0
    def test_module_action_getter_sample_success(self, actor_forward, scaler_forward):
        scaler_forward.return_value = self.dummy_states_scaled
        actor_forward.return_value = (self.dummy_actions, self.dummy_log_std)
        actor: nn.Module = ProbMLPConstantLogStd(STATE_DIM, ACTION_DIM, HIDDEN_DIMS, ACTIVATION, FINAL_LAYER_ACTIVATION, LOG_STD)
        actor.forward = actor_forward
        scaler: nn.Module = DummyNet()
        scaler.forward = scaler_forward

        action_getter: ActionGetter = ActionGetterModule(actor, scaler)

        actions, log_prob = action_getter.sample_action(self.dummy_states)
        self.assertEqual(len(actions.shape), 2, "2D case output shape is not 2D")
        self.assertTupleEqual(actions.shape, (N_EXAMPLES, ACTION_DIM), "2D case output shape is inconsistent")
        np.testing.assert_array_equal(actions, self.dummy_actions)

        np.testing.assert_array_equal(scaler_forward.call_args[0][0], self.dummy_states)
        np.testing.assert_array_equal(actor_forward.call_args[0][0], self.dummy_states_scaled)
Esempio n. 6
0
    def test_module_updater_optimizer_change_action_success(self) -> None:
        relu = nn.ReLU()
        tanh = nn.Tanh()
        actor = ProbMLPConstantLogStd(STATE_DIM, ACTION_DIM, HIDDEN_DIMS, ACTIVATION, FINAL_LAYER_ACTIVATION, LOG_STD)
        scaler = DummyNet()
        action_getter = ActionGetterModule(actor, scaler)
        optimizer = RAdam(actor.parameters(), lr=3e-4)
        module_updater: ModuleUpdater = ModuleUpdaterOptimizer(optimizer)

        action1 = action_getter.get_action(self.dummy_states)
        action2 = action_getter.get_action(self.dummy_states)

        output, log_std = actor.forward(self.dummy_states_tensor)
        mse_loss = nn.MSELoss()
        loss = mse_loss.forward(output, self.dummy_target_tensor)

        module_updater.update_module(loss)

        action3 = action_getter.get_action(self.dummy_states)

        np.testing.assert_array_equal(action1, action2)
        self.assertFalse(np.array_equal(action1, action3))
Esempio n. 7
0
 def test_dummy_success(self) -> None:
     net: nn.Module = DummyNet()
     dummy_output = net.forward(self.dummy_features)
     np.testing.assert_array_almost_equal(dummy_output, self.dummy_features)