Ejemplo n.º 1
0
    def test_composition_success(self, forward_mock, array_dict_get, module_dict_get, tensor_dict_get, tensor_dict_set):
        # mock
        array_dict = ArrayDict(N_EXAMPLES)
        array_dict_get.return_value = self.dummy_states
        array_dict.get = array_dict_get

        tensor_dict = TensorDict()
        tensor_dict_get.return_value = self.dummy_states_tensor
        tensor_dict_set.return_value = None
        tensor_dict.get = tensor_dict_get
        tensor_dict.set = tensor_dict_set

        net: nn.Module = DummyNet()
        forward_mock.return_value = self.dummy_states_tensor
        net.forward = forward_mock

        module_dict = ModuleDict()
        module_dict_get.return_value = net
        module_dict.get = module_dict_get

        # run
        tensor_inserter1 = TensorInserterTensorize(ArrayKey.states, TensorKey.states_tensor)
        tensor_inserter2 = TensorInserterForward(TensorKey.states_tensor, ModuleKey.scaler, TensorKey.states_tensor)
        tensor_inserter = tensor_inserter1 + tensor_inserter2
        tensor_inserter.insert_tensor(tensor_dict, array_dict, module_dict, np.arange(N_EXAMPLES))

        # assert
        array_dict_get.called_once_with(ArrayKey.states)
        self.assertEqual(tensor_dict_set.call_count, 2)
        self.assertEqual(tensor_dict_get.call_count, 1)
        tensor_dict_get.assert_called_once_with(TensorKey.states_tensor)
        module_dict_get.assert_called_once_with(ModuleKey.scaler)
Ejemplo n.º 2
0
    def test_forward_success(self, forward_mock, module_dict_get, tensor_dict_get, tensor_dict_set):
        # mock
        net: nn.Module = DummyNet()
        forward_mock.return_value = self.dummy_states_tensor
        net.forward = forward_mock

        module_dict = ModuleDict()
        module_dict_get.return_value = net
        module_dict.get = module_dict_get

        tensor_dict = TensorDict()
        tensor_dict_get.return_value = self.dummy_states_tensor
        tensor_dict_set.return_value = None
        tensor_dict.get = tensor_dict_get
        tensor_dict.set = tensor_dict_set

        array_dict = ArrayDict(0)

        # run
        tensor_inserter = TensorInserterForward(TensorKey.states_tensor, ModuleKey.scaler, TensorKey.next_states_tensor)
        tensor_inserter.insert_tensor(tensor_dict, array_dict, module_dict, np.arange(N_EXAMPLES))

        # assert
        tensor_dict_get.assert_called_once_with(TensorKey.states_tensor)
        module_dict_get.assert_called_once_with(ModuleKey.scaler)
        forward_mock.assert_called_once_with(self.dummy_states_tensor)
        tensor_dict_set.assert_called_once()
Ejemplo n.º 3
0
    def test_array_dicts_set_success(self) -> None:
        array_dict: ArrayDict = ArrayDict(N_EXAMPLES)
        dummy_states = np.random.random((N_EXAMPLES, STATE_DIM))
        array_dict.set(ArrayKey.states, dummy_states)

        self.assertTrue(ArrayKey.states in array_dict.dict.keys())
        np.testing.assert_array_equal(array_dict.get(ArrayKey.states), dummy_states)
Ejemplo n.º 4
0
    def collect_samples_by_horizon(self) -> ArrayDict:
        states_seq = []
        actions_seq = []
        log_probs_seq = []
        next_states_seq = []
        rewards_seq = []
        dones_seq = []
        for _ in range(self.horizon):
            states = self.env_container.envs_states
            actions, log_probs = self.action_getter.sample_action(states)
            next_states, rewards, dones, _ = self.env_container.envs_step(actions)

            states_seq.append(states)
            actions_seq.append(actions)
            log_probs_seq.append(log_probs)
            next_states_seq.append(next_states)
            rewards_seq.append(rewards)
            dones_seq.append(dones)

        array_dict: ArrayDict = ArrayDict(self.env_container.n_envs * self.horizon)
        for key, value in zip(
                [ArrayKey.states, ArrayKey.actions, ArrayKey.log_probs, ArrayKey.actions,
                 ArrayKey.next_states, ArrayKey.rewards, ArrayKey.dones],
                [states_seq, actions_seq, log_probs_seq, next_states_seq, rewards_seq, dones_seq]):
            array_dict.set(key, np.concatenate(value, axis=0))
        return array_dict
 def insert_tensor(self, tensor_dict: TensorDict, array_dict: ArrayDict,
                   module_dict: ModuleDict, batch_idx: np.ndarray):
     array = array_dict.get(self.array_key)[batch_idx]
     tensor = torch.as_tensor(array, dtype=self.dtype).to(device)
     if len(tensor.shape) == 1:
         tensor = tensor.reshape(-1, 1)
     tensor_dict.set(self.tensor_key, tensor)
     return tensor_dict
Ejemplo n.º 6
0
    def collect_samples_by_number(self):
        """
        Collect experience until gathered tuples exceed specified number.
        """
        state_dim, = self.env_container.env.observation_space.shape
        action_dim, = self.env_container.env.action_space.shape

        states_seq = []
        actions_seq = []
        log_probs_seq = []
        next_states_seq = []
        rewards_seq = []
        dones_seq = []
        n_samples_collected = 0
        self.env_container.envs_reset()
        while n_samples_collected < self.n_samples:
            for _ in range(self.horizon):
                states = self.env_container.envs_states
                actions, log_probs = self.action_getter.sample_action(states)
                next_states, rewards, dones, _ = self.env_container.envs_step(actions)

                states_seq.append(states)
                actions_seq.append(actions)
                log_probs_seq.append(log_probs)
                next_states_seq.append(next_states)
                rewards_seq.append(rewards)
                dones_seq.append(dones)

                n_samples_collected += self.env_container.n_envs
                if n_samples_collected >= self.n_samples:
                    break

        states_matrix = np.stack(states_seq, axis=1)
        actions_matrix = np.stack(actions_seq, axis=1)
        log_probs_matrix = np.stack(log_probs_seq, axis=1)
        rewards_matrix = np.stack(rewards_seq, axis=1)
        next_states_matrix = np.stack(next_states_seq, axis=1)
        dones_matrix = np.stack(dones_seq, axis=1)

        # cumulative rewards
        cumulative_rewards_matrix = compute_cumulative_rewards_mat(rewards_matrix, dones_matrix, 0.99)

        array_dict: ArrayDict = ArrayDict(n_samples_collected)
        array_dict.set(ArrayKey.states, states_matrix.reshape(-1, state_dim))
        array_dict.set(ArrayKey.actions, actions_matrix.reshape(-1, action_dim))
        array_dict.set(ArrayKey.log_probs, log_probs_matrix.reshape(-1, action_dim))
        array_dict.set(ArrayKey.rewards, rewards_matrix.reshape(-1))
        array_dict.set(ArrayKey.dones, dones_matrix.reshape(-1))
        array_dict.set(ArrayKey.next_states, next_states_matrix.reshape(-1, state_dim))
        array_dict.set(ArrayKey.cumulative_rewards, cumulative_rewards_matrix.reshape(-1))

        return array_dict
Ejemplo n.º 7
0
    def test_lambda_success(self, get_mock, set_mock):
        # mock
        array_dict = ArrayDict(N_EXAMPLES)
        module_dict = ModuleDict()
        tensor_dict = TensorDict()
        get_mock.return_value = 1
        tensor_dict.get = get_mock
        tensor_dict.set = set_mock

        # run
        tensor_inserter: TensorInserter = TensorInserterLambda([TensorKey.states_tensor, TensorKey.actions_tensor],
                                                               lambda x, y: x + y, TensorKey.dones_tensor)
        tensor_inserter.insert_tensor(tensor_dict, array_dict, module_dict, np.arange(N_EXAMPLES))

        # assert
        get_mock.assert_any_call(TensorKey.states_tensor)
        get_mock.assert_any_call(TensorKey.actions_tensor)
        set_mock.assert_called_with(TensorKey.dones_tensor, 2)
Ejemplo n.º 8
0
    def test_tensorize_success(self, array_dict_get, tensor_dict_get, tensor_dict_set):
        # mock
        array_dict: ArrayDict = ArrayDict(N_EXAMPLES)
        array_dict_get.return_value = self.dummy_states
        array_dict.get = array_dict_get
        tensor_dict: TensorDict = TensorDict()
        tensor_dict_get.return_value = self.dummy_states_tensor
        tensor_dict_set.return_value = None
        tensor_dict.get = tensor_dict_get
        tensor_dict.set = tensor_dict_set

        # run
        tensor_inserter: TensorInserter = TensorInserterTensorize(ArrayKey.states, TensorKey.states_tensor, torch.float)
        tensor_dict = tensor_inserter.insert_tensor(tensor_dict, array_dict, self.dummy_module_dict,
                                                    np.arange(N_EXAMPLES))

        # assert
        array_dict_get.assert_called_once_with(ArrayKey.states)
        tensor_dict_set.assert_called_once()

        np.testing.assert_array_almost_equal(tensor_dict.get(TensorKey.states_tensor), self.dummy_states)
Ejemplo n.º 9
0
 def test_array_dicts_set_fail_key_check(self) -> None:
     array_dict: ArrayDict = ArrayDict(N_EXAMPLES)
     dummy_states = np.random.random((N_EXAMPLES, STATE_DIM))
     with self.assertRaises(RuntimeError):
         array_dict.set(TensorKey.states_tensor, dummy_states)
Ejemplo n.º 10
0
 def test_array_dicts_set_fail_n_examples_check(self) -> None:
     array_dict: ArrayDict = ArrayDict(N_EXAMPLES)
     dummy_states = np.random.random((N_EXAMPLES + 1, STATE_DIM))
     with self.assertRaises(RuntimeError):
         array_dict.set(ArrayKey.states, dummy_states)