예제 #1
0
 def _sample_batch(self, batch_size):
     obs_batch, act_batch, reward_batch, next_obs_batch, is_done_batch, next_act_batch = \
         self.exp_buffer.sample(batch_size)
     obs_batch = min_max_scaling(obs_batch)
     next_obs_batch = min_max_scaling(next_obs_batch)
     is_done_batch = is_done_batch.astype("float32")
     reward_batch = reward_batch.astype("float32")
     return {"obs": obs_batch, "actions": act_batch, "rewards": reward_batch,
             "next_obs": next_obs_batch, "is_done": is_done_batch, "next_actions": next_act_batch}
예제 #2
0
 def _sample_batch(self, batch_size):
     obs_batch, act_batch, reward_batch, next_obs_batch, is_done_batch = self.exp_buffer.sample(
         batch_size)
     obs_batch = obs_batch.reshape(obs_batch.shape[0], 1,
                                   obs_batch.shape[1] * obs_batch.shape[2])
     next_obs_batch = next_obs_batch.reshape(
         next_obs_batch.shape[0], 1,
         next_obs_batch.shape[1] * next_obs_batch.shape[2])
     obs_batch = min_max_scaling(obs_batch)
     next_obs_batch = min_max_scaling(next_obs_batch)
     is_done_batch = is_done_batch.astype("float32")
     reward_batch = reward_batch.astype("float32")
     return {
         "obs": obs_batch,
         "actions": act_batch,
         "rewards": reward_batch,
         "next_obs": next_obs_batch,
         "is_done": is_done_batch
     }
예제 #3
0
 def decision(self, state_space: np.ndarray, action_space: ActionSpace):
     """
     triggered by get play turn method of super class.
     This is the method were the magic should happen that chooses the right action
     :param state_space:
     :param action_space:
     :return:
     """
     # preprocess state space
     # normalizing state space between zero and one
     state_space = min_max_scaling(state_space)
     state_space = state_space.reshape(1, multiply(*state_space.shape))
     qvalues = self._get_qvalues([state_space])
     decision = self._sample_actions(qvalues, action_space)
     return decision