def _sample_batch(self, batch_size): obs_batch, act_batch, reward_batch, next_obs_batch, is_done_batch, next_act_batch = \ self.exp_buffer.sample(batch_size) obs_batch = min_max_scaling(obs_batch) next_obs_batch = min_max_scaling(next_obs_batch) is_done_batch = is_done_batch.astype("float32") reward_batch = reward_batch.astype("float32") return {"obs": obs_batch, "actions": act_batch, "rewards": reward_batch, "next_obs": next_obs_batch, "is_done": is_done_batch, "next_actions": next_act_batch}
def _sample_batch(self, batch_size): obs_batch, act_batch, reward_batch, next_obs_batch, is_done_batch = self.exp_buffer.sample( batch_size) obs_batch = obs_batch.reshape(obs_batch.shape[0], 1, obs_batch.shape[1] * obs_batch.shape[2]) next_obs_batch = next_obs_batch.reshape( next_obs_batch.shape[0], 1, next_obs_batch.shape[1] * next_obs_batch.shape[2]) obs_batch = min_max_scaling(obs_batch) next_obs_batch = min_max_scaling(next_obs_batch) is_done_batch = is_done_batch.astype("float32") reward_batch = reward_batch.astype("float32") return { "obs": obs_batch, "actions": act_batch, "rewards": reward_batch, "next_obs": next_obs_batch, "is_done": is_done_batch }
def decision(self, state_space: np.ndarray, action_space: ActionSpace): """ triggered by get play turn method of super class. This is the method were the magic should happen that chooses the right action :param state_space: :param action_space: :return: """ # preprocess state space # normalizing state space between zero and one state_space = min_max_scaling(state_space) state_space = state_space.reshape(1, multiply(*state_space.shape)) qvalues = self._get_qvalues([state_space]) decision = self._sample_actions(qvalues, action_space) return decision