예제 #1
0
    def _get_rollout_in_batches(self, states, actions, rewards, dones):

        if self.rollout_batch_size is None:
            return self.algorithm.get_rollout(states, actions, rewards, dones)

        indices = np.arange(
            0, len(states) + self.rollout_batch_size - 1,
            self.rollout_batch_size
        )
        rollout = None
        for i in range(len(indices) - 1):
            states_batch = states[indices[i]:indices[i+1]+1]
            actions_batch = actions[indices[i]:indices[i+1]+1]
            rewards_batch = rewards[indices[i]:indices[i+1]+1]
            dones_batch = dones[indices[i]:indices[i+1]+1]
            rollout_batch = self.algorithm.get_rollout(
                states_batch, actions_batch, rewards_batch, dones_batch
            )
            if rollout is not None:
                rollout = utils.append_dict(rollout, rollout_batch)
            else:
                rollout = rollout_batch
        return rollout
예제 #2
0
    def _get_rollout_in_batches(self, states, actions, rewards, dones):

        if self.rollout_batch_size is None:
            return self.algorithm.get_rollout(states, actions, rewards, dones)

        indices = np.arange(0,
                            len(states) + self.rollout_batch_size - 1,
                            self.rollout_batch_size)
        rollout = None
        for i_from, i_to in utils.pairwise(indices):
            states_batch = states[i_from:i_to + 1]
            actions_batch = actions[i_from:i_to + 1]
            rewards_batch = rewards[i_from:i_to + 1]
            dones_batch = dones[i_from:i_to + 1]

            rollout_batch = self.algorithm.get_rollout(states_batch,
                                                       actions_batch,
                                                       rewards_batch,
                                                       dones_batch)
            rollout = rollout_batch \
                if rollout is None \
                else utils.append_dict(rollout, rollout_batch)

        return rollout