Python flatten Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.ppo.utils

Method/Function: flatten

Examples at hotexamples.com: 2

Python flatten - 2 examples found. These are the top rated real world Python examples of ray.rllib.ppo.utils.flatten extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: runner.py Project: reazrrr/ray

    def compute_steps(
            self, gamma, lam, horizon, min_steps_per_task,
            observation_filter, reward_filter):
        """Compute multiple rollouts and concatenate the results.

        Args:
            gamma: MDP discount factor
            lam: GAE(lambda) parameter
            horizon: Number of steps after which a rollout gets cut
            min_steps_per_task: Lower bound on the number of states to be
                collected.
            observation_filter: Function that is applied to each of the
                observations.
            reward_filter: Function that is applied to each of the rewards.

        Returns:
            states: List of states.
            total_rewards: Total rewards of the trajectories.
            trajectory_lengths: Lengths of the trajectories.
        """

        # Update our local filters
        self.observation_filter = observation_filter.copy()
        self.reward_filter = reward_filter.copy()

        num_steps_so_far = 0
        trajectories = []
        total_rewards = []
        trajectory_lengths = []
        while True:
            trajectory = self.compute_trajectory(gamma, lam, horizon)
            total_rewards.append(
                trajectory["raw_rewards"].sum(axis=0).mean())
            trajectory_lengths.append(
                np.logical_not(trajectory["dones"]).sum(axis=0).mean())
            trajectory = flatten(trajectory)
            not_done = np.logical_not(trajectory["dones"])
            # Filtering out states that are done. We do this because
            # trajectories are batched and cut only if all the trajectories
            # in the batch terminated, so we can potentially get rid of
            # some of the states here.
            trajectory = {key: val[not_done]
                          for key, val in trajectory.items()}
            num_steps_so_far += trajectory["raw_rewards"].shape[0]
            trajectories.append(trajectory)
            if num_steps_so_far >= min_steps_per_task:
                break
        return (
            concatenate(trajectories),
            total_rewards,
            trajectory_lengths,
            self.observation_filter,
            self.reward_filter)

Example #2

Show file

File: test.py Project: qyccc/rllibddpg

 def testFlatten(self):
     d = {
         "s": np.array([[[1, -1], [2, -2]], [[3, -3], [4, -4]]]),
         "a": np.array([[[5], [-5]], [[6], [-6]]])
     }
     flat = flatten(d.copy(), start=0, stop=2)
     assert_allclose(d["s"][0][0][:], flat["s"][0][:])
     assert_allclose(d["s"][0][1][:], flat["s"][1][:])
     assert_allclose(d["s"][1][0][:], flat["s"][2][:])
     assert_allclose(d["s"][1][1][:], flat["s"][3][:])
     assert_allclose(d["a"][0][0], flat["a"][0])
     assert_allclose(d["a"][0][1], flat["a"][1])
     assert_allclose(d["a"][1][0], flat["a"][2])
     assert_allclose(d["a"][1][1], flat["a"][3])