def rollout_policy(filename, traj_len, seed, env_name, n_trajs=1):
    model = SAC.load(filename)
    env = gym.make(env_name)
    env.seed(seed)

    trajs = []
    for _ in range(int(n_trajs)):
        obs_list, acts_list, rews_list = [], [], []
        obs = env.reset()
        obs_list.append(obs)
        for _ in range(traj_len):
            act = model.predict(obs, deterministic=True)[0]
            obs, r, done, _ = env.step(act)
            # assert not done
            acts_list.append(act)
            obs_list.append(obs)
            rews_list.append(r)

        infos = [{} for _ in range(traj_len)]
        traj = types.TrajectoryWithRew(
            obs=np.array(obs_list),
            acts=np.array(acts_list),
            infos=infos,
            rews=np.array(rews_list),
        )
        trajs.append(traj)

    return trajs
def convert_trajs(filename, traj_len):
    with open(filename, "rb") as f:
        data = pickle.load(f)

    assert traj_len < len(data["observations"][0])
    obs = np.array(data["observations"][0][:traj_len + 1])
    acts = np.array(data["actions"][0][:traj_len])
    rews = np.array([0 for _ in range(traj_len)])
    infos = [{} for _ in range(traj_len)]
    traj = types.TrajectoryWithRew(obs=obs, acts=acts, infos=infos, rews=rews)
    return [traj]
Esempio n. 3
0
    def finish_trajectory(self, key: Hashable = None) -> types.TrajectoryWithRew:
        """Complete the trajectory labelled with `key`.

        Args:
            key: key uniquely identifying which in-progress trajectory to remove.

        Returns:
            traj: list of completed trajectories popped from
                `self.partial_trajectories`.
        """
        part_dicts = self.partial_trajectories[key]
        del self.partial_trajectories[key]
        out_dict_unstacked = collections.defaultdict(list)
        for part_dict in part_dicts:
            for key, array in part_dict.items():
                out_dict_unstacked[key].append(array)
        out_dict_stacked = {
            key: np.stack(arr_list, axis=0)
            for key, arr_list in out_dict_unstacked.items()
        }
        traj = types.TrajectoryWithRew(**out_dict_stacked)
        assert traj.rews.shape[0] == traj.acts.shape[0] == traj.obs.shape[0] - 1
        return traj
Esempio n. 4
0
def trajectory_rew(trajectory: types.Trajectory) -> types.TrajectoryWithRew:
    """Like `trajectory` but with reward randomly sampled from a Gaussian."""
    rews = np.random.randn(len(trajectory))
    return types.TrajectoryWithRew(**dataclasses.asdict(trajectory), rews=rews)