Esempi in Python per TrajectoryWithRew

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: imitation.data.types

Metodo/funzione: TrajectoryWithRew

Esempi su hotexamples.com: 4

TrajectoryWithRew in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per imitation.data.types.TrajectoryWithRew, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: create_demonstrations.py Progetto: HumanCompatibleAI/deep-rlsp

def rollout_policy(filename, traj_len, seed, env_name, n_trajs=1):
    model = SAC.load(filename)
    env = gym.make(env_name)
    env.seed(seed)

    trajs = []
    for _ in range(int(n_trajs)):
        obs_list, acts_list, rews_list = [], [], []
        obs = env.reset()
        obs_list.append(obs)
        for _ in range(traj_len):
            act = model.predict(obs, deterministic=True)[0]
            obs, r, done, _ = env.step(act)
            # assert not done
            acts_list.append(act)
            obs_list.append(obs)
            rews_list.append(r)

        infos = [{} for _ in range(traj_len)]
        traj = types.TrajectoryWithRew(
            obs=np.array(obs_list),
            acts=np.array(acts_list),
            infos=infos,
            rews=np.array(rews_list),
        )
        trajs.append(traj)

    return trajs

Esempio n. 2

Mostra file

File: create_demonstrations.py Progetto: HumanCompatibleAI/deep-rlsp

def convert_trajs(filename, traj_len):
    with open(filename, "rb") as f:
        data = pickle.load(f)

    assert traj_len < len(data["observations"][0])
    obs = np.array(data["observations"][0][:traj_len + 1])
    acts = np.array(data["actions"][0][:traj_len])
    rews = np.array([0 for _ in range(traj_len)])
    infos = [{} for _ in range(traj_len)]
    traj = types.TrajectoryWithRew(obs=obs, acts=acts, infos=infos, rews=rews)
    return [traj]

Esempio n. 3

Mostra file

    def finish_trajectory(self, key: Hashable = None) -> types.TrajectoryWithRew:
        """Complete the trajectory labelled with `key`.

        Args:
            key: key uniquely identifying which in-progress trajectory to remove.

        Returns:
            traj: list of completed trajectories popped from
                `self.partial_trajectories`.
        """
        part_dicts = self.partial_trajectories[key]
        del self.partial_trajectories[key]
        out_dict_unstacked = collections.defaultdict(list)
        for part_dict in part_dicts:
            for key, array in part_dict.items():
                out_dict_unstacked[key].append(array)
        out_dict_stacked = {
            key: np.stack(arr_list, axis=0)
            for key, arr_list in out_dict_unstacked.items()
        }
        traj = types.TrajectoryWithRew(**out_dict_stacked)
        assert traj.rews.shape[0] == traj.acts.shape[0] == traj.obs.shape[0] - 1
        return traj

Esempio n. 4

Mostra file

File: test_data.py Progetto: whoiszyc/imitation-1

def trajectory_rew(trajectory: types.Trajectory) -> types.TrajectoryWithRew:
    """Like `trajectory` but with reward randomly sampled from a Gaussian."""
    rews = np.random.randn(len(trajectory))
    return types.TrajectoryWithRew(**dataclasses.asdict(trajectory), rews=rews)