Python Trajectory.add Examples

Programming Language: Python

Namespace/Package Name: easyrl.utils.data

Class/Type: Trajectory

Method/Function: add

Examples at hotexamples.com: 4

Python Trajectory.add - 4 examples found. These are the top rated real world Python examples of easyrl.utils.data.Trajectory.add extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Trajectory(5)

add(4)

add_extra(3)

Frequently Used Methods

Trajectory (5)

add (4)

add_extra (3)

Example #1

Show file

    def __call__(self,
                 time_steps,
                 sample=True,
                 evaluation=False,
                 return_on_done=False,
                 render=False,
                 render_image=False,
                 sleep_time=0,
                 reset_first=False,
                 reset_kwargs=None,
                 action_kwargs=None,
                 get_last_val=False):
        traj = Trajectory()
        if reset_kwargs is None:
            reset_kwargs = {}
        if action_kwargs is None:
            action_kwargs = {}
        if evaluation:
            env = self.eval_env
        else:
            env = self.train_env
        # In RL^2, we should always reset in the begining of a rollout
        if self.obs is None or reset_first or evaluation:
            self.reset(**reset_kwargs)
        ob = self.obs
        hidden_state = self.hidden_states
        # this is critical for some environments depending
        # on the returned ob data. use deepcopy() to avoid
        # adding the same ob to the traj

        # only add deepcopy() when a new ob is generated
        # so that traj[t].next_ob is still the same instance as traj[t+1].ob
        ob = deepcopy(ob)
        if return_on_done:
            all_dones = np.zeros(env.num_envs, dtype=bool)
        else:
            all_dones = None
        done = None
        for t in range(time_steps):
            if render:
                env.render()
                if sleep_time > 0:
                    time.sleep(sleep_time)
            if render_image:
                # get render images at the same time step as ob
                imgs = deepcopy(env.get_images())

            action, action_info, hidden_state = self.agent.get_action(
                ob, sample=sample, hidden_state=hidden_state, **action_kwargs)
            #print('action_info', action_info)
            if self.hidden_state_shape is None:
                self.hidden_state_shape = hidden_state.shape
            next_ob, reward, done, info = env.step(action)

            if render_image:
                for img, inf in zip(imgs, info):
                    inf['render_image'] = deepcopy(img)

            true_next_ob, true_done, all_dones = self.get_true_done_next_ob(
                next_ob, done, reward, info, all_dones)

            sd = StepData(
                ob=ob,
                action=action,
                action_info=action_info,
                next_ob=true_next_ob,
                reward=reward,
                done=true_done,
                info=info,
                extra=
                done,  # this is a flag that can tell whether the environment
                # is reset or not so that we know whether we need to
                # reset the hidden state or not. We save it in "extra"
            )
            ob = next_ob
            traj.add(sd)
            if return_on_done and np.all(all_dones):
                break

            # the order of next few lines matter, do not exchange
            if get_last_val and not evaluation and t == time_steps - 1:
                last_val, _ = self.agent.get_val(traj[-1].next_ob_raw,
                                                 hidden_state=hidden_state)
                if last_val is not None:
                    traj.add_extra('last_val', torch_to_np(last_val))
                else:
                    traj.add_extra('last_val', None)
            hidden_state = self.check_hidden_state(hidden_state, done=done)
        self.obs = ob if not evaluation else None
        self.hidden_states = hidden_state.detach() if not evaluation else None
        return traj

Example #2

Show file

File: single_env_nstep_runner.py Project: taochenshh/easyrl

 def __call__(self, time_steps, sample=True, evaluation=False,
              return_on_done=False, render=False, render_image=False,
              sleep_time=0, reset_first=False,
              env_reset_kwargs=None, agent_reset_kwargs=None,
              action_kwargs=None, random_action=False):
     traj = Trajectory()
     if env_reset_kwargs is None:
         env_reset_kwargs = {}
     if agent_reset_kwargs is None:
         agent_reset_kwargs = {}
     if action_kwargs is None:
         action_kwargs = {}
     action_kwargs['eval'] = evaluation
     if evaluation:
         env = self.eval_env
     else:
         env = self.train_env
     if self.obs is None or reset_first or evaluation:
         self.reset(env=env,
                    env_reset_kwargs=env_reset_kwargs,
                    agent_reset_kwargs=agent_reset_kwargs)
     ob = self.obs
     ob = deepcopy(ob)
     for t in range(time_steps):
         if render:
             env.render()
             if sleep_time > 0:
                 time.sleep(sleep_time)
         if render_image:
             # get render images at the same time step as ob
             imgs = get_render_images(env)
         if random_action:
             action = env.action_space.sample()
             if len(action.shape) == 1:
                 # the first dim is num_envs
                 action = list_to_numpy(action, expand_dims=0)
             action_info = dict()
         else:
             action, action_info = self.agent.get_action(ob,
                                                         sample=sample,
                                                         **action_kwargs)
         next_ob, reward, done, info = env.step(action)
         if render_image:
             for img, inf in zip(imgs, info):
                 inf['render_image'] = deepcopy(img)
         true_done = deepcopy(done)
         for iidx, inf in enumerate(info):
             true_done[iidx] = true_done[iidx] and not inf.get('TimeLimit.truncated',
                                                               False)
         sd = StepData(ob=ob,
                       action=action,
                       action_info=action_info,
                       next_ob=next_ob,
                       reward=reward,
                       done=true_done,
                       info=info)
         ob = next_ob
         traj.add(sd)
         if return_on_done and done:
             break
         if done:
             ob = self.reset(env, env_reset_kwargs, agent_reset_kwargs)
     self.obs = None if evaluation else deepcopy(ob)
     return traj

Example #3

Show file

    def __call__(self,
                 time_steps,
                 sample=True,
                 evaluation=False,
                 return_on_done=False,
                 render=False,
                 render_image=False,
                 sleep_time=0,
                 reset_first=False,
                 reset_kwargs=None,
                 action_kwargs=None,
                 random_action=False,
                 get_last_val=False):
        traj = Trajectory()
        if reset_kwargs is None:
            reset_kwargs = {}
        if action_kwargs is None:
            action_kwargs = {}
        if evaluation:
            env = self.eval_env
        else:
            env = self.train_env
        if self.obs is None or reset_first or evaluation:
            self.reset(env=env, **reset_kwargs)
        ob = self.obs
        # this is critical for some environments depending
        # on the returned ob data. use deepcopy() to avoid
        # adding the same ob to the traj

        # only add deepcopy() when a new ob is generated
        # so that traj[t].next_ob is still the same instance as traj[t+1].ob
        ob = deepcopy(ob)
        if return_on_done:
            all_dones = np.zeros(env.num_envs, dtype=bool)
        else:
            all_dones = None
        for t in range(time_steps):
            if render:
                env.render()
                if sleep_time > 0:
                    time.sleep(sleep_time)
            if render_image:
                # get render images at the same time step as ob
                imgs = get_render_images(env)

            if random_action:
                action = env.random_actions()
                action_info = dict()
            else:
                action, action_info = self.agent.get_action(ob,
                                                            sample=sample,
                                                            **action_kwargs)
            next_ob, reward, done, info = env.step(action)

            if render_image:
                for img, inf in zip(imgs, info):
                    inf['render_image'] = deepcopy(img)

            true_next_ob, true_done, all_dones = self.get_true_done_next_ob(
                next_ob, done, reward, info, all_dones, skip_record=evaluation)
            sd = StepData(ob=ob,
                          action=action,
                          action_info=action_info,
                          next_ob=true_next_ob,
                          reward=reward,
                          done=true_done,
                          info=info)
            ob = next_ob
            traj.add(sd)
            if return_on_done and np.all(all_dones):
                break

        if get_last_val and not evaluation:
            last_val = self.agent.get_val(traj[-1].next_ob)
            traj.add_extra('last_val', torch_to_np(last_val))
        self.obs = ob if not evaluation else None
        return traj

Example #4

Show file

    def __call__(self,
                 time_steps,
                 sample=True,
                 evaluation=False,
                 return_on_done=False,
                 render=False,
                 render_image=False,
                 sleep_time=0,
                 reset_kwargs=None,
                 action_kwargs=None):
        traj = Trajectory()
        if reset_kwargs is None:
            reset_kwargs = {}
        if action_kwargs is None:
            action_kwargs = {}
        if evaluation:
            env = self.eval_env
        else:
            env = self.train_env
        ob = env.reset(**reset_kwargs)
        # this is critical for some environments depending
        # on the returned ob data. use deepcopy() to avoid
        # adding the same ob to the traj

        # only add deepcopy() when a new ob is generated
        # so that traj[t].next_ob is still the same instance as traj[t+1].ob
        ob = deepcopy(ob)
        if return_on_done:
            all_dones = np.zeros(env.num_envs, dtype=bool)
        for t in range(time_steps):
            if render:
                env.render()
                if sleep_time > 0:
                    time.sleep(sleep_time)
            if render_image:
                # get render images at the same time step as ob
                imgs = deepcopy(env.get_images())

            action, action_info = self.agent.get_action(ob,
                                                        sample=sample,
                                                        **action_kwargs)
            next_ob, reward, done, info = env.step(action)
            next_ob = deepcopy(next_ob)
            if render_image:
                for img, inf in zip(imgs, info):
                    inf['render_image'] = deepcopy(img)

            done_idx = np.argwhere(done).flatten()
            if done_idx.size > 0 and return_on_done:
                # vec env automatically resets the environment when it's done
                # so the returned next_ob is not actually the next observation
                all_dones[done_idx] = True
            sd = StepData(ob=ob,
                          action=deepcopy(action),
                          action_info=deepcopy(action_info),
                          next_ob=next_ob,
                          reward=deepcopy(reward),
                          done=deepcopy(done),
                          info=deepcopy(info))
            ob = next_ob
            traj.add(sd)
            if return_on_done and np.all(all_dones):
                break
        if not evaluation:
            #print("next_ob:", traj[-1].next_ob)
            last_val = self.agent.get_val(traj[-1].next_ob_raw)
            traj.add_extra('last_val', torch_to_np(last_val))
        return traj