Esempio n. 1
0
def gen_user_traj(max_steps=100, path=None, record=False):
  # if record Save Policy and Trajectories to Folder
  video_path = path
  video_recorder = None
  if record:
    video_recorder = VideoRecorder(env, video_path, enabled=video_path is not None)

  traj = []
  obs = env.reset()
  obs = get_pong_symbolic(obs)

  for i in range(max_steps):
    _obs = obs
    action = get_user_action(obs)
    obs, reward, terminate, _ = env.step(action)
    obs = get_pong_symbolic(obs)
    traj.append([_obs, action, reward, terminate])

    #print(traj[-1])
    env.render()  # Note: rendering increases step time.

    if record:
      video_recorder.capture_frame()

    if terminate:
      print('Total Steps:', i)
      break

  if record:
    video_recorder.close()

  return traj
    def sample(self, horizon, policy, record_fname=None):
        """Samples a rollout from the agent.

        Arguments:
            horizon: (int) The length of the rollout to generate from the agent.
            policy: (policy) The policy that the agent will use for actions.
            record_fname: (str/None) The name of the file to which a recording of the rollout
                will be saved. If None, the rollout will not be recorded.

        Returns: (dict) A dictionary containing data from the rollout.
            The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
        """
        video_record = record_fname is not None
        recorder = None if not video_record else VideoRecorder(
            self.env, record_fname)

        times, rewards = [], []
        O, A, reward_sum, done = [self.env.reset()], [], 0, False

        policy.reset()
        for t in range(horizon):
            if video_record:
                recorder.capture_frame()
            start = time.time()
            A.append(policy.act(O[t], t))
            times.append(time.time() - start)

            obs, reward, done, info = self.env.step(A[t])

            O.append(obs)
            reward_sum += reward
            rewards.append(reward)
            if done:
                break

        if video_record:
            recorder.capture_frame()
            recorder.close()

        print("Average action selection time: ", np.mean(times))
        print("Rollout length: ", len(A))

        return {
            "obs": np.array(O),
            "ac": np.array(A),
            "reward_sum": reward_sum,
            "rewards": np.array(rewards),
        }