def keep_shortest_trajectories(self,
                                demos_file,
                                num_to_keep,
                                episode_lengths=None):
     # Keep num_to_keep shortest trajectories in the dataset at demos_file.
     if episode_lengths is None:
         episode_lengths = []
         with gfile.GFile(demos_file, 'rb') as f:
             while True:
                 try:
                     demo = pickle.load(f)
                     episode_lengths.append(len(demo['actions']))
                 except EOFError:
                     break
     sorted_lengths = sorted(enumerate(episode_lengths),
                             key=operator.itemgetter(1))
     included_trajectories = set(
         [e[0] for e in sorted_lengths[:num_to_keep]])
     print('Keeping', len(included_trajectories), 'trajectories')
     all_demos_file = (demos_file.replace(f'e{num_to_keep}',
                                          '').replace('.pkl', 'all.pkl'))
     gfile.Rename(demos_file, all_demos_file)
     new_demo_writer = pickle_dataset.DemoWriter(demos_file)
     i = 0
     with gfile.GFile(all_demos_file, 'rb') as f:
         while True:
             try:
                 demo = pickle.load(f)
                 if i in included_trajectories:
                     new_demo_writer.write_episode(demo['observations'],
                                                   demo['actions'])
                 i += 1
             except EOFError:
                 break
 def keep_latest_trajectories(self, demos_file, num_to_keep):
     # Keep num_to_keep shortest trajectories in the dataset at demos_file.
     print(demos_file)
     all_demos_file = (demos_file.replace(f'e{num_to_keep}',
                                          '').replace('.pkl', 'all.pkl'))
     print(all_demos_file)
     gfile.Rename(demos_file, all_demos_file)
     last_demos = []
     with gfile.GFile(all_demos_file, 'rb') as f:
         while True:
             try:
                 demo = pickle.load(f)
                 last_demos.append(demo)
                 last_demos = last_demos[:num_to_keep]
             except EOFError:
                 break
     new_demo_writer = pickle_dataset.DemoWriter(demos_file)
     for demo in last_demos:
         new_demo_writer.write_episode(demo['observations'],
                                       demo['actions'])
Exemple #3
0
def main(_):
    task = FLAGS.task
    env = gym.make(f'visual-{task}-v0')

    topdir = FLAGS.top_dir
    in_path = os.path.join(topdir, FLAGS.in_dir, f'{task}-v0_demos.pickle')
    with gfile.GFile(in_path, 'rb') as f:
        dataset = pickle.load(f)

    out_path = os.path.join(topdir, FLAGS.out_dir, f'{task}-v0_demos.pickle')
    writer = pickle_dataset.DemoWriter(out_path,
                                       compress=FLAGS.compress_images)

    old_time = time.time()

    num_demos = FLAGS.max_demos_to_include or len(dataset)
    for d in range(num_demos):
        env.reset()
        demo = dataset[d]
        observations, actions = get_observations_for_demo(env, demo)
        writer.write_episode(observations, actions)
        new_time = time.time()
        print(f'{d + 1} / {num_demos}', new_time - old_time, 's')
        old_time = new_time
Exemple #4
0
def env_loop(env,
             agent,
             num_episodes,
             log_path,
             record_failed,
             seed,
             increment_seed,
             compress_images=True):
    """Loop for collecting demonstrations with an agent in a Gym environment."""
    if log_path is None:
        log_f = None
        success_f = None
        demo_writer = None
    else:
        log_f = gfile.GFile(log_path + '_log.txt', 'w')
        success_f = gfile.GFile(log_path + '_success.txt', 'w')
        demo_writer = pickle_dataset.DemoWriter(log_path + '.pkl',
                                                compress_images)
        print('Writing demos to', log_path + '.pkl')
    e = 0
    # Counter to keep track of seed offset, if not recording failed episodes.
    skipped_seeds = 0
    num_successes = 0
    num_attempts = 0
    min_reward, max_reward = np.inf, -np.inf
    while e < num_episodes:
        if e % 10 == 0 and e > 0:
            print(f'Episode {e} / {num_episodes}; '
                  f'Success rate {num_successes} / {num_attempts}')
        if increment_seed:
            env.seed(seed + skipped_seeds + e)
        obs = env.reset()

        done = False
        _, agent_info = agent.get_action(obs['original_obs'])
        action = agent_info['evaluation']
        observations = []
        actions = []
        rewards = []
        # For envs with non-Markovian success criteria, track required fields.
        goals_achieved = []

        while not done:
            observations.append(obs)
            actions.append(action)
            obs, reward, done, info = env.step(action)
            rewards.append(reward)
            min_reward = min(min_reward, reward)
            max_reward = max(max_reward, reward)
            _, agent_info = agent.get_action(obs['original_obs'])
            action = agent_info['evaluation']
            if 'goal_achieved' in info:
                goals_achieved.append(info['goal_achieved'])

        # Environment defines success criteria based on full episode.
        success_percentage = env.evaluate_success([{
            'env_infos': {
                'goal_achieved': goals_achieved
            }
        }])
        success = bool(success_percentage)

        num_successes += int(success)
        num_attempts += 1
        if success:
            print(f'{e}: success')
            if log_f is not None:
                log_f.write(f'{e}: success\n')
                log_f.flush()
            if success_f is not None:
                success_f.write('success\n')
                success_f.flush()
        else:
            if 'TimeLimit.truncated' in info and info['TimeLimit.truncated']:
                print(f'{e}: failure: time limit')
            else:
                print(f'{e}: failure')
            if log_f is not None:
                if 'TimeLimit.truncated' in info and info[
                        'TimeLimit.truncated']:
                    log_f.write(f'{e}: failure: time limit \n')
                else:
                    log_f.write(f'{e}: failure\n')
                log_f.flush()
            if success_f is not None:
                success_f.write('failure\n')
                success_f.flush()

        if success or record_failed:
            e += 1
            if demo_writer is not None:
                demo_writer.write_episode(observations, actions, rewards)
        elif not record_failed:
            skipped_seeds += 1

    print(f'Done; Success rate {num_successes} / {num_attempts}')
    print('min reward', min_reward)
    print('max reward', max_reward)
    if log_f is not None:
        log_f.write(f'Done; Success rate {num_successes} / {num_attempts}\n')
        log_f.write(f'min reward {min_reward}\n')
        log_f.write(f'max reward {max_reward}\n')
        log_f.close()
def compress_dataset(demos_file, new_demos_file):
  dataset = pickle_dataset.DemoReader(path=demos_file)
  writer = pickle_dataset.DemoWriter(path=new_demos_file)

  for obs, act in zip(dataset.observations, dataset.actions):
    writer.write_episode(obs, act)
    def eval_policy(self,
                    num_episodes,
                    trained_steps=None,
                    collapse_policy=True,
                    eval_path=None,
                    num_videos_to_save=0,
                    max_num_steps=None,
                    seed=None,
                    increment_seed=False,
                    stop_if_stuck=False):
        """Evaluate policy on env for num_episodes episodes."""
        if FLAGS.domain == 'mime':
            self._eval_environment.create_env()
        if not increment_seed and seed is not None:
            self._eval_environment.env.seed(seed)
            if self._cam_eval_environment is not None:
                self._cam_eval_environment.env.seed(seed)
        num_successes = 0
        action_logger = ActionLogger(self._environment.action_spec())
        if max_num_steps is None:
            max_num_steps = self._eval_environment.default_max_episode_steps
        if eval_path is None:
            log_f = None
            success_f = None
            episode_length_f = None
            eval_writer = None
        else:
            if not gfile.exists(os.path.dirname(eval_path)):
                gfile.makedirs(os.path.dirname(eval_path))
            collapse_str = 'c' if collapse_policy else ''
            stuck_str = 's' if stop_if_stuck else ''
            eval_summary_path = eval_path + f'_all{collapse_str}{stuck_str}'
            eval_path = eval_path + f'_{trained_steps}{collapse_str}{stuck_str}'
            log_f = gfile.GFile(eval_path + '_log.txt', 'w')
            success_f = gfile.GFile(eval_path + '_success.txt', 'w')
            episode_length_f = gfile.GFile(eval_path + '_lengths.txt', 'w')
            eval_writer = pickle_dataset.DemoWriter(eval_path + '.pkl')
            actions_path = eval_path + '_actions.pkl'
            if gfile.exists(actions_path):
                gfile.Remove(actions_path)
        for e in range(num_episodes):
            rewards = []
            if increment_seed and seed is not None:
                self._eval_environment.env.seed(seed + e)
                if self._cam_eval_environment is not None:
                    self._cam_eval_environment.env.seed(seed + e)
            if e % 10 == 0 and e > 0:
                success_rate = num_successes / e * 100
                print(
                    f'Episode {e} / {num_episodes}; Success rate {num_successes} / '
                    f'{e} ({success_rate:.4f}%)')
            if (e < num_videos_to_save and eval_writer is not None
                    and self._cam_eval_environment is not None):
                environment = self._cam_eval_environment
                self._eval_environment.reset(
                )  # Keep both environments in same state.
                print(f'eval episode {e}: using cam env')
            else:
                environment = self._eval_environment
                if self._cam_eval_environment is not None:
                    # Keep both environments in same state.
                    self._cam_eval_environment.reset()
                    print(f'eval episode {e}: using non-cam env')

            timestep = environment.reset()
            observations = []
            actions = []
            step_count = 0
            if FLAGS.base_controller is not None:
                # Reset script for each episode.
                self._actor.base_controller = ScriptAgent(
                    environment.env, FLAGS.base_controller)

            while not timestep.last():
                acme_obs, _, norm_base_act = self._actor.get_acme_observation(
                    timestep.observation)
                action, base_action, residual_action, _, _, _, _ = (
                    self._actor.select_action(acme_obs,
                                              norm_base_act,
                                              timestep.observation,
                                              add_exploration=False,
                                              collapse=collapse_policy))
                observations.append(timestep.observation)
                actions.append(self._actor.flat_action_to_dict(action))

                self._log_action(action_logger,
                                 (action, base_action, residual_action))
                next_timestep = environment.step(action)
                info = environment.info_from_observation(
                    next_timestep.observation)

                timestep = next_timestep
                rewards.append(timestep.reward)
                step_count += 1

            discounted_returns = [rewards[-1]]
            for r in reversed(rewards[:-1]):
                discounted_returns.append(r + FLAGS.discount *
                                          discounted_returns[-1])
            self.min_discounted = min(self.min_discounted,
                                      np.min(discounted_returns))
            self.max_discounted = max(self.max_discounted,
                                      np.max(discounted_returns))
            print('discounted episode return range:'
                  f'[{self.min_discounted}, {self.max_discounted}]')

            if info['success']:
                print(f'{e}: success')
                if log_f is not None:
                    log_f.write(f'{e}: success\n')
                    log_f.flush()
                if success_f is not None:
                    success_f.write('success\n')
                    success_f.flush()
                num_successes += 1
            else:
                if 'failure_message' in info:
                    print(f'{e}: failure:', info['failure_message'])
                elif step_count >= max_num_steps or timestep.last():
                    print(f'{e}: failure: time limit')
                else:
                    print(f'{e}: failure')
                if log_f is not None:
                    if 'failure_message' in info:
                        log_f.write(f'{e}: failure:' +
                                    info['failure_message'] + '\n')
                    elif step_count >= max_num_steps or timestep.last():
                        log_f.write(f'{e}: failure: time limit \n')
                    else:
                        log_f.write(f'{e}: failure\n')
                    log_f.flush()
                if success_f is not None:
                    success_f.write('failure\n')
                    success_f.flush()
            if episode_length_f is not None:
                episode_length_f.write(str(step_count) + '\n')
                episode_length_f.flush()
            if e < num_videos_to_save and eval_writer is not None:
                eval_writer.write_episode(observations, actions)
                action_logger.append_to_pickle(actions_path)

        success_rate = num_successes / num_episodes * 100
        print(f'Done; Success rate {num_successes} / {num_episodes} '
              f'({success_rate:.4f}%)')
        if log_f is not None:
            log_f.write(f'Done; Success rate {num_successes} / {num_episodes} '
                        f'({success_rate:.4f}%)\n')
            log_f.close()
        csv_writer = csv.writer(
            gfile.GFile(eval_summary_path + '_success_rates.csv', 'a'))
        csv_writer.writerow([trained_steps, num_successes / num_episodes])
        return num_successes / num_episodes, True
 record_count = i % log_frames_freq
 while num_episodes is None or i < num_episodes:
     rewards = []
     episode_steps = 0
     episode_return = 0
     prev_raw_residual = None
     prev_residual_exploration = False
     # For envs with non-Markovian success criteria, track required fields.
     if i % log_frames_freq == 0:
         record_count = 0
         first_to_record = i
         last_to_record = i + num_episodes_to_log - 1
         if out_dir is not None:
             demo_writer = pickle_dataset.DemoWriter(
                 os.path.join(
                     out_dir, 'episodes',
                     f'episodes_{first_to_record}-{last_to_record}.pkl')
             )
     if record_count < num_episodes_to_log:  # Log frames for current episode.
         if self._cam_environment is None:
             environment = self._environment
         else:
             environment = self._cam_environment
             self._environment.reset(
             )  # Keep both environments in same state.
             print(f'episode {i}: using cam env')
     else:  # Do not log frames for current episode.
         environment = self._environment
         if self._cam_environment is not None:
             self._cam_environment.reset(
             )  # Keep both environments in same state.
def eval_policy(env,
                seed,
                increment_seed,
                agent,
                num_episodes,
                eval_path=None,
                num_videos_to_save=0,
                summary_writer=None,
                summary_key='',
                stop_if_stuck=False,
                verbose=False):
    """Evaluate policy on env for num_episodes episodes."""
    num_successes = 0
    success_rates = {}
    if eval_path is None:
        log_f = None
        success_f = None
        episode_length_f = None
        eval_writer = None
    else:
        log_f = gfile.GFile(eval_path + '_log.txt', 'w')
        success_f = gfile.GFile(eval_path + '_success.txt', 'w')
        episode_length_f = gfile.GFile(eval_path + '_lengths.txt', 'w')
        eval_writer = pickle_dataset.DemoWriter(eval_path + '.pkl')
    if not increment_seed:
        env.seed(seed)

    hand_vil_episodes = None
    if FLAGS.hand_vil_episodes_path is not None:
        with gfile.GFile(FLAGS.hand_vil_episodes_path, 'rb') as f:
            hand_vil_episodes = pickle.load(f)
        hand_vil_actions = hand_vil_episodes['actions']
        hand_vil_images = hand_vil_episodes['rgb']
        hand_vil_robot_info = [
            separate_episode_robot_info(e_infos, agent)
            for e_infos in hand_vil_episodes['env_infos']
        ]

    for e in range(num_episodes):
        if e % 10 == 0 and e > 0:
            success_rate = num_successes / e
            if verbose:
                print(
                    f'Episode {e} / {num_episodes}; Success rate {num_successes} / '
                    f'{e} ({success_rate * 100:.4f}%)')
            if (e % 100 == 0 and e > 0) or e == num_episodes - 1:
                success_rates[e] = success_rate
                log_success_rate(e, success_rate, summary_writer, summary_key)

        if increment_seed:
            env.seed(seed)
            seed += 1
        obs = env.reset()

        done = False
        observations = []
        actions = []
        step_count = 0
        prev_stacked_obs = None
        # For envs with non-Markovian success criteria, track required fields.
        goals_achieved = []

        while not done:
            if hand_vil_episodes is not None:
                obs = hand_vil_robot_info[e][step_count]
                obs['rgb'] = hand_vil_images[e][step_count]
            action, stacked_obs = agent.get_action(obs,
                                                   observations,
                                                   env,
                                                   return_stacked_obs=True)
            if hand_vil_episodes is not None:
                if not np.allclose(
                        action, hand_vil_actions[e][step_count], atol=5e-6):
                    raise ValueError(
                        'Actions from agent and from trajectory diverge: '
                        f'{action} vs {hand_vil_actions[e][step_count]}')
            if prev_stacked_obs is not None and stop_if_stuck:
                prev_img, prev_signals = prev_stacked_obs  # pylint: disable=unpacking-non-sequence
                img, signals = stacked_obs
                obs_stuck = np.all(np.equal(img, prev_img))
                # Note: target position has even higher noise.
                signals_stuck = np.all(np.isclose(signals, prev_signals))
                act_stuck = equal_actions(action, actions[-1])
                if obs_stuck and signals_stuck and act_stuck:
                    info[
                        'failure_message'] = 'Stuck' or info['failure_message']
                    break
            prev_stacked_obs = stacked_obs
            observations.append(obs)
            actions.append(action)
            obs, unused_reward, done, info = env.step(action)
            step_count += 1
            if (hand_vil_episodes is not None
                    and step_count >= len(hand_vil_robot_info[e])):
                print('episode ends at', step_count, 'done =', done)
            if 'goal_achieved' in info:
                # Environment defines success criteria based on full episode.
                goals_achieved.append(info['goal_achieved'])
                success_percentage = env.evaluate_success([{
                    'env_infos': {
                        'goal_achieved': goals_achieved
                    }
                }])
                success = bool(success_percentage)
                done = done or success
            else:
                success = False

        if verbose:
            print(step_count, info)
        # Success is directly exposed in environment info.
        success = success or ('success' in info and info['success'])

        num_successes += int(success)
        if success:
            if verbose:
                print(f'{e}: success')
            if log_f is not None:
                log_f.write(f'{e}: success\n')
                log_f.flush()
            if success_f is not None:
                success_f.write('success\n')
                success_f.flush()
        else:
            if verbose:
                if 'failure_message' in info:
                    print(f'{e}: failure:', info['failure_message'])
                elif 'TimeLimit.truncated' in info and info[
                        'TimeLimit.truncated']:
                    print(f'{e}: failure: time limit')
                else:
                    print(f'{e}: failure')
            if log_f is not None:
                if 'failure_message' in info:
                    log_f.write(f'{e}: failure: ' + info['failure_message'] +
                                '\n')
                elif 'TimeLimit.truncated' in info and info[
                        'TimeLimit.truncated']:
                    log_f.write(f'{e}: failure: time limit \n')
                else:
                    log_f.write(f'{e}: failure\n')
                log_f.flush()
            if success_f is not None:
                success_f.write('failure\n')
                success_f.flush()
        if episode_length_f is not None:
            # TODO(minttu): Save env infos for later.
            episode_length_f.write(str(step_count) + '\n')
            episode_length_f.flush()
        if e < num_videos_to_save and eval_writer is not None:
            eval_writer.write_episode(observations, actions)

    success_rate = num_successes / num_episodes
    success_rates[num_episodes] = success_rate
    log_success_rate(num_episodes, success_rate, summary_writer, summary_key)
    print(f'Done; Success rate {num_successes} / {num_episodes} '
          f'({success_rate * 100:.4f}%)')
    if log_f is not None:
        log_f.write(f'Done; Success rate {num_successes} / {num_episodes} '
                    f'({success_rate * 100:.4f}%)\n')
        log_f.close()
    return success_rates
def env_loop(env, add_noise, num_episodes, log_path, record_failed, stop_early,
             seed, increment_seed, compress_images):
  """Loop for collecting demos with a scripted agent in a Mime environment."""
  if log_path is None:
    log_f = None
    success_f = None
    demo_writer = None
  else:
    log_f = gfile.GFile(log_path + '_log.txt', 'w')
    success_f = gfile.GFile(log_path + '_success.txt', 'w')
    demo_writer = pickle_dataset.DemoWriter(log_path + '.pkl', compress_images)
    print('Writing demos to', log_path + '.pkl')
  e = 0
  # Counter to keep track of seed offset, if not recording failed episodes.
  skipped_seeds = 0
  num_successes = 0
  num_attempts = 0
  while e < num_episodes:
    if e % 10 == 0 and e > 0:
      print(f'Episode {e} / {num_episodes}; '
            f'Success rate {num_successes} / {num_attempts}')
    if increment_seed:
      env.seed(seed + skipped_seeds + e)
    obs = env.reset()
    # To define a different script, use forked version of mime.
    # agent = ScriptAgent(env, FLAGS.script_type)
    agent = ScriptAgent(env)

    done = False
    action = agent.get_action()
    if add_noise:
      make_noised(action)
    observations = []
    actions = []

    while (not (stop_early and done)) and action is not None:
      observations.append(obs)
      actions.append(action)
      obs, unused_reward, done, info = env.step(action)
      action = agent.get_action()
      if add_noise and action is not None:
        make_noised(action)

    if info['success']:
      print(f'{num_attempts}: success')
      if log_f is not None:
        log_f.write(f'{num_attempts}: success' + '\n')
        log_f.flush()
      if success_f is not None:
        success_f.write('success\n')
        success_f.flush()
      num_successes += 1
    else:
      if action is None:
        info['failure_message'] = 'End of Script.'
      print(f'{num_attempts}: failure:', info['failure_message'])
      if log_f is not None:
        log_f.write(
            f'{num_attempts}: failure: ' + info['failure_message'] + '\n')
        log_f.flush()
      if success_f is not None:
        success_f.write('failure\n')
        success_f.flush()
    num_attempts += 1

    if info['success'] or record_failed:
      e += 1
      if demo_writer is not None:
        demo_writer.write_episode(observations, actions)
    elif not record_failed:
      skipped_seeds += 1

  print(f'Done; Success rate {num_successes} / {num_attempts}')
  if log_f is not None:
    log_f.write(f'Done; Success rate {num_successes} / {num_attempts}\n')
    log_f.close()