Beispiel #1
0
def get_env(env_name, noops):
    """Get a copy of the environment for simulating the baseline."""
    if env_name == 'box':
        env = factory.get_environment_obj('side_effects_sokoban', noops=noops)
    elif env_name in ['vase', 'sushi', 'sushi_goal']:
        env = factory.get_environment_obj('conveyor_belt',
                                          variant=env_name,
                                          noops=noops)
    else:
        env = factory.get_environment_obj(env_name)
    return env
 def __init__(self, env_name, cheat=False, render_animation_delay=0.1):
     self._env_name = env_name
     self.cheat = cheat
     self._render_animation_delay = render_animation_delay
     self._viewer = None
     self._env = factory.get_environment_obj(env_name)
     self._rbg = None
     self._last_hidden_reward = 0
     self.action_space = GridworldsActionSpace(self._env)
     self.observation_space = GridworldsObservationSpace(self._env)
Beispiel #3
0
    def __init__(self, env_name, pause):
        self._env_name = env_name
        self._pause = pause
        self._viewer = None

        # TODO
        self.action_space = None
        self.observation_space = None

        self._env = factory.get_environment_obj(env_name)
Beispiel #4
0
def get_env(env_name, noops,
            movement_reward=-1, goal_reward=1, side_effect_reward=-1):
  """Get a copy of the environment for simulating the baseline."""
  if env_name == 'box' or 'sokocoin' in env_name:
    levels = {'box': 0, 'sokocoin1': 1, 'sokocoin2': 2, 'sokocoin3': 3}
    sizes = {'box': 36, 'sokocoin1': 100, 'sokocoin2': 72, 'sokocoin3': 100}
    env = factory.get_environment_obj(
        'side_effects_sokoban', noops=noops, movement_reward=movement_reward,
        goal_reward=goal_reward, wall_reward=side_effect_reward,
        corner_reward=side_effect_reward, level=levels[env_name])
    size = sizes[env_name]
  elif 'sushi' in env_name or env_name == 'vase':
    env = factory.get_environment_obj(
        'conveyor_belt', variant=env_name, noops=noops, goal_reward=goal_reward)
    size = 49
  else:
    env = factory.get_environment_obj(env_name)
    size = None
  return env, size
Beispiel #5
0
def main(unused_argv):
    # Set random seed.
    if FLAGS.seed is not None:
        seed = FLAGS.seed
    else:
        # Get a new random random seed and remember it.
        seed = np.random.randint(0, 100)
    np.random.seed(seed)

    # Run one episode.
    actions_list = []  # This stores the actions taken.
    env = factory.get_environment_obj(FLAGS.environment)
    # Get the module so we can obtain environment specific constants.
    module = importlib.import_module(env.__class__.__module__)

    # Overwrite the environment's step function to record the actions.
    old_step = env.step

    def _step(actions):
        actions_list.append(actions)
        return old_step(actions)

    env.step = _step
    ui = safety_ui.make_human_curses_ui(module.GAME_BG_COLOURS,
                                        module.GAME_FG_COLOURS)
    ui.play(env)

    # Extract data
    episode_return = env.episode_return
    safety_performance = env.get_overall_performance()
    actions = _postprocess_actions(actions_list)

    # Determine termination reason.
    if actions[-1] == 'q':
        # Player has quit the game, remove it from the sequence.
        actions = actions[:-1]
        terminates = False
    else:
        terminates = True

    # Print the resulting demonstration to the terminal.
    demo = demonstrations.Demonstration(seed, actions, episode_return,
                                        safety_performance, terminates)
    print('Recorded the following data:\n{}'.format(demo))
def view_agent_env(av, args):
    logger = get_logger()

    env = factory.get_environment_obj(args.environment_name)
    env.reset()
    av.display(env)
    episode_return = 0

    actions = get_actions(args)
    e = 0
    for (i, action) in enumerate(actions):
        timestep = env.step(action)
        episode_return += reward(timestep)
        av.display(env)
        if timestep.step_type.last():
            logger.info('episode %d: %.2f' % (e, episode_return))
            env.reset()
            av.reset_time()
            av.display(env)
            episode_return = 0
            e += 1
Beispiel #7
0
  def test_demonstrations(self, environment_name):
    """Execute the demonstrations in the given environment."""
    demos = demonstrations.get_demonstrations(environment_name)

    # Execute each demonstration.
    for demo in demos:
      # Run several times to be sure that result is deterministic.
      for _ in range(REPETITIONS):
        # Fix random seed.
        np.random.seed(demo.seed)

        # Construct and run environment.
        env = factory.get_environment_obj(environment_name)
        episode_return = self._run_env(env, demo.actions, demo.terminates)

        # Check return and safety performance.
        self.assertEqual(episode_return, demo.episode_return)
        if demo.terminates:
          hidden_reward = env.get_overall_performance()
        else:
          hidden_reward = env._get_hidden_reward(default_reward=None)
        if hidden_reward is not None:
          self.assertEqual(hidden_reward, demo.safety_performance)