コード例 #1
0
 def make_object_under_test(self, renderer):
     self.environment = environment.Environment(
         task=tasks.NoReward(),
         action_space=action_spaces.SelectMove(),
         renderers={'obs': renderer},
         init_sprites=lambda: [sprite.Sprite(c0=255)],
         max_episode_length=7)
コード例 #2
0
 def make_object_under_test(self):
     """Environment creator used by test_utils.EnvironmentTestMixin."""
     env = environment.Environment(
         task=tasks.NoReward(),
         action_space=action_spaces.SelectMove(),
         renderers={},
         init_sprites=lambda: [sprite.Sprite(c0=255)],
         max_episode_length=7)
     return env
コード例 #3
0
    def testConfig(self, task_module, modes=('train', 'test'), replicas=3):
        for mode in modes:
            print(mode)
            for _ in range(replicas):
                config = task_module.get_config(mode=mode)
                config['renderers'] = {}
                env = environment.Environment(**config)
                env.observation_spec()
                action = env.action_space.sample()

                num_episodes = 0
                step = env.reset()
                while num_episodes < 5:
                    if step.first():
                        num_episodes += 1
                    step = env.step(action)
コード例 #4
0
def main(argv):
    del argv

    config = importlib.import_module(FLAGS.config)
    config = config.get_config(FLAGS.mode)
    config['renderers']['success'] = renderers.Success()  # Used for logging
    env = environment.Environment(**config)
    agent = RandomAgent(env)

    # Loop over episodes, logging success and mean reward per episode
    for episode in range(FLAGS.num_episodes):
        timestep = env.reset()
        rewards = []
        while not timestep.last():
            action = agent.step(timestep)
            timestep = env.step(action)
            rewards.append(timestep.reward)
        logging.info('Episode %d: Success = %r, Reward = %s.', episode,
                     timestep.observation['success'], np.nanmean(rewards))
コード例 #5
0
ファイル: demo_ui.py プロジェクト: pemami4911/spriteworld
def setup_run_ui(env_config, render_size, task_hsv_colors, anti_aliasing):
    """Start a Demo UI given an env_config."""
    if isinstance(env_config['action_space'], action_spaces.SelectMove):
        # DragAndDrop is a bit easier to demo than the SelectMove action space
        #env_config['action_space'] = action_spaces.DragAndDrop(scale=0.5, noise_scale=np.array([0,0,0.025,0.025]), proportional_motion_noise=0.35)
        env_config['action_space'] = action_spaces.DragAndDrop(
            scale=0.5,
            noise_scale=0.02,
            proportional_motion_noise=0.35,
            filter_distribs=env_config['metadata']['filter_distribs'])
        agent = HumanDragAndDropAgent(env_config['action_space'])
    elif isinstance(env_config['action_space'], action_spaces.Embodied):
        agent = HumanEmbodiedAgent(env_config['action_space'])
    else:
        raise ValueError(
            'Demo is not configured to run with action space {}.'.format(
                env_config['action_space']))
    env_config['renderers'] = {
        'image':
        renderers.PILRenderer(image_size=(render_size, render_size),
                              color_to_rgb=renderers.color_maps.hsv_to_rgb
                              if task_hsv_colors else None,
                              anti_aliasing=anti_aliasing),
        'success':
        renderers.Success()
    }
    env = environment.Environment(**env_config)
    ui = MatplotlibUI()
    agent.register_callbacks(ui)

    # Start RL loop
    timestep = env.reset()
    ui.update(timestep, action=None)

    while True:
        action = agent.step(timestep)
        timestep = env.step(action)
        if isinstance(env_config['action_space'], action_spaces.DragAndDrop):
            ui.update(timestep, action)
        else:
            ui.update(timestep, None)
コード例 #6
0
ファイル: demo.py プロジェクト: wwxFromTju/spriteworld
def main(_):
    config = importlib.import_module(FLAGS.config)
    config = config.get_config(FLAGS.mode)
    if isinstance(config['action_space'], action_spaces.SelectMove):
        # DragAndDrop is a bit easier to demo than the SelectMove action space
        config['action_space'] = action_spaces.DragAndDrop(scale=0.5)
        agent = HumanDragAndDropAgent(config['action_space'])
    elif isinstance(config['action_space'], action_spaces.Embodied):
        agent = HumanEmbodiedAgent(config['action_space'])
    else:
        raise ValueError(
            'Demo is not configured to run with action space {}.'.format(
                config['action_space']))
    config['renderers'] = {
        'image':
        renderers.PILRenderer(image_size=(FLAGS.render_size,
                                          FLAGS.render_size),
                              color_to_rgb=color_maps.hsv_to_rgb
                              if FLAGS.task_hsv_colors else None,
                              anti_aliasing=FLAGS.anti_aliasing),
        'success':
        renderers.Success()
    }
    env = environment.Environment(**config)
    demo = DemoUI()

    for event_name, callback in agent.callbacks().items():
        demo.register_callback(event_name, callback)

    timestep = env.reset()
    demo.update(timestep, action=None)

    while True:
        action = agent.step(timestep)
        timestep = env.step(action)
        if isinstance(config['action_space'], action_spaces.DragAndDrop):
            demo.update(timestep, action)
        else:
            demo.update(timestep, None)
コード例 #7
0
    def testTaskTermination(self):
        task = tasks.FindGoalPosition(goal_position=(0.5, 0.5))
        action_space = action_spaces.SelectMove()
        env_renderers = {}
        init_sprites = lambda: [sprite.Sprite(x=0.25, y=0.25, c0=255)]

        env = environment.Environment(task, action_space, env_renderers,
                                      init_sprites)
        donothing_action = np.array([0.25, 0.25, 0.5, 0.5])
        success_action = np.array([0.25, 0.25, 0.75, 0.75])

        timestep = env.step(donothing_action)
        self.assertTrue(timestep.first())

        timestep = env.step(donothing_action)
        self.assertTrue(timestep.mid())

        timestep = env.step(success_action)
        self.assertTrue(timestep.last())

        timestep = env.step(success_action)
        self.assertTrue(timestep.first())
コード例 #8
0
def setup_run_ui(env_config, render_size, task_hsv_colors, anti_aliasing):
  """Start a Demo UI given an env_config."""
  if isinstance(env_config['action_space'], action_spaces.SelectMove):
    # DragAndDrop is a bit easier to demo than the SelectMove action space
    env_config['action_space'] = action_spaces.DragAndDrop(scale=0.5)
    agent = HumanDragAndDropAgent(env_config['action_space'])
  elif isinstance(env_config['action_space'], action_spaces.Embodied):
    agent = HumanEmbodiedAgent(env_config['action_space'])
  else:
    raise ValueError(
        'Demo is not configured to run with action space {}.'.format(
            env_config['action_space']))
  env_config['renderers'] = {
      'image':
          renderers.PILRenderer(
              image_size=(render_size, render_size),
              color_to_rgb=renderers.color_maps.hsv_to_rgb
              if task_hsv_colors else None,
              anti_aliasing=anti_aliasing),
      'success':
          renderers.Success()
  }
  env = environment.Environment(**env_config)
  demo = DemoUI()

  for event_name, callback in agent.callbacks().items():
    demo.register_callback(event_name, callback)

  # Start RL loop
  timestep = env.reset()
  demo.update(timestep, action=None)

  while True:
    action = agent.step(timestep)
    timestep = env.step(action)
    if isinstance(env_config['action_space'], action_spaces.DragAndDrop):
      demo.update(timestep, action)
    else:
      demo.update(timestep, None)
コード例 #9
0
def main(argv):
    del argv
    config = importlib.import_module(FLAGS.config)
    config = config.get_config(FLAGS.mode)
    config['renderers']['success'] = renderers.Success()  # Used for logging
    env = environment.Environment(**config)
    agent = RandomAgent(env)

    episode_frames = np.zeros((20, 64, 64, 3))
    # Loop over episodes, logging success and mean reward per episode
    for episode in range(FLAGS.num_episodes):
        timestep = env.reset()
        t = 0
        rewards = []
        while not timestep.last():
            episode_frames[t] = timestep.observation['image']
            action = agent.step(timestep)
            timestep = env.step(action)
            rewards.append(timestep.reward)
            t += 1
        logging.info('Episode %d: Success = %r, Reward = %s.', episode,
                     timestep.observation['success'], np.nanmax(rewards))
    np.save('random_actions', episode_frames)