Ejemplo n.º 1
0
    def setUp(self):
        super(EnvTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')

        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=64,
            direction_encoding_dim=256,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))
        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)

        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        # For deterministic behavior in test
        np.random.seed(0)
Ejemplo n.º 2
0
    def setUp(self):
        super(DiscriminatorProblemTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')
        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=64,
            direction_encoding_dim=256,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)
        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        self._agent_config = agent_config.get_r2r_agent_config()
        self._agent_config.add_hparam('init_image_enc_with_text_state', True)
        self._agent_config.add_hparam('average_image_states_of_all_steps',
                                      False)
        self._agent_config.embed_action = True
Ejemplo n.º 3
0
 def get_environment(self):
   if not self._env:
     assert self._data_sources, 'data_sources must be non-empty.'
     self._env = env.R2REnv(
         data_sources=self._data_sources,
         runtime_config=self._runtime_config,
         env_config=self._env_config)
   return self._env
Ejemplo n.º 4
0
    def setUp(self):
        super(DiscriminatorTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')
        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)

        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)
        self.num_panos = 36
        self.image_feature_size = 2052
        self.num_actions = 14
        self.time_step = 3
        self.batch_size = 1
        done = np.array([[True], [False], [True]])
        done = np.reshape(done, [3, 1])
        self._test_environment = common.EnvOutput(
            reward=0,
            done=done,
            observation={
                constants.IS_START:
                np.array([[True], [False], [True]]),
                constants.DISC_MASK:
                np.array([[True], [False], [True]]),
                constants.PANO_ENC:
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size
                ]),
                constants.CONN_ENC:
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size
                ]),
                constants.INS_TOKEN_IDS:
                np.array([[[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]],
                          [[3, 6, 1, 0, 0]]]),
                constants.VALID_CONN_MASK:
                np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                          [[True] * 2 + [False] * 12]])
            },
            info='')
        self._agent = discriminator_agent.DiscriminatorAgent(
            agent_config.get_r2r_agent_config())
Ejemplo n.º 5
0
  def test_call_ndh(self):
    self._agent = agent.R2RAgent(agent_config.get_ndh_agent_config())
    self.data_dir = FLAGS.test_srcdir + (
        'valan/r2r/testdata')

    self._env_config = hparam.HParams(
        problem='NDH',
        history='all',
        path_type='trusted_path',
        max_goal_room_panos=4,
        scan_base_dir=self.data_dir,
        data_base_dir=self.data_dir,
        vocab_dir=self.data_dir,
        problem_path=os.path.join(self.data_dir, 'NDH'),
        vocab_file='vocab.txt',
        images_per_pano=36,
        max_conns=14,
        image_encoding_dim=64,
        direction_encoding_dim=256,
        image_features_dir=os.path.join(self.data_dir, 'image_features'),
        instruction_len=50,
        max_agent_actions=6,
        reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal'))

    self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)
    self._env = env.R2REnv(
        data_sources=['R2R_small_split'],
        runtime_config=self._runtime_config,
        env_config=self._env_config)

    env_output = self._env.reset()
    observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 0),
                                        env_output.observation)
    initial_agent_state = self._agent.get_initial_state(
        observation, batch_size=1)
    # Agent always expects time,batch dimensions. First add and then remove.
    env_output = utils.add_time_batch_dim(env_output)
    agent_output, _ = self._agent(env_output, initial_agent_state)

    self.assertEqual(agent_output.policy_logits.shape, [1, 1, 14])
    self.assertEqual(agent_output.baseline.shape, [1, 1])

    initial_agent_state = ([
        (tf.random.normal([self.batch_size,
                           512]), tf.random.normal([self.batch_size, 512])),
        (tf.random.normal([self.batch_size,
                           512]), tf.random.normal([self.batch_size, 512]))
    ], tf.random.normal([self.batch_size, 5, 512]))
    agent_output, _ = self._agent(self._test_environment, initial_agent_state)

    self.assertEqual(agent_output.policy_logits.shape,
                     [self.time_step, self.batch_size, 14])
    self.assertEqual(agent_output.baseline.shape,
                     [self.time_step, self.batch_size])
Ejemplo n.º 6
0
 def get_environment(self):
     if not self._env:
         assert self._data_sources, 'data_sources must be non-empty.'
         if self._curriculum:
             # See actor_main.py and curriculum_env.py for the argument options.
             self._env = curriculum_env.CurriculumR2REnv(
                 data_sources=self._data_sources,
                 runtime_config=self._runtime_config,
                 curriculum_env_config=curriculum_env_config_lib.
                 get_default_curriculum_env_config(self._curriculum))
         else:
             self._env = env.R2REnv(
                 data_sources=self._data_sources,
                 runtime_config=self._runtime_config,
                 env_config=env_config_lib.get_default_env_config())
     return self._env
Ejemplo n.º 7
0
  def setUp(self):
    super(DiscriminatorTest, self).setUp()
    self.data_dir = FLAGS.test_srcdir + (
        'valan/r2r/testdata')
    self._env_config = hparam.HParams(
        problem='R2R',
        scan_base_dir=self.data_dir,
        data_base_dir=self.data_dir,
        vocab_dir=self.data_dir,
        vocab_file='vocab.txt',
        images_per_pano=36,
        max_conns=14,
        image_encoding_dim=64,
        direction_encoding_dim=256,
        image_features_dir=os.path.join(self.data_dir, 'image_features'),
        instruction_len=50,
        max_agent_actions=6,
        project_decoder_input_states=True,
        use_all_final_states=False,
        reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal'))

    self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)
    self._env = env.R2REnv(
        data_sources=['R2R_small_split'],
        runtime_config=self._runtime_config,
        env_config=self._env_config)
    self.num_panos = 36
    self.image_feature_size = 64
    self.direction_encoding_dim = 256
    self.num_actions = 14
    self.time_step = 3
    self.batch_size = 2
    done = np.array([[False, True], [True, False], [True, False]])
    self._test_environment = common.EnvOutput(
        reward=0,
        done=done,
        observation={
            constants.PATH_ID:  # Shape = [time, batch]
                np.array([[2, 1], [0, 1], [0, 1]]),
            constants.IS_START:  # Shape = [time, batch]
                np.array([[False, True], [True, False], [False, False]]),
            constants.DISC_MASK:  # Shape = [time, batch]
                np.array([[False, True], [True, True], [True, True]]),
            constants.PANO_ENC:  # Shape = [time, batch, num_panos, featur_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.CONN_ENC:
                # Shape = [time, batch, num_actions, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.PREV_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.NEXT_GOLDEN_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.INS_TOKEN_IDS:  # Shape = [time, batch, token_len]
                np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]),
            constants.INS_LEN:  # Shape = [time, batch]
                np.tile(np.array([[3]]), [self.time_step, self.batch_size]),
            constants.VALID_CONN_MASK:
                # Shape = [time, batch, num_connections]
                np.tile(
                    np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                              [[True] * 2 + [False] * 12]]),
                    [1, self.batch_size, 1]),
            constants.LABEL:
                # Shape = [time, batch]
                np.array([[False, False], [True, False], [True, False]])
        },
        info='')
    self._agent_config = agent_config.get_r2r_agent_config()
Ejemplo n.º 8
0
    def setUp(self):
        super(EvalMetricTest, self).setUp()

        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')

        self._env_config = hparam.HParams(
            problem='R2R',
            base_path=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)
        self._env = env.R2REnv(data_sources=['small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        # scan: gZ6f7yhEvPG
        # Path: 1, 3, 7, 5, 2
        self._golden_path = [1, 4, 6, 2]
        self._scan_id = 0  # testdata has single scan only 'gZ6f7yhEvPG'
        self._env_list = [
            common.EnvOutput(reward=0,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 1,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 3,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 7,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 5,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=False,
                             observation={
                                 constants.PANO_ID:
                                 2,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(
                reward=4,  # success
                done=True,  # end of episode
                # next episode's observation.
                observation={
                    constants.PANO_ID: 11,
                    constants.GOLDEN_PATH: self._golden_path,
                    constants.GOAL_PANO_ID: 2,
                    constants.SCAN_ID: self._scan_id,
                    constants.GOAL_ROOM_PANOS:
                    [6, 2, constants.INVALID_NODE_ID]
                },
                info=None),
        ]
        self._action_list = [3, 7, 5, 2, 0]