Ejemplo n.º 1
0
    def setUp(self):
        super(EnvTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')

        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=64,
            direction_encoding_dim=256,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))
        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)

        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        # For deterministic behavior in test
        np.random.seed(0)
Ejemplo n.º 2
0
    def setUp(self):
        super(DiscriminatorProblemTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')
        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=64,
            direction_encoding_dim=256,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)
        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        self._agent_config = agent_config.get_r2r_agent_config()
        self._agent_config.add_hparam('init_image_enc_with_text_state', True)
        self._agent_config.add_hparam('average_image_states_of_all_steps',
                                      False)
        self._agent_config.embed_action = True
Ejemplo n.º 3
0
    def setUp(self):
        super(NDHEnvTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')

        self.reward_fn_type = 'distance_to_goal'
        self._env_config = hparam.HParams(
            problem='NDH',
            history='all',
            path_type='trusted_path',
            max_goal_room_panos=4,
            base_path=self.data_dir,
            problem_path=os.path.join(self.data_dir, 'NDH'),
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn_type=self.reward_fn_type,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                self.reward_fn_type))
        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)

        self._env = env_ndh.NDHEnv(data_sources=['small_split'],
                                   runtime_config=self._runtime_config,
                                   env_config=self._env_config)

        # For deterministic behavior in test
        np.random.seed(0)
Ejemplo n.º 4
0
def get_default_env_config():
    """Returns default env config."""
    config = hparam.HParams(**DEFAULT_ENV_CONFIG)
    config.vln_reward_fn = env_config.RewardFunction.get_reward_fn(
        config.vln_reward_fn_type)
    config.ndh_reward_fn = env_ndh_config.RewardFunction.get_reward_fn(
        config.ndh_reward_fn_type)
    return config
Ejemplo n.º 5
0
    def setUp(self):
        super(DiscriminatorTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')
        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)

        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)
        self.num_panos = 36
        self.image_feature_size = 2052
        self.num_actions = 14
        self.time_step = 3
        self.batch_size = 1
        done = np.array([[True], [False], [True]])
        done = np.reshape(done, [3, 1])
        self._test_environment = common.EnvOutput(
            reward=0,
            done=done,
            observation={
                constants.IS_START:
                np.array([[True], [False], [True]]),
                constants.DISC_MASK:
                np.array([[True], [False], [True]]),
                constants.PANO_ENC:
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size
                ]),
                constants.CONN_ENC:
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size
                ]),
                constants.INS_TOKEN_IDS:
                np.array([[[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]],
                          [[3, 6, 1, 0, 0]]]),
                constants.VALID_CONN_MASK:
                np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                          [[True] * 2 + [False] * 12]])
            },
            info='')
        self._agent = discriminator_agent.DiscriminatorAgent(
            agent_config.get_r2r_agent_config())
Ejemplo n.º 6
0
  def test_call_ndh(self):
    self._agent = agent.R2RAgent(agent_config.get_ndh_agent_config())
    self.data_dir = FLAGS.test_srcdir + (
        'valan/r2r/testdata')

    self._env_config = hparam.HParams(
        problem='NDH',
        history='all',
        path_type='trusted_path',
        max_goal_room_panos=4,
        scan_base_dir=self.data_dir,
        data_base_dir=self.data_dir,
        vocab_dir=self.data_dir,
        problem_path=os.path.join(self.data_dir, 'NDH'),
        vocab_file='vocab.txt',
        images_per_pano=36,
        max_conns=14,
        image_encoding_dim=64,
        direction_encoding_dim=256,
        image_features_dir=os.path.join(self.data_dir, 'image_features'),
        instruction_len=50,
        max_agent_actions=6,
        reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal'))

    self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)
    self._env = env.R2REnv(
        data_sources=['R2R_small_split'],
        runtime_config=self._runtime_config,
        env_config=self._env_config)

    env_output = self._env.reset()
    observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 0),
                                        env_output.observation)
    initial_agent_state = self._agent.get_initial_state(
        observation, batch_size=1)
    # Agent always expects time,batch dimensions. First add and then remove.
    env_output = utils.add_time_batch_dim(env_output)
    agent_output, _ = self._agent(env_output, initial_agent_state)

    self.assertEqual(agent_output.policy_logits.shape, [1, 1, 14])
    self.assertEqual(agent_output.baseline.shape, [1, 1])

    initial_agent_state = ([
        (tf.random.normal([self.batch_size,
                           512]), tf.random.normal([self.batch_size, 512])),
        (tf.random.normal([self.batch_size,
                           512]), tf.random.normal([self.batch_size, 512]))
    ], tf.random.normal([self.batch_size, 5, 512]))
    agent_output, _ = self._agent(self._test_environment, initial_agent_state)

    self.assertEqual(agent_output.policy_logits.shape,
                     [self.time_step, self.batch_size, 14])
    self.assertEqual(agent_output.baseline.shape,
                     [self.time_step, self.batch_size])
Ejemplo n.º 7
0
def get_default_env_config(mode):
    """Returns default config using values from dict `DEFAULT_ENV_CONFIG`."""
    config = hparam.HParams(**DEFAULT_ENV_CONFIG)
    config.mode = mode
    if mode == 'train':
        config.max_agent_actions = config.max_agent_train_actions
    else:
        config.max_agent_actions = config.max_agent_test_actions
    config.image_features_path = (config.pano_image_features_path
                                  if config.panoramic_action_space else
                                  config.legacy_image_features_path)
    return config
Ejemplo n.º 8
0
def get_default_env_config():
    """Returns default config using values from dict `DEFAULT_ENV_CONFIG`."""
    config = hparam.HParams(**DEFAULT_ENV_CONFIG)
    config.reward_fn = RewardFunction.get_reward_fn(config.reward_fn_type)

    # Update directories if set in FLAGS.
    if FLAGS.scan_base_dir:
        config.scan_base_dir = FLAGS.scan_base_dir
    if FLAGS.data_base_dir:
        config.data_base_dir = FLAGS.data_base_dir
    if FLAGS.vocab_dir:
        config.vocab_dir = FLAGS.vocab_dir
    if FLAGS.vocab_file:
        config.vocab_file = FLAGS.vocab_file
    if FLAGS.image_features_dir:
        config.image_features_dir = FLAGS.image_features_dir
    return config
Ejemplo n.º 9
0
def get_default_curriculum_env_config(method, env_config=None):
    """Get default curriculum env config.

  Args:
    method: The method used in curriculum learning.
    env_config: Optional. The env config. If None, use the default env
      config file. Default, None.

  Returns:
    A curriculum env config.
  """
    if env_config is None:
        env_config = env_config_lib.get_default_env_config()
    config_updates = dict(env_config.values(), method=method)
    curriculum_env_config = DEFAULT_CURRICULUM_ENV_CONFIG.copy()
    curriculum_env_config.update(config_updates)
    config = hparam.HParams(**curriculum_env_config)
    return config
Ejemplo n.º 10
0
def get_ndh_env_config():
    """Returns default NDH config using values from dict `NDH_ENV_CONFIG`."""
    # Input settings.
    history = NDH_ENV_CONFIG['history']
    if history == 'none':
        NDH_ENV_CONFIG['instruction_len'] = 1  # [<EOS>] fixed length.
    elif history == 'target':
        NDH_ENV_CONFIG[
            'instruction_len'] = 3  # [<TAR> target <EOS>] fixed length.
    elif history == 'oracle_ans':
        # 16.16+/-9.67 ora utt len, 35.5 at x2 stddevs. 71 is double that.
        NDH_ENV_CONFIG['instruction_len'] = 50
    elif history == 'nav_q_oracle_ans':
        # 11.24+/-6.43 [plus Ora avg], 24.1 at x2 std.
        # 71+48 ~~ 120 per QA doubles both.
        NDH_ENV_CONFIG['instruction_len'] = 120
    else:  # i.e., 'all'
        # 4.93+/-3.21 turns -> 2.465+/-1.605 Q/A.
        # 5.67 at x2 std. Call it 6 (real max 13).
        NDH_ENV_CONFIG['instruction_len'] = 240

    config = hparam.HParams(**NDH_ENV_CONFIG)
    config.reward_fn = RewardFunction.get_reward_fn(config.reward_fn_type)
    return config
Ejemplo n.º 11
0
def get_default_env_config():
  """Returns default config using values from dict `EXAMPLE_ENV_CONFIG`."""
  config = hparam.HParams(**EXAMPLE_ENV_CONFIG)
  return config
Ejemplo n.º 12
0
  def setUp(self):
    super(DiscriminatorTest, self).setUp()
    self.data_dir = FLAGS.test_srcdir + (
        'valan/r2r/testdata')
    self._env_config = hparam.HParams(
        problem='R2R',
        scan_base_dir=self.data_dir,
        data_base_dir=self.data_dir,
        vocab_dir=self.data_dir,
        vocab_file='vocab.txt',
        images_per_pano=36,
        max_conns=14,
        image_encoding_dim=64,
        direction_encoding_dim=256,
        image_features_dir=os.path.join(self.data_dir, 'image_features'),
        instruction_len=50,
        max_agent_actions=6,
        project_decoder_input_states=True,
        use_all_final_states=False,
        reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal'))

    self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)
    self._env = env.R2REnv(
        data_sources=['R2R_small_split'],
        runtime_config=self._runtime_config,
        env_config=self._env_config)
    self.num_panos = 36
    self.image_feature_size = 64
    self.direction_encoding_dim = 256
    self.num_actions = 14
    self.time_step = 3
    self.batch_size = 2
    done = np.array([[False, True], [True, False], [True, False]])
    self._test_environment = common.EnvOutput(
        reward=0,
        done=done,
        observation={
            constants.PATH_ID:  # Shape = [time, batch]
                np.array([[2, 1], [0, 1], [0, 1]]),
            constants.IS_START:  # Shape = [time, batch]
                np.array([[False, True], [True, False], [False, False]]),
            constants.DISC_MASK:  # Shape = [time, batch]
                np.array([[False, True], [True, True], [True, True]]),
            constants.PANO_ENC:  # Shape = [time, batch, num_panos, featur_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.CONN_ENC:
                # Shape = [time, batch, num_actions, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.PREV_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.NEXT_GOLDEN_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.INS_TOKEN_IDS:  # Shape = [time, batch, token_len]
                np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]),
            constants.INS_LEN:  # Shape = [time, batch]
                np.tile(np.array([[3]]), [self.time_step, self.batch_size]),
            constants.VALID_CONN_MASK:
                # Shape = [time, batch, num_connections]
                np.tile(
                    np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                              [[True] * 2 + [False] * 12]]),
                    [1, self.batch_size, 1]),
            constants.LABEL:
                # Shape = [time, batch]
                np.array([[False, False], [True, False], [True, False]])
        },
        info='')
    self._agent_config = agent_config.get_r2r_agent_config()
Ejemplo n.º 13
0
    def setUp(self):
        super(EvalMetricTest, self).setUp()

        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')

        self._env_config = hparam.HParams(
            problem='R2R',
            base_path=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)
        self._env = env.R2REnv(data_sources=['small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        # scan: gZ6f7yhEvPG
        # Path: 1, 3, 7, 5, 2
        self._golden_path = [1, 4, 6, 2]
        self._scan_id = 0  # testdata has single scan only 'gZ6f7yhEvPG'
        self._env_list = [
            common.EnvOutput(reward=0,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 1,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 3,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 7,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 5,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=False,
                             observation={
                                 constants.PANO_ID:
                                 2,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(
                reward=4,  # success
                done=True,  # end of episode
                # next episode's observation.
                observation={
                    constants.PANO_ID: 11,
                    constants.GOLDEN_PATH: self._golden_path,
                    constants.GOAL_PANO_ID: 2,
                    constants.SCAN_ID: self._scan_id,
                    constants.GOAL_ROOM_PANOS:
                    [6, 2, constants.INVALID_NODE_ID]
                },
                info=None),
        ]
        self._action_list = [3, 7, 5, 2, 0]
Ejemplo n.º 14
0
def get_ndh_agent_config():
    """Returns default config using values from dict `NDH_AGENT_CONFIG`."""
    config = hparam.HParams(**NDH_AGENT_CONFIG)
    return config
Ejemplo n.º 15
0
    def testStepToGoalRoom(self):
        self.reward_fn_type = 'distance_to_room'
        self._env_config = hparam.HParams(
            problem='NDH',
            history='all',
            path_type='trusted_path',
            max_goal_room_panos=4,
            base_path=self.data_dir,
            problem_path=os.path.join(self.data_dir, 'NDH'),
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn_type=self.reward_fn_type,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                self.reward_fn_type))
        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)

        self._env = env_ndh.NDHEnv(data_sources=['small_split'],
                                   runtime_config=self._runtime_config,
                                   env_config=self._env_config)

        scan_id = 0  # testdata only has single scan 'gZ6f7yhEvPG'
        _ = self._env.reset()
        golden_path = [
            'ba27da20782d4e1a825f0a133ad84da9',
            '47d8a8282c1c4a7fb3eeeacc45e9d959',  # in the goal room
            '0ee20663dfa34b438d48750ddcd7366c'  # in the goal room
        ]

        # Step through the trajectory and verify the env_output.
        for i, action in enumerate(
            [self._get_pano_id(p, scan_id) for p in golden_path]):
            expected_time_step = i + 1
            expected_heading, expected_pitch = self._env._get_heading_pitch(
                action, scan_id, expected_time_step)
            if i + 1 < len(golden_path):
                expected_oracle_action = self._get_pano_id(
                    golden_path[i + 1], scan_id)
            else:
                expected_oracle_action = constants.STOP_NODE_ID
            expected_reward = 1 if i <= 1 else 0
            env_test.verify_env_output(
                self,
                self._env.step(action),
                expected_reward=expected_reward,  #  Moving towards goal.
                expected_done=False,
                expected_info='',
                expected_time_step=expected_time_step,
                expected_path_id=318,
                expected_pano_name=golden_path[i],
                expected_heading=expected_heading,
                expected_pitch=expected_pitch,
                expected_scan_id=scan_id,
                expected_oracle_action=expected_oracle_action)

        # Stop at goal pano. Terminating the episode results in resetting the
        # observation to next episode.
        env_test.verify_env_output(
            self,
            self._env.step(constants.STOP_NODE_ID),
            expected_reward=4,  # reached goal and stopped
            expected_done=True,  # end of episode
            expected_info='',
            # observation for next episode.
            expected_time_step=0,
            expected_path_id=1304,
            expected_pano_name='80929af5cf234ae38ac3a2a4e60e4342',
            expected_heading=6.101,
            expected_pitch=0.,
            expected_scan_id=scan_id,
            expected_oracle_action=self._get_pano_id(
                'ba27da20782d4e1a825f0a133ad84da9', scan_id))
Ejemplo n.º 16
0
def get_default_env_config():
    """Returns default config using values from dict `DEFAULT_ENV_CONFIG`."""
    config = hparam.HParams(**DEFAULT_ENV_CONFIG)
    config.reward_fn = RewardFunction.get_reward_fn(config.reward_fn_type)
    return config