def setUp(self):
        super(DiscriminatorProblemTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')
        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=64,
            direction_encoding_dim=256,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)
        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        self._agent_config = agent_config.get_r2r_agent_config()
        self._agent_config.add_hparam('init_image_enc_with_text_state', True)
        self._agent_config.add_hparam('average_image_states_of_all_steps',
                                      False)
        self._agent_config.embed_action = True
    def setUp(self):
        super(DiscriminatorTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')
        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)

        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)
        self.num_panos = 36
        self.image_feature_size = 2052
        self.num_actions = 14
        self.time_step = 3
        self.batch_size = 1
        done = np.array([[True], [False], [True]])
        done = np.reshape(done, [3, 1])
        self._test_environment = common.EnvOutput(
            reward=0,
            done=done,
            observation={
                constants.IS_START:
                np.array([[True], [False], [True]]),
                constants.DISC_MASK:
                np.array([[True], [False], [True]]),
                constants.PANO_ENC:
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size
                ]),
                constants.CONN_ENC:
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size
                ]),
                constants.INS_TOKEN_IDS:
                np.array([[[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]],
                          [[3, 6, 1, 0, 0]]]),
                constants.VALID_CONN_MASK:
                np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                          [[True] * 2 + [False] * 12]])
            },
            info='')
        self._agent = discriminator_agent.DiscriminatorAgent(
            agent_config.get_r2r_agent_config())
  def __init__(self, runtime_config, mode, data_sources):
    self._runtime_config = runtime_config
    self._mode = mode
    self._data_sources = data_sources

    self._env = None
    self._loss_type = None
    self._eval_dict = self._get_eval_dict()
    self._agent = discriminator_agent.DiscriminatorAgent(
        agent_config.get_r2r_agent_config())
Exemple #4
0
    def __init__(self, runtime_config, mode, data_sources, curriculum=''):
        self._runtime_config = runtime_config
        self._mode = mode
        self._data_sources = data_sources
        self._curriculum = curriculum

        self._agent = agent.R2RAgent(agent_config.get_r2r_agent_config())
        self._prob_ac = 0.5
        self._env = None
        self._loss_type = None
        self._eval_dict = self._get_eval_dict()
Exemple #5
0
  def test_call_r2r(self):
    self._agent = agent.R2RAgent(agent_config.get_r2r_agent_config())
    self.data_dir = FLAGS.test_srcdir + (
        'valan/r2r/testdata')

    self._env_config = hparam.HParams(
        problem='R2R',
        scan_base_dir=self.data_dir,
        data_base_dir=self.data_dir,
        vocab_dir=self.data_dir,
        vocab_file='vocab.txt',
        images_per_pano=36,
        max_conns=14,
        image_encoding_dim=64,
        direction_encoding_dim=256,
        image_features_dir=os.path.join(self.data_dir, 'image_features'),
        instruction_len=50,
        max_agent_actions=6,
        reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal'))

    self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)
    self._env = env.R2REnv(
        data_sources=['R2R_small_split'],
        runtime_config=self._runtime_config,
        env_config=self._env_config)

    env_output = self._env.reset()
    observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 0),
                                        env_output.observation)
    initial_agent_state = self._agent.get_initial_state(
        observation, batch_size=1)
    # Agent always expects time,batch dimensions. First add and then remove.
    env_output = utils.add_time_batch_dim(env_output)
    agent_output, _ = self._agent(env_output, initial_agent_state)

    self.assertEqual(agent_output.policy_logits.shape, [1, 1, 14])
    self.assertEqual(agent_output.baseline.shape, [1, 1])

    initial_agent_state = ([
        (tf.random.normal([self.batch_size,
                           512]), tf.random.normal([self.batch_size, 512])),
        (tf.random.normal([self.batch_size,
                           512]), tf.random.normal([self.batch_size, 512]))
    ], tf.random.normal([self.batch_size, 5, 512]))
    agent_output, _ = self._agent(self._test_environment, initial_agent_state)

    self.assertEqual(agent_output.policy_logits.shape,
                     [self.time_step, self.batch_size, 14])
    self.assertEqual(agent_output.baseline.shape,
                     [self.time_step, self.batch_size])
  def __init__(self, runtime_config, mode, data_sources, agent_config=None,
               env_config=None):
    self._runtime_config = runtime_config
    self._mode = mode
    self._data_sources = data_sources

    self._env_config = (
        env_config if env_config else env_config_lib.get_default_env_config())
    self._env = None
    self._loss_type = None
    self._eval_dict = self._get_eval_dict()

    agent_config = (
        agent_config
        if agent_config else agent_config_lib.get_r2r_agent_config())
    agent_type = (
        agent_config.agent_type
        if hasattr(agent_config, 'agent_type') else 'default')
    if agent_type == 'default':
      self._agent = discriminator_agent.DiscriminatorAgent(agent_config)
    elif agent_type == 'v2':
      self._agent = discriminator_agent.DiscriminatorAgentV2(agent_config)
Exemple #7
0
  def __init__(self,
               runtime_config,
               mode,
               data_sources,
               curriculum='',
               agent_type='r2r'):
    self._runtime_config = runtime_config
    self._mode = mode
    self._data_sources = data_sources
    self._curriculum = curriculum

    if agent_type.lower() == 'r2r':
      self._agent = agent.R2RAgent(
          agent_config.get_r2r_agent_config(), mode=mode)
    elif agent_type.lower() == 'mt':
      self._agent = mt_agent.MTEnvAgAgent(
          mt_agent_config.get_agent_config(), mode=mode)
    else:
      raise ValueError('Invalid agent_type: {}'.format(agent_type))

    self._prob_ac = 0.5
    self._env = None
    self._loss_type = None
    self._eval_dict = self._get_eval_dict()
Exemple #8
0
  def setUp(self):
    super(DiscriminatorTest, self).setUp()
    self.data_dir = FLAGS.test_srcdir + (
        'valan/r2r/testdata')
    self._env_config = hparam.HParams(
        problem='R2R',
        scan_base_dir=self.data_dir,
        data_base_dir=self.data_dir,
        vocab_dir=self.data_dir,
        vocab_file='vocab.txt',
        images_per_pano=36,
        max_conns=14,
        image_encoding_dim=64,
        direction_encoding_dim=256,
        image_features_dir=os.path.join(self.data_dir, 'image_features'),
        instruction_len=50,
        max_agent_actions=6,
        project_decoder_input_states=True,
        use_all_final_states=False,
        reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal'))

    self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)
    self._env = env.R2REnv(
        data_sources=['R2R_small_split'],
        runtime_config=self._runtime_config,
        env_config=self._env_config)
    self.num_panos = 36
    self.image_feature_size = 64
    self.direction_encoding_dim = 256
    self.num_actions = 14
    self.time_step = 3
    self.batch_size = 2
    done = np.array([[False, True], [True, False], [True, False]])
    self._test_environment = common.EnvOutput(
        reward=0,
        done=done,
        observation={
            constants.PATH_ID:  # Shape = [time, batch]
                np.array([[2, 1], [0, 1], [0, 1]]),
            constants.IS_START:  # Shape = [time, batch]
                np.array([[False, True], [True, False], [False, False]]),
            constants.DISC_MASK:  # Shape = [time, batch]
                np.array([[False, True], [True, True], [True, True]]),
            constants.PANO_ENC:  # Shape = [time, batch, num_panos, featur_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.CONN_ENC:
                # Shape = [time, batch, num_actions, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.PREV_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.NEXT_GOLDEN_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.INS_TOKEN_IDS:  # Shape = [time, batch, token_len]
                np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]),
            constants.INS_LEN:  # Shape = [time, batch]
                np.tile(np.array([[3]]), [self.time_step, self.batch_size]),
            constants.VALID_CONN_MASK:
                # Shape = [time, batch, num_connections]
                np.tile(
                    np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                              [[True] * 2 + [False] * 12]]),
                    [1, self.batch_size, 1]),
            constants.LABEL:
                # Shape = [time, batch]
                np.array([[False, False], [True, False], [True, False]])
        },
        info='')
    self._agent_config = agent_config.get_r2r_agent_config()