def setUp(self): super(EnvTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env.R2REnv(data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) # For deterministic behavior in test np.random.seed(0)
def setUp(self): super(DiscriminatorProblemTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv(data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self._agent_config = agent_config.get_r2r_agent_config() self._agent_config.add_hparam('init_image_enc_with_text_state', True) self._agent_config.add_hparam('average_image_states_of_all_steps', False) self._agent_config.embed_action = True
def get_environment(self): if not self._env: assert self._data_sources, 'data_sources must be non-empty.' self._env = env.R2REnv( data_sources=self._data_sources, runtime_config=self._runtime_config, env_config=self._env_config) return self._env
def setUp(self): super(DiscriminatorTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv(data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self.num_panos = 36 self.image_feature_size = 2052 self.num_actions = 14 self.time_step = 3 self.batch_size = 1 done = np.array([[True], [False], [True]]) done = np.reshape(done, [3, 1]) self._test_environment = common.EnvOutput( reward=0, done=done, observation={ constants.IS_START: np.array([[True], [False], [True]]), constants.DISC_MASK: np.array([[True], [False], [True]]), constants.PANO_ENC: tf.random.normal([ self.time_step, self.batch_size, self.num_panos, self.image_feature_size ]), constants.CONN_ENC: tf.random.normal([ self.time_step, self.batch_size, self.num_actions, self.image_feature_size ]), constants.INS_TOKEN_IDS: np.array([[[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]]]), constants.VALID_CONN_MASK: np.array([[[True] * 14], [[True] * 5 + [False] * 9], [[True] * 2 + [False] * 12]]) }, info='') self._agent = discriminator_agent.DiscriminatorAgent( agent_config.get_r2r_agent_config())
def test_call_ndh(self): self._agent = agent.R2RAgent(agent_config.get_ndh_agent_config()) self.data_dir = FLAGS.test_srcdir + ( 'valan/r2r/testdata') self._env_config = hparam.HParams( problem='NDH', history='all', path_type='trusted_path', max_goal_room_panos=4, scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, problem_path=os.path.join(self.data_dir, 'NDH'), vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env.R2REnv( data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) env_output = self._env.reset() observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 0), env_output.observation) initial_agent_state = self._agent.get_initial_state( observation, batch_size=1) # Agent always expects time,batch dimensions. First add and then remove. env_output = utils.add_time_batch_dim(env_output) agent_output, _ = self._agent(env_output, initial_agent_state) self.assertEqual(agent_output.policy_logits.shape, [1, 1, 14]) self.assertEqual(agent_output.baseline.shape, [1, 1]) initial_agent_state = ([ (tf.random.normal([self.batch_size, 512]), tf.random.normal([self.batch_size, 512])), (tf.random.normal([self.batch_size, 512]), tf.random.normal([self.batch_size, 512])) ], tf.random.normal([self.batch_size, 5, 512])) agent_output, _ = self._agent(self._test_environment, initial_agent_state) self.assertEqual(agent_output.policy_logits.shape, [self.time_step, self.batch_size, 14]) self.assertEqual(agent_output.baseline.shape, [self.time_step, self.batch_size])
def get_environment(self): if not self._env: assert self._data_sources, 'data_sources must be non-empty.' if self._curriculum: # See actor_main.py and curriculum_env.py for the argument options. self._env = curriculum_env.CurriculumR2REnv( data_sources=self._data_sources, runtime_config=self._runtime_config, curriculum_env_config=curriculum_env_config_lib. get_default_curriculum_env_config(self._curriculum)) else: self._env = env.R2REnv( data_sources=self._data_sources, runtime_config=self._runtime_config, env_config=env_config_lib.get_default_env_config()) return self._env
def setUp(self): super(DiscriminatorTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ( 'valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, project_decoder_input_states=True, use_all_final_states=False, reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv( data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self.num_panos = 36 self.image_feature_size = 64 self.direction_encoding_dim = 256 self.num_actions = 14 self.time_step = 3 self.batch_size = 2 done = np.array([[False, True], [True, False], [True, False]]) self._test_environment = common.EnvOutput( reward=0, done=done, observation={ constants.PATH_ID: # Shape = [time, batch] np.array([[2, 1], [0, 1], [0, 1]]), constants.IS_START: # Shape = [time, batch] np.array([[False, True], [True, False], [False, False]]), constants.DISC_MASK: # Shape = [time, batch] np.array([[False, True], [True, True], [True, True]]), constants.PANO_ENC: # Shape = [time, batch, num_panos, featur_size] tf.random.normal([ self.time_step, self.batch_size, self.num_panos, self.image_feature_size + self.direction_encoding_dim ]), constants.CONN_ENC: # Shape = [time, batch, num_actions, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.num_actions, self.image_feature_size + self.direction_encoding_dim ]), constants.PREV_ACTION_ENC: # Shape = [time, batch, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.image_feature_size + self.direction_encoding_dim ]), constants.NEXT_GOLDEN_ACTION_ENC: # Shape = [time, batch, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.image_feature_size + self.direction_encoding_dim ]), constants.INS_TOKEN_IDS: # Shape = [time, batch, token_len] np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]], [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]], [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]), constants.INS_LEN: # Shape = [time, batch] np.tile(np.array([[3]]), [self.time_step, self.batch_size]), constants.VALID_CONN_MASK: # Shape = [time, batch, num_connections] np.tile( np.array([[[True] * 14], [[True] * 5 + [False] * 9], [[True] * 2 + [False] * 12]]), [1, self.batch_size, 1]), constants.LABEL: # Shape = [time, batch] np.array([[False, False], [True, False], [True, False]]) }, info='') self._agent_config = agent_config.get_r2r_agent_config()
def setUp(self): super(EvalMetricTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', base_path=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env.R2REnv(data_sources=['small_split'], runtime_config=self._runtime_config, env_config=self._env_config) # scan: gZ6f7yhEvPG # Path: 1, 3, 7, 5, 2 self._golden_path = [1, 4, 6, 2] self._scan_id = 0 # testdata has single scan only 'gZ6f7yhEvPG' self._env_list = [ common.EnvOutput(reward=0, done=None, observation={ constants.PANO_ID: 1, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 3, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 7, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 5, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=False, observation={ constants.PANO_ID: 2, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput( reward=4, # success done=True, # end of episode # next episode's observation. observation={ constants.PANO_ID: 11, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), ] self._action_list = [3, 7, 5, 2, 0]