def setUp(self): super(DiscriminatorProblemTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv(data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self._agent_config = agent_config.get_r2r_agent_config() self._agent_config.add_hparam('init_image_enc_with_text_state', True) self._agent_config.add_hparam('average_image_states_of_all_steps', False) self._agent_config.embed_action = True
def setUp(self): super(DiscriminatorTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv(data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self.num_panos = 36 self.image_feature_size = 2052 self.num_actions = 14 self.time_step = 3 self.batch_size = 1 done = np.array([[True], [False], [True]]) done = np.reshape(done, [3, 1]) self._test_environment = common.EnvOutput( reward=0, done=done, observation={ constants.IS_START: np.array([[True], [False], [True]]), constants.DISC_MASK: np.array([[True], [False], [True]]), constants.PANO_ENC: tf.random.normal([ self.time_step, self.batch_size, self.num_panos, self.image_feature_size ]), constants.CONN_ENC: tf.random.normal([ self.time_step, self.batch_size, self.num_actions, self.image_feature_size ]), constants.INS_TOKEN_IDS: np.array([[[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]]]), constants.VALID_CONN_MASK: np.array([[[True] * 14], [[True] * 5 + [False] * 9], [[True] * 2 + [False] * 12]]) }, info='') self._agent = discriminator_agent.DiscriminatorAgent( agent_config.get_r2r_agent_config())
def __init__(self, runtime_config, mode, data_sources): self._runtime_config = runtime_config self._mode = mode self._data_sources = data_sources self._env = None self._loss_type = None self._eval_dict = self._get_eval_dict() self._agent = discriminator_agent.DiscriminatorAgent( agent_config.get_r2r_agent_config())
def __init__(self, runtime_config, mode, data_sources, curriculum=''): self._runtime_config = runtime_config self._mode = mode self._data_sources = data_sources self._curriculum = curriculum self._agent = agent.R2RAgent(agent_config.get_r2r_agent_config()) self._prob_ac = 0.5 self._env = None self._loss_type = None self._eval_dict = self._get_eval_dict()
def test_call_r2r(self): self._agent = agent.R2RAgent(agent_config.get_r2r_agent_config()) self.data_dir = FLAGS.test_srcdir + ( 'valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env.R2REnv( data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) env_output = self._env.reset() observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 0), env_output.observation) initial_agent_state = self._agent.get_initial_state( observation, batch_size=1) # Agent always expects time,batch dimensions. First add and then remove. env_output = utils.add_time_batch_dim(env_output) agent_output, _ = self._agent(env_output, initial_agent_state) self.assertEqual(agent_output.policy_logits.shape, [1, 1, 14]) self.assertEqual(agent_output.baseline.shape, [1, 1]) initial_agent_state = ([ (tf.random.normal([self.batch_size, 512]), tf.random.normal([self.batch_size, 512])), (tf.random.normal([self.batch_size, 512]), tf.random.normal([self.batch_size, 512])) ], tf.random.normal([self.batch_size, 5, 512])) agent_output, _ = self._agent(self._test_environment, initial_agent_state) self.assertEqual(agent_output.policy_logits.shape, [self.time_step, self.batch_size, 14]) self.assertEqual(agent_output.baseline.shape, [self.time_step, self.batch_size])
def __init__(self, runtime_config, mode, data_sources, agent_config=None, env_config=None): self._runtime_config = runtime_config self._mode = mode self._data_sources = data_sources self._env_config = ( env_config if env_config else env_config_lib.get_default_env_config()) self._env = None self._loss_type = None self._eval_dict = self._get_eval_dict() agent_config = ( agent_config if agent_config else agent_config_lib.get_r2r_agent_config()) agent_type = ( agent_config.agent_type if hasattr(agent_config, 'agent_type') else 'default') if agent_type == 'default': self._agent = discriminator_agent.DiscriminatorAgent(agent_config) elif agent_type == 'v2': self._agent = discriminator_agent.DiscriminatorAgentV2(agent_config)
def __init__(self, runtime_config, mode, data_sources, curriculum='', agent_type='r2r'): self._runtime_config = runtime_config self._mode = mode self._data_sources = data_sources self._curriculum = curriculum if agent_type.lower() == 'r2r': self._agent = agent.R2RAgent( agent_config.get_r2r_agent_config(), mode=mode) elif agent_type.lower() == 'mt': self._agent = mt_agent.MTEnvAgAgent( mt_agent_config.get_agent_config(), mode=mode) else: raise ValueError('Invalid agent_type: {}'.format(agent_type)) self._prob_ac = 0.5 self._env = None self._loss_type = None self._eval_dict = self._get_eval_dict()
def setUp(self): super(DiscriminatorTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ( 'valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, project_decoder_input_states=True, use_all_final_states=False, reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv( data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self.num_panos = 36 self.image_feature_size = 64 self.direction_encoding_dim = 256 self.num_actions = 14 self.time_step = 3 self.batch_size = 2 done = np.array([[False, True], [True, False], [True, False]]) self._test_environment = common.EnvOutput( reward=0, done=done, observation={ constants.PATH_ID: # Shape = [time, batch] np.array([[2, 1], [0, 1], [0, 1]]), constants.IS_START: # Shape = [time, batch] np.array([[False, True], [True, False], [False, False]]), constants.DISC_MASK: # Shape = [time, batch] np.array([[False, True], [True, True], [True, True]]), constants.PANO_ENC: # Shape = [time, batch, num_panos, featur_size] tf.random.normal([ self.time_step, self.batch_size, self.num_panos, self.image_feature_size + self.direction_encoding_dim ]), constants.CONN_ENC: # Shape = [time, batch, num_actions, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.num_actions, self.image_feature_size + self.direction_encoding_dim ]), constants.PREV_ACTION_ENC: # Shape = [time, batch, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.image_feature_size + self.direction_encoding_dim ]), constants.NEXT_GOLDEN_ACTION_ENC: # Shape = [time, batch, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.image_feature_size + self.direction_encoding_dim ]), constants.INS_TOKEN_IDS: # Shape = [time, batch, token_len] np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]], [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]], [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]), constants.INS_LEN: # Shape = [time, batch] np.tile(np.array([[3]]), [self.time_step, self.batch_size]), constants.VALID_CONN_MASK: # Shape = [time, batch, num_connections] np.tile( np.array([[[True] * 14], [[True] * 5 + [False] * 9], [[True] * 2 + [False] * 12]]), [1, self.batch_size, 1]), constants.LABEL: # Shape = [time, batch] np.array([[False, False], [True, False], [True, False]]) }, info='') self._agent_config = agent_config.get_r2r_agent_config()