Exemple #1
0
    def testWithMockAgent_DoneAllFalse(self):
        total_timesteps = 3
        batch_size = 4
        done = np.array([[False, False, False, False],
                         [False, False, False, False],
                         [False, False, False, False]])
        init_state = tf.constant([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                                  [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0],
                                  [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0],
                                  [4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]])

        env_output = common.EnvOutput(
            reward=None,
            done=done,
            observation={
                _OBS_KEY_1: tf.ones([total_timesteps, batch_size, 50]),
                _OBS_KEY_0: tf.zeros([total_timesteps, batch_size, 50]),
            },
            info=None)
        agent = MockAgent(3, 4, init_state, done)
        agent.reset_timestep()
        agent_output, final_state = agent(env_output, init_state)
        np.testing.assert_array_almost_equal(
            np.zeros((total_timesteps, batch_size, 4)),
            agent_output.policy_logits)
        np.testing.assert_array_almost_equal(
            np.ones((total_timesteps, batch_size)), agent_output.baseline)
        expected_final_state = np.array(
            [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0],
             [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0],
             [6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0],
             [7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0]])
        np.testing.assert_array_almost_equal(expected_final_state,
                                             final_state.numpy())
    def setUp(self):
        super(DiscriminatorTest, self).setUp()
        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')
        self._env_config = hparam.HParams(
            problem='R2R',
            scan_base_dir=self.data_dir,
            data_base_dir=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)

        self._env = env.R2REnv(data_sources=['R2R_small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)
        self.num_panos = 36
        self.image_feature_size = 2052
        self.num_actions = 14
        self.time_step = 3
        self.batch_size = 1
        done = np.array([[True], [False], [True]])
        done = np.reshape(done, [3, 1])
        self._test_environment = common.EnvOutput(
            reward=0,
            done=done,
            observation={
                constants.IS_START:
                np.array([[True], [False], [True]]),
                constants.DISC_MASK:
                np.array([[True], [False], [True]]),
                constants.PANO_ENC:
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size
                ]),
                constants.CONN_ENC:
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size
                ]),
                constants.INS_TOKEN_IDS:
                np.array([[[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]],
                          [[3, 6, 1, 0, 0]]]),
                constants.VALID_CONN_MASK:
                np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                          [[True] * 2 + [False] * 12]])
            },
            info='')
        self._agent = discriminator_agent.DiscriminatorAgent(
            agent_config.get_r2r_agent_config())
Exemple #3
0
    def _reset(self):
        self._frame_count = 0

        # Switch to a new, random region among this worker's regions
        region_idx = np.random.randint(0, len(self._all_regions))
        self._current_region = self._all_regions[region_idx]
        self._graph = self._all_graphs[self._current_region]

        # Switch to a new, random instruction sequence from this region
        self._current_sequence_idx = np.random.randint(
            0, len(self._all_entry_sequences[self._current_region]))
        current_entry_sequence = (self._all_entry_sequences[
            self._current_region][self._current_sequence_idx])

        # Switch to a new, random instruction from this instruction sequence

        self._current_entry_idx = np.random.randint(
            0, len(current_entry_sequence))
        current_entry = current_entry_sequence[self._current_entry_idx]

        # The graph uses different action representations depending on action space
        # Convert to integer indices - the unifying representation used here
        golden_actions, self._golden_path = self._graph.get_golden_actions(
            current_entry)
        self._golden_actions = self._convert_golden_actions(golden_actions)

        self._goal_pano_id = current_entry.route[-1]

        self._graph_state = GraphState(current_entry.route[0],
                                       current_entry.start_heading, 0, 0)
        self._distance_to_goal = self.shortest_path_length(
            self._graph_state.pano_id, self._goal_pano_id)

        if self._run_writer:
            self._run_writer.reset()
            self._run_writer.log_run_data({
                'region':
                self._current_region,
                'route_id':
                current_entry.route_id,
                'segment_idx':
                self._current_entry_idx
            })

        return common.EnvOutput(
            reward=np.float32(0.),
            done=False,
            # We randomly choose prev_action_idx at the beginning of every episode.
            observation=self._get_current_observation(
                prev_action=np.random.choice(
                    self._panoramic_action_bins +
                    1 if self._panoramic_actions else streetview_constants.
                    NUM_DISCRETE_ACTIONS)),
            info=self._get_step_info())
Exemple #4
0
    def _step(self, action):
        """Updates the state using the provided action.

    Sets `done=True` if either this action corresponds to stop node or the
    budget for the current episode is exhausted.

    Args:
      action: An integer specifying the next pano id.

    Returns:
      A tuple `EnvOutput`.
    """
        # First check this is a valid action.
        assert action >= 0
        current_observations = self.get_current_env_output().observation
        current_pano_id = current_observations[constants.PANO_ID]
        current_scan_id = current_observations[constants.SCAN_ID]
        current_time_step = current_observations[constants.TIME_STEP]
        assert action in self._scan_info[current_scan_id].conn_ids[
            current_pano_id]
        next_pano_id = action
        self._path_history.append(next_pano_id)
        done = False
        if (next_pano_id == constants.STOP_NODE_ID
                or current_time_step == self._max_actions_per_episode):
            done = True

        problem_type = self._paths[self._current_idx]['problem_type']
        if problem_type == constants.PROBLEM_VLN:
            reward = np.float32(
                self._compute_vln_reward(
                    path_history=self._path_history[:-1],
                    next_pano=next_pano_id,
                    golden_path=self._paths[self._current_idx]['path'],
                    end_of_episode=done,
                    scan_info=self._scan_info[current_scan_id]))
        elif problem_type == constants.PROBLEM_NDH:
            reward = np.float32(
                self._compute_ndh_reward(
                    path_history=self._path_history[:-1],
                    next_pano=next_pano_id,
                    golden_path=self._paths[self._current_idx]['path'],
                    end_of_episode=done,
                    scan_info=self._scan_info[current_scan_id],
                    goal_room_panos=self._paths[
                        self._current_idx]['end_panos']))
        else:
            raise ValueError('Invalid problem_type: {}.'.format(problem_type))
        return common.EnvOutput(reward=reward,
                                done=done,
                                observation=self._get_current_observation(
                                    next_pano_id, current_scan_id,
                                    current_time_step + 1),
                                info='')
Exemple #5
0
 def _step(self, action):
     assert action in self._action_space
     new_state = self._current_state + action
     done = False
     if new_state < 0 or new_state == self._state_space_size:
         new_state = -100  # STOP
         done = True
     self._current_state = new_state
     return common.EnvOutput(reward=float(action),
                             done=done,
                             observation=self._get_current_observation(),
                             info='')
Exemple #6
0
    def _reset(self):
        """Reset the environment with new data.

    Returns:
      A instance of common.EnvOutput, which is the initial Observation.
    """
        self._current_idx = self._get_next_idx(self._current_idx)
        current_scan_id = self._paths[self._current_idx]['scan_id']
        current_pano_id = self._scan_info[current_scan_id].pano_name_to_id[
            self._paths[self._current_idx]['path'][0]]
        self._path_history = [current_pano_id]
        return common.EnvOutput(reward=np.float32(0.0),
                                done=False,
                                observation=self._get_current_observation(
                                    current_pano_id, current_scan_id, 0),
                                info='')
Exemple #7
0
  def _step(self, action):
    """Updates the state using the provided action.

    Sets `done=True` if either this action corresponds to stop node or the
    budget for the current episode is exhausted.

    Args:
      action: An integer specifying the next pano id.

    Returns:
      A tuple `EnvOutput`.
    """
    # First check this is a valid action.
    assert action >= 0
    current_observations = self.get_current_env_output().observation
    current_pano_id = current_observations[constants.PANO_ID]
    current_scan_id = current_observations[constants.SCAN_ID]
    current_time_step = current_observations[constants.TIME_STEP]

    # Sanity check.
    if not (constants.LABEL in current_observations and
            current_observations[constants.LABEL] == 0):
      # Panos must be connected when label = 1 or label does not exist.
      if action not in self._scan_info[current_scan_id].conn_ids[
          current_pano_id]:
        raise ValueError('Current and next panos must be connected.')

    next_pano_id = action
    self._path_history.append(next_pano_id)
    done = False
    if (next_pano_id == constants.STOP_NODE_ID or
        current_time_step == self._max_actions_per_episode):
      done = True
    return common.EnvOutput(
        reward=np.float32(
            self._compute_reward(
                path_history=self._path_history[:-1],
                next_pano=next_pano_id,
                golden_path=self._current_path_dict['path'],
                end_of_episode=done,
                scan_info=self._scan_info[current_scan_id])),
        done=done,
        observation=self._get_current_observation(next_pano_id, current_scan_id,
                                                  current_time_step + 1),
        info='')
Exemple #8
0
 def setUp(self):
   super(AgentR2RTest, self).setUp()
   self.num_panos = 36
   self.image_feature_size = 64 + 256
   self.num_actions = 14
   self.time_step = 3
   self.batch_size = 1
   done = np.array([[True], [False], [True]])
   self._test_environment = common.EnvOutput(
       reward=None,
       done=done,
       observation={
           constants.HEADING: np.zeros([self.time_step, self.batch_size, 1]),
           constants.PITCH: np.zeros([self.time_step, self.batch_size, 1]),
           constants.PANO_ENC:
               tf.random.normal([
                   self.time_step, self.batch_size, self.num_panos,
                   self.image_feature_size
               ]),
           constants.CONN_ENC:
               tf.random.normal([
                   self.time_step, self.batch_size, self.num_actions,
                   self.image_feature_size
               ]),
           constants.PREV_ACTION_ENC:
               tf.random.normal([
                   self.time_step, self.batch_size, self.image_feature_size
               ]),
           constants.INS_TOKEN_IDS:
               np.array([
                   [[3, 6, 1, 0, 0]],
                   [[3, 6, 1, 0, 0]],
                   [[3, 6, 1, 0, 0]],
               ]),
           constants.VALID_CONN_MASK:
               np.array([
                   [[1.0] * 14],
                   [[1.0] * 5 + [0.0] * 9],
                   [[1.0] * 2 + [0.0] * 12],
               ])
       },
       info='')
Exemple #9
0
 def __init__(self, state_space_size, unroll_length=1):
     self._state_space_size = state_space_size
     # Creates simple dynamics (T stands for transition):
     #   states = [0, 1, ... len(state_space_size - 1)] + [STOP]
     #   actions = [-1, 1]
     #   T(s, a) = s + a  iff (s + a) is a valid state
     #           = STOP   otherwise
     self._action_space = [-1, 1]
     self._current_state = None
     self._env_spec = common.EnvOutput(
         reward=tf.TensorSpec(shape=[unroll_length + 1], dtype=tf.float32),
         done=tf.TensorSpec(shape=[unroll_length + 1], dtype=tf.bool),
         observation={
             'f1':
             tf.TensorSpec(shape=[unroll_length + 1, 4, 10],
                           dtype=tf.float32),
             'f2':
             tf.TensorSpec(shape=[unroll_length + 1, 7, 10, 2],
                           dtype=tf.float32)
         },
         info=tf.TensorSpec(shape=[unroll_length + 1], dtype=tf.string))
Exemple #10
0
  def setUp(self):
    super(DiscriminatorTest, self).setUp()
    self.data_dir = FLAGS.test_srcdir + (
        'valan/r2r/testdata')
    self._env_config = hparam.HParams(
        problem='R2R',
        scan_base_dir=self.data_dir,
        data_base_dir=self.data_dir,
        vocab_dir=self.data_dir,
        vocab_file='vocab.txt',
        images_per_pano=36,
        max_conns=14,
        image_encoding_dim=64,
        direction_encoding_dim=256,
        image_features_dir=os.path.join(self.data_dir, 'image_features'),
        instruction_len=50,
        max_agent_actions=6,
        project_decoder_input_states=True,
        use_all_final_states=False,
        reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal'))

    self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)
    self._env = env.R2REnv(
        data_sources=['R2R_small_split'],
        runtime_config=self._runtime_config,
        env_config=self._env_config)
    self.num_panos = 36
    self.image_feature_size = 64
    self.direction_encoding_dim = 256
    self.num_actions = 14
    self.time_step = 3
    self.batch_size = 2
    done = np.array([[False, True], [True, False], [True, False]])
    self._test_environment = common.EnvOutput(
        reward=0,
        done=done,
        observation={
            constants.PATH_ID:  # Shape = [time, batch]
                np.array([[2, 1], [0, 1], [0, 1]]),
            constants.IS_START:  # Shape = [time, batch]
                np.array([[False, True], [True, False], [False, False]]),
            constants.DISC_MASK:  # Shape = [time, batch]
                np.array([[False, True], [True, True], [True, True]]),
            constants.PANO_ENC:  # Shape = [time, batch, num_panos, featur_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.CONN_ENC:
                # Shape = [time, batch, num_actions, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.PREV_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.NEXT_GOLDEN_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.INS_TOKEN_IDS:  # Shape = [time, batch, token_len]
                np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]),
            constants.INS_LEN:  # Shape = [time, batch]
                np.tile(np.array([[3]]), [self.time_step, self.batch_size]),
            constants.VALID_CONN_MASK:
                # Shape = [time, batch, num_connections]
                np.tile(
                    np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                              [[True] * 2 + [False] * 12]]),
                    [1, self.batch_size, 1]),
            constants.LABEL:
                # Shape = [time, batch]
                np.array([[False, False], [True, False], [True, False]])
        },
        info='')
    self._agent_config = agent_config.get_r2r_agent_config()
Exemple #11
0
    def setUp(self):
        super(EvalMetricTest, self).setUp()

        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')

        self._env_config = hparam.HParams(
            problem='R2R',
            base_path=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)
        self._env = env.R2REnv(data_sources=['small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        # scan: gZ6f7yhEvPG
        # Path: 1, 3, 7, 5, 2
        self._golden_path = [1, 4, 6, 2]
        self._scan_id = 0  # testdata has single scan only 'gZ6f7yhEvPG'
        self._env_list = [
            common.EnvOutput(reward=0,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 1,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 3,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 7,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 5,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=False,
                             observation={
                                 constants.PANO_ID:
                                 2,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(
                reward=4,  # success
                done=True,  # end of episode
                # next episode's observation.
                observation={
                    constants.PANO_ID: 11,
                    constants.GOLDEN_PATH: self._golden_path,
                    constants.GOAL_PANO_ID: 2,
                    constants.SCAN_ID: self._scan_id,
                    constants.GOAL_ROOM_PANOS:
                    [6, 2, constants.INVALID_NODE_ID]
                },
                info=None),
        ]
        self._action_list = [3, 7, 5, 2, 0]
Exemple #12
0
 def __init__(self):
     self._current_env_output = common.EnvOutput(reward=None,
                                                 done=True,
                                                 observation=None,
                                                 info=None)
Exemple #13
0
    def _step(self, action):
        """Steps the environment.

    Args:
      action:
        If not using panoramic actions, this is an integer index 0 to 3
        If using panoramic actions, this is an index 0 to K, where
          values 0 to K-1 are angle bins from -180 to 180, and K is the stop
          action

    Returns:
      The next environment output.
    """
        self._frame_count += 1
        # Update distance to goal. Note that at every step, self._distance_to_goal
        # is the distance to goal from the pano we reached in the prev step.
        self._distance_to_goal = self.shortest_path_length(
            self._graph_state.pano_id, self._goal_pano_id)

        if not isinstance(action, np.ndarray):
            action = np.array(action, dtype=np.int64)

        # Convert action idx to representation used by the graph
        if self._panoramic_actions:
            # go_towards is 'stop' or heading angle in degrees
            go_towards = ('stop' if action >= self._panoramic_action_bins else
                          self._pano_action_bin_to_heading(action))
        else:
            # go_towards is left, right, forward or stop.
            go_towards = self.ACTION_IDX_TO_STR[int(action)]

        reward = 0.
        done = False
        if go_towards == 'stop':
            # If the action is to stop.

            reward = 1. if self._goal_pano_id == self._graph_state.pano_id else -1.
            done = True
        else:
            # Else, take the step.
            next_graph_state = self._graph.get_next_graph_state(
                self._graph_state, go_towards)

            if len(self._graph.nodes[next_graph_state.pano_id].neighbors) < 2:
                # stay still when running into the boundary of the graph
                logging.info(
                    'At the border (number of neighbors < 2). Did not go %s.',
                    str(go_towards))
                done = True
            else:
                prev_state_potential = self.shortest_path_length(
                    self._graph_state.pano_id, self._goal_pano_id)
                cur_state_potential = self.shortest_path_length(
                    next_graph_state.pano_id, self._goal_pano_id)

                dist_reward = prev_state_potential - cur_state_potential
                reward = self._DEFAULT_STEP_REWARD + dist_reward

                self._graph_state = next_graph_state

        if self._frame_count > self._max_actions_per_episode:
            done = True

        observation = self._get_current_observation(prev_action=action)

        # Log transition:
        if self._run_writer:
            self._run_writer.log_action(action, go_towards)
            self._run_writer.log_state(self._graph_state)
            self._run_writer.log_observation(observation)
            if done:
                self._run_writer.write()

        return common.EnvOutput(reward=reward,
                                done=done,
                                observation=observation,
                                info=self._get_step_info())
Exemple #14
0
 def _reset(self):
     self._current_state = 0  # always start at state=0
     return common.EnvOutput(reward=0.,
                             done=False,
                             observation=self._get_current_observation(),
                             info='')