Ejemplo n.º 1
0
 def select_action(self) -> utils.SlotBasedAction:
   if 0 <= self._env.trial_number < len(self._actions):
     action_num = self._action_num[self._env.trial_number]
     actions = self._actions[self._env.trial_number]
     if action_num < len(actions):
       action = actions[action_num]
       self._action_num[self._env.trial_number] += 1
       stone_ind, potion_ind = action
       if potion_ind == -1:
         return utils.SlotBasedAction(stone_ind=stone_ind, cauldron=True)
       return utils.SlotBasedAction(stone_ind=stone_ind, potion_ind=potion_ind)
   return utils.SlotBasedAction(no_op=True)
Ejemplo n.º 2
0
 def env_mid_trial(self, reset=True, no_op_steps=0):
   env = self.make_object_under_test()
   if reset:
     env.reset()
   for _ in range(no_op_steps):
     env.step_slot_based_action(utils.SlotBasedAction(no_op=True))
   return env
Ejemplo n.º 3
0
 def test_no_op(self):
   env = self.make_object_under_test()
   timestep = env.reset()
   # Perform a no-op
   new_timestep = env.step_slot_based_action(utils.SlotBasedAction(no_op=True))
   np.testing.assert_allclose(timestep.observation['symbolic_obs'],
                              new_timestep.observation['symbolic_obs'])
   self.assertEqual(new_timestep.reward, 0)
Ejemplo n.º 4
0
def make_random_action_sequence(num_trials, end_trial_action):
  num_random_actions = 10
  assert num_random_actions <= _MAX_STEPS_PER_TRIAL
  # On each trial take some random actions then end the trial.
  actions = []
  for _ in range(num_trials):
    # Create random actions, some of which may not be possible.
    actions.extend(
        [random_slot_based_action() for _ in range(num_random_actions)])
    # End the trial
    if end_trial_action:
      actions.append(utils.SlotBasedAction(end_trial=True))
    else:
      for _ in range(_MAX_STEPS_PER_TRIAL - num_random_actions):
        actions.append(utils.SlotBasedAction(no_op=True))
  return [symbolic_alchemy.slot_based_action_to_int(action, end_trial_action)
          for action in actions]
Ejemplo n.º 5
0
 def end_trial(self) -> Tuple[float, float]:
     overall_reward = 0.0
     overall_discount = 1.0
     # If it is the last step of an episode reset to start a new one.
     if self._is_last_step:
         self.reset()
         # Reward and discount will be None as we have started a new episode.
     if self._end_trial_action:
         reward = self.step_no_observation(
             type_utils.SlotBasedAction(end_trial=True))
         overall_reward += reward
     else:
         # If it is a new trial take at least one step.
         if self.is_new_trial():
             reward = self.step_no_observation(
                 type_utils.SlotBasedAction(no_op=True))
             overall_reward += reward
         while not (self._is_last_step or self.is_new_trial()):
             reward = self.step_no_observation(
                 type_utils.SlotBasedAction(no_op=True))
             overall_reward += reward
     return overall_reward, overall_discount
Ejemplo n.º 6
0
    def _int_to_slot_based_action(self,
                                  action: int) -> type_utils.SlotBasedAction:
        """Converts integer action to simplified action.

    In the integer representation, if we have an end trial action the mapping is
    as follows, otherwise subtract 1 from the integers shown below:
      0 represents ending the trial
      1 represents doing nothing
      The remaining integers represent putting a stone into a potion or into the
        cauldron, i.e. s * (num potion types + 1) + 2 represents putting stone
        type s into the cauldron (or stone index s in the slot based version)
        and s * (num potion types + 1) + 3 + p represents putting stone type s
        (or again index s) into potion type p (or index p).

    Args:
      action: Integer representing the action to take.

    Returns:
       SlotBasedAction representing the action to take.
    """
        altered_action = copy.deepcopy(action)
        altered_action -= 1
        if self._end_trial_action:
            altered_action -= 1
        if altered_action < 0:
            return type_utils.SlotBasedAction(
                end_trial=altered_action == END_TRIAL,
                no_op=altered_action == NO_OP)
        potions_and_cauldron = MAX_POTIONS + 1
        stone_ind = altered_action // potions_and_cauldron
        potion_ind = (altered_action % potions_and_cauldron) - 1
        if potion_ind < 0:
            return type_utils.SlotBasedAction(stone_ind=stone_ind,
                                              cauldron=True)
        return type_utils.SlotBasedAction(stone_ind=stone_ind,
                                          potion_ind=potion_ind)
Ejemplo n.º 7
0
 def _type_based_to_slot_based(
         self,
         action: type_utils.TypeBasedAction) -> type_utils.SlotBasedAction:
     stone_ind, potion_ind = None, None
     if action.using_stone:
         aligned_stone = stones_and_potions.align(action.perceived_stone,
                                                  self._chemistry.rotation)
         latent_stone = self._chemistry.stone_map.apply(aligned_stone)
         stone_ind = self.game_state.get_stone_ind(
             stone=graphs.Node(-1, latent_stone.latent_coords))
     if action.using_potion:
         latent_potion = self._chemistry.potion_map.apply(
             action.perceived_potion)
         potion_ind = self.game_state.get_potion_ind(potion=latent_potion)
     return type_utils.SlotBasedAction(end_trial=action.end_trial,
                                       no_op=action.no_op,
                                       stone_ind=stone_ind,
                                       cauldron=action.cauldron,
                                       potion_ind=potion_ind)
Ejemplo n.º 8
0
    def use_positive_stones(self) -> dm_env.TimeStep:
        overall_reward = 0
        overall_discount = 1.0
        # If it is the last step of an episode reset to start a new one.
        if self._is_last_step:
            self.reset()
            # Reward and discount will be None as we have started a new episode.
        pos_stone_inds = self.game_state.get_stones_above_thresh(
            self._reward_weights, threshold=0)
        for stone_ind in pos_stone_inds:
            timestep = self.step_slot_based_action(
                type_utils.SlotBasedAction(stone_ind=stone_ind, cauldron=True))
            overall_reward += timestep.reward
            overall_discount *= timestep.discount
            if self._is_last_step or self.is_new_trial():
                return self.construct_step(float(overall_reward),
                                           overall_discount)
        end_trial_reward, end_trial_discount = self.end_trial()
        overall_reward += end_trial_reward
        overall_discount *= end_trial_discount

        # Get the cumulative reward and discount and the final step type and
        # observation.
        return self.construct_step(float(overall_reward), overall_discount)
Ejemplo n.º 9
0
 def select_action(self) -> utils.SlotBasedAction:
   return utils.SlotBasedAction(no_op=True)
Ejemplo n.º 10
0
def random_slot_based_action():
  stone_ind = random.randint(0, symbolic_alchemy.MAX_STONES - 1)
  potion_ind = random.randint(-1, symbolic_alchemy.MAX_POTIONS - 1)
  if potion_ind < 0:
    return utils.SlotBasedAction(stone_ind=stone_ind, cauldron=True)
  return utils.SlotBasedAction(stone_ind=stone_ind, potion_ind=potion_ind)
Ejemplo n.º 11
0
class SymbolicAlchemySeeChemistryTest(parameterized.TestCase):
  """We don't do the full mixin tests for the chemistry observation."""

  def _make_env(self, see_chemistry, constraint, **kwargs):
    return make_fixed_chem_env(
        constraint=constraint,
        see_chemistries={_CHEM_NAME: see_chemistry},
        observe_used=True, end_trial_action=False, **kwargs)

  @parameterized.parameters(
      # In the graph observations edges are in the following order:
      #     _________11__________
      #    /|                  /|
      #  9/ |               10/ |
      #  /  |                /  |
      # /___|_____8_________/   |
      # |   |6              |   |7
      # |   |               |   |
      # |2  |               |4  |
      # |   |_______5_______|___|
      # |   /               |   /
      # |  /1               |  /3
      # | /                 | /
      # |/________0_________|/
      #
      # With coordinate system:
      # |
      # |z  /
      # |  /y
      # | /
      # |/___x___
      #
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          rotation=utils.RotationElement(present=False),
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.no_bottleneck_constraints()[0],
       # With no constraints all edges should be present
       'expected_obs': np.ones((12,), np.float32),
       'expected_len': 12},
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          rotation=utils.RotationElement(present=False),
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.bottleneck1_constraints()[0],
       # For bottleneck1 constraint the only x direction edge that exists is 8,
       # so 0, 5 and 11 are missing.
       'expected_obs': np.array([0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0,
                                 1.0, 1.0, 0.0], np.float32),
       'expected_len': 12},
      {'see_chemistry': utils.ChemistrySeen(
          stone_map=utils.StoneMapElement(present=False),
          graph=utils.GraphElement(present=False),
          rotation=utils.RotationElement(present=False),
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.no_bottleneck_constraints()[0],
       # First 6 entries are a 1-hot for the dimension map, in this case the
       # dimension map used is the first one.
       # The next 3 entries are 0 or 1 for the direction map, in this case all
       # directions are positive.
       'expected_obs': np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0],
                                np.float32),
       'expected_len': 9},
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          graph=utils.GraphElement(present=False),
          rotation=utils.RotationElement(present=False),
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.no_bottleneck_constraints()[0],
       # 3 entries are 0 or 1 for the direction map, in this case all directions
       # are positive.
       'expected_obs': np.array([1.0, 1.0, 1.0], np.float32),
       'expected_len': 3},
      {'see_chemistry': utils.ChemistrySeen(
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.no_bottleneck_constraints()[0],
       # Observations are from the previous tests concatenated with graph first,
       # then potion map then stone map.
       'expected_obs': np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0,  # graph
                                 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,  # graph
                                 1.0, 0.0, 0.0, 0.0, 0.0, 0.0,  # potion dim map
                                 1.0, 1.0, 1.0,  # potion dir map
                                 1.0, 1.0, 1.0,  # stone map
                                 1.0, 0.0, 0.0, 0.0], np.float32),  # rotation
       'expected_len': 28},
      # Tests for the belief state observation.
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          rotation=utils.RotationElement(present=False),
          content=utils.ElementContent.BELIEF_STATE,
          precomputed='perceptual_mapping_randomized_with_random_bottleneck'),
       'constraint': graphs.no_bottleneck_constraints()[0],
       # With no actions the belief state should be unknown for all edges.
       'expected_obs': 0.5 * np.ones((12,), np.float32),
       'expected_len': 12},
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          rotation=utils.RotationElement(present=False),
          content=utils.ElementContent.BELIEF_STATE,
          precomputed='perceptual_mapping_randomized_with_random_bottleneck'),
       'constraint': graphs.bottleneck1_constraints()[0],
       # It shouldn't make a difference whether the underlying chemistry has a
       # constraint or not everythin is unknown.
       'expected_obs': 0.5 * np.ones((12,), np.float32),
       'expected_len': 12},
      {'see_chemistry': utils.ChemistrySeen(
          stone_map=utils.StoneMapElement(present=False),
          graph=utils.GraphElement(present=False),
          rotation=utils.RotationElement(present=False),
          content=utils.ElementContent.BELIEF_STATE,
          precomputed='perceptual_mapping_randomized_with_random_bottleneck'),
       'constraint': graphs.no_bottleneck_constraints()[0],
       # First 6 entries are a 1-hot for the dimension map, with no actions all
       # of the dimesnsion maps are possible so the entries are all unknown.
       # The next 3 entries are 0 or 1 for the direction map, or 0.5 for unknown
       # which is the case if no actions are taken.
       'expected_obs': 0.5 * np.ones((9,), np.float32),
       'expected_len': 9},
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          graph=utils.GraphElement(present=False),
          rotation=utils.RotationElement(present=False),
          content=utils.ElementContent.BELIEF_STATE,
          precomputed='perceptual_mapping_randomized_with_random_bottleneck'),
       'constraint': graphs.no_bottleneck_constraints()[0],
       # 3 entries are 0 or 1 for the direction map, in this case all directions
       # are positive, since the test stones include an instance of the best
       # stone, the stone map should be known from the start.
       'expected_obs': np.array([1.0, 1.0, 1.0], np.float32),
       'expected_len': 3},
      {'see_chemistry': utils.ChemistrySeen(
          content=utils.ElementContent.BELIEF_STATE,
          rotation=utils.RotationElement(present=False),
          precomputed='perceptual_mapping_randomized_with_random_bottleneck'),
       'constraint': graphs.no_bottleneck_constraints()[0],
       # Observations are from the previous tests concatenated with graph first,
       # then potion map then stone map.
       'expected_obs': np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  # graph
                                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  # graph
                                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  # potion dim map
                                 0.5, 0.5, 0.5,  # potion dir map
                                 1.0, 1.0, 1.0], np.float32),  # stone map
       'expected_len': 24},
      {'see_chemistry': utils.ChemistrySeen(
          content=utils.ElementContent.BELIEF_STATE,
          rotation=utils.RotationElement(present=False),
          precomputed='perceptual_mapping_randomized_with_random_bottleneck'),
       'constraint': graphs.no_bottleneck_constraints()[0],
       'actions': [utils.SlotBasedAction(stone_ind=0, potion_ind=0)],
       # If we put the 0th stone into the 0th potion we will see a change on
       # axis 1, we will become certain that the dim map is either [0, 1, 2] or
       # [2, 1, 0], we will become certain that the edge from (-1, -1, 1) to
       # (-1, 1, 1) exists.
       'expected_obs': np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  # graph
                                 0.5, 0.5, 0.5, 1.0, 0.5, 0.5,  # graph
                                 0.5, 0.0, 0.0, 0.0, 0.0, 0.5,  # potion dim map
                                 0.5, 1.0, 0.5,  # potion dir map
                                 1.0, 1.0, 1.0], np.float32),  # stone map
       'expected_len': 24},
      # Tests for a combination of content types
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          rotation=utils.RotationElement(present=False),
          groups=[
              utils.GroupInChemistry(
                  {utils.ElementType.GRAPH: {0, 1, 2, 3}}, [1.0, 0.0, 0.0]),
              utils.GroupInChemistry(
                  {utils.ElementType.GRAPH: {4, 5, 6}}, [0.0, 0.0, 1.0]),
              utils.GroupInChemistry(
                  {utils.ElementType.GRAPH: {7, 8, 9, 10, 11}}, [0.0, 1.0, 0.0]),
              ],
          precomputed='perceptual_mapping_randomized_with_random_bottleneck'),
       'constraint': graphs.bottleneck1_constraints()[0],
       'actions': [utils.SlotBasedAction(stone_ind=0, potion_ind=0)],
       # With no actions the belief state should be unknown for all edges.
       'expected_obs': np.array(
           [0.0, 1.0, 1.0, 1.0,  # ground truth - 0 missing 1, 2, 3 exist
            0.5, 0.5, 0.5,  # unknown - these are set to 0.5
            # belief state - after the action 9 is known, others are unknown
            0.5, 0.5, 1.0, 0.5, 0.5], np.float32),
       'expected_len': 12},
      # Rotation tests
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          graph=utils.GraphElement(present=False),
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.no_bottleneck_constraints()[0],
       'make_env_kwargs': {
           'rotation': stones_and_potions.rotation_from_angles([0, 0, 0])},
       'expected_obs': np.array([1.0, 0.0, 0.0, 0.0], np.float32),
       'expected_len': 4},
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          graph=utils.GraphElement(present=False),
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.no_bottleneck_constraints()[0],
       'make_env_kwargs': {
           'rotation': stones_and_potions.rotation_from_angles([0, 0, -45])},
       'expected_obs': np.array([0.0, 1.0, 0.0, 0.0], np.float32),
       'expected_len': 4},
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          graph=utils.GraphElement(present=False),
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.no_bottleneck_constraints()[0],
       'make_env_kwargs': {
           'rotation': stones_and_potions.rotation_from_angles([0, -45, 0])},
       'expected_obs': np.array([0.0, 0.0, 1.0, 0.0], np.float32),
       'expected_len': 4},
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          stone_map=utils.StoneMapElement(present=False),
          graph=utils.GraphElement(present=False),
          content=utils.ElementContent.GROUND_TRUTH),
       'constraint': graphs.no_bottleneck_constraints()[0],
       'make_env_kwargs': {
           'rotation': stones_and_potions.rotation_from_angles([-45, 0, 0])},
       'expected_obs': np.array([0.0, 0.0, 0.0, 1.0], np.float32),
       'expected_len': 4},
      # In belief state if we have stones which are unique to a particular
      # rotation then the rotation should be known and possibly part of the
      # stone map.
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          graph=utils.GraphElement(present=False),
          content=utils.ElementContent.BELIEF_STATE,
          precomputed=('perceptual_mapping_randomized_with_rotation_and_'
                       'random_bottleneck')),
       'constraint': graphs.no_bottleneck_constraints()[0],
       'make_env_kwargs': {
           'rotation': stones_and_potions.rotation_from_angles([-45, 0, 0]),
           'test_stones': [[Stone(0, [1, 1, 1]), Stone(0, [1, 1, -1])]]},
       'expected_obs': np.array(
           [1.0, 1.0, 1.0,  # stone map
            0.0, 0.0, 0.0, 1.0], np.float32),  # rotation
       'expected_len': 7},
      # Otherwise rotation and stone map observations should both be unknown.
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          graph=utils.GraphElement(present=False),
          content=utils.ElementContent.BELIEF_STATE,
          precomputed=('perceptual_mapping_randomized_with_rotation_and_'
                       'random_bottleneck')),
       'constraint': graphs.no_bottleneck_constraints()[0],
       'make_env_kwargs': {
           'rotation': stones_and_potions.rotation_from_angles([-45, 0, 0]),
           'test_stones': [[Stone(0, [1, 1, 1])]]},
       'expected_obs': np.array(
           [0.5, 0.5, 0.5,  # stone map
            0.5, 0.5, 0.5, 0.5], np.float32),  # rotation
       'expected_len': 7},
      {'see_chemistry': utils.ChemistrySeen(
          potion_map=utils.PotionMapElement(present=False),
          graph=utils.GraphElement(present=False),
          content=utils.ElementContent.BELIEF_STATE,
          precomputed=('perceptual_mapping_randomized_with_rotation_and_'
                       'random_bottleneck')),
       'constraint': graphs.no_bottleneck_constraints()[0],
       'make_env_kwargs': {
           'rotation': stones_and_potions.rotation_from_angles([-45, 0, 0]),
           'test_stones': [[Stone(0, [1, 1, 1])]]},
       'actions': [utils.SlotBasedAction(stone_ind=0, potion_ind=6)],
       'expected_obs': np.array(
           [1.0, 1.0, 1.0,  # stone map
            0.0, 0.0, 0.0, 1.0], np.float32),  # rotation
       'expected_len': 7},
  )
  def test_see_chemistry(
      self, see_chemistry, constraint, expected_obs, expected_len,
      actions=None, make_env_kwargs=None):
    """Test the ground truth chemistry observations."""
    env = self._make_env(
        see_chemistry=see_chemistry, constraint=constraint,
        **(make_env_kwargs or {}))
    timestep = env.reset()
    if actions:
      for action in actions:
        timestep = env.step_slot_based_action(action)

    np.testing.assert_allclose(
        timestep.observation[_CHEM_NAME], expected_obs)
    self.assertLen(
        timestep.observation[_CHEM_NAME], expected_len)

  def test_see_chem_before_reset(self):
    env = self._make_env(
        see_chemistry=utils.ChemistrySeen(
            content=utils.ElementContent.GROUND_TRUTH),
        constraint=graphs.no_bottleneck_constraints()[0])
    obs = env.observation()
    # Observation should be all unknown because we have not reset the
    # environment yet.
    np.testing.assert_allclose(obs[_CHEM_NAME], [0.5] * 28)
    # After resetting none of the chem should be unknown.
    env.reset()
    obs = env.observation()
    np.testing.assert_array_less(
        0.01 * np.ones((28,)),
        np.abs(obs[_CHEM_NAME] - np.array([0.5] * 28)))
Ejemplo n.º 12
0
def slot_based_use_potion(
    env, unused_perceived_stone, stone, unused_perceived_potion, potion):
  del unused_perceived_stone, unused_perceived_potion
  return env.step_slot_based_action(utils.SlotBasedAction(
      stone_ind=stone.idx, potion_ind=potion.idx))
Ejemplo n.º 13
0
def slot_based_use_stone(env, unused_perceived_stone, stone):
  del unused_perceived_stone
  return env.step_slot_based_action(utils.SlotBasedAction(
      stone_ind=stone.idx, cauldron=True))