Ejemplo n.º 1
0
  def _test_use_potion(self, take_action):
    env = self.make_object_under_test()
    env.reset()
    stone = _TEST_STONES[0][0]
    potion = _TEST_POTIONS[0][0]
    aligned_stone = _FIXED_STONE_MAP.apply_inverse(stone.latent_stone())
    perceived_stone = stones_and_potions.unalign(aligned_stone, _FIXED_ROTATION)
    perceived_potion = _FIXED_POTION_MAP.apply_inverse(potion.latent_potion())
    new_timestep = take_action(
        env, perceived_stone, stone, perceived_potion, potion)
    self.assertEqual(new_timestep.reward, 0)

    stone_features, _ = symbolic_alchemy.slot_based_num_features(
        env.observe_used)
    potion_start_index = stone_features * symbolic_alchemy.MAX_STONES
    potion0_obs = new_timestep.observation['symbolic_obs'][potion_start_index]
    self.assertAlmostEqual(potion0_obs, 1.0, 4)
    stone_obs = new_timestep.observation['symbolic_obs'][:stone_features]
    # Coords change to -1, 1, 1 and reward changes to 1/max reward
    self.assertAlmostEqual(stone_obs[0], -1.0, 4)
    self.assertAlmostEqual(stone_obs[1], 1.0, 4)
    self.assertAlmostEqual(stone_obs[2], 1.0, 4)
    self.assertAlmostEqual(
        stone_obs[3], 1.0 / stones_and_potions.max_reward(), 4)

    # After using the potion end the trial
    end_trial_reward, _ = env.end_trial()
    self.assertEqual(end_trial_reward, 0)
Ejemplo n.º 2
0
  def _test_use_stone(self, take_action):
    env = self.make_object_under_test()
    env.reset()
    num_stone_features, _ = symbolic_alchemy.slot_based_num_features(
        env.observe_used)
    default_stone_features, _ = env._default_features()
    latent_stones = [stone.latent_stone() for stone in _TEST_STONES[0]]
    aligned_stones = [_FIXED_STONE_MAP.apply_inverse(stone)
                      for stone in latent_stones]
    perceived_stones = [stones_and_potions.unalign(stone, _FIXED_ROTATION)
                        for stone in aligned_stones]
    for stone, perceived_stone, latent_stone in zip(
        _TEST_STONES[0], perceived_stones, latent_stones):
      new_timestep = take_action(env, perceived_stone, stone)
      expected_reward = reward_fcn()(latent_stone.latent_coords)
      self.assertEqual(new_timestep.reward, expected_reward)
      # Observation should be set to the default.
      stone_obs = new_timestep.observation['symbolic_obs'][
          num_stone_features * stone.idx:num_stone_features * (stone.idx + 1)]
      for stone_feat, default_stone_feat in zip(
          stone_obs, default_stone_features[0, :]):
        self.assertAlmostEqual(stone_feat, default_stone_feat, 4)

    # After using the stones end the trial
    end_trial_reward, _ = env.end_trial()
    self.assertEqual(end_trial_reward, 0)
def get_stone_tests():
    """Test cases for converting between stones and unity stone properties."""
    stone_tests = []
    for rotation in stones_and_potions.possible_rotations():
        for sm in stones_and_potions.possible_stone_maps():
            stone_tests.append(([
                (stones_and_potions.unalign(sm.apply_inverse(l), rotation), l)
                for l in stones_and_potions.possible_latent_stones()
            ], unity_python_conversion.to_stone_unity_properties,
                                functools.partial(from_stone_unity_properties,
                                                  rotation=rotation),
                                lambda x: x, _make_tuple))
    return stone_tests
Ejemplo n.º 4
0
def to_unity_chemistry(
    chemistry: utils.Chemistry
) -> Tuple[alchemy_pb2.Chemistry, alchemy_pb2.RotationMapping]:
    """Convert from python types to unity Chemistry object."""
    # Latent stones and potions are always in the same places.
    latent_stones = stones_and_potions.possible_latent_stones()
    latent_potions = stones_and_potions.possible_latent_potions()

    # Apply the dimension swapping map between latent stones in unity and latent
    # stones in python (see from_unity_chemistry for more explanation).
    python_to_unity = PythonToUnityDimMap(chemistry)
    python_latent_stones = [
        python_to_unity.apply_to_stone(latent_stone)
        for latent_stone in latent_stones
    ]
    python_latent_potions = [
        python_to_unity.apply_to_potion(latent_potion)
        for latent_potion in latent_potions
    ]

    # Apply the stone map to them to get perceptual stones.
    aligned_stones = [
        chemistry.stone_map.apply_inverse(stone)
        for stone in python_latent_stones
    ]
    perceived_stones = [
        stones_and_potions.unalign(stone, chemistry.rotation)
        for stone in aligned_stones
    ]
    unity_stones = [
        to_stone_unity_properties(perceived, latent)
        for perceived, latent in zip(perceived_stones, latent_stones)
    ]

    # Apply the potion map to them to get perceptual potions.
    perceived_potions = [
        chemistry.potion_map.apply_inverse(potion)
        for potion in python_latent_potions
    ]

    unity_potions = [
        to_potion_unity_properties(perceived, latent, python_to_unity.graph)
        for perceived, latent in zip(perceived_potions, latent_potions)
    ]

    unity_chemistry = alchemy_pb2.Chemistry(stones=unity_stones,
                                            potions=unity_potions)
    rotation_mapping = rotation_to_unity(python_to_unity.rotation)

    return unity_chemistry, rotation_mapping
Ejemplo n.º 5
0
    def observation(self):
        # If we are using the slot based representation then get features for each
        # stone which is present.
        num_axes = stones_and_potions.get_num_axes()
        default_stone_features, default_potion_features = self._default_features(
        )
        stone_features = np.concatenate(
            [default_stone_features for _ in range(MAX_STONES)], axis=0)
        potion_features = np.concatenate(
            [default_potion_features for _ in range(MAX_POTIONS)], axis=0)
        existing_stones = (self.game_state.existing_stones()
                           if self.game_state else [])
        existing_potions = (self.game_state.existing_potions()
                            if self.game_state else [])
        for stone in existing_stones:
            stone_ind = self.game_state.get_stone_ind(stone_inst=stone.idx)
            assert 0 <= stone_ind < MAX_STONES, 'stone idx out of range'
            aligned_stone = self._chemistry.stone_map.apply_inverse(
                stone.latent_stone())
            perceived_stone = stones_and_potions.unalign(
                aligned_stone, self._chemistry.rotation)
            for f in range(num_axes):
                stone_features[stone_ind,
                               f] = perceived_stone.perceived_coords[f]
            # This feature is equivalent to the value indicator seen on the stone as
            # it distinguishes different reward values.
            stone_features[stone_ind,
                           num_axes] = (perceived_stone.reward /
                                        stones_and_potions.max_reward())
            if self.observe_used:
                stone_features[stone_ind, num_axes + 1] = 0.0
        for potion in existing_potions:
            potion_ind = self.game_state.get_potion_ind(potion_inst=potion.idx)
            assert potion_ind < MAX_POTIONS, 'potion idx out of range'
            latent_potion = potion.latent_potion()
            perceived_potion = self._chemistry.potion_map.apply_inverse(
                latent_potion)
            potion_features[potion_ind, 0] = (
                (perceived_potion.index() / POTION_TYPE_SCALE) - 1.0)
            if self.observe_used:
                potion_features[potion_ind, 1] = 0.0

        concat_obs = {
            'symbolic_obs':
            np.concatenate((stone_features.reshape(
                (-1, )), potion_features.reshape((-1, ))))
        }
        concat_obs.update(self.chem_observation())
        return concat_obs
Ejemplo n.º 6
0
def type_based_action_from_ints(
        aligned_stone_index: stones_and_potions.AlignedStoneIndex,
        perceived_potion_index: stones_and_potions.PerceivedPotionIndex,
        rotation: np.ndarray) -> TypeBasedAction:
    """Converts from int specification of action to type based."""
    if aligned_stone_index == helpers.END_TRIAL:
        return TypeBasedAction(end_trial=True)
    perceived_stone = stones_and_potions.unalign(
        stones_and_potions.aligned_stone_from_index(aligned_stone_index),
        rotation)
    if perceived_potion_index == stones_and_potions.CAULDRON:
        return TypeBasedAction(stone=perceived_stone, cauldron=True)
    perceived_potion = stones_and_potions.perceived_potion_from_index(
        perceived_potion_index)
    return TypeBasedAction(stone=perceived_stone, potion=perceived_potion)
Ejemplo n.º 7
0
 def potion_used(self, stone_ind: int, potion_ind: int, val: int,
                 start_stone: graphs.Node, stone_inst: int, potion: Potion,
                 end_stone: Optional[graphs.Node]) -> None:
     if end_stone is not None:
         aligned_stone = self._stone_map.apply_inverse(
             LatentStone(np.array(end_stone.coords)))
         self.outcome = stones_and_potions.unalign(aligned_stone,
                                                   self._rotation)
     perceived_stone = self._perceived_stone(start_stone)
     if potion_ind == CAULDRON:
         self.type_based_action = utils.TypeBasedAction(
             stone=perceived_stone, cauldron=True)
     else:
         perceived_potion = self._potion_map.apply_inverse(
             LatentPotion(potion.dimension, potion.direction))
         self.type_based_action = utils.TypeBasedAction(
             stone=perceived_stone, potion=perceived_potion)
 def events(self):
     events = []
     if self._new_trial:
         if self._trial_number == 0:
             unity_chemistry, rotation_mapping = (
                 unity_python_conversion.to_unity_chemistry(self.chemistry))
             events.append(('DeepMind/Alchemy/ChemistryCreated',
                            alchemy_pb2.ChemistryCreated(
                                chemistry=unity_chemistry,
                                rotation_mapping=rotation_mapping)))
         else:
             events.append(
                 ('DeepMind/Trial/TrialEnded',
                  trial_pb2.TrialEnded(trial_id=self._trial_number - 1)))
         events.append(('DeepMind/Alchemy/CauldronCreated',
                        alchemy_pb2.CauldronCreated()))
         for potion in self.items.trials[self._trial_number].potions:
             latent_potion = potion.latent_potion()
             perceived_potion = self.chemistry.potion_map.apply_inverse(
                 latent_potion)
             potion_properties = unity_python_conversion.to_potion_unity_properties(
                 perceived_potion=perceived_potion,
                 latent_potion=latent_potion,
                 graph=self.chemistry.graph)
             events.append(('DeepMind/Alchemy/PotionCreated',
                            alchemy_pb2.PotionCreated(
                                potion_instance_id=potion.idx,
                                potion_properties=potion_properties)))
         for stone in self.items.trials[self._trial_number].stones:
             latent_stone = stone.latent_stone()
             aligned_stone = self.chemistry.stone_map.apply_inverse(
                 latent_stone)
             perceived_stone = stones_and_potions.unalign(
                 aligned_stone, self.chemistry.rotation)
             stone_properties = unity_python_conversion.to_stone_unity_properties(
                 perceived_stone=perceived_stone, latent_stone=latent_stone)
             events.append(('DeepMind/Alchemy/StoneCreated',
                            alchemy_pb2.StoneCreated(
                                stone_instance_id=stone.idx,
                                stone_properties=stone_properties)))
         events.append(
             ('DeepMind/Trial/TrialStarted',
              trial_pb2.TrialStarted(trial_id=self._trial_number)))
     events.extend(self._used_events)
     return [encode_event(name, event) for name, event in events]
Ejemplo n.º 9
0
def find_dim_map_and_stone_map(
        chemistry: utils.Chemistry) -> Tuple[np.ndarray, StoneMap, np.ndarray]:
    """Find a dimension map and stone map which map latent stones to perceived."""

    latent_stones = stones_and_potions.possible_latent_stones()
    aligned_stones = [
        chemistry.stone_map.apply_inverse(stone) for stone in latent_stones
    ]
    perceived_stones = [
        stones_and_potions.unalign(stone, chemistry.rotation)
        for stone in aligned_stones
    ]

    for dim_map in [
            np.eye(3, dtype=np.int)[p, :]
            for p in itertools.permutations([0, 1, 2])
    ]:
        for stone_map in stones_and_potions.possible_stone_maps():
            sm = np.diag(stone_map.latent_pos_dir.astype(np.int))
            # Since we do rotation before reflection in this case we must allow
            # rotation forwards and backwards to get all cases.
            # Because of the scaling this is not just the inverse matrix.
            inverse_rotation = stones_and_potions.rotation_from_angles([
                -a for a in stones_and_potions.rotation_to_angles(
                    chemistry.rotation)
            ])
            for rotation in [chemistry.rotation, inverse_rotation]:
                all_match = True
                for ls, ps in zip(latent_stones, perceived_stones):
                    new_ls = np.matmul(dim_map,
                                       ls.latent_coords.astype(np.int))
                    ps_prime = np.matmul(
                        sm, np.matmul(np.linalg.inv(rotation), new_ls))
                    if not all(
                            abs(a - b) < 0.0001 for a, b in zip(
                                ps_prime, ps.perceived_coords.astype(np.int))):
                        all_match = False
                        break
                if all_match:
                    return np.linalg.inv(dim_map), stone_map, rotation
    assert False, (
        'No dimension map and stone map takes latent stones to the passed '
        'perceived stones with the passed rotation.')
Ejemplo n.º 10
0
 def _perceived_stone(self, stone: graphs.Node):
     aligned_stone = self._stone_map.apply_inverse(
         LatentStone(np.array(stone.coords)))
     return stones_and_potions.unalign(aligned_stone, self._rotation)
Ejemplo n.º 11
0
 def perceived_stone(self, stone: Stone) -> PerceivedStone:
     aligned_stone = self._chemistry.stone_map.apply_inverse(
         stone.latent_stone())
     perceived_stone = stones_and_potions.unalign(aligned_stone,
                                                  self._chemistry.rotation)
     return perceived_stone