def _test_use_potion(self, take_action): env = self.make_object_under_test() env.reset() stone = _TEST_STONES[0][0] potion = _TEST_POTIONS[0][0] aligned_stone = _FIXED_STONE_MAP.apply_inverse(stone.latent_stone()) perceived_stone = stones_and_potions.unalign(aligned_stone, _FIXED_ROTATION) perceived_potion = _FIXED_POTION_MAP.apply_inverse(potion.latent_potion()) new_timestep = take_action( env, perceived_stone, stone, perceived_potion, potion) self.assertEqual(new_timestep.reward, 0) stone_features, _ = symbolic_alchemy.slot_based_num_features( env.observe_used) potion_start_index = stone_features * symbolic_alchemy.MAX_STONES potion0_obs = new_timestep.observation['symbolic_obs'][potion_start_index] self.assertAlmostEqual(potion0_obs, 1.0, 4) stone_obs = new_timestep.observation['symbolic_obs'][:stone_features] # Coords change to -1, 1, 1 and reward changes to 1/max reward self.assertAlmostEqual(stone_obs[0], -1.0, 4) self.assertAlmostEqual(stone_obs[1], 1.0, 4) self.assertAlmostEqual(stone_obs[2], 1.0, 4) self.assertAlmostEqual( stone_obs[3], 1.0 / stones_and_potions.max_reward(), 4) # After using the potion end the trial end_trial_reward, _ = env.end_trial() self.assertEqual(end_trial_reward, 0)
def _test_use_stone(self, take_action): env = self.make_object_under_test() env.reset() num_stone_features, _ = symbolic_alchemy.slot_based_num_features( env.observe_used) default_stone_features, _ = env._default_features() latent_stones = [stone.latent_stone() for stone in _TEST_STONES[0]] aligned_stones = [_FIXED_STONE_MAP.apply_inverse(stone) for stone in latent_stones] perceived_stones = [stones_and_potions.unalign(stone, _FIXED_ROTATION) for stone in aligned_stones] for stone, perceived_stone, latent_stone in zip( _TEST_STONES[0], perceived_stones, latent_stones): new_timestep = take_action(env, perceived_stone, stone) expected_reward = reward_fcn()(latent_stone.latent_coords) self.assertEqual(new_timestep.reward, expected_reward) # Observation should be set to the default. stone_obs = new_timestep.observation['symbolic_obs'][ num_stone_features * stone.idx:num_stone_features * (stone.idx + 1)] for stone_feat, default_stone_feat in zip( stone_obs, default_stone_features[0, :]): self.assertAlmostEqual(stone_feat, default_stone_feat, 4) # After using the stones end the trial end_trial_reward, _ = env.end_trial() self.assertEqual(end_trial_reward, 0)
def get_stone_tests(): """Test cases for converting between stones and unity stone properties.""" stone_tests = [] for rotation in stones_and_potions.possible_rotations(): for sm in stones_and_potions.possible_stone_maps(): stone_tests.append(([ (stones_and_potions.unalign(sm.apply_inverse(l), rotation), l) for l in stones_and_potions.possible_latent_stones() ], unity_python_conversion.to_stone_unity_properties, functools.partial(from_stone_unity_properties, rotation=rotation), lambda x: x, _make_tuple)) return stone_tests
def to_unity_chemistry( chemistry: utils.Chemistry ) -> Tuple[alchemy_pb2.Chemistry, alchemy_pb2.RotationMapping]: """Convert from python types to unity Chemistry object.""" # Latent stones and potions are always in the same places. latent_stones = stones_and_potions.possible_latent_stones() latent_potions = stones_and_potions.possible_latent_potions() # Apply the dimension swapping map between latent stones in unity and latent # stones in python (see from_unity_chemistry for more explanation). python_to_unity = PythonToUnityDimMap(chemistry) python_latent_stones = [ python_to_unity.apply_to_stone(latent_stone) for latent_stone in latent_stones ] python_latent_potions = [ python_to_unity.apply_to_potion(latent_potion) for latent_potion in latent_potions ] # Apply the stone map to them to get perceptual stones. aligned_stones = [ chemistry.stone_map.apply_inverse(stone) for stone in python_latent_stones ] perceived_stones = [ stones_and_potions.unalign(stone, chemistry.rotation) for stone in aligned_stones ] unity_stones = [ to_stone_unity_properties(perceived, latent) for perceived, latent in zip(perceived_stones, latent_stones) ] # Apply the potion map to them to get perceptual potions. perceived_potions = [ chemistry.potion_map.apply_inverse(potion) for potion in python_latent_potions ] unity_potions = [ to_potion_unity_properties(perceived, latent, python_to_unity.graph) for perceived, latent in zip(perceived_potions, latent_potions) ] unity_chemistry = alchemy_pb2.Chemistry(stones=unity_stones, potions=unity_potions) rotation_mapping = rotation_to_unity(python_to_unity.rotation) return unity_chemistry, rotation_mapping
def observation(self): # If we are using the slot based representation then get features for each # stone which is present. num_axes = stones_and_potions.get_num_axes() default_stone_features, default_potion_features = self._default_features( ) stone_features = np.concatenate( [default_stone_features for _ in range(MAX_STONES)], axis=0) potion_features = np.concatenate( [default_potion_features for _ in range(MAX_POTIONS)], axis=0) existing_stones = (self.game_state.existing_stones() if self.game_state else []) existing_potions = (self.game_state.existing_potions() if self.game_state else []) for stone in existing_stones: stone_ind = self.game_state.get_stone_ind(stone_inst=stone.idx) assert 0 <= stone_ind < MAX_STONES, 'stone idx out of range' aligned_stone = self._chemistry.stone_map.apply_inverse( stone.latent_stone()) perceived_stone = stones_and_potions.unalign( aligned_stone, self._chemistry.rotation) for f in range(num_axes): stone_features[stone_ind, f] = perceived_stone.perceived_coords[f] # This feature is equivalent to the value indicator seen on the stone as # it distinguishes different reward values. stone_features[stone_ind, num_axes] = (perceived_stone.reward / stones_and_potions.max_reward()) if self.observe_used: stone_features[stone_ind, num_axes + 1] = 0.0 for potion in existing_potions: potion_ind = self.game_state.get_potion_ind(potion_inst=potion.idx) assert potion_ind < MAX_POTIONS, 'potion idx out of range' latent_potion = potion.latent_potion() perceived_potion = self._chemistry.potion_map.apply_inverse( latent_potion) potion_features[potion_ind, 0] = ( (perceived_potion.index() / POTION_TYPE_SCALE) - 1.0) if self.observe_used: potion_features[potion_ind, 1] = 0.0 concat_obs = { 'symbolic_obs': np.concatenate((stone_features.reshape( (-1, )), potion_features.reshape((-1, )))) } concat_obs.update(self.chem_observation()) return concat_obs
def type_based_action_from_ints( aligned_stone_index: stones_and_potions.AlignedStoneIndex, perceived_potion_index: stones_and_potions.PerceivedPotionIndex, rotation: np.ndarray) -> TypeBasedAction: """Converts from int specification of action to type based.""" if aligned_stone_index == helpers.END_TRIAL: return TypeBasedAction(end_trial=True) perceived_stone = stones_and_potions.unalign( stones_and_potions.aligned_stone_from_index(aligned_stone_index), rotation) if perceived_potion_index == stones_and_potions.CAULDRON: return TypeBasedAction(stone=perceived_stone, cauldron=True) perceived_potion = stones_and_potions.perceived_potion_from_index( perceived_potion_index) return TypeBasedAction(stone=perceived_stone, potion=perceived_potion)
def potion_used(self, stone_ind: int, potion_ind: int, val: int, start_stone: graphs.Node, stone_inst: int, potion: Potion, end_stone: Optional[graphs.Node]) -> None: if end_stone is not None: aligned_stone = self._stone_map.apply_inverse( LatentStone(np.array(end_stone.coords))) self.outcome = stones_and_potions.unalign(aligned_stone, self._rotation) perceived_stone = self._perceived_stone(start_stone) if potion_ind == CAULDRON: self.type_based_action = utils.TypeBasedAction( stone=perceived_stone, cauldron=True) else: perceived_potion = self._potion_map.apply_inverse( LatentPotion(potion.dimension, potion.direction)) self.type_based_action = utils.TypeBasedAction( stone=perceived_stone, potion=perceived_potion)
def events(self): events = [] if self._new_trial: if self._trial_number == 0: unity_chemistry, rotation_mapping = ( unity_python_conversion.to_unity_chemistry(self.chemistry)) events.append(('DeepMind/Alchemy/ChemistryCreated', alchemy_pb2.ChemistryCreated( chemistry=unity_chemistry, rotation_mapping=rotation_mapping))) else: events.append( ('DeepMind/Trial/TrialEnded', trial_pb2.TrialEnded(trial_id=self._trial_number - 1))) events.append(('DeepMind/Alchemy/CauldronCreated', alchemy_pb2.CauldronCreated())) for potion in self.items.trials[self._trial_number].potions: latent_potion = potion.latent_potion() perceived_potion = self.chemistry.potion_map.apply_inverse( latent_potion) potion_properties = unity_python_conversion.to_potion_unity_properties( perceived_potion=perceived_potion, latent_potion=latent_potion, graph=self.chemistry.graph) events.append(('DeepMind/Alchemy/PotionCreated', alchemy_pb2.PotionCreated( potion_instance_id=potion.idx, potion_properties=potion_properties))) for stone in self.items.trials[self._trial_number].stones: latent_stone = stone.latent_stone() aligned_stone = self.chemistry.stone_map.apply_inverse( latent_stone) perceived_stone = stones_and_potions.unalign( aligned_stone, self.chemistry.rotation) stone_properties = unity_python_conversion.to_stone_unity_properties( perceived_stone=perceived_stone, latent_stone=latent_stone) events.append(('DeepMind/Alchemy/StoneCreated', alchemy_pb2.StoneCreated( stone_instance_id=stone.idx, stone_properties=stone_properties))) events.append( ('DeepMind/Trial/TrialStarted', trial_pb2.TrialStarted(trial_id=self._trial_number))) events.extend(self._used_events) return [encode_event(name, event) for name, event in events]
def find_dim_map_and_stone_map( chemistry: utils.Chemistry) -> Tuple[np.ndarray, StoneMap, np.ndarray]: """Find a dimension map and stone map which map latent stones to perceived.""" latent_stones = stones_and_potions.possible_latent_stones() aligned_stones = [ chemistry.stone_map.apply_inverse(stone) for stone in latent_stones ] perceived_stones = [ stones_and_potions.unalign(stone, chemistry.rotation) for stone in aligned_stones ] for dim_map in [ np.eye(3, dtype=np.int)[p, :] for p in itertools.permutations([0, 1, 2]) ]: for stone_map in stones_and_potions.possible_stone_maps(): sm = np.diag(stone_map.latent_pos_dir.astype(np.int)) # Since we do rotation before reflection in this case we must allow # rotation forwards and backwards to get all cases. # Because of the scaling this is not just the inverse matrix. inverse_rotation = stones_and_potions.rotation_from_angles([ -a for a in stones_and_potions.rotation_to_angles( chemistry.rotation) ]) for rotation in [chemistry.rotation, inverse_rotation]: all_match = True for ls, ps in zip(latent_stones, perceived_stones): new_ls = np.matmul(dim_map, ls.latent_coords.astype(np.int)) ps_prime = np.matmul( sm, np.matmul(np.linalg.inv(rotation), new_ls)) if not all( abs(a - b) < 0.0001 for a, b in zip( ps_prime, ps.perceived_coords.astype(np.int))): all_match = False break if all_match: return np.linalg.inv(dim_map), stone_map, rotation assert False, ( 'No dimension map and stone map takes latent stones to the passed ' 'perceived stones with the passed rotation.')
def _perceived_stone(self, stone: graphs.Node): aligned_stone = self._stone_map.apply_inverse( LatentStone(np.array(stone.coords))) return stones_and_potions.unalign(aligned_stone, self._rotation)
def perceived_stone(self, stone: Stone) -> PerceivedStone: aligned_stone = self._chemistry.stone_map.apply_inverse( stone.latent_stone()) perceived_stone = stones_and_potions.unalign(aligned_stone, self._chemistry.rotation) return perceived_stone