def select_action(self) -> utils.SlotBasedAction: if 0 <= self._env.trial_number < len(self._actions): action_num = self._action_num[self._env.trial_number] actions = self._actions[self._env.trial_number] if action_num < len(actions): action = actions[action_num] self._action_num[self._env.trial_number] += 1 stone_ind, potion_ind = action if potion_ind == -1: return utils.SlotBasedAction(stone_ind=stone_ind, cauldron=True) return utils.SlotBasedAction(stone_ind=stone_ind, potion_ind=potion_ind) return utils.SlotBasedAction(no_op=True)
def env_mid_trial(self, reset=True, no_op_steps=0): env = self.make_object_under_test() if reset: env.reset() for _ in range(no_op_steps): env.step_slot_based_action(utils.SlotBasedAction(no_op=True)) return env
def test_no_op(self): env = self.make_object_under_test() timestep = env.reset() # Perform a no-op new_timestep = env.step_slot_based_action(utils.SlotBasedAction(no_op=True)) np.testing.assert_allclose(timestep.observation['symbolic_obs'], new_timestep.observation['symbolic_obs']) self.assertEqual(new_timestep.reward, 0)
def make_random_action_sequence(num_trials, end_trial_action): num_random_actions = 10 assert num_random_actions <= _MAX_STEPS_PER_TRIAL # On each trial take some random actions then end the trial. actions = [] for _ in range(num_trials): # Create random actions, some of which may not be possible. actions.extend( [random_slot_based_action() for _ in range(num_random_actions)]) # End the trial if end_trial_action: actions.append(utils.SlotBasedAction(end_trial=True)) else: for _ in range(_MAX_STEPS_PER_TRIAL - num_random_actions): actions.append(utils.SlotBasedAction(no_op=True)) return [symbolic_alchemy.slot_based_action_to_int(action, end_trial_action) for action in actions]
def end_trial(self) -> Tuple[float, float]: overall_reward = 0.0 overall_discount = 1.0 # If it is the last step of an episode reset to start a new one. if self._is_last_step: self.reset() # Reward and discount will be None as we have started a new episode. if self._end_trial_action: reward = self.step_no_observation( type_utils.SlotBasedAction(end_trial=True)) overall_reward += reward else: # If it is a new trial take at least one step. if self.is_new_trial(): reward = self.step_no_observation( type_utils.SlotBasedAction(no_op=True)) overall_reward += reward while not (self._is_last_step or self.is_new_trial()): reward = self.step_no_observation( type_utils.SlotBasedAction(no_op=True)) overall_reward += reward return overall_reward, overall_discount
def _int_to_slot_based_action(self, action: int) -> type_utils.SlotBasedAction: """Converts integer action to simplified action. In the integer representation, if we have an end trial action the mapping is as follows, otherwise subtract 1 from the integers shown below: 0 represents ending the trial 1 represents doing nothing The remaining integers represent putting a stone into a potion or into the cauldron, i.e. s * (num potion types + 1) + 2 represents putting stone type s into the cauldron (or stone index s in the slot based version) and s * (num potion types + 1) + 3 + p represents putting stone type s (or again index s) into potion type p (or index p). Args: action: Integer representing the action to take. Returns: SlotBasedAction representing the action to take. """ altered_action = copy.deepcopy(action) altered_action -= 1 if self._end_trial_action: altered_action -= 1 if altered_action < 0: return type_utils.SlotBasedAction( end_trial=altered_action == END_TRIAL, no_op=altered_action == NO_OP) potions_and_cauldron = MAX_POTIONS + 1 stone_ind = altered_action // potions_and_cauldron potion_ind = (altered_action % potions_and_cauldron) - 1 if potion_ind < 0: return type_utils.SlotBasedAction(stone_ind=stone_ind, cauldron=True) return type_utils.SlotBasedAction(stone_ind=stone_ind, potion_ind=potion_ind)
def _type_based_to_slot_based( self, action: type_utils.TypeBasedAction) -> type_utils.SlotBasedAction: stone_ind, potion_ind = None, None if action.using_stone: aligned_stone = stones_and_potions.align(action.perceived_stone, self._chemistry.rotation) latent_stone = self._chemistry.stone_map.apply(aligned_stone) stone_ind = self.game_state.get_stone_ind( stone=graphs.Node(-1, latent_stone.latent_coords)) if action.using_potion: latent_potion = self._chemistry.potion_map.apply( action.perceived_potion) potion_ind = self.game_state.get_potion_ind(potion=latent_potion) return type_utils.SlotBasedAction(end_trial=action.end_trial, no_op=action.no_op, stone_ind=stone_ind, cauldron=action.cauldron, potion_ind=potion_ind)
def use_positive_stones(self) -> dm_env.TimeStep: overall_reward = 0 overall_discount = 1.0 # If it is the last step of an episode reset to start a new one. if self._is_last_step: self.reset() # Reward and discount will be None as we have started a new episode. pos_stone_inds = self.game_state.get_stones_above_thresh( self._reward_weights, threshold=0) for stone_ind in pos_stone_inds: timestep = self.step_slot_based_action( type_utils.SlotBasedAction(stone_ind=stone_ind, cauldron=True)) overall_reward += timestep.reward overall_discount *= timestep.discount if self._is_last_step or self.is_new_trial(): return self.construct_step(float(overall_reward), overall_discount) end_trial_reward, end_trial_discount = self.end_trial() overall_reward += end_trial_reward overall_discount *= end_trial_discount # Get the cumulative reward and discount and the final step type and # observation. return self.construct_step(float(overall_reward), overall_discount)
def select_action(self) -> utils.SlotBasedAction: return utils.SlotBasedAction(no_op=True)
def random_slot_based_action(): stone_ind = random.randint(0, symbolic_alchemy.MAX_STONES - 1) potion_ind = random.randint(-1, symbolic_alchemy.MAX_POTIONS - 1) if potion_ind < 0: return utils.SlotBasedAction(stone_ind=stone_ind, cauldron=True) return utils.SlotBasedAction(stone_ind=stone_ind, potion_ind=potion_ind)
class SymbolicAlchemySeeChemistryTest(parameterized.TestCase): """We don't do the full mixin tests for the chemistry observation.""" def _make_env(self, see_chemistry, constraint, **kwargs): return make_fixed_chem_env( constraint=constraint, see_chemistries={_CHEM_NAME: see_chemistry}, observe_used=True, end_trial_action=False, **kwargs) @parameterized.parameters( # In the graph observations edges are in the following order: # _________11__________ # /| /| # 9/ | 10/ | # / | / | # /___|_____8_________/ | # | |6 | |7 # | | | | # |2 | |4 | # | |_______5_______|___| # | / | / # | /1 | /3 # | / | / # |/________0_________|/ # # With coordinate system: # | # |z / # | /y # | / # |/___x___ # {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), rotation=utils.RotationElement(present=False), content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.no_bottleneck_constraints()[0], # With no constraints all edges should be present 'expected_obs': np.ones((12,), np.float32), 'expected_len': 12}, {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), rotation=utils.RotationElement(present=False), content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.bottleneck1_constraints()[0], # For bottleneck1 constraint the only x direction edge that exists is 8, # so 0, 5 and 11 are missing. 'expected_obs': np.array([0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0], np.float32), 'expected_len': 12}, {'see_chemistry': utils.ChemistrySeen( stone_map=utils.StoneMapElement(present=False), graph=utils.GraphElement(present=False), rotation=utils.RotationElement(present=False), content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.no_bottleneck_constraints()[0], # First 6 entries are a 1-hot for the dimension map, in this case the # dimension map used is the first one. # The next 3 entries are 0 or 1 for the direction map, in this case all # directions are positive. 'expected_obs': np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], np.float32), 'expected_len': 9}, {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), graph=utils.GraphElement(present=False), rotation=utils.RotationElement(present=False), content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.no_bottleneck_constraints()[0], # 3 entries are 0 or 1 for the direction map, in this case all directions # are positive. 'expected_obs': np.array([1.0, 1.0, 1.0], np.float32), 'expected_len': 3}, {'see_chemistry': utils.ChemistrySeen( content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.no_bottleneck_constraints()[0], # Observations are from the previous tests concatenated with graph first, # then potion map then stone map. 'expected_obs': np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, # graph 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, # graph 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, # potion dim map 1.0, 1.0, 1.0, # potion dir map 1.0, 1.0, 1.0, # stone map 1.0, 0.0, 0.0, 0.0], np.float32), # rotation 'expected_len': 28}, # Tests for the belief state observation. {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), rotation=utils.RotationElement(present=False), content=utils.ElementContent.BELIEF_STATE, precomputed='perceptual_mapping_randomized_with_random_bottleneck'), 'constraint': graphs.no_bottleneck_constraints()[0], # With no actions the belief state should be unknown for all edges. 'expected_obs': 0.5 * np.ones((12,), np.float32), 'expected_len': 12}, {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), rotation=utils.RotationElement(present=False), content=utils.ElementContent.BELIEF_STATE, precomputed='perceptual_mapping_randomized_with_random_bottleneck'), 'constraint': graphs.bottleneck1_constraints()[0], # It shouldn't make a difference whether the underlying chemistry has a # constraint or not everythin is unknown. 'expected_obs': 0.5 * np.ones((12,), np.float32), 'expected_len': 12}, {'see_chemistry': utils.ChemistrySeen( stone_map=utils.StoneMapElement(present=False), graph=utils.GraphElement(present=False), rotation=utils.RotationElement(present=False), content=utils.ElementContent.BELIEF_STATE, precomputed='perceptual_mapping_randomized_with_random_bottleneck'), 'constraint': graphs.no_bottleneck_constraints()[0], # First 6 entries are a 1-hot for the dimension map, with no actions all # of the dimesnsion maps are possible so the entries are all unknown. # The next 3 entries are 0 or 1 for the direction map, or 0.5 for unknown # which is the case if no actions are taken. 'expected_obs': 0.5 * np.ones((9,), np.float32), 'expected_len': 9}, {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), graph=utils.GraphElement(present=False), rotation=utils.RotationElement(present=False), content=utils.ElementContent.BELIEF_STATE, precomputed='perceptual_mapping_randomized_with_random_bottleneck'), 'constraint': graphs.no_bottleneck_constraints()[0], # 3 entries are 0 or 1 for the direction map, in this case all directions # are positive, since the test stones include an instance of the best # stone, the stone map should be known from the start. 'expected_obs': np.array([1.0, 1.0, 1.0], np.float32), 'expected_len': 3}, {'see_chemistry': utils.ChemistrySeen( content=utils.ElementContent.BELIEF_STATE, rotation=utils.RotationElement(present=False), precomputed='perceptual_mapping_randomized_with_random_bottleneck'), 'constraint': graphs.no_bottleneck_constraints()[0], # Observations are from the previous tests concatenated with graph first, # then potion map then stone map. 'expected_obs': np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5, # graph 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, # graph 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, # potion dim map 0.5, 0.5, 0.5, # potion dir map 1.0, 1.0, 1.0], np.float32), # stone map 'expected_len': 24}, {'see_chemistry': utils.ChemistrySeen( content=utils.ElementContent.BELIEF_STATE, rotation=utils.RotationElement(present=False), precomputed='perceptual_mapping_randomized_with_random_bottleneck'), 'constraint': graphs.no_bottleneck_constraints()[0], 'actions': [utils.SlotBasedAction(stone_ind=0, potion_ind=0)], # If we put the 0th stone into the 0th potion we will see a change on # axis 1, we will become certain that the dim map is either [0, 1, 2] or # [2, 1, 0], we will become certain that the edge from (-1, -1, 1) to # (-1, 1, 1) exists. 'expected_obs': np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5, # graph 0.5, 0.5, 0.5, 1.0, 0.5, 0.5, # graph 0.5, 0.0, 0.0, 0.0, 0.0, 0.5, # potion dim map 0.5, 1.0, 0.5, # potion dir map 1.0, 1.0, 1.0], np.float32), # stone map 'expected_len': 24}, # Tests for a combination of content types {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), rotation=utils.RotationElement(present=False), groups=[ utils.GroupInChemistry( {utils.ElementType.GRAPH: {0, 1, 2, 3}}, [1.0, 0.0, 0.0]), utils.GroupInChemistry( {utils.ElementType.GRAPH: {4, 5, 6}}, [0.0, 0.0, 1.0]), utils.GroupInChemistry( {utils.ElementType.GRAPH: {7, 8, 9, 10, 11}}, [0.0, 1.0, 0.0]), ], precomputed='perceptual_mapping_randomized_with_random_bottleneck'), 'constraint': graphs.bottleneck1_constraints()[0], 'actions': [utils.SlotBasedAction(stone_ind=0, potion_ind=0)], # With no actions the belief state should be unknown for all edges. 'expected_obs': np.array( [0.0, 1.0, 1.0, 1.0, # ground truth - 0 missing 1, 2, 3 exist 0.5, 0.5, 0.5, # unknown - these are set to 0.5 # belief state - after the action 9 is known, others are unknown 0.5, 0.5, 1.0, 0.5, 0.5], np.float32), 'expected_len': 12}, # Rotation tests {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), graph=utils.GraphElement(present=False), content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.no_bottleneck_constraints()[0], 'make_env_kwargs': { 'rotation': stones_and_potions.rotation_from_angles([0, 0, 0])}, 'expected_obs': np.array([1.0, 0.0, 0.0, 0.0], np.float32), 'expected_len': 4}, {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), graph=utils.GraphElement(present=False), content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.no_bottleneck_constraints()[0], 'make_env_kwargs': { 'rotation': stones_and_potions.rotation_from_angles([0, 0, -45])}, 'expected_obs': np.array([0.0, 1.0, 0.0, 0.0], np.float32), 'expected_len': 4}, {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), graph=utils.GraphElement(present=False), content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.no_bottleneck_constraints()[0], 'make_env_kwargs': { 'rotation': stones_and_potions.rotation_from_angles([0, -45, 0])}, 'expected_obs': np.array([0.0, 0.0, 1.0, 0.0], np.float32), 'expected_len': 4}, {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), stone_map=utils.StoneMapElement(present=False), graph=utils.GraphElement(present=False), content=utils.ElementContent.GROUND_TRUTH), 'constraint': graphs.no_bottleneck_constraints()[0], 'make_env_kwargs': { 'rotation': stones_and_potions.rotation_from_angles([-45, 0, 0])}, 'expected_obs': np.array([0.0, 0.0, 0.0, 1.0], np.float32), 'expected_len': 4}, # In belief state if we have stones which are unique to a particular # rotation then the rotation should be known and possibly part of the # stone map. {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), graph=utils.GraphElement(present=False), content=utils.ElementContent.BELIEF_STATE, precomputed=('perceptual_mapping_randomized_with_rotation_and_' 'random_bottleneck')), 'constraint': graphs.no_bottleneck_constraints()[0], 'make_env_kwargs': { 'rotation': stones_and_potions.rotation_from_angles([-45, 0, 0]), 'test_stones': [[Stone(0, [1, 1, 1]), Stone(0, [1, 1, -1])]]}, 'expected_obs': np.array( [1.0, 1.0, 1.0, # stone map 0.0, 0.0, 0.0, 1.0], np.float32), # rotation 'expected_len': 7}, # Otherwise rotation and stone map observations should both be unknown. {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), graph=utils.GraphElement(present=False), content=utils.ElementContent.BELIEF_STATE, precomputed=('perceptual_mapping_randomized_with_rotation_and_' 'random_bottleneck')), 'constraint': graphs.no_bottleneck_constraints()[0], 'make_env_kwargs': { 'rotation': stones_and_potions.rotation_from_angles([-45, 0, 0]), 'test_stones': [[Stone(0, [1, 1, 1])]]}, 'expected_obs': np.array( [0.5, 0.5, 0.5, # stone map 0.5, 0.5, 0.5, 0.5], np.float32), # rotation 'expected_len': 7}, {'see_chemistry': utils.ChemistrySeen( potion_map=utils.PotionMapElement(present=False), graph=utils.GraphElement(present=False), content=utils.ElementContent.BELIEF_STATE, precomputed=('perceptual_mapping_randomized_with_rotation_and_' 'random_bottleneck')), 'constraint': graphs.no_bottleneck_constraints()[0], 'make_env_kwargs': { 'rotation': stones_and_potions.rotation_from_angles([-45, 0, 0]), 'test_stones': [[Stone(0, [1, 1, 1])]]}, 'actions': [utils.SlotBasedAction(stone_ind=0, potion_ind=6)], 'expected_obs': np.array( [1.0, 1.0, 1.0, # stone map 0.0, 0.0, 0.0, 1.0], np.float32), # rotation 'expected_len': 7}, ) def test_see_chemistry( self, see_chemistry, constraint, expected_obs, expected_len, actions=None, make_env_kwargs=None): """Test the ground truth chemistry observations.""" env = self._make_env( see_chemistry=see_chemistry, constraint=constraint, **(make_env_kwargs or {})) timestep = env.reset() if actions: for action in actions: timestep = env.step_slot_based_action(action) np.testing.assert_allclose( timestep.observation[_CHEM_NAME], expected_obs) self.assertLen( timestep.observation[_CHEM_NAME], expected_len) def test_see_chem_before_reset(self): env = self._make_env( see_chemistry=utils.ChemistrySeen( content=utils.ElementContent.GROUND_TRUTH), constraint=graphs.no_bottleneck_constraints()[0]) obs = env.observation() # Observation should be all unknown because we have not reset the # environment yet. np.testing.assert_allclose(obs[_CHEM_NAME], [0.5] * 28) # After resetting none of the chem should be unknown. env.reset() obs = env.observation() np.testing.assert_array_less( 0.01 * np.ones((28,)), np.abs(obs[_CHEM_NAME] - np.array([0.5] * 28)))
def slot_based_use_potion( env, unused_perceived_stone, stone, unused_perceived_potion, potion): del unused_perceived_stone, unused_perceived_potion return env.step_slot_based_action(utils.SlotBasedAction( stone_ind=stone.idx, potion_ind=potion.idx))
def slot_based_use_stone(env, unused_perceived_stone, stone): del unused_perceived_stone return env.step_slot_based_action(utils.SlotBasedAction( stone_ind=stone.idx, cauldron=True))