コード例 #1
0
def make_fixed_chem_env(
    constraint=None, potion_map=_FIXED_POTION_MAP, stone_map=_FIXED_STONE_MAP,
    rotation=_FIXED_ROTATION, test_stones=None, test_potions=None, **kwargs):
  if constraint is None:
    constraint = graphs.no_bottleneck_constraints()[0]
  env = symbolic_alchemy.get_symbolic_alchemy_fixed(
      episode_items=utils.EpisodeItems(
          potions=test_potions or _TEST_POTIONS,
          stones=test_stones or _TEST_STONES),
      chemistry=utils.Chemistry(
          graph=graphs.create_graph_from_constraint(constraint),
          potion_map=potion_map, stone_map=stone_map, rotation=rotation),
      reward_weights=reward_fcn(), max_steps_per_trial=_MAX_STEPS_PER_TRIAL,
      **kwargs)
  return env
コード例 #2
0
 def setUp(self):
     super().setUp()
     self.env3d_mock = Mock3DEnv()
     self.chemistry = utils.Chemistry(
         potion_map=stones_and_potions.all_fixed_potion_map(),
         stone_map=stones_and_potions.all_fixed_stone_map(),
         graph=graphs.create_graph_from_constraint(
             graphs.no_bottleneck_constraints()[0]),
         rotation=np.eye(3))
     self.items = utils.EpisodeItems(potions=[[Potion(0, 0, 1)],
                                              [Potion(1, 2, -1)]],
                                     stones=[[Stone(2, [-1, -1, -1])],
                                             [Stone(3, [1, 1, 1])]])
     self.env3d_mock.set_chemistry_and_items(self.chemistry, self.items)
     self.wrapper = symbolic_alchemy_wrapper.SymbolicAlchemyWrapper(
         self.env3d_mock,
         'alchemy/perceptual_mapping_randomized_with_random_bottleneck')
コード例 #3
0
 def episode_returns(self) -> Any:
     items = utils.EpisodeItems([], [])
     items.trials = super().episode_returns()['trials']
     return items
コード例 #4
0
class IdealObserverTest(parameterized.TestCase):

    level_name_to_precomputed = None

    @classmethod
    def setUpClass(cls):
        super(IdealObserverTest, cls).setUpClass()
        # Load all of the precomputed maps required for all tests otherwise loading
        # them in the tests can take too long and cause the test to time out.
        levels_used = [
            'all_fixed_bottleneck1', 'perceptual_mapping_randomized',
            'perceptual_mapping_randomized_with_random_bottleneck'
        ]
        cls.level_name_to_precomputed = {
            s: precomputed_maps.load_from_level_name(s)
            for s in levels_used
        }

    @parameterized.parameters(
        # 1 stone worth 3, no potions - reward should be 3
        (Counter({AlignedStone(3, np.array([1, 1, 1])): 1}), Counter(), 3),
        # 1 stone worth 1, no potions - reward should be 1
        (Counter({AlignedStone(1, np.array([1, 1, 1])): 1}), Counter(), 1),
        # 1 stone worth -1, no potions - reward should be 0 (since we can choose
        # not to put the stone in)
        (Counter({AlignedStone(-1, np.array([1, 1, 1])): 1}), Counter(), 0),
        # 1 stone worth 3, any number of potions - reward should be 3
        (Counter({AlignedStone(3, np.array([1, 1, 1])): 1
                  }), Counter({PerceivedPotion(0, 1): 1}), 3),
        (Counter({AlignedStone(3, np.array([1, 1, 1])): 1}),
         Counter({
             PerceivedPotion(0, 1): 1,
             PerceivedPotion(0, -1): 1
         }), 3),
        # If graph has no bottlenecks and we start with a stone worth 1 and
        # potions which go in opposite directions by using a potion we get
        # expected reward of 4/3 since:
        #  1/6 prob of going to 3 then cashing in - contributes 1/2
        #  1/3 prob of going to -1 then we can definitely get back to 1 using the
        #    other potion - contributes 1/3
        #  1/2 prob of staying at 1 then it is best not to use the other potion -
        #    contributes 1/2
        (Counter({AlignedStone(1, np.array([1, 1, 1])): 1}),
         Counter({
             PerceivedPotion(0, 1): 1,
             PerceivedPotion(0, -1): 1
         }), 1.33333333, 0, 'perceptual_mapping_randomized'),
        # If graph has no bottlenecks and we start with a stone worth -1 and
        # potions on different axes then by using a potion we get
        # expected reward of 0.583333 since:
        #  1/6 prob of going to -3 - contributes 0
        #  1/3 prob of going to 1 then if we use the other potion we know it
        #    applies to a different axis so:
        #      1/4 prob of going to 3 - contributes (1/3) * (1/4) * 3 = 1/4
        #      1/4 prob of going to -1 - contributes 0
        #      1/2 prob of staying at 1 - contributes (1/3) * (1/2) * 1 = 1/6
        #  1/2 prob of staying at -1 then the other potion could apply to any axis
        #    so it gives:
        #      1/6 prob of going to -3 - contributes 0
        #      1/3 prob of going to 1 - contributes (1/2) * (1/3) * 1 = 1/6
        #      1/2 prob of staying at -1 - contributes 0
        (Counter({AlignedStone(-1, np.array([1, 1, 1])): 1}),
         Counter({
             PerceivedPotion(0, 1): 1,
             PerceivedPotion(1, 1): 1
         }), 0.583333, 0, 'perceptual_mapping_randomized'))
    def test_expected_reward(
            self,
            aligned_stones,
            perceived_potions,
            expected_expected_reward,
            bonus=0,
            level_name='perceptual_mapping_randomized_with_random_bottleneck'):

        precomputed = IdealObserverTest.level_name_to_precomputed[level_name]

        # Make an initial game state using first trial information.
        current_game_state = ideal_observer.BeliefState(precomputed)

        # Start with no search results.
        search_results = {}

        aligned_stones_ind = collections.Counter(
            {k.index(): v
             for k, v in aligned_stones.items()})
        perceived_potions_ind = collections.Counter(
            {k.index(): v
             for k, v in perceived_potions.items()})
        current_game_state.new_trial(aligned_stones_ind, perceived_potions_ind)

        _, objective, _ = ideal_observer.ideal_observer(
            current_game_state, search_results, bonus, precomputed, False)
        expected_reward, _ = objective
        self.assertAlmostEqual(expected_reward, expected_expected_reward, 4)

    @parameterized.parameters(
        # In the first trial we have 1 stone worth -3 and the 3 potions required
        # to get it to 3. In the second trial we have a stone at 1 and 1 correct
        # potion and some potions which will take it away from the 3. It should
        # take the knowledge gained from trial 1 and apply it to trial 2.
        {
            'perceived_items':
            utils.EpisodeItems(
                stones=[[Stone(0, [-1, -1, -1])], [Stone(0, [1, 1, -1])]],
                potions=[[Potion(0, 0, 1),
                          Potion(1, 1, 1),
                          Potion(2, 2, 1)],
                         [Potion(0, 0, -1),
                          Potion(1, 1, -1),
                          Potion(2, 2, 1)]]),
            'potion_map':
            _ALL_FIXED_POTION_MAP,
            'stone_map':
            _ALL_FIXED_STONE_MAP,
            'graph':
            _ALL_FIXED_GRAPH,
            'expected_rewards': [15, 15]
        },
        {
            'perceived_items':
            utils.EpisodeItems(
                stones=[[Stone(0, [-1, -1, -1])], [Stone(0, [1, -1, 1])]],
                potions=[[Potion(0, 0, 1),
                          Potion(1, 1, 1),
                          Potion(2, 2, 1)],
                         [Potion(0, 0, -1),
                          Potion(1, 1, -1),
                          Potion(2, 2, 1)]]),
            'potion_map':
            _ALL_FIXED_POTION_MAP,
            'stone_map':
            _ALL_FIXED_STONE_MAP,
            'graph':
            _ALL_FIXED_GRAPH,
            'expected_rewards': [15, 1]
        },
        {
            'perceived_items':
            utils.EpisodeItems(
                stones=[[Stone(0, [-1, -1, -1])], [Stone(0, [1, -1, 1])],
                        [Stone(0, [-1, -1, -1])], [Stone(0, [1, 1, 1])],
                        [Stone(0, [1, 1, -1])], [Stone(0, [1, 1, -1])],
                        [Stone(0, [1, -1, -1])], [Stone(0, [1, -1, -1])],
                        [Stone(0, [1, -1, -1])], [Stone(0, [-1, -1, 1])]],
                potions=[[
                    Potion(0, 0, -1),
                    Potion(1, 1, -1),
                    Potion(2, 0, 1),
                    Potion(3, 2, -1),
                    Potion(4, 0, 1),
                    Potion(5, 0, -1)
                ],
                         [
                             Potion(0, 0, 1),
                             Potion(1, 0, -1),
                             Potion(2, 0, -1),
                             Potion(3, 0, -1),
                             Potion(4, 0, 1),
                             Potion(5, 2, -1)
                         ],
                         [
                             Potion(0, 0, -1),
                             Potion(1, 1, 1),
                             Potion(2, 2, -1),
                             Potion(3, 1, -1),
                             Potion(4, 2, -1),
                             Potion(5, 2, -1)
                         ],
                         [
                             Potion(0, 1, -1),
                             Potion(1, 0, -1),
                             Potion(2, 1, -1),
                             Potion(3, 1, -1),
                             Potion(4, 0, -1),
                             Potion(5, 0, -1)
                         ],
                         [
                             Potion(0, 1, 1),
                             Potion(1, 0, -1),
                             Potion(2, 1, -1),
                             Potion(3, 2, 1),
                             Potion(4, 1, 1),
                             Potion(5, 0, -1)
                         ],
                         [
                             Potion(0, 2, -1),
                             Potion(1, 1, 1),
                             Potion(2, 2, 1),
                             Potion(3, 0, -1),
                             Potion(4, 0, -1),
                             Potion(5, 0, 1)
                         ],
                         [
                             Potion(0, 0, 1),
                             Potion(1, 0, 1),
                             Potion(2, 0, -1),
                             Potion(3, 2, -1),
                             Potion(4, 2, -1),
                             Potion(5, 1, 1)
                         ],
                         [
                             Potion(0, 2, -1),
                             Potion(1, 1, 1),
                             Potion(2, 1, -1),
                             Potion(3, 0, 1),
                             Potion(4, 2, -1),
                             Potion(5, 2, -1)
                         ],
                         [
                             Potion(0, 2, 1),
                             Potion(1, 2, -1),
                             Potion(2, 2, -1),
                             Potion(3, 2, 1),
                             Potion(4, 1, -1),
                             Potion(5, 1, -1)
                         ],
                         [
                             Potion(0, 1, -1),
                             Potion(1, 0, -1),
                             Potion(2, 2, 1),
                             Potion(3, 2, 1),
                             Potion(4, 2, -1),
                             Potion(5, 1, 1)
                         ]]),
            'potion_map':
            _ALL_FIXED_POTION_MAP,
            'stone_map':
            _ALL_FIXED_STONE_MAP,
            'graph':
            _ALL_FIXED_GRAPH,
            'expected_rewards': [0, 0, 0, 15, 15, 15, 1, 1, 1, 1]
        },
        # This is a case that the oracle fails because it has to use a long path
        # instead of a short one.
        {
            'perceived_items':
            utils.EpisodeItems(
                stones=[[Stone(0, [-1, -1, -1]),
                         Stone(1, [1, 1, -1])]],
                potions=[[Potion(0, 0, 1),
                          Potion(1, 1, 1),
                          Potion(2, 2, 1)]]),
            'potion_map':
            _ALL_FIXED_POTION_MAP,
            'stone_map':
            _ALL_FIXED_STONE_MAP,
            'graph':
            _BOTTLENECK1_GRAPH,
            'expected_rewards': [16],
            'bonus':
            12,
            # Give it the exact constraint, stone map and potion map so it can run
            # as the oracle.
            'level_name':
            'all_fixed_bottleneck1'
        },
        # This is a case where without sorting on search depth for actions of
        # equal reward, the search_oracle takes the longer path.
        {
            'perceived_items':
            utils.EpisodeItems(
                stones=[[Stone(0, [-1, -1, -1]),
                         Stone(1, [-1, -1, 1])]],
                potions=[[Potion(0, 0, -1),
                          Potion(1, 0, 1),
                          Potion(2, 2, 1)]]),
            'potion_map':
            _ALL_FIXED_POTION_MAP,
            'stone_map':
            _ALL_FIXED_STONE_MAP,
            'graph':
            _BOTTLENECK1_GRAPH,
            'expected_rewards': [1],
            'bonus':
            12,
            # Give it the exact constraint, stone map and potion map so it can run
            # as the oracle.
            'level_name':
            'all_fixed_bottleneck1',
            # We expect the only events to be applying potion 1 to stone 1 and then
            # putting it into the cauldron.
            'expected_events': [
                ev_trk.OrderedEvents([
                    # First put stone 1 into potion 1
                    ev_trk.SingleEvent(1, {1}),
                    # Then put stone 1 into the cauldron
                    ev_trk.SingleEvent(1, {-1})
                ])
            ],
            'non_events': [
                ev_trk.AnyOrderEvents({
                    # Stone 0 should not be put into any potions or the cauldron.
                    ev_trk.SingleEvent(0, {0, 1, 2, -1}),
                    # Stone 1 should not be put into any other potions.
                    ev_trk.SingleEvent(1, {0, 2}),
                })
            ]
        },
        # Tests for the ideal explorer.
        # Ideal observer wouldn't use the potion as it minimises search depth if
        # it cannot get reward but ideal explorer will use it to minimise num
        # world states.
        {
            'minimise_world_states':
            False,
            'perceived_items':
            utils.EpisodeItems(stones=[[Stone(0, [-1, -1, -1])]],
                               potions=[[Potion(0, 0, 1)]]),
            'potion_map':
            _ALL_FIXED_POTION_MAP,
            'stone_map':
            _ALL_FIXED_STONE_MAP,
            'graph':
            _BOTTLENECK1_GRAPH,
            'expected_rewards': [0],
            'bonus':
            12,
            'non_events': [
                # Do not put stone 0 into potion 0 as we are minimising search depth.
                ev_trk.SingleEvent(0, {0}),
            ]
        },
        {
            'minimise_world_states':
            True,
            'perceived_items':
            utils.EpisodeItems(stones=[[Stone(0, [-1, -1, -1])]],
                               potions=[[Potion(0, 0, 1)]]),
            'potion_map':
            _ALL_FIXED_POTION_MAP,
            'stone_map':
            _ALL_FIXED_STONE_MAP,
            'graph':
            _BOTTLENECK1_GRAPH,
            'expected_rewards': [0],
            'bonus':
            12,
            'expected_events': [
                # Put stone 0 into potion 0 to reduce num world states.
                ev_trk.SingleEvent(0, {0}),
            ]
        },
    )
    def test_multiple_trials(
            self,
            perceived_items,
            potion_map,
            stone_map,
            graph,
            expected_rewards,
            bonus=12,
            level_name='perceptual_mapping_randomized_with_random_bottleneck',
            expected_events=None,
            non_events=None,
            minimise_world_states=False):

        precomputed = IdealObserverTest.level_name_to_precomputed[level_name]

        reward_weights = stones_and_potions.RewardWeights([1, 1, 1], 0, bonus)
        symbolic_bot_trackers_from_env = functools.partial(
            add_trackers_to_env,
            reward_weights=reward_weights,
            precomputed=precomputed,
            init_belief_state=None)
        results = symbolic_alchemy_bots.get_multi_trial_ideal_observer_reward(
            perceived_items,
            utils.Chemistry(potion_map, stone_map, graph,
                            np.eye(3)), reward_weights, precomputed,
            minimise_world_states, symbolic_bot_trackers_from_env)
        per_trial = results['score']['per_trial']
        event_trackers = results['matrix_event']['event_tracker']

        self.assertLen(per_trial, len(expected_rewards))
        for trial_reward, expected_trial_reward in zip(per_trial,
                                                       expected_rewards):
            self.assertEqual(trial_reward, expected_trial_reward, 4)

        if expected_events is not None:
            self.assertLen(event_trackers, len(expected_events))
            for event_tracker, expected_event in zip(event_trackers,
                                                     expected_events):
                self.assertTrue(expected_event.occurs(event_tracker.events))
        if non_events is not None:
            self.assertLen(event_trackers, len(non_events))
            for event_tracker, non_event in zip(event_trackers, non_events):
                self.assertFalse(non_event.occurs(event_tracker.events))
コード例 #5
0
def proto_to_episode_items(
        proto: chemistries_pb2.EpisodeItems) -> utils.EpisodeItems:
    items = utils.EpisodeItems(stones=[], potions=[])
    items.trials = [proto_to_trial_items(p) for p in proto.trial_items]
    return items