def test_schelling_s(self):
     # Schelling failure scenario
     #
     # X X S P-D X X
     # X     ↓R    X
     # X     X     X
     # O           O
     # X     X     X
     # X     ↓H    X
     # X X D P-S X X
     #
     # The layout is completely symmetric. Both pots need 2 more onions,
     # and only one delivery is left. The best thing to do would be to split up
     # towards the different pots, but the agents must somehow coordinate on the
     # first step. In the H+R case, this doesn't work, but in the R+R it does.
     #
     eva = AgentEvaluator(
         {
             "layout_name": "schelling_s",
             "start_order_list": ["any", "any"],
             "cook_time": 5
         },
         force_compute=force_compute)
     start_state = eva.env.mdp.get_standard_start_state()
     start_state.objects = {
         (2, 0): Obj('soup', (2, 0), ('onion', 2, 5)),
         (2, 4): Obj('soup', (2, 4), ('onion', 2, 5))
     }
     eva.start_state = start_state
     self.compare_times(eva, h_idx=1)
    def test_unidentifiable_s(self):
        # Same as above, but smaller layout to facilitate DRL training

        eva = AgentEvaluator(
            {
                "layout_name": "asymmetric_advantages",
                "start_order_list": ["any", "any"],
                "cook_time": 5
            },
            force_compute=force_compute)
        start_state = eva.env.mdp.get_standard_start_state()
        start_state.objects = {
            (4, 2): Obj('soup', (4, 2), ('onion', 2, 0)),
            (4, 3): Obj('soup', (4, 3), ('onion', 3, 5))
        }
        eva.start_state = start_state
        self.compare_times(eva, h_idx=0)
    def test_unidentifiable(self):
        # Scenario with unidentifiable human plan (and asymmetric advantages)
        #
        # X O X X X
        # X     ↓RX
        # X X     X
        # X X     X
        # X S     D
        # X X P=P5X
        # X O     D
        # X X     X
        # X X ↑H  X
        # X       X
        # X S X X X
        #
        # The human goes up towards either the onion or a dish
        # The robot can't really deduce which one the human is going for,
        # but if the human was optimal, it would go for the onion. Therefore,
        # R assumes H will take care of the last onion necessary, and heads
        # to the dish dispenser. However, by the time R gets there it is clear
        # that H has decided to get a dish, so the optimal action now becomes
        # going for the onion, wasting quite a lot of time.

        eva = AgentEvaluator(
            {
                "layout_name": "unident",
                "start_order_list": ["any", "any"],
                "cook_time": 5
            },
            force_compute=force_compute)
        start_state = eva.env.mdp.get_standard_start_state()
        start_state.objects = {
            (5, 2): Obj('soup', (5, 2), ('onion', 2, 0)),
            (5, 3): Obj('soup', (5, 3), ('onion', 3, 5))
        }
        eva.start_state = start_state
        self.compare_times(eva, h_idx=0)