Beispiel #1
0
    def test_notify_reward(self):
        self.sut = KuhnPoker.NFSP.Agent.NfspAgent(self.mock_q_policy,
                                                  self.mock_supervised_trainer,
                                                  nu=0)
        self.sut.kuhn_supervised_policy.aggressive_action_prob = MagicMock(
            return_value=1)

        infoset = KuhnInfoset(card=1, bet_sequence=(0, ))
        infoset_state = infoset_to_state(infoset)
        self.sut.get_action(infoset)

        self.mock_q_policy.add_sars = MagicMock()

        infoset_next = KuhnInfoset(card=1, bet_sequence=(0, 1))
        infoset_next_state = infoset_to_state(infoset_next)

        self.sut.notify_reward(next_infoset=infoset_next,
                               reward=123,
                               is_terminal=True)

        # call_args[0] are the position args
        self.assertEqual(self.mock_q_policy.add_sars.call_args[0], tuple())
        self.assertEqual(
            self.mock_q_policy.add_sars.call_args[1]["state"].tolist(),
            infoset_state.tolist())
        self.assertEqual(self.mock_q_policy.add_sars.call_args[1]["action"], 1)
        self.assertEqual(self.mock_q_policy.add_sars.call_args[1]["reward"],
                         123)
        self.assertEqual(
            self.mock_q_policy.add_sars.call_args[1]["next_state"].tolist(),
            infoset_next_state.tolist())
        self.assertEqual(
            self.mock_q_policy.add_sars.call_args[1]["is_terminal"], True)
Beispiel #2
0
    def test_check_bet_call_game(self):
        def mock_random_sample(a, b):
            return [1, 2]

        def get_agent0_action(infoset: KuhnInfoset):
            if infoset.bet_sequence == ():
                return 0
            else:
                return 1

        def get_agent1_action(infoset: KuhnInfoset):
            return 1

        # P0 has queen, P1 has king
        with mock.patch('random.sample', mock_random_sample):
            self.agents[0].get_action = MagicMock(
                side_effect=get_agent0_action)
            self.agents[1].get_action = MagicMock(
                side_effect=get_agent1_action)
            KuhnPoker.NFSP.Agent.collect_trajectories(self.agents, num_games=1)

        self.agents[0].reset.assert_called_once_with()
        self.agents[1].reset.assert_called_once_with()

        self.assertEqual(self.agents[0].notify_reward.mock_calls[0][2], {
            "next_infoset": KuhnInfoset(1, ()),
            "reward": 0,
            "is_terminal": False
        })
        self.assertEqual(
            self.agents[1].notify_reward.mock_calls[0][2], {
                "next_infoset": KuhnInfoset(2, (0, )),
                "reward": 0,
                "is_terminal": False
            })

        self.assertEqual(
            self.agents[0].notify_reward.mock_calls[1][2], {
                "next_infoset": KuhnInfoset(1, (0, 1)),
                "reward": 0,
                "is_terminal": False
            })

        self.assertEqual(self.agents[1].notify_reward.mock_calls[1][2], {
            "next_infoset": None,
            "reward": 2,
            "is_terminal": True
        })
        self.assertEqual(self.agents[0].notify_reward.mock_calls[2][2], {
            "next_infoset": None,
            "reward": -2,
            "is_terminal": True
        })

        self.assertEqual(3, len(self.agents[0].notify_reward.mock_calls))
        self.assertEqual(2, len(self.agents[1].notify_reward.mock_calls))
Beispiel #3
0
def log_strategy(writer: SummaryWriter, policy: NnPolicyWrapper,
                 global_step: int):
    infoset = KuhnInfoset(0, ())

    for card in range(3):
        infoset.card = card

        infoset.bet_sequence = ()
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_open" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (0, )
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_check/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (0, 1)
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_check/p1_bet/p0" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (1, )
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_bet/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)
Beispiel #4
0
    def test_aggressive_action_prob_supervised(self):
        self.sut = KuhnPoker.NFSP.Agent.NfspAgent(self.mock_q_policy,
                                                  self.mock_supervised_trainer,
                                                  nu=0)

        self.sut.kuhn_supervised_policy.aggressive_action_prob = MagicMock(
            return_value=1)
        infoset = KuhnInfoset(card=1, bet_sequence=(1, ))

        retval = self.sut.aggressive_action_prob(infoset)

        self.assertEqual(1, retval)
        self.sut.kuhn_supervised_policy.aggressive_action_prob.assert_called_with(
            infoset)
Beispiel #5
0
def log_qvals(writer: SummaryWriter, policy: QPolicy, global_step: int):
    infoset = KuhnInfoset(0, ())

    for card in range(3):
        infoset.card = card

        infoset.bet_sequence = ()
        state = torch.from_numpy(
            infoset_to_state(infoset)).float().unsqueeze(0).to(device)
        q_vals = policy.qnetwork_local.forward(state).cpu().numpy()[0]
        node_name = "q_vals/%s/p0_open" % card_to_str(card)
        writer.add_scalar(node_name,
                          q_vals[1] - q_vals[0],
                          global_step=global_step)

        infoset.bet_sequence = (0, )
        state = torch.from_numpy(
            infoset_to_state(infoset)).float().unsqueeze(0).to(device)
        q_vals = policy.qnetwork_local.forward(state).cpu().numpy()[0]
        node_name = "q_vals/%s/p0_check/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          q_vals[1] - q_vals[0],
                          global_step=global_step)

        infoset.bet_sequence = (0, 1)
        state = torch.from_numpy(
            infoset_to_state(infoset)).float().unsqueeze(0).to(device)
        q_vals = policy.qnetwork_local.forward(state).cpu().numpy()[0]
        node_name = "q_vals/%s/p0_check/p1_bet/p0" % card_to_str(card)
        writer.add_scalar(node_name,
                          q_vals[1] - q_vals[0],
                          global_step=global_step)

        infoset.bet_sequence = (1, )
        state = torch.from_numpy(
            infoset_to_state(infoset)).float().unsqueeze(0).to(device)
        q_vals = policy.qnetwork_local.forward(state).cpu().numpy()[0]
        node_name = "q_vals/%s/p0_bet/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          q_vals[1] - q_vals[0],
                          global_step=global_step)
Beispiel #6
0
    def test_aggressive_action_prob_q(self):
        self.sut = KuhnPoker.NFSP.Agent.NfspAgent(self.mock_q_policy,
                                                  self.mock_supervised_trainer,
                                                  nu=1.1)

        self.sut.kuhn_rl_policy.get_action = MagicMock(return_value=1)
        self.sut.supervised_trainer.add_observation = MagicMock()

        infoset = KuhnInfoset(card=1, bet_sequence=(1, ))
        infoset_state = infoset_to_state(infoset)

        retval = self.sut.aggressive_action_prob(infoset)

        self.assertEqual(1, retval)
        self.assertEqual(infoset_state.tolist(), self.sut.last_state.tolist())

        self.sut.kuhn_rl_policy.get_action.assert_called_with(infoset)

        self.assertEqual(
            self.sut.supervised_trainer.add_observation.call_args[0]
            [0].tolist(), infoset_state.tolist())
        self.assertEqual(
            self.sut.supervised_trainer.add_observation.call_args[0][1], 1)
 def test_game_start_jack(self):
     infoset = KuhnInfoset(0, ())
     state = infoset_to_state(infoset)
     self.assertEqual([1, 0, 0, 0, 0, 0, 0], state.tolist())
 def test_game_p0_check_p1_bet_queen(self):
     infoset = KuhnInfoset(1, (0, 1))
     state = infoset_to_state(infoset)
     self.assertEqual([0, 1, 0, 1, 0, 0, 1], state.tolist())
 def test_game_p0_check_king(self):
     infoset = KuhnInfoset(2, (0, ))
     state = infoset_to_state(infoset)
     self.assertEqual([0, 0, 1, 1, 0, 0, 0], state.tolist())
 def test_game_start_queen(self):
     infoset = KuhnInfoset(1, ())
     state = infoset_to_state(infoset)
     self.assertEqual([0, 1, 0, 0, 0, 0, 0], state.tolist())