Example #1
0
 def test_start_state_when_opponent_is_next_to_move(self):
     game = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], [' ', 'X', 'X']])
     ab = AlphaBeta()
     mdp = FixedGameMDP(game, ab, 1)
     expected = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '],
                           ['O', 'X', 'X']])
     self.assertEqual(mdp.start_state(), expected)
Example #2
0
 def test_cur_state_when_opponent_should_move_to_start(self):
     game = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], [' ', 'X', 'X']])
     ab = AlphaBeta()
     mdp = FixedGameMDP(game, ab, 1)
     env = Environment(mdp)
     expected = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '],
                           ['O', 'X', 'X']])
     self.assertEqual(env.cur_state(), expected)
Example #3
0
 def test_do_action(self):
     # X - O
     # - - X
     # - - O
     game = TicTacToe().make_moves([1, 3, 6, 9])
     mdp = FixedGameMDP(game.copy(), AlphaBeta(), 1)
     env = Environment(mdp)
     env.do_action(7)
     expected = TicTacToe().make_moves([1, 3, 6, 9, 7, 4])
     self.assertEqual(env.cur_state(), expected)
Example #4
0
 def test_transitions(self):
     # X - O
     # - - X
     # - - O
     game = TicTacToe().make_moves([1, 3, 6, 9])
     # Create an mdp where AlphaBeta is the
     # second player to move
     mdp = FixedGameMDP(game, AlphaBeta(), 1)
     # Put X in 7 slot, and the mdp should
     # make the transition assuming O moving 4
     transitions = mdp.transitions(game, 7)
     self.assertEqual(len(transitions), 1)
     next_game, prob = transitions[0]
     self.assertEqual(next_game, game.copy().make_moves([7, 4]))
     self.assertEqual(prob, 1.0)
Example #5
0
 def test_reward_when_agent_moves_second_and_losses(self):
     ab = AlphaBeta()
     opp_idx = 0
     mdp = FixedGameMDP(self.game, ab, opp_idx)
     cur_state = TicTacToe().make_moves([1, 4, 2, 5])
     # cur_state:
     # X X -
     # O O -
     # - - -
     action = 3
     next_state = cur_state.copy().make_move(action)
     # next_state:
     # X X X
     # O O -
     # - - -
     reward = mdp.reward(cur_state, action, next_state)
     self.assertEqual(reward, -1.0)
Example #6
0
 def test_reward_when_agent_moves_second_and_wins(self):
     ab = AlphaBeta()
     opp_idx = 0
     mdp = FixedGameMDP(self.game, ab, opp_idx)
     # Opponent moves first, Agent second
     cur_state = TicTacToe().make_moves([1, 4, 2, 5, 7])
     # cur_state:
     # X X -
     # O O -
     # X - -
     action = 6
     next_state = cur_state.copy().make_move(action)
     # next_state:
     # X X -
     # O O O
     # X - -
     reward = mdp.reward(cur_state, action, next_state)
     self.assertEqual(reward, 1.0)
 def setUp(self):
     self.ab = AlphaBeta()
Example #8
0
 def test_cur_state(self):
     game = TicTacToe()
     mdp = FixedGameMDP(game, AlphaBeta(), 1)
     env = Environment(mdp)
     self.assertEqual(env.cur_state(), mdp.start_state())
     self.assertEqual(env.cur_state(), game)
Example #9
0
 def setUp(self):
     self.game = TicTacToe()
     self.mdp = FixedGameMDP(self.game, AlphaBeta(), 1)
     self.players = [RandPlayer(), RandPlayer()]