def testState(self): # First "real life" example g = TicTacToe.Game() self.assertEqual(g.status, -1) g.play((0,0)); g.play((0,1)) self.assertEqual(g.status, -1) g.play((1,1)); g.play((1,0)) self.assertEqual(g.status, -1) g.play((2,2)); self.assertEqual(g.status, 1) with self.assertRaises(RuntimeError) as cm: g.play((2,1)) # Some more "to the point" tests g = TicTacToe.Game() g._board[0, 0:3] = 1 self.assertEqual(g._compute_status(), 1) self.assertEqual(g.status, 1) g = TicTacToe.Game() g._board[0:3, 2] = 1 self.assertEqual(g._compute_status(), 1) self.assertEqual(g.status, 1) g = TicTacToe.Game() g._board[2, 0] = 2 self.assertEqual(g._compute_status(), -1) self.assertEqual(g.status, -1) g._board[1, 1] = 2 self.assertEqual(g._compute_status(), -1) self.assertEqual(g.status, -1) g._board[0, 2] = 2 self.assertEqual(g._compute_status(), 2) self.assertEqual(g.status, 2)
def main(): import TicTacToe as tm import TicTacToe.bots as tm_bots bot_minmax = tm_bots.Minimax_Bot("x", "o", "Bob") bot_random = tm_bots.Random_Bot("o", "x", "Bill") game = tm.Game([bot_minmax, bot_random]) game.run()
def getWLRs(self, number_of_runs=1000): env = TicTacToe.Game(self.rewards) results = [0, 0, 0] for episode in range(number_of_runs): # Reset the environment env.reset() # run game steps = env.run(self.qtable) # get reward from last step reward = steps[-1][3] for i in range(3): if reward == self.rewards[i]: results[i] += 1 return results
def initLearning(self): # init Game Environment self.environment = TicTacToe.Game(self.rewards)
def twoplayer_setup(self): game1 = TicTacToe.Game() player1 = TicTacToe.Player('X', game1) player2 = TicTacToe.Player('O', game1) return (game1, player1, player2)
def onecompplayer_setup(self): game1 = TicTacToe.Game() computer1 = TicTacToe.Computer('X', game1) player2 = TicTacToe.Player('O', game1) return (game1, computer1, player2)
import TicTacToe from utility import loadFromFile qtable = loadFromFile("qtable") game = TicTacToe.Game((10, -10, 5)) game.showcase(qtable)