def test_player_move(): board = g.Board() player = g.Player(board, 1) player.epsilon = 1 board.set_state([2, 1, 2, 1, 2, 1, 2, 0, 2]) assert player.move() == 7 player = g.Player(board, 2) player.epsilon = 1 board.set_state([2, 1, 2, 1, 2, 1, 2, 0, 2]) assert player.move() == 7
def test_player_move(): board = g.Board() player = g.Player(board, 1) board.set_state([1, 0, 1, 0, 2, 0, 0, 0, 0]) player.set_Q(1, 10) player.epsilon = 0 assert player.move() == 1
def test_player_rewards(): board = g.Board() player = g.Player(board, 1) assert player.reward(3) == 0 board.set_state([1, 1, 1, 0, 0, 0, 0, 0, 0]) assert player.reward(3) == 100 board.set_state([2, 2, 2, 0, 0, 0, 0, 0, 0]) assert player.reward(3) == -100
def test_player_bestmove(): board = g.Board() player = g.Player(board, 1) board.set_state([2, 1, 2, 1, 2, 1, 2, 0, 2]) player.set_Q(7, 10) assert np.array_equal(player.eval_board(), [0., 0., 0., 0., 0., 0., 0., 10., 0.]) board.set_state([0, 1, 2, 1, 2, 1, 2, 0, 2]) player.set_Q(0, 20) player.set_Q(7, 10) assert np.array_equal(player.eval_board(), [20., 0., 0., 0., 0., 0., 0., 10., 0.])
def test_player_Q(): board = g.Board() player = g.Player(board, 1) player.set_Q(1, 56) assert player.get_Q(1) == 56
import deeptactoe from deeptactoe import game as g from pprint import pprint as pp import numpy as np board = g.Board() player1 = g.Player(board, 1) player2 = g.Player(board, 2, beh='human') player1.epsilon = 1 try: player1.load_Q() except: print("Not possible to load Q matrix. Computer will play at random") def sanitise(board, move): if (np.any(np.equal(board.get_empty(), int(move)))): return False else: print("Illegal move!") return True def resolution(board): case = board.get_condition() if (case == 1): print("The Computer wins") elif (case == 2): print("Human wins") else: print("Tie")
import deeptactoe from deeptactoe import game as g from pprint import pprint as pp import matplotlib matplotlib.use('GTK3Cairo') import matplotlib.pyplot as plt import numpy as np board = g.Board() player1 = g.Player(board, 1) player2 = g.Player(board, 2, beh='fixed') player1.epsilon = 1 board.start_game() wins = [] n_ep = 10000 for episode in range(n_ep): board.clear() for i in range(6): action = player1.move() player1.update_Q(action) player1.update_epsilon(episode) board.update(action, 1) if (board.get_condition() < 4): break action = player2.move() board.update(action, 2)