def test_evaluate_graphic(): """ test evaluate with graphical output """ while input('press q to quit: ') != 'q': tmp = np.random.randint(3, size=(19, 19)) board = np.zeros((1, 19, 19, 3), dtype=int) for y in range(19): for x in range(19): if tmp[y, x] == 1: board[0, y, x, 0] = 1 elif tmp[y, x] == 2: board[0, y, x, 1] = 1 wpos = [] for y in range(19): for x in range(19): if evaluate(board, 0, [y, x]): wpos.append([y, x]) for y in range(19): for x in range(19): if evaluate(board, 1, [y, x]): wpos.append([y, x]) for y in range(19): for x in range(19): v = board[0, y, x, 0] + board[0, y, x, 1] * 2 if [y, x] in wpos: print('\033[92m{}\033[0m'.format(v), end='') else: print(v, end='') print() pass
def test_against_random(model, size=100): ''' Evaluate the model against random performance ''' wins = {0: 0, 1: 0, 2: 0} for _ in range(size): game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) if pid == 1: x, y = random_choice(board) else: r_board = game.get_board_raw() x, y, _ = evaluate(model, game, pid, tau=1) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 wins[winner] += 1 print('Wins: %d Ties: %d Losses: %d' % (wins[2], wins[0], wins[1])) return (wins[2] / (wins[0] + wins[1] + wins[2]))
def play_user(model): ''' Test the model against human skill level ''' game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) print(board) if pid == 2: x, y, prob = evaluate(model, game, pid, tau=.1) print(prob) print(model.evaluate(game.get_input(pid))) else: x = int(input('x: ')) y = int(input('y: ')) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 print(game.get_input(1))
def basic_win(): """ test with simple env """ e = np.array([[0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) board = conv_map(e) player = 0 pos = (0, 0) root = Node(0) net = Network(-1) return evaluate(board, player, pos)
def test_winning_move_when_one(): """ test if network find winning move among one return true if winning move is found return else otherwise """ while True: has_winning_move = 0 tmp = np.random.randint(3, size=(19, 19)) board = np.zeros((1, 19, 19, 3), dtype=int) for y in range(19): for x in range(19): if tmp[y, x] == 1: board[0, y, x, 0] = 1 if evaluate(board, 0, [y, x]): board[0, y, x, 0] = 0 if has_winning_move: has_winning_move = 1 elif tmp[y, x] == 2: board[0, y, x, 1] = 1 if evaluate(board, 1, [y, x]): board[0, y, x, 1] = 0 if has_winning_move: has_winning_move = 2 if not has_winning_move: continue network = Network(0) node = Node(0) expand(node, board, has_winning_move - 1, network) turn(board, has_winning_move - 1, node, network) for y in range(19): for x in range(19): if evaluate(board, has_winning_move - 1, [y, x]): return True return False
def human_turn(board, node, player, net): print("Your turn, Human") e = 0 while (not e): try: x = int(input("x: ")) y = int(input("y: ")) e = 1 except: print("wrong format, only integers required") q = input("quit? (y/n): ") if q == "y": exit(0) pos = (x, y) put_on_board(board, pos, player, 1) node = update_turn(board, player ^ 1, node, net, pos) _, r = evaluate(board, player, pos) return node, r
for it in range(ITER): for i in range(EPOCH): print('{}: {}/{}'.format(it, i, EPOCH), end='\r') game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None inputs = {1: [], 2: []} probs = {1 : [], 2: []} while winner is None: board = game.get_board(pid) r_board = game.get_board_raw() x, y, prob = evaluate(model, game, (pid % 2) + 1) inputs[pid].append(copy.copy(game.get_input(pid))) probs[pid].append(prob) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 if winner != 0: one_reward = [1 if winner == 1 else -1] * len(inputs[1]) two_reward = [1 if winner == 2 else -1] * len(inputs[2]) else: one_reward = [0] * len(inputs[1]) two_reward = [0] * len(inputs[2])