def debug_run(model): ''' Shows the board and value for each step in a game ''' game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) val = model.evaluate(game.get_input(pid)) print(board) print(val) x, y = q_select(pid, board, model, game) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 print( model.sess.run(model.probs, feed_dict={model.states: game.get_input(pid)})[0])
def test_against_random(model): ''' Evaluate the model against random performance ''' wins = {0: 0, 1: 0, 2: 0} for _ in range(1000): game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) if pid == 1: x, y = random_choice(board) else: x, y = q_select(pid, board, model, game) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 wins[winner] += 1 print('Wins: %d Ties: %d Losses: %d' % (wins[2], wins[0], wins[1])) return (wins[2] / (wins[0] + wins[1] + wins[2]))
def play_user(model): ''' Test the model against human skill level ''' game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) print(board) if pid == 2: x, y, prob = evaluate(model, game, pid, tau=.1) print(prob) print(model.evaluate(game.get_input(pid))) else: x = int(input('x: ')) y = int(input('y: ')) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 print(game.get_input(1))
def chess_worker(connection, args): while True: game = TicTacToe() rewards = [] pid = 1 boards = [copy.deepcopy(game)] i = 0 while game.check_win() is None: if random.random() > args.gamma: connection.send({'type': 'board', 'boards': boards[-8:]}) move = connection.recv() else: move = random.choice(game.legal_moves) game = game.push(move) boards.append(copy.deepcopy(game)) pid = (pid % 2 + 1) i += 1 res = game.check_win() size = len(boards) - 1 if res != 0: rewards = [1 if (i % 2) == 0 else 0 for i in range(size)] rewards = rewards[::-1] else: rewards = [0] * (size) rewards = np.expand_dims(np.array(rewards), 1) in_boards, in_targets = build_input(boards, rewards, args.history) connection.send({ 'type': 'data', 'inp': in_boards, 'rewards': in_targets }) connection.send({'type': 'end'})
def debug_run(model): ''' Shows the board and value for each step in a game ''' game = TicTacToe() pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) val = model.evaluate(board.reshape(1, 3, 3)) print(board) print(val) x, y = q_select(board, model) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1
def play_user(model): ''' Test the model against human skill level ''' game = TicTacToe() pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) val = model.evaluate(board.reshape(1, 3, 3)) print(board) if pid == 2: x, y = q_select(pid, board, model, game) else: x = int(input('x: ')) y = int(input('y: ')) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1
pid = 1 opp_pid = 2 winner = None while winner is None: board = game.get_board(pid) if random.random() < gamma: x, y = random_choice(board) else: x, y = q_select(board, model) game.place(pid, x, y) board = game.get_board(pid) boards[pid].append(board) winner = game.check_win() pid = (pid % 2) + 1 board = game.get_board(pid) boards[pid].append(board) if winner != 0: loser = (winner % 2) + 1 #winner_rewards = decay_reward(1, len(boards[winner])) #loser_rewards = decay_reward(-1, len(boards[loser])) winner_rewards = [1] * len(boards[winner]) loser_rewards = [-1] * len(boards[loser]) rewards = np.array(winner_rewards + loser_rewards)