def play_user(model): ''' Test the model against human skill level ''' game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) print(board) if pid == 2: x, y, prob = evaluate(model, game, pid, tau=.1) print(prob) print(model.evaluate(game.get_input(pid))) else: x = int(input('x: ')) y = int(input('y: ')) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 print(game.get_input(1))
def debug_run(model): ''' Shows the board and value for each step in a game ''' game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) val = model.evaluate(game.get_input(pid)) print(board) print(val) x, y = q_select(pid, board, model, game) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 print( model.sess.run(model.probs, feed_dict={model.states: game.get_input(pid)})[0])
print('{}: {}/{}'.format(it, i, EPOCH), end='\r') game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None inputs = {1: [], 2: []} probs = {1 : [], 2: []} while winner is None: board = game.get_board(pid) r_board = game.get_board_raw() x, y, prob = evaluate(model, game, (pid % 2) + 1) inputs[pid].append(copy.copy(game.get_input(pid))) probs[pid].append(prob) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 if winner != 0: one_reward = [1 if winner == 1 else -1] * len(inputs[1]) two_reward = [1 if winner == 2 else -1] * len(inputs[2]) else: one_reward = [0] * len(inputs[1]) two_reward = [0] * len(inputs[2]) total_inputs = build_input(inputs)