def __init__(self, varname, player_1='human', player_2='random', width=300, height=350, cid=None): valid_players = ('human', 'random', 'alphabeta') if player_1 not in valid_players or player_2 not in valid_players: raise TypeError("Players must be one of {}".format(valid_players)) Canvas.__init__(self, varname, width, height, cid) self.ttt = TicTacToe() self.state = self.ttt.initial self.turn = 0 self.strokeWidth(5) self.players = (player_1, player_2) self.font("20px Arial") self.draw_board()
async def tic_tac_toe(self, ctx, player2: discord.Member): def _confirm_reply(m): return m.channel == ctx.message.channel and m.author == player2 and m.content.lower( ) in ['y', 'n'] await ctx.send( f"{player2.mention} play a game of tic tac toe with {ctx.message.author}?" ) reply_msg = await self.client.wait_for('message', timeout=40.0, check=_confirm_reply) if reply_msg.content == 'n': await ctx.send('Ok!, game cancelled') return def _move_reply(m): return m.channel == ctx.message.channel and m.author == players[ player] and m.content in [str(x) for x in range(1, 10)] player = 'O' player1 = ctx.message.author players = { 'X': player1, 'O': player2, } game = TicTacToe() await ctx.send(game.rules) await ctx.send(f"{player1.mention} ~ X\n{player2.mention} ~ O") while True: player = 'O' if player == 'X' else 'X' while True: try: await ctx.send( f"{players[player].mention}: enter a position") reply_msg = await self.client.wait_for('message', timeout=40.0, check=_move_reply) position = int(reply_msg.content) game.playerMove(player, position) break except ValueError: await ctx.send(f"{players[player].mention} Invalid move") await ctx.send(game.display()) if game.checkWin(player): await ctx.send(f"{players[player].mention} won!") break if game.checkDraw(): await ctx.send( f"{players['X'].mention} {players['O'].mention} Draw!") break
async def msg_challenge(message): p1 = message.author if len(message.mentions) != 1: await client.send_message(message.channel, 'Please mention one player.') return p2 = message.mentions[0] for g in games: if p1 in g.players or p2 in g.players: await client.send_message(message.channel, 'One player is already in a game.') return if p2 == p1: await client.send_message(message.channel, 'Can\'t play against yourself.') return cnt = message.content.lower() game = TicTacToe([p1, p2]) await game.start(client, message) print('Created game ' + str(game)) games.append(game)
import numpy as np from games import TicTacToe from players import RandomPlayer, MCTSPlayer from montecarlo import MCTS game = TicTacToe(3) print(game.board()) player_random = RandomPlayer(game) mcts_play = MCTS(game=game, player=player_random, episodes=800) player = MCTSPlayer(game=game, mcts=mcts_play) mcts = MCTS(game=game, player=player_random, episodes=800) for i in range(1000): while game.winner() == 0: mcts.train() p = mcts.get_policy(prop=1) a = np.random.choice(mcts.action_space, p=p) print(p, a, mcts.get_Qsa(game.state(), mcts.action_space), sum(mcts.get_Nsa(game.state(), mcts.action_space))) game.move(a) print(game.board()) mcts.update() game.set_state(game.start()) print(i) print(game.board())
from tkinter import * import sys import os.path sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from games import minimax_decision, alphabeta_player, random_player, TicTacToe # "gen_state" can be used to generate a game state to apply the algorithm from tests.test_games import gen_state tttgraph = __import__("tic-tac-toe_graph") ttt = TicTacToe() root = None buttons = [] frames = [] x_pos = [] o_pos = [] count = 0 sym = "" result = None choices = None graph_display = None def create_frames(root): """ This function creates the necessary structure of the game. """ frame1 = Frame(root) frame2 = Frame(root) frame3 = Frame(root) frame4 = Frame(root) create_buttons(frame1)
class Canvas_TicTacToe(Canvas): """Play a 3x3 TicTacToe game on HTML canvas""" def __init__(self, varname, player_1='human', player_2='random', width=300, height=350, cid=None): valid_players = ('human', 'random', 'alphabeta') if player_1 not in valid_players or player_2 not in valid_players: raise TypeError("Players must be one of {}".format(valid_players)) Canvas.__init__(self, varname, width, height, cid) self.ttt = TicTacToe() self.state = self.ttt.initial self.turn = 0 self.strokeWidth(5) self.players = (player_1, player_2) self.font("20px Arial") self.draw_board() def mouse_click(self, x, y): player = self.players[self.turn] if self.ttt.terminal_test(self.state): if 0.55 <= x/self.width <= 0.95 and 6/7 <= y/self.height <= 6/7+1/8: self.state = self.ttt.initial self.turn = 0 self.draw_board() return if player == 'human': x, y = int(3*x/self.width) + 1, int(3*y/(self.height*6/7)) + 1 if (x, y) not in self.ttt.actions(self.state): # Invalid move return move = (x, y) elif player == 'alphabeta': move = alphabeta_player(self.ttt, self.state) else: move = random_player(self.ttt, self.state) self.state = self.ttt.result(self.state, move) self.turn ^= 1 self.draw_board() def draw_board(self): self.clear() self.stroke(0, 0, 0) offset = 1/20 self.line_n(0 + offset, (1/3)*6/7, 1 - offset, (1/3)*6/7) self.line_n(0 + offset, (2/3)*6/7, 1 - offset, (2/3)*6/7) self.line_n(1/3, (0 + offset)*6/7, 1/3, (1 - offset)*6/7) self.line_n(2/3, (0 + offset)*6/7, 2/3, (1 - offset)*6/7) board = self.state.board for mark in board: if board[mark] == 'X': self.draw_x(mark) elif board[mark] == 'O': self.draw_o(mark) if self.ttt.terminal_test(self.state): # End game message utility = self.ttt.utility(self.state, self.ttt.to_move(self.ttt.initial)) if utility == 0: self.text_n('Game Draw!', offset, 6/7 + offset) else: self.text_n('Player {} wins!'.format("XO"[utility < 0]), offset, 6/7 + offset) # Find the 3 and draw a line self.stroke([255, 0][self.turn], [0, 255][self.turn], 0) for i in range(3): if all([(i + 1, j + 1) in self.state.board for j in range(3)]) and \ len({self.state.board[(i + 1, j + 1)] for j in range(3)}) == 1: self.line_n(i/3 + 1/6, offset*6/7, i/3 + 1/6, (1 - offset)*6/7) if all([(j + 1, i + 1) in self.state.board for j in range(3)]) and \ len({self.state.board[(j + 1, i + 1)] for j in range(3)}) == 1: self.line_n(offset, (i/3 + 1/6)*6/7, 1 - offset, (i/3 + 1/6)*6/7) if all([(i + 1, i + 1) in self.state.board for i in range(3)]) and \ len({self.state.board[(i + 1, i + 1)] for i in range(3)}) == 1: self.line_n(offset, offset*6/7, 1 - offset, (1 - offset)*6/7) if all([(i + 1, 3 - i) in self.state.board for i in range(3)]) and \ len({self.state.board[(i + 1, 3 - i)] for i in range(3)}) == 1: self.line_n(offset, (1 - offset)*6/7, 1 - offset, offset*6/7) # restart button self.fill(0, 0, 255) self.rect_n(0.5 + offset, 6/7, 0.4, 1/8) self.fill(0, 0, 0) self.text_n('Restart', 0.5 + 2*offset, 13/14) else: # Print which player's turn it is self.text_n("Player {}'s move({})".format("XO"[self.turn], self.players[self.turn]), offset, 6/7 + offset) self.update() def draw_x(self, position): self.stroke(0, 255, 0) x, y = [i-1 for i in position] offset = 1/15 self.line_n(x/3 + offset, (y/3 + offset)*6/7, x/3 + 1/3 - offset, (y/3 + 1/3 - offset)*6/7) self.line_n(x/3 + 1/3 - offset, (y/3 + offset)*6/7, x/3 + offset, (y/3 + 1/3 - offset)*6/7) def draw_o(self, position): self.stroke(255, 0, 0) x, y = [i-1 for i in position] self.arc_n(x/3 + 1/6, (y/3 + 1/6)*6/7, 1/9, 0, 360)
from games import TicTacToe from algorithms import MCTS import numpy as np import tqdm ## 10000 searches per move is pretty strong. Vary the quality of ## decision making by changing number of searches from 10000 to ## something else. Or, try changing the board size and playing a game. # Create environment env = TicTacToe(3) # Reset obs = env.reset() # whose turn? True if you want to play first human_play = True player = 'x' other_player = 'o' while True: # Print board and check if game over env.print_board() if env.won('x'): print('Game over, x wins') break if env.won('o'): print('Game over, o wins') break if env.draw(): print('Draw') break if human_play: # ask for action
def game(): return TicTacToe()
from games import (GameState, Game, Fig52Game, TicTacToe, query_player, random_player, alphabeta_player, minimax_decision, alphabeta_full_search, alphabeta_search) if __name__ == '__main__': ttt = TicTacToe() print('Initial game state:') print(ttt.display(ttt.initial)) for _ in range(10): print(ttt.play_game(alphabeta_player, random_player))
) adaptive_trainer_robot = PolicyGradientPlayer( learner_brain, discount_rate=DISCOUNT_RATE, batch_iterations=BATCH_ITERATIONS, experience_batch_size=EXPERIENCE_BATCH_SIZE, experience_buffer_size=EXPERIENCE_BUFFER_SIZE, ) static_trainer_robot = PolicyGradientPlayer( static_trainer_brain, discount_rate=DISCOUNT_RATE, batch_iterations=BATCH_ITERATIONS, experience_buffer_size=EXPERIENCE_BUFFER_SIZE, ) adaptive_train_game = TicTacToe((learner_robot, adaptive_trainer_robot)) static_train_game = TicTacToe((learner_robot, static_trainer_robot)) # Initialize plot data game_counts = [] wins = [] losses = [] scores = [] mean_experience_values = [] mean_confidences = [] brain_costs = [] brain_costs_ema = [] weight_ranges = [] weight_means = [] plot_data = { "score": {
discount_factor=DISCOUNT_FACTOR, reward_factor=REWARD_FACTOR, batch_iterations=BATCH_ITERATIONS, experience_batch_size=EXPERIENCE_BATCH_SIZE, experience_buffer_size=EXPERIENCE_BUFFER_SIZE, ) train_player = PolicyGradientPlayer( robot_brain, discount_factor=DISCOUNT_FACTOR, reward_factor=REWARD_FACTOR, batch_iterations=1, experience_batch_size=EXPERIENCE_BATCH_SIZE, experience_buffer_size=EXPERIENCE_BUFFER_SIZE, ) human_game = TicTacToe((human, robot)) training = TicTacToe((robot, train_player)) random_training = TicTacToe((robot, RandomPlayer())) robot.act_greedy = True robot.show_action_probabilities = True playing = True while playing: # Gain experience, no learning to keep it fast robot.learn_while_playing = False random_training.play(32) training.play(32) # Learn on every move of the human game
action='store_true', default=False) parser.add_argument( "--no-clear", help="If True, the screen is not cleared between rounds", action='store_true', default=False) parser.add_argument( "--cores", help="The number of threads that will be run concurrently", default=4) args = parser.parse_args() # Get game to play if args.game == 'tictactoe': game_rules = TicTacToe() elif args.game == 'connect4': game_rules = Connect4() elif args.game == 'draughts': game_rules = Draughts() # Decide whether to show probabilities from argument print('Welcome to the Monte-Carlo tree search AI') print() print('AI plays first') print() while True: interface = Interface(game_rules, parallel=not args.no_parallel) interface.set_iterations(int(args.iter)) interface.print_path(args.print_path) interface.show_probabilities = args.show_probs
from games import TicTacToe # task 1.2.1 ttt = TicTacToe() #ttt.run(1000, 'move at random') #ttt.draw(True) ttt.run(1000, 'move at probability x_board') ttt.draw(False) ttt.run(1000, 'move at probability x_o_board') ttt.draw(False) ttt.run(1000, 'move at heuristic') ttt.draw(False) ttt.run(1000, 'move at heuristic forward') ttt.draw(False)
import pickle from games import TicTacToe from brain import Brain from players import PolicyGradientPlayer, HumanPlayer, RandomPlayer from brain.activation_functions import ReLU, Softmax human = HumanPlayer() BRAIN_FILEPATH = "brain/saved/tictactoe-brain.pickle" playing = True while playing: robot_brain = pickle.load(open(BRAIN_FILEPATH, "rb")) robot = PolicyGradientPlayer(robot_brain) robot.act_greedy = True robot.show_action_probabilities = 0.3 human_game = TicTacToe((human, robot)) # robot_game = TicTacToe((robot, robot_opponent)) playing = human_game.play(2, render=True, pause=0.5) # robot_game.play(1, render=True, pause=0.7)
from games import TicTacToe, minimax_decision, alphabeta_search, GameState, alphabeta_player, random_player print("Enter the first row.") row1 = input().split(' ') print("Enter the second row.") row2 = input().split(' ') print("Enter the third row.") row3 = input().split(' ') ticTacToe = TicTacToe() board = {} moves = [] row1.extend(row2) row1.extend(row3) table = row1 rowIndex = 1 columnIndex = 1 # Creating the current state of the game, from the user inputs for element in table: if (element in ['X', 'O']): board[(rowIndex, columnIndex)] = element else: moves.append((rowIndex, columnIndex)) columnIndex += 1 if (columnIndex > 3): rowIndex += 1 columnIndex = 1 # Code to find who has to move next, considering X starts the game nextToMove = 'X'
(64, ReLU), (9, Softmax), ) robot_brain = Brain(BRAIN_TOPOLOGY, learning_rate=LEARNING_RATE, regularization=REGULARIZATION) learning_robot = PolicyGradientPlayer( robot_brain, discount_rate=DISCOUNT_RATE, batch_iterations=BATCH_ITERATIONS, experience_batch_size=EXPERIENCE_BATCH_SIZE, experience_buffer_size=EXPERIENCE_BUFFER_SIZE, ) random_game = TicTacToe((learning_robot, RandomPlayer())) # Initialize plot data game_counts = [] wins = [] losses = [] scores = [] mean_experience_values = [] mean_confidences = [] brain_costs = [] brain_costs_ema = [] weight_ranges = [] weight_means = [] # Create a plot figure plot_data = {