def run(): n = 6 width, height = 9, 9 model_file = 'best_policy.model' #載入模型 try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding = 'bytes') # To support python3 best_policy = PolicyValueNet(width, height, policy_param) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # set larger n_playout for better performance # human player, input your move in the format: 2,3 human = Human() # set start_player=0 for human first game.start_play(human, mcts_player, start_player=1, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def run(): n = 5 width, height = 8, 8 try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) # ############### human VS AI ################### # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow best_policy = PolicyValueNet(width, height, model_file=model_file) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=500) # human player, input your move in the format: 2,3 human1 = Human() # set start_player=0 for human first game.start_play(human1, mcts_player, start_player=1, is_shown=1) # game.start_play(human1, human2, start_player=0, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def run(): n = 4 width, height = 6, 6 model_file = 'best_policy.model' try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) # ############### human VS AI ################### # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow best_policy = PolicyValueNet(width, height, model_file=model_file) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy # try: # policy_param = pickle.load(open(model_file, 'rb')) # except: # policy_param = pickle.load(open(model_file, 'rb'), # encoding='bytes') # To support python3 # best_policy = PolicyValueNetNumpy(width, height, policy_param) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, # c_puct=5, # n_playout=400) # set larger n_playout for better performance # uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout) # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000) # human player, input your move in the format: 2,3 human = Human() # set start_player=0 for human first game.start_play(human, mcts_player, start_player=0, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def __init__(self, init_model=None, is_shown=0): self.board_width = 15 self.board_height = 15 self.n_in_row = 5 self.board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) self.is_shown = is_shown self.game = Game_UI(self.board, is_shown) self.learn_rate = 2e-3 self.lr_multiplier = 1.0 self.temp = 1.0 self.n_playout = 400 self.c_puct = 5 self.buffer_size = 10000 self.batch_size = 512 self.data_buffer = deque(maxlen=self.buffer_size) self.play_batch_size = 1 self.epochs = 5 self.kl_targ = 0.02 self.check_freq = 50 self.game_batch_num = 1500 self.best_win_ratio = 0.0 self.pure_mcts_playout_num = 1000 if init_model: self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, model_file=init_model) else: self.policy_value_net = PolicyValueNet(self.board_width, self.board_height) self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1)
def run_a2c_vs_sarsa(): winners = [] board_length = 8 action_space = (board_length, board_length, board_length, board_length) agent_one = A2C((board_length, board_length), action_space, "a3c", "up", 1.0, 2000, 100000) agent_two = SARSAAgent((board_length, board_length), action_space, "sarsa_two", "down", 1.0, 2000, 100000, save_path="../data/modeldata/sarsa_two/model.ckpt") iterations = 200000 for i in range(iterations): board = Board(board_length=8) game = Game(agent_one=agent_one, agent_two=agent_two, board=board) game.play(verbose=False) winners += [game.winner] agent_one.epsilon *= 0.99999 if (i % 5000 == 0 and i > 0) or (iterations - 1 == i): victories_player_two = 0 victories_player_one = 0 for winner in winners: if winner == "a3c": victories_player_one += 1 if winner == "Two": victories_player_two += 1 logging.info("Current epsilon: {}".format(agent_one.epsilon)) logging.info("Player One: {}".format(str(victories_player_one))) logging.info("Player Two: {}".format(str(victories_player_two))) logging.info("Mean Rewards Agent One: {}".format( agent_one.moving_average_rewards[-1])) logging.info("Mean Rewards Agent Two: {}".format( agent_two.moving_average_rewards[-1]))
def run_sarsa_lstm_vs_random(): winners = [] board_length = 8 action_space = (board_length, board_length, board_length, board_length) agent_one = SARSALSTMAgent((board_length, board_length), action_space, "sarsa_lstm", "up", 1.0, 2000, 100000, caching=False) agent_two = RandomAgent((board_length, board_length), (board_length, board_length), "Two", "down") iterations = 200000 for i in range(iterations): board = Board(board_length=8) game = Game(agent_one=agent_one, agent_two=agent_two, board=board) game.play(verbose=False) winners += [game.winner] agent_one.epsilon *= 0.99999 if (i % 5000 == 0 and i > 0) or iterations - 1 == i: victories_player_two = 0 victories_player_one = 0 for winner in winners: if winner == "sarsa_lstm": victories_player_one += 1 if winner == "Two": victories_player_two += 1 logging.info("Current epsilon: {}".format(agent_one.epsilon)) logging.info("Player One: {}".format(str(victories_player_one))) logging.info("Player Two: {}".format(str(victories_player_two))) logging.info("Mean Rewards Agent One: {}".format( agent_one.moving_average_rewards[-1])) logging.info("Mean Rewards Agent Two: {}".format( agent_two.moving_average_rewards[-1]))
def __init__(self, init_model=None): # 设置棋盘和游戏的参数 self.board_width = 10 self.board_height = 10 self.n_in_row = 4 self.board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) self.game = Game(self.board) # 设置训练参数 self.learn_rate = 2e-3 # 基准学习率 self.lr_multiplier = 1.0 # 基于KL自动调整学习倍速 self.temp = 1.0 # 温度参数 self.n_playout = 400 # 每下一步棋,模拟的步骤数 self.c_puct = 5 # exploitation和exploration之间的折中系数 self.buffer_size = 10000 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) #使用 deque 创建一个双端队列 self.play_batch_size = 1 self.epochs = 5 # num of train_steps for each update self.kl_targ = 0.02 # 早停检查 self.check_freq = 50 # 每50次检查一次,策略价值网络是否更新 self.game_batch_num = 500 # 训练多少个epoch self.best_win_ratio = 0.0 # 当前最佳胜率,用他来判断是否有更好的模型 # 弱AI(纯MCTS)模拟步数,用于给训练的策略AI提供对手 self.pure_mcts_playout_num = 1000 if init_model: # 通过init_model设置策略网络 self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, model_file=init_model, use_gpu=True) else: # 训练一个新的策略网络 self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, use_gpu=True) # AI Player,设置is_selfplay=1 自我对弈,因为是在进行训练 self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1)
def run(): n = 5 width, height = 9, 9 model_file = 'output/best_policy.model' try: board = Board(width=width, height=height, n_in_row=n, forbidden_hands=True) game = Game(board) # ############### human VS AI ################### # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow # best_policy = PolicyValueNet(width, height, model_file = model_file) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy best_policy = PolicyValueNetRes30(width, height, 'l+', model_file=model_file) mcts_player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=400) # set larger n_playout for better performance # uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout) # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000) # human player, input your move in the format: 2,3 human = Human() # set start_player=0 for human first game.start_play(human, mcts_player, start_player=1, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def __init__(self, game_batch_num, model_file=None): # params of the board and the game self.size = BOARD_SIZE use_gpu = False board = Board(size=self.size, n_in_row=N_IN_ROW) self.game = Game(board) # training params self.learn_rate = 2e-3 self.lr_multiplier = 1.0 # adaptively adjust the learning rate based on KL self.temp = 1.0 # the temperature param self.n_playout = 400 # num of simulations for each move self.c_puct = 5 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=10000) self.play_batch_size = 1 self.epochs = 5 # num of train_steps for each update self.kl_targ = 0.02 self.check_freq = 50 self.game_batch_num = game_batch_num self.best_win_ratio = 0.0 # num of simulations used for the pure mcts, which is used as # the opponent to evaluate the trained policy self.pure_mcts_playout_num = 1000 if model_file: # start training from an initial policy-value net self.policy_value_net = PolicyValueNet(size=self.size, model_file=model_file, use_gpu=use_gpu) else: # start training from a new policy-value net self.policy_value_net = PolicyValueNet(size=self.size, use_gpu=use_gpu) self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1)
def __init__(self): # params of the board and the game self.board_width = 6 self.board_height = 6 self.n_in_row = 4 self.board = Board(width=self.board_width, height=self.board_height, n_in_row=self.n_in_row) self.game = Game(self.board) # training params self.learn_rate = 5e-3 self.lr_multiplier = 1.0 # adaptively adjust the learning rate based on KL self.temp = 1.0 # the temperature param self.n_playout = 400 # num of simulations for each move self.c_puct = 5 self.buffer_size = 10000 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) self.play_batch_size = 1 self.epochs = 5 # num of train_steps for each update self.kl_targ = 0.025 self.check_freq = 50 self.game_batch_num = 1500 self.best_win_ratio = 0.0 # num of simulations used for the pure mcts, which is used as the opponent to evaluate the trained policy self.pure_mcts_playout_num = 1000 # start training from a given policy-value net # policy_param = pickle.load(open('current_policy.model', 'rb')) # self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, net_params = policy_param) # start training from a new policy-value net self.policy_value_net = PolicyValueNet(self.board_width, self.board_height) self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout, is_selfplay=1)
def run(): n = 5 width, height = 15, 15 # model_file = 'best_policy_8_8_5.model' model_file = 'best_policy.model' try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) ################ human VS AI ################### # MCTS player with the policy_value_net trained by AlphaZero algorithm # policy_param = pickle.load(open(model_file, 'rb')) # best_policy = PolicyValueNet(width, height, net_params = policy_param) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # MCTS player with the trained policy_value_net written in pure numpy try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') # To support python3 best_policy = PolicyValueNetNumpy(width, height, policy_param) mcts_player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=400) # set larger n_playout for better performance # uncomment the following line to play with pure MCTS (its much weaker even with a larger n_playout) # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000) # human player, input your move in the format: 2,3 human = Human() # set start_player=0 for human first game.start_play(human, mcts_player, start_player=1, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def evaluate_player(player1, player2, n_games=10): """ Evaluate the trained policy by playing against the pure MCTS player Note: this is only for monitoring the progress of training current_mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout) """ board = Board(width=9, height=9, n_in_row=5) game = Game(board) win_cnt = defaultdict(int) for i in range(n_games): winner = game.start_play(player1, player2, start_player=i % 2, is_shown=0) print(winner) win_cnt[winner] += 1 win_ratio = 1.0 * (win_cnt[1] + 0.5 * win_cnt[-1]) / n_games print("player1:{}vs player2:{}. result: win: {}, lose: {}, tie:{}".format( player1, player2, win_cnt[1], win_cnt[2], win_cnt[-1])) return win_ratio
def run(): n = 5 width, height = 10, 10 model_file = 'best_policy.model' try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) graphic = Graphic() # graphic.run() print(1111) # thread1 = threading.Thread(target=graphic.run, args=()) best_policy = PolicyValueNet(width, height, model_file='./model/' + model_file) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=1000) print("hhh") human = Human(graphic) # set start_player=0 for human first thread2 = threading.Thread(target=game.start_play, args=(human, mcts_player, graphic, 1, 1)) # game.start_play(human, mcts_player, graphic, start_player=0, is_shown=1) # thread1.setDaemon(True) # thread1.start() thread2.setDaemon(True) thread2.start() graphic.run() # thread1.join() # thread2.join() # game.start_play(human, mcts_player, graphic, start_player=0, is_shown=1) # thread.start_new_thread(game.start_play, (human, mcts_player, graphic, 0, 1)) # thread.start_new_thread(graphic.run, ()) except KeyboardInterrupt: print('\n\rquit')
def run(): n = 5 width, height = 9, 9 iteration = 1000 model_file = './model/current_policy_{}_{}_{}_iteration{}.model'.format( height, width, n, iteration) #model_file = './model/best_policy_{}_{}_{}.model'.format(height,width,n) try: board = Board(width=width, height=height, n_in_row=n) best_policy = PolicyValueNet(width, height, model_file=model_file) AI_player1 = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) AI_player2 = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) human = Human() game = Game("AlphaZero Gomoku", board, AI_player1, AI_player2) while True: game.play() pygame.display.update() for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() exit() elif event.type == pygame.MOUSEBUTTONDOWN: mouse_x, mouse_y = pygame.mouse.get_pos() game.mouseClick(mouse_x, mouse_y) game.check_buttons(mouse_x, mouse_y) except KeyboardInterrupt: print('\n\rquit')
def go(self): print("One rule:\r\n Move piece form 'x,y' \r\n eg 1,3\r\n") print("-" * 60) print("Ready Go") mc = MonteCarloTreeSearch(self.net, 1000) node = TreeNode() board = Board() while True: if board.c_player == BLACK: action = input(f"Your piece is 'O' and move: ") action = [int(n, 10) for n in action.split(",")] action = action[0] * board.size + action[1] next_node = TreeNode(action=action) else: _, next_node = mc.search(board, node) board.move(next_node.action) board.show() next_node.parent = None node = next_node if board.is_draw(): print("-" * 28 + "Draw" + "-" * 28) return if board.is_game_over(): if board.c_player == BLACK: print("-" * 28 + "Win" + "-" * 28) else: print("-" * 28 + "Loss" + "-" * 28) return board.trigger()
def battle(player1, player2, num_games = 100000, silent = False): draw_count = 0 oneCount = 0 twoCount = 0 draw_count=0 medTotal = 0 medHad = 0 medPos = set() for i in range(num_games): board = Board() result, total, had = play_game(board, player1, player2) medTotal += total medHad += had # medPos.update(pos) # print(board) if result == 'me': # print('nn won') oneCount += 1 elif result == 'op': # print('random won') twoCount += 1 else: # print('tie') draw_count += 1 #if i%20==0: # print('finished game #' + str(i)) # input() if not silent: p1 = player1.typeRep() p2 = player2.typeRep() print("After {} games we have draws: {}, {} wins: {}, and {} wins: {}.".format(num_games, draw_count, p1, oneCount, p2, twoCount)) print("Which gives percentages of draws: {:.2%}, {} wins: {:.2%}, and {} wins: {:.2%}".format( draw_count / num_games, p1, oneCount / num_games, p2, twoCount / num_games)) return oneCount, twoCount, draw_count, medTotal, medHad
def __init__(self, n: int, init_model=None): # params of the board and the game self.n = n self.board = Board(self.n) self.game = Game(self.board) # training params self.learn_rate = 5e-3 self.lr_multiplier = 1.0 # adaptively adjust the learning rate based on KL self.temp = 1.0 # the temperature param self.n_play_out = 400 # number of simulations for each move self.c_puct = 5 self.buffer_size = 10000 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) self.epochs = 5 # number of train_steps for each update self.kl_target = 0.025 self.check_freq = 50 self.game_batch_number = 10000 self.best_win_ratio = 0.0 self.episode_length = 0 self.pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) # number of simulations used for the pure mcts, which is used as the opponent to evaluate the trained policy self.last_batch_number = 0 self.pure_mcts_play_out_number = 1000 if init_model: # start training from an initial policy-value net policy_param = pickle.load(open(init_model, 'rb')) self.policy_value_net = PolicyValueNet(self.n, net_params=policy_param) else: # start training from a new policy-value net self.policy_value_net = PolicyValueNet(self.n) self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_func, c_puct=self.c_puct, n_play_out=self.n_play_out, is_self_play=1)
import sys from game import Board from lexer import Lexer2048 from parser import Parser2048 board = Board(size=(4, 4)) lexer, parser = Lexer2048(), Parser2048(fmap=board.fmap) while True: try: print("\033[32m" + "2048 >>>" + "\033[0m", end=" ") inp = input() command = lexer.preprocess(inp) out = parser.parse(lexer.tokenize(command)) board.eout() except EOFError: print() exit() except Exception as E: print(str(E)) print("-1", file=sys.stderr)
def move(self, board): print("MCTSPlayer thinks...") nodecnt = 0 t0 = time.time() rtnode, alrdy_in = self.board_already_in_gt(board) # print("RTNODE: ", rtnode) # node not in; if node in, no need to do anything if not alrdy_in: # need to check if any parents are in gt. rtnode_would_be_parents = self.gt.get_would_be_parents_to_root( rtnode) # get parent in game tree, not grandparent, etc # print("RTNode parents", rtnode_would_be_parents) last_parent_in_gt = max(list( filter(lambda x: self.gt.node_in_tree(x), rtnode_would_be_parents)), key=lambda x: len(x)) # print("Latest parent: ", last_parent_in_gt) # print("Parent - RTnode", rtnode.replace(last_parent_in_gt, "")) # add all children of that parent last_added_parent = last_parent_in_gt for m in rtnode.replace(last_parent_in_gt, ""): # print("adding: ", m, "with parent: ", last_added_parent) self.gt.add_node(m, [0, 0], last_added_parent) last_added_parent = last_added_parent + m # which player are we? player = board.state[18] while True: nodecnt += 1 # print("RTNODE in WHile LOOP: ", rtnode) chosennode = self.choose_next_node(rtnode, board) if time.time() - t0 > self.movet: break nodeboard = Board(board.state.copy()) for m in chosennode.replace(rtnode, ""): nodeboard.pushMove(int(m)) # Add children of current node for m in nodeboard.legalMoves(): self.gt.add_node(str(m), data=[0, 0], parent=chosennode) score = self.playout(nodeboard, player) # back propagate score & numVisits # print("Chosen Node: '" + chosennode + "'") for node in self.gt.get_parents_to_root(chosennode): data = self.gt.get_data(node) data[0] += score data[1] += 1 self.gt.update_data(node, data) best_move = None best_numVisits = 0 for m in self.gt.get_children(rtnode): data = self.gt.get_data(m) if data[1] > best_numVisits: best_numVisits = data[1] best_move = m # return max(self.gt.get_children(MCTSPlayer.rootnode), key = lambda m: self.gt.get_data(m)[1]) # print("node", rtnode, 'children visits', list(map(lambda x: (x,self.gt.get_data(x)[1]), self.gt.get_children(rtnode)))) print( "MCTSPLayer explored {0} nodes in {1} seconds at {2:.2f} nodes/s". format(nodecnt, self.movet, nodecnt / self.movet)) return int(best_move[-1])
import sys import cv2 sys.path.append('./build/lib.macosx-10.7-x86_64-3.6/') sys.path.append('/Users/joshua/Coding/go/JoshieGo') from game import Board import gofeat import numpy as np mtx = np.ones(shape=(19, 19), dtype=np.int) string = Board.mtx2str(mtx) print(string) print(dir(gofeat)) string = gofeat.random(string) ret_mtx = np.fromstring(string, sep=' ', dtype=np.int).reshape(19, 19) print(ret_mtx) import pickle games = pickle.load(open('go_test.pkl', 'rb')) cnt = 0 for board_mtx, move in zip(games[0], games[1]): cnt += 1 if cnt % 200 != 0: continue board = Board(board_mtx=board_mtx) canvas = board.visualize_board(grid_size=35) cv2.imshow('board', canvas) cv2.waitKey()
def hello(): b = Board() b.board[(1, 1)] = Tile.new('FFFF') return display_text(b)
if i + 1 < len(board): if board[i][j] == board[i + 1][j]: score += board[i][j] if j + 1 < len(board): if board[i][j] == board[i][j + 1]: score += board[i][j] if i - 1 >= 0: if board[i][j] == board[i - 1][j]: score += board[i][j] if j - 1 >= 0: if board[i][j] == board[i][j - 1]: score += board[i][j] return score x = Board() x.display_board() def play_game(x): while x.check_alive: max = -1 move = "" left_move = x.move_left(active=False) right_move = x.move_right(active=False) up_move = x.move_up(active=False) down_move = x.move_down(active=False) if left_move[0] and score_board(left_move[3]) >= max: max = score_board(left_move[3]) move = "left" if up_move[0] and score_board(up_move[3]) >= max:
def setUp(self): self.game_board = Board(8,8)
def __init__(self, boardSize, numMines): self.board = Board(boardSize, numMines) self.numMines = numMines self.boardSize = boardSize self.win = False self.lose = False
else: pass board.tileListUpdate(userInput, currentPlayer) player.playerTurnChange(currentPlayer) boardCondition = board.boardCheck() if boardCondition != 'continue': self.boardClear() if boardCondition == 'p1 wins': if currentPlayer == 'X': player.playerTurnChange(player.players[1]) self.headVictoryChange(player.players[0]) elif boardCondition == 'p2 wins': self.headVictoryChange(player.players[1]) elif boardCondition == 'tie': self.headVictoryChange() def boardClear(self): self.button0['text'], self.button1['text'], self.button2['text'], self.button3['text'], self.button4['text'], self.button5['text'], self.button6['text'], self.button7['text'], self.button8['text'] = ['' for _ in range(9)] board.tileListUpdate(None, None, True) if __name__ == '__main__': root = tk.Tk() root.resizable(False, False) root.title('Tic Tac Toe') root.geometry('600x900+0+0') GameGUI = GameGUI(root) board = Board() player = Player('X', 'O') root.mainloop()
def __init__(self): self.WIN_SCORE = 10 self.level = 0 self.current_player = None self.opponent = None self.board = Board()
def test_solve(self): board = Board() self.assertFalse(board.is_solved) board.solve() self.assertTrue(board.is_solved)
if move_reward > 0: new_q = move_reward else: # using the Bellman equation new_q = (1 - self.LEARNING_RATE) * current_q + self.LEARNING_RATE * ( move_reward + self.DISCOUNT * max_future_q) self.table[obs][action] = new_q # update the q-table return move_reward if __name__ == "__main__": A = Agent("q-tables/qtable-main.pickle") startboard = np.array([[4] * 6, [0] * 2, [4] * 6]) b = Board(startboard) print(b) c, player = np.random.randint(1, 3), 0 empty = [0] * 6 # used to check if one side of the board is empty while b.board[0] != empty and b.board[2] != empty: pos = 0 if c % 2 == 1: player = 1 elif c % 2 == 0: player = 2 if player == 1: try: pos = int( input(("Player {}, choose your space to move from...\n".
def move(self, board): def h(b): gameover, winner = b.isGameOver() # print("eval at ", b.state, "\n", gameover, winner) if gameover: if winner is None: return 0 else: return 2 - 4 * winner else: return self.evaluator.evaluate(b.state.copy().reshape((1, 19))) def minimax(brd, depth, maxplayer): self.increment_nodecount() MAXVAL = 100000 gameover, _ = brd.isGameOver() if depth == 0 or gameover: return h(brd) if maxplayer: value = -MAXVAL for m in brd.legalMoves(): new_board = Board(brd.state.copy()) new_board.pushMove(m) value = max(value, minimax(new_board, depth - 1, False)) return value else: value = MAXVAL for m in brd.legalMoves(): new_board = Board(brd.state.copy()) new_board.pushMove(m) value = min(value, minimax(new_board, depth - 1, True)) return value t0 = time.time() print("MinimaxPlayer thinks...") # if move in cache, play it # print("MMP - pre - cache: ", self.cache) if tuple(board.state) in self.cache.keys(): print( "MinimaxPlayer depth {0} played from cache in {1:.2f} seconds". format(self.DEPTH, time.time() - t0)) return self.cache[tuple(board.state)] move_scores = {} mxPl = board.state[18] == Board.BLACK_MOVE for move in board.legalMoves(): val = minimax(Board(board.tryMove(move)), self.DEPTH, mxPl) # print(board.state, move, val, self.DEPTH, mxPl) move_scores[move] = val time_elapsed = time.time() - t0 try: ncnt = self.nodecount / time_elapsed except ZeroDivisionError: ncnt = 1e9 print( "MinimaxPlayer depth {0} explored {1} nodes in {2:.2f} seconds at {3:.2f} nodes/s" .format(self.DEPTH, self.nodecount, time_elapsed, ncnt)) self.reset_nodecount() if board.state[18] == Board.WHITE_MOVE: bmove = max(move_scores.keys(), key=lambda x: move_scores[x]) # while cache is smaller than cache limit, add move to cache if len(self.cache.keys()) < self.cachelimit: self.cache[tuple(board.state)] = bmove # print("MMP - post - cache: ", self.cache) return bmove else: bmove = min(move_scores.keys(), key=lambda x: move_scores[x]) # while cache is smaller than cache limit, add move to cache if len(self.cache.keys()) < self.cachelimit: self.cache[tuple(board.state)] = bmove # print("MMP - post - cache: ", self.cache) return bmove
from curses import wrapper from game import ( Board, Player, Pawn, Rook, Knight, King, Bishop, Queen, Renderer, TraditionalStrategy, ) board = Board(8) p1 = Player('Chris') p1.color = curses.COLOR_RED board.add_player(p1) p1.setup(TraditionalStrategy) p2 = Player('Jess') p2.color = curses.COLOR_BLUE board.add_player(p2) p2.setup(TraditionalStrategy) def main(stdscr): stdscr.clear() curses.noecho()