def play_round(self): logger.info("Starting game round...") if np.random.uniform(0, 1) <= 0.5: white = self.current black = self.best w = "current" b = "best" else: white = self.best black = self.current w = "best" b = "current" current_board = cboard() checkmate = False dataset = [] value = 0 t = 0.1 while checkmate == False and current_board.actions() != []: dataset.append(copy.deepcopy(ed.encode_board(current_board))) print("") print(current_board.current_board) if current_board.player == 0: root = UCT_search(current_board, 777, white, t) policy = get_policy(root, t) print("Policy: ", policy, "white = %s" % (str(w))) elif current_board.player == 1: root = UCT_search(current_board, 777, black, t) policy = get_policy(root, t) print("Policy: ", policy, "black = %s" % (str(b))) current_board = do_decode_n_move_pieces(current_board,\ np.random.choice(np.array([0,1,2,3,4,5,6]), \ p = policy)) # decode move and move piece(s) if current_board.check_winner() == True: # someone wins if current_board.player == 0: # black wins value = -1 elif current_board.player == 1: # white wins value = 1 checkmate = True dataset.append(ed.encode_board(current_board)) if value == -1: dataset.append(f"{b} as black wins") return b, dataset elif value == 1: dataset.append(f"{w} as white wins") return w, dataset else: dataset.append("Nobody wins") return None, dataset
def MCTS_self_play(connectnet, num_games, start_idx, cpu, args, iteration): logger.info("[CPU: %d]: Starting MCTS self-play..." % cpu) if not os.path.isdir("./datasets/iter_%d" % iteration): if not os.path.isdir("datasets"): os.mkdir("datasets") os.mkdir("datasets/iter_%d" % iteration) for idxx in tqdm(range(start_idx, num_games + start_idx)): logger.info("[CPU: %d]: Game %d" % (cpu, idxx)) current_board = c_board() checkmate = False dataset = [] # to get state, policy, value for neural network training states = [] value = 0 move_count = 0 while checkmate == False and current_board.actions() != []: if move_count < 11: t = args.temperature_MCTS else: t = 0.1 states.append(copy.deepcopy(current_board.current_board)) board_state = copy.deepcopy(ed.encode_board(current_board)) root = UCT_search(current_board, 777, connectnet, t) policy = get_policy(root, t) print("[CPU: %d]: Game %d POLICY:\n " % (cpu, idxx), policy) current_board = do_decode_n_move_pieces(current_board,\ np.random.choice(np.array([0,1,2,3,4,5,6]), \ p = policy)) # decode move and move piece(s) dataset.append([board_state, policy]) print( "[Iteration: %d CPU: %d]: Game %d CURRENT BOARD:\n" % (iteration, cpu, idxx), current_board.current_board, current_board.player) print(" ") if current_board.check_winner() == True: # if somebody won if current_board.player == 0: # black wins value = -1 elif current_board.player == 1: # white wins value = 1 checkmate = True move_count += 1 dataset_p = [] for idx, data in enumerate(dataset): s, p = data if idx == 0: dataset_p.append([s, p, 0]) else: dataset_p.append([s, p, value]) del dataset save_as_pickle("iter_%d/" % iteration +\ "dataset_iter%d_cpu%i_%i_%s" % (iteration, cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")), dataset_p)
def UCT_search(game_state, num_reads,net): root = UCTNode(game_state, move=None, parent=DummyNode()) for i in range(num_reads): leaf = root.select_leaf() encoded_s = ed.encode_board(leaf.game); encoded_s = encoded_s.transpose(2,0,1) encoded_s = torch.from_numpy(encoded_s).float().cuda() child_priors, value_estimate = net(encoded_s) child_priors = child_priors.detach().cpu().numpy().reshape(-1); value_estimate = value_estimate.item() if leaf.game.check_status() == True and leaf.game.in_check_possible_moves() == []: # if checkmate leaf.backup(value_estimate); continue leaf.expand(child_priors) # need to make sure valid moves leaf.backup(value_estimate) return np.argmax(root.child_number_visits), root
def play_round(self): if np.random.uniform(0, 1) <= 0.5: white = self.current black = self.best w = "current" b = "best" else: white = self.best black = self.current w = "best" b = "current" current_board = c_board() checkmate = False states = [] dataset = [] value = 0 while checkmate == False and current_board.move_count <= 100: draw_counter = 0 for s in states: if np.array_equal(current_board.current_board, s): draw_counter += 1 if draw_counter == 3: # draw by repetition break states.append(copy.deepcopy(current_board.current_board)) board_state = copy.deepcopy(ed.encode_board(current_board)) dataset.append(board_state) if current_board.player == 0: best_move, _ = UCT_search(current_board, 777, white) elif current_board.player == 1: best_move, _ = UCT_search(current_board, 777, black) current_board = do_decode_n_move_pieces( current_board, best_move) # decode move and move piece(s) print(current_board.current_board, current_board.move_count) print(" ") if current_board.check_status( ) == True and current_board.in_check_possible_moves( ) == []: # checkmate if current_board.player == 0: # black wins value = -1 elif current_board.player == 1: # white wins value = 1 checkmate = True dataset.append(value) if value == -1: return b, dataset elif value == 1: return w, dataset else: return None, dataset
def UCT_search(game_state, num_reads, net, temp): root = UCTNode(game_state, move=None, parent=DummyNode()) for i in range(num_reads): leaf = root.select_leaf() encoded_s = ed.encode_board(leaf.game) encoded_s = encoded_s.transpose(2, 0, 1) encoded_s = torch.from_numpy(encoded_s).float().cuda() child_priors, value_estimate = net(encoded_s) child_priors = child_priors.detach().cpu().numpy().reshape(-1) value_estimate = value_estimate.item() if leaf.game.check_winner() == True or leaf.game.actions( ) == []: # if somebody won or draw leaf.backup(value_estimate) continue leaf.expand(child_priors) # need to make sure valid moves leaf.backup(value_estimate) return root
def MCTS_self_play(chessnet, num_games, cpu): for idxx in range(0, num_games): current_board = c_board() checkmate = False dataset = [] # to get state, policy, value for neural network training states = [] value = 0 while checkmate == False and current_board.move_count <= 100: draw_counter = 0 for s in states: if np.array_equal(current_board.current_board, s): draw_counter += 1 if draw_counter == 3: # draw by repetition break states.append(copy.deepcopy(current_board.current_board)) board_state = copy.deepcopy(ed.encode_board(current_board)) best_move, root = UCT_search(current_board, 777, chessnet) current_board = do_decode_n_move_pieces( current_board, best_move) # decode move and move piece(s) policy = get_policy(root) dataset.append([board_state, policy]) print(current_board.current_board, current_board.move_count) print(" ") if current_board.check_status( ) == True and current_board.in_check_possible_moves( ) == []: # checkmate if current_board.player == 0: # black wins value = -1 elif current_board.player == 1: # white wins value = 1 checkmate = True dataset_p = [] for idx, data in enumerate(dataset): s, p = data if idx == 0: dataset_p.append([s, p, 0]) else: dataset_p.append([s, p, value]) del dataset save_as_pickle( "dataset_cpu%i_%i_%s" % (cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")), dataset_p)
def play_game(net): # Asks human what he/she wanna play as white = None black = None while (True): play_as = input( "What do you wanna play as? (\"O\"/\"X\")? Note: \"O\" starts first, \"X\" starts second\n" ) if play_as == "O": black = net break elif play_as == "X": white = net break else: print("I didn't get that.") current_board = cboard() checkmate = False dataset = [] value = 0 t = 0.1 moves_count = 0 while checkmate == False and current_board.actions() != []: if moves_count <= 5: t = 1 else: t = 0.1 moves_count += 1 dataset.append(copy.deepcopy(ed.encode_board(current_board))) print(current_board.current_board) print(" ") if current_board.player == 0: if white != None: print("AI is thinking........") root = UCT_search(current_board, 777, white, t) policy = get_policy(root, t) else: while (True): col = input( "Which column do you wanna drop your piece? (Enter 1-7)\n" ) if int(col) in [1, 2, 3, 4, 5, 6, 7]: policy = np.zeros([7], dtype=np.float32) policy[int(col) - 1] += 1 break elif current_board.player == 1: if black != None: print("AI is thinking.............") root = UCT_search(current_board, 777, black, t) policy = get_policy(root, t) else: while (True): col = input( "Which column do you wanna drop your piece? (Enter 1-7)\n" ) if int(col) in [1, 2, 3, 4, 5, 6, 7]: policy = np.zeros([7], dtype=np.float32) policy[int(col) - 1] += 1 break current_board = do_decode_n_move_pieces(current_board,\ np.random.choice(np.array([0,1,2,3,4,5,6]), \ p = policy)) # decode move and move piece(s) if current_board.check_winner() == True: # someone wins if current_board.player == 0: # black wins value = -1 elif current_board.player == 1: # white wins value = 1 checkmate = True dataset.append(ed.encode_board(current_board)) print(current_board.current_board) print(" ") if value == -1: if play_as == "O": dataset.append(f"AI as black wins") print("YOU LOSE!!!!!!!") else: dataset.append(f"Human as black wins") print("YOU WIN!!!!!!!") return "black", dataset elif value == 1: if play_as == "O": dataset.append(f"Human as white wins") print("YOU WIN!!!!!!!!!!!") else: dataset.append(f"AI as white wins") print("YOU LOSE!!!!!!!") return "white", dataset else: dataset.append("Nobody wins") print("DRAW!!!!!") return None, dataset