def human_play(net_checkpoint): globals.set_gpu_visible(True) from time import sleep from MCTS import MCTS from NNet import NNet from game import GameState net = NNet(net_checkpoint) # init environment game, first_player = GameState.generate_training_game(randint(0, 10000)) mcts = MCTS(net, game.copy()) player = first_player # Game loop while True: #net turn action = np.argmax(mcts.get_pi(player)) game.next_state(action, player) print('\n\n\nnet MOVED\n' + str(game)) sleep(0.7) if game.is_finished(): break action = int(input(': ')) game.next_state(action, -player) print('\n\n\nyou MOVED\n' + str(game)) sleep(0.7) if game.is_finished(): break if not mcts.root.has_children(): mcts.search(mcts.root, -player) assert (mcts.root.children[action]) mcts.root = mcts.root.children[action] print('Game over')
def evaluate_net_process(net, net_opponent, chunk_counter, games_per_chunk, results_queue): """ Best network plays some games against the new trained one. Returns the sum of ties, wins and loses, where ties are 0, wins are 1 and loses -1. """ globals.set_gpu_visible(False) from multiprocessing import current_process as cp from time import sleep from MCTS import MCTS from NNet import NNet net = NNet(net or (globals.get_last_iteration() + 1)) net_opponent = NNet(net_opponent) result = 0 while True: # Update chunk counter with chunk_counter.get_lock(): chunk_counter.value -= 1 chunk_index = chunk_counter.value if chunk_index < 0: break print('--', cp().name, '-- Starting chunk', chunk_index) for game_it in range(games_per_chunk * chunk_index, games_per_chunk * (chunk_index + 1)): # init environment game, first_player = GameState.generate_training_game(game_it) mcts = MCTS(net, game.copy()) mcts_opponent = MCTS(net_opponent, game.copy()) player = first_player # Game loop while True: # debug #print('root state', mcts.root.state) # Net turn action = np.argmax(mcts.get_pi(player)) game.next_state(action, player) if train_args.show_moves: print('\n\n\nnet MOVED\n' + str(game.board)) sleep(0.7) if game.is_finished(): break if not mcts_opponent.root.has_children(): # If there are no children, expand the current mcts root, but # first check if the game is finished. If that's the case nothing # is expanded mcts_opponent.search(mcts_opponent.root, player) assert (mcts_opponent.root.children[action]) mcts_opponent.root = mcts_opponent.root.children[action] # Net opponent turn # debug #print('root state', mcts_opponent.root.state) action = np.argmax(mcts_opponent.get_pi(-player)) game.next_state(action, -player) if train_args.show_moves: print('\n\n\nnet_opponent MOVED\n' + str(game.board)) sleep(0.7) if game.is_finished(): break if not mcts.root.has_children(): #same as above mcts.search(mcts.root, -player) assert (mcts.root.children[action]) mcts.root = mcts.root.children[action] # Update overall result ('net' was the first to move) result += 0 if not any(game.valid_moves()) else ( 1 if game.get_winner() == first_player else -1) print('--', cp().name, '-- selfplay match ended') print('--', cp().name, '-- Finished') results_queue.put(result) return 0
def selfplay_process(chunk_counter, games_per_chunk, all_moves): """ Process that plays games_per_proc games """ globals.set_gpu_visible(False) from multiprocessing import current_process as cp from MCTS import MCTS from NNet import NNet from pickle import dump from os import path net = NNet('best') examples = [] #[board, pi, winner] moves of played games while True: # Update chunk counter with chunk_counter.get_lock(): chunk_counter.value -= 1 chunk_index = chunk_counter.value if chunk_index < 0: break print('--', cp().name, '-- Starting chunk', chunk_index) for game_it in range(games_per_chunk * chunk_index, games_per_chunk * (chunk_index + 1)): game_moves = [] #[board, pi, winner] moves of single game # Init game environment game, first_player = GameState.generate_training_game(game_it) mcts = MCTS(net, game.copy()) # Game loop player = first_player while not game.is_finished(): pi = mcts.get_pi(player) if train_args.show_moves: print(str(game) + '\n\n\n') sleep(1) assert (not any(np.isnan(pi))) #debug only game_moves.append([game.canon_board(player), pi, 0]) #winner is set temporary to 0 game.next_state(np.argmax(pi), player) player *= -1 print('--', cp().name, '-- selfplay match ended') # Iterate over game_moves to set the right winner after the game winner = game.get_winner() if winner: #winner==0: game is tie winner = 1 if winner == first_player else -1 for move in game_moves: move[2] = winner winner *= -1 """ # Discard first moves if len(game_moves) >= 8: game_moves = game_moves[8:] """ """ # Add moves of this game if it's not a tie if not winner==0: examples += game_moves continue """ # Add all moves from the game to the examples examples += game_moves # Save on disk the data of the current iteration. In case of stop in the middle of an # iteration, these data are not restored automatically, need to do that by hand. Every # .pickle file is just a python list in the form [[s, pi, v], [s, pi, v], ...], so you # only need to append this list to the examples tmp_dir = 'tmp/' if not path.exists(tmp_dir): mkdir(tmp_dir) with open(tmp_dir + 'examples_{}.pickle'.format(cp().name), 'wb') as f: dump([chunk_index, examples], f) print('--', cp().name, '-- Finished') # Append moves to the queue all_moves.put(examples) return 0