def __init__(self, game, checkpoint_directory, actor=None, network_save_interval=100, rollouts=100, start_game=0, replay_save_interval=250, replay_limit=20000, minibatch_size=50, replay_file=None, test_games=50, nn_steps=1): self.game = game self.checkpoint_directory = checkpoint_directory self.network_save_interval = network_save_interval self.mcts = MCTS(game, simulations=rollouts, default_policy=self.create_default_policy()) self.game_count = start_game self.replay_save_interval = replay_save_interval self.replay_buffer = deque(maxlen=replay_limit) self.rp_count = 0 self.minibatch_size = minibatch_size self.test_games = test_games self.nn_steps = nn_steps if replay_file == 'auto': self.replay_file = f'{checkpoint_directory}/replays.txt' else: self.replay_file = replay_file if not os.path.exists(checkpoint_directory): os.makedirs(checkpoint_directory) if actor: self.actor = actor self.save_actor_to_file() else: self.actor = self.load_actor_from_file() if start_game > 0: self.actor.load_checkpoint( f'{checkpoint_directory}/game_{start_game}') if replay_save_interval > replay_limit: raise ValueError( f'replay_save_interval ({replay_save_interval}) must be smaller ' f'than replay_limit ({replay_limit})') if replay_file is not None and replay_file != 'auto': try: self.load_replays() except FileNotFoundError: pass if start_game == 0: self.actor.save_checkpoint(checkpoint_directory + '/game_0') self.actor.save_checkpoint(checkpoint_directory + '/best') with open(checkpoint_directory + '/best.txt', 'w') as f: f.write(str(0))
class MCTSTrainer: def __init__(self, gnn, test_graphs, filename): self.mcts = MCTS(gnn) self.test_graphs = test_graphs self.test_result = [] self.filename = filename # rollout until the end def train1(self, graph, TAU, batch_size=10, iter_p=2): self.mcts.train(graph, TAU, batch_size=batch_size, iter_p=iter_p) # rollout only until leaf def train2(self, graph, TAU, batch_size=10, iter_p=2): self.mcts.train(graph, TAU, batch_size=batch_size, stop_at_leaf=True, iter_p=2) def test(self): result = [self.mcts.search(graph) for graph in self.test_graphs] print(result) self.test_result.append(result) def save_test_result(self): os.makedirs("log", exist_ok=True) with open("log/{}.pickle".format(self.filename), mode="wb") as f: pickle.dump(self.test_result, f) def save_model(self): os.makedirs("model", exist_ok=True) torch.save(self.mcts.gnn.state_dict(), "model/{}.pth".format(self.filename))
class AI(Player): def __init__(self, name, m): Player.__init__(self, name) self.brain = MCTS(m) def request_input(self, game: Game, state: State): self.brain.simulate(game, state) best_idx = self.brain.get_best_child(state) return game.gen_child_states(state)[best_idx]
def __init__(self, ip_address=None, verbose=True, auto_test=False): self.series_id = -1 self.starting_player = -1 self.game_count = 0 self.series_count = 0 self.series_game_count = 0 BasicClientActorAbs.__init__(self, ip_address, verbose=verbose, auto_test=auto_test) trainer = ActorTrainer(self.hex, 'model/1000x500x100-200', start_game=250) #self.actor = trainer.actor self.actor = MCTS(self.hex, simulations=100)
def use_model(t): gnn, name, graph = t np.random.seed() mcts = MCTS(gnn, performance=True) Timer.start('all') result = mcts.search_for_exp(graph, time_limit=10 * 60, min_iter_num=100) print("graph: {}, result: {}".format(name, result)) print("max: ", max(result)) Timer.end('all') Timer.print() return max(result)
def choose_mcts(next_move_types, next_moves, last_move_type, last_move, game, action_mcts, simulation): #init mcts if simulation == False: #要不起不需要mcst if len(next_moves) == 0: print("actions", [430]) return "yaobuqi", [] game_copy = copy.deepcopy(game) game_copy.players[0].model = "mcts" game_copy.players[1].model = "random" game_copy.players[2].model = "random" mcts = MCTS(tree_policy=UCB1(c=1.41), default_policy=random_terminal_roll_out, backup=monte_carlo, game=game_copy) #state s = get_state(game_copy.playrecords, player=1) #action actions = get_actions(next_moves, game_copy.actions_lookuptable, game_copy) #new state s = combine(s, actions) begin = time.time() best_action, win_prob = mcts(s, n=1000) game.playrecords.win_prob = round(win_prob, 2) duration = time.time() - begin print("actions", actions, "best_action", best_action, "win_prob", win_prob, "time", duration) if best_action == 429: return "buyao", [] elif best_action == 430: return "yaobuqi", [] else: best_action_id = actions.index(best_action) return next_move_types[best_action_id], next_moves[best_action_id] #mcts simulation else: if action_mcts == 429: return "buyao", [] elif action_mcts == 430: return "yaobuqi", [] else: return next_move_types[action_mcts], next_moves[action_mcts]
def uct_play_game(): game = NimGame(15) search_mgr = SearchMgr() p1, p2 = MCTS(search_mgr).set_root(game), MCTS(search_mgr).set_root(game) while game.get_actions(): print(str(game)) a1 = p1.uct(game, iters=100) a2 = p2.uct(game, iters=1000) if game.player_just_moved == 1: # play with values for iter_max and verbose = True # Player 2 a = a2 else: # Player 1 a = a1 print('Best Action: ' + str(a) + '\n') game.take_action(a) p1.update_root(a) p2.update_root(a) if game.get_result(game.player_just_moved) == 1.0: print('Player ' + str(game.player_just_moved) + ' wins!') elif game.get_result(game.player_just_moved) == 0.0: print('Player ' + str(3 - game.player_just_moved) + ' wins!') else: print('Nobody wins!')
def play_series(x): game = Hex() actor = Actor(game, [], replay_file='model/replays_expert.txt', rp_save_interval=replay_save_interval) mcts = MCTS(game, simulations=rollouts) for i in range(games_per_series): print('Starting game 1') state = game.get_initial_state() mcts.set_state(state) while not game.is_finished(state): move, probabilities = mcts.select_move(True) padded_probs = np.pad(probabilities, (0, game.num_possible_moves() - len(probabilities)), 'constant') actor.add_to_replay_buffer(state, padded_probs) state = game.get_outcome_state(state, move) mcts.set_state(state)
params["n_input_features"] = numpy.prod(env.observation_space.shape) params["env"] = env params["gamma"] = 0.99 # Planning/MCTS Hyperparameters params["horizon"] = 10 params["simulations"] = 100 # 1000 # Deep RL Hyperparameters params["alpha"] = 0.0005 # 0.001 params["epsilon"] = 0.1 params["memory_capacity"] = 10000 params["warmup_phase"] = 1000 params["target_update_interval"] = 5000 params["minibatch_size"] = 64 params["epsilon_linear_decay"] = 1.0 / params["memory_capacity"] params["epsilon_min"] = 0.01 training_episodes = 1 # 2000 mcts_agent = MCTS(params["env"], params["gamma"], c=1., n_iter=params["simulations"]) a2c_agent = a2c.A2CLearner(params) lens = [len(episode(env, mcts_agent, a2c_agent, i)) for i in range(500)] actions = episode(env, mcts_agent, a2c_agent, 500) print('-') print(actions) plot.plot(lens) plot.show()
pos = new_chess_game() for i in range(len(move)): movei = move[i] if pos[movei[0],movei[1]] != 0: pos[movei[2],movei[3]] = pos[movei[0],movei[1]] pos[movei[0],movei[1]] = 0 else: ValueErr("error") print(np.flipud(pos)) mcts = MCTS(tree_policy=Go(c=5), default_policy=RandomKStepRollOut_Value(20, 0.95), backup=monte_carlo) policy_fun = policy_nn() rollout_fun = rollout_nn() value_fun = value_nn() root = StateNode(None, ChessState(pos, 1, policy_fun, rollout_fun, value_fun, False )) best_action = mcts(root, n=500) pr.disable() s = io.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print(s.getvalue())
from games.hex import Hex from games.random_player import RandomPlayer from mcts.mcts import MCTS game = Hex() p1 = MCTS(game, simulations=50) p2 = RandomPlayer(game) players = (p1, p2) games = 20 p2_starting = False wins = 0 for i in range(games): state = game.get_initial_state() turn = p2_starting while not game.is_finished(state): for p in players: p.set_state(state) move = players[int(turn)].select_move() state = game.get_outcome_state(state, move) turn = not turn result = game.evaluate_state(state) if p2_starting and result == -1 or not p2_starting and result == 1: wins += 1 print(f'Won game {i+1}') else: print(f'Lost game {i+1}') p2_starting = not p2_starting
class ActorTrainer: def __init__(self, game, checkpoint_directory, actor=None, network_save_interval=100, rollouts=100, start_game=0, replay_save_interval=250, replay_limit=20000, minibatch_size=50, replay_file=None, test_games=50, nn_steps=1): self.game = game self.checkpoint_directory = checkpoint_directory self.network_save_interval = network_save_interval self.mcts = MCTS(game, simulations=rollouts, default_policy=self.create_default_policy()) self.game_count = start_game self.replay_save_interval = replay_save_interval self.replay_buffer = deque(maxlen=replay_limit) self.rp_count = 0 self.minibatch_size = minibatch_size self.test_games = test_games self.nn_steps = nn_steps if replay_file == 'auto': self.replay_file = f'{checkpoint_directory}/replays.txt' else: self.replay_file = replay_file if not os.path.exists(checkpoint_directory): os.makedirs(checkpoint_directory) if actor: self.actor = actor self.save_actor_to_file() else: self.actor = self.load_actor_from_file() if start_game > 0: self.actor.load_checkpoint( f'{checkpoint_directory}/game_{start_game}') if replay_save_interval > replay_limit: raise ValueError( f'replay_save_interval ({replay_save_interval}) must be smaller ' f'than replay_limit ({replay_limit})') if replay_file is not None and replay_file != 'auto': try: self.load_replays() except FileNotFoundError: pass if start_game == 0: self.actor.save_checkpoint(checkpoint_directory + '/game_0') self.actor.save_checkpoint(checkpoint_directory + '/best') with open(checkpoint_directory + '/best.txt', 'w') as f: f.write(str(0)) def train(self, num_games): for i in range(num_games): self.game_count += 1 game_start_time = time.time() print(f'[GAME {self.game_count}] Initializing state') state = self.game.get_initial_state() self.mcts.set_state(state) print(f'[GAME {self.game_count}] Simulating game') while not self.game.is_finished(state): move, probabilities = self.mcts.select_move(True) padded_probs = np.pad( probabilities, (0, self.game.num_possible_moves() - len(probabilities)), 'constant') self.add_to_replay_buffer(state, padded_probs) state = game.get_outcome_state(state, move) self.mcts.set_state(state) print(f'[GAME {self.game_count}] Training neural network') for j in range(self.nn_steps): self.train_network() if self.game_count % self.network_save_interval == 0: print( f'[GAME {self.game_count}] Saving neural network checkpoint' ) self.actor.save_checkpoint( f'{self.checkpoint_directory}/game_{self.game_count}') if self.test_against_best(): print( f'[GAME {self.game_count}] New best found - saving checkpoint' ) print( f'[GAME {self.game_count}] Time elapsed: {time.time() - game_start_time:.2f}' ) print() def test_against_best(self): if self.test_games <= 0: return False print(f'[GAME {self.game_count}] Testing against best model...', end='') best_actor = self.load_actor_from_file() best_actor.load_checkpoint(f'{self.checkpoint_directory}/best') starting = True wins = 0 for i in range(self.test_games): turn = starting state = self.game.get_initial_state() while not self.game.is_finished(state): if turn: move = self.actor.select_move(state) else: move = best_actor.select_move(state) state = game.get_outcome_state(state, move[0]) turn = not turn result = game.evaluate_state(state) if result == 1 and starting or result == -1 and not starting: wins += 1 starting = not starting print(f'won {wins}/{self.test_games}') if wins > self.test_games / 2: self.actor.save_checkpoint(self.checkpoint_directory + '/best') with open(self.checkpoint_directory + '/best.txt', 'w') as f: f.write(str(self.game_count)) return True return False def train_network(self): minibatch = random.sample( self.replay_buffer, min(self.minibatch_size, len(self.replay_buffer))) for i in range(len(minibatch)): minibatch[i] = self.game.format_for_nn( minibatch[i][0], format=self.actor.format), minibatch[i][1] self.actor.network.train(minibatch=minibatch) def create_default_policy(self): def actor_default_policy(state, moves): move = self.actor.select_move(state, stochastic=True) return move return actor_default_policy def add_to_replay_buffer(self, state, probabilities): self.replay_buffer.append((state, probabilities)) self.rp_count += 1 if self.replay_save_interval != -1 and self.rp_count % self.replay_save_interval == 0 and self.rp_count != 0: replays = len(self.replay_buffer) self.save_replays( itertools.islice(self.replay_buffer, replays - self.replay_save_interval, replays)) def save_replays(self, replays): if self.replay_file is None: return with open(self.replay_file, 'a') as f: for replay in replays: state_string = ','.join(map(str, replay[0][0])) + ',' + str( replay[0][1]) probs_string = ','.join(map(str, replay[1])) rp_string = state_string + ';' + probs_string f.write(rp_string + '\n') def load_replays(self): with open(self.replay_file, 'r') as f: for line in f: state, probs = line.split(';') state = list(map(int, state.split(','))) player = state[-1] board = state[:-1] probs = list(map(float, probs.split(','))) self.replay_buffer.append(((board, player), probs)) def load_actor_from_file(self): with open(f'{self.checkpoint_directory}/actor_params.txt') as f: lines = f.read().split('\n') format = lines[0] optimizer = 'adam' if len(lines) > 1: optimizer = lines[1] with open(f'{self.checkpoint_directory}/actor_layers.bin', 'rb') as f: layers = pickle.load(f) return Actor(self.game, layers, format=format, optimizer=optimizer) def save_actor_to_file(self): with open(f'{self.checkpoint_directory}/actor_params.txt', 'w') as f: f.write(self.actor.format + '\n') f.write(self.actor.optimizer) with open(f'{self.checkpoint_directory}/actor_layers.bin', 'wb') as f: pickle.dump(self.actor.layers, f)
def __init__(self, name, m): Player.__init__(self, name) self.brain = MCTS(m)
from mcts.mcts import MCTS from games.nim import Nim from games.random_player import RandomPlayer import random # Instantiate our game with given parameters game = Nim(9, 3) num_games = 50 play_mode = 0 # Create a new MCTS player for player 1 player1 = MCTS(game, simulations=1000) # Create player 2 - either as the same player as player 1, or as a random player player2 = (player1, RandomPlayer(game))[0] players = [player1, player2] def run_single_game(starting_player=0, verbose=False): """ Runs a simulation of a single game, and returns the winning player. :param starting_player: The player that should start the game. :param verbose: If True, string representations of all moves will be printed to the console. :return: 0 if player 1 is the winner, 1 if player 2 is the winner. """ state = game.get_initial_state(starting_player) current_player = starting_player for p in players: p.set_state(state) while not game.is_finished(state): move = players[current_player].select_move()
class BasicClientActor(BasicClientActorAbs): def __init__(self, ip_address=None, verbose=True, auto_test=False): self.series_id = -1 self.starting_player = -1 self.game_count = 0 self.series_count = 0 self.series_game_count = 0 BasicClientActorAbs.__init__(self, ip_address, verbose=verbose, auto_test=auto_test) trainer = ActorTrainer(self.hex, 'model/1000x500x100-200', start_game=250) #self.actor = trainer.actor self.actor = MCTS(self.hex, simulations=100) def handle_get_action(self, state): """ Here you will use the neural net that you trained using MCTS to select a move for your actor on the current board. Remember to use the correct player_number for YOUR actor! The default action is to select a random empty cell on the board. This should be modified. :param state: The current board in the form (1 or 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), where 1 or 2 indicates the number of the current player. If you are player 2 in the current series, for example, then you will see a 2 here throughout the entire series, whereas player 1 will see a 1. :return: Your actor's selected action as a tuple (row, column) """ current_player = state[0] - 1 board = list(state[1:]) state = (board, current_player) #next_move = self.actor.select_move(state)[0][0] self.actor.set_state(state) next_move = self.actor.select_move()[0] return next_move def handle_series_start(self, unique_id, series_id, player_map, num_games, game_params): """ Set the player_number of our actor, so that we can tell our MCTS which actor we are. :param unique_id: integer identifier for the player within the whole tournament database :param series_id: (1 or 2) indicating which player this will be for the ENTIRE series :param player_map: a list of tuples: (unique-id series-id) for all players in a series :param num_games: number of games to be played in the series :param game_params: important game parameters. For Hex = list with one item = board size (e.g. 5) :return """ self.series_id = series_id self.series_count += 1 print(f'Series {self.series_count} starting') print(f'Series ID: {series_id}') self.series_game_count = 0 ############################# # # # YOUR CODE (if you have anything else) HERE # # ############################## def handle_game_start(self, start_player): """ :param start_player: The starting player number (1 or 2) for this particular game. :return """ self.starting_player = start_player self.game_count += 1 print( f'Game {self.game_count} starting. (Game {self.series_game_count} in series.)' ) ############################# # # # YOUR CODE (if you have anything else) HERE # # ############################## def handle_game_over(self, winner, end_state): """ Here you can decide how to handle what happens when a game finishes. The default action is to print the winner and the end state. :param winner: Winner ID (1 or 2) :param end_state: Final state of the board. :return: """ ############################# # # # YOUR CODE HERE # # ############################## print() print("Game over, these are the stats:") print('Winner: ' + str(winner)) print('End state:') self.print_state(end_state) def handle_series_over(self, stats): """ Here you can handle the series end in any way you want; the initial handling just prints the stats. :param stats: The actor statistics for a series = list of tuples [(unique_id, series_id, wins, losses)...] :return: """ ############################# # # # YOUR CODE HERE # # ############################# print("Series ended, these are the stats:") print(f'Series ID: {self.series_id}') for stat in stats: if stat[1] == self.series_id: # Found my stats print( f'Won {stat[2]}/{stat[2] + stat[3]} ({stat[2]/(stat[2]+stat[3]):.0%})' ) print() # print(str(stats)) def handle_tournament_over(self, score): """ Here you can decide to do something when a tournament ends. The default action is to print the received score. :param score: The actor score for the tournament :return: """ ############################# # # # YOUR CODE HERE # # ############################# print("Tournament over. Your score was: " + str(score)) def handle_illegal_action(self, state, illegal_action): """ Here you can handle what happens if you get an illegal action message. The default is to print the state and the illegal action. :param state: The state :param illegal_action: The illegal action :return: """ ############################# # # # YOUR CODE HERE # # ############################# print("An illegal action was attempted:") print('State: ' + str(state)) print('Action: ' + str(illegal_action))
def __init__(self, gnn, test_graphs, filename): self.mcts = MCTS(gnn) self.test_graphs = test_graphs self.test_result = [] self.filename = filename
testfile = "trivialExample.in" problem = FileReader(testfile) print("Video Sizes: %r" % (problem.videoSizes,)) print("Endpoints:\n\t%s" % ("\n\t".join([str(e) for e in problem.endpoints]))) print("Requests: %r" % ([r for r in problem.requests])) # Generate initial state initial_contents = list([(0, []) for _ in range(problem.nCaches)]) initial_score = 0 initial_state = TreeState(caches_contents=initial_contents, score=initial_score, problem=problem) # Generate the optimal end state mcts = MCTS(tree_policy=UCB1(c=1.41), default_policy=immediate_reward, backup=monte_carlo) node = StateNode(parent=None, state=initial_state) while True: if node.state.is_terminal(): print("Terminal node reached.") break print("Finding best action") best_action = mcts(node) print("Performing action") node = StateNode(parent=None, state=node.state.perform(best_action)) print("Score now is: %d" % node.state.score) print("Saving output")
def mcts_agent(game: ".game.Game", move_number) -> ".game.Game": mcts = MCTS(game, move_number) cards_to_choose, cards_attack = mcts.choose_next_move() choose_card_from_hand(game, ChooseCard.DEFINED_CARDS, cards_to_choose) attack_opponent(game, AttackOpponent.DEFINED_CARDS, cards_attack)
timeout = int(args.timeout) - 5 c1 = Client(args.ip, PORTS[player_arg], player_arg) c1.send_name(PLAYER_NAMES[player_arg]) board = Board() game = Game(board) # Main game loop try: while not game.ended: state = None while state is None: state, turn = c1.receive_state() game.board.board = state if turn not in ["black", "white"]: raise GameEndedException game.turn = TURN_MAPPING[turn] print(state, turn) if game.turn == OUR_PLAYER: mcts = MCTS(deepcopy(game), OUR_PLAYER, max_depth=max_depth, C=C) start, end = mcts.search(timeout) print(start, end) c1.send_move(start, end) except GameEndedException: print("Game ended with state {}".format(turn)) c1.close()