def handle_get_action(self, state): """ Here you will use the neural net that you trained using MCTS to select a move for your actor on the current board. Remember to use the correct player_number for YOUR actor! The default action is to select a random empty cell on the board. This should be modified. :param state: The current board in the form (1 or 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), where 1 or 2 indicates the number of the current player. If you are player 2 in the current series, for example, then you will see a 2 here throughout the entire series, whereas player 1 will see a 1. :return: Your actor's selected action as a tuple (row, column) """ # This is an example player who picks random moves. REMOVE THIS WHEN YOU ADD YOUR OWN CODE !! # next_move = tuple(self.pick_random_free_cell(state, size=int(math.sqrt(len(state) - 1)))) player = list(state).pop(0) board_state = list(state)[1:] size = int(math.sqrt(len(board_state))) # [1, 2, 3, 4] --> [[1, 2], [3, 4]] board = np.reshape(board_state, (size, size)) # [[1, 2], [3, 4]] --> [[[p1, p2, free], [p1, p2, free]], [[p1, p2, free], [p1, p2, free]]] _board = MCTS.get_feature(board, player == 2) next_move = ANET.predict(_board, self.model) legal_moves = [] for i in range(len(board)): for j in range(len(board)): if (board[i][j] == 0): legal_moves.append((i, j)) next_move = (next_move[1], next_move[0]) if player == 2 else next_move if next_move not in legal_moves: next_move = choice(legal_moves) return next_move
def fetch_game_models(self, dir_path="anet"): for filename in os.listdir(dir_path): if filename.endswith(".h5"): model = ANET.new_from_file(dir_path + "/" + filename) key = self.filename_to_step(filename) self.models[key] = model else: continue
def run_tournament(self, path="topp"): self.fetch_game_models(path) games = self.get_games() wrong_moves = 0 total_moves = 0 for p1, p2 in games: for i in range(self.num_games): game = Hex(5, 1) while len(game.get_moves()) > 0: if self.random and game.player == Hex.PLAYER_LEFT: next_move = choice(game.get_moves()) else: model = self.models[p1 if game.player == Hex.PLAYER_TOP else p2] next_move = ANET.predict(get_feature( game.get_state(), game.player == Hex.PLAYER_LEFT), model=model) next_move = ( next_move[1], next_move[0] ) if game.player == Hex.PLAYER_LEFT else next_move if next_move not in game.get_moves(): wrong_moves += 1 next_move = choice(game.get_moves()) total_moves += 1 game.do_move(next_move) res_p1 = game.get_result(Hex.PLAYER_TOP) res_p2 = game.get_result(Hex.PLAYER_LEFT) if res_p1 == 0 and res_p2 == 0: raise Exception if res_p1 == 1 and res_p2 == 1: raise Exception winning_model = p1 if res_p1 == 1 else p2 loosing_model = p2 if res_p2 == 0 else p1 if self.results.get( (winning_model, loosing_model)) is not None: self.results[(winning_model, loosing_model)] += 1 else: self.results[(winning_model, loosing_model)] = 1 pp = pprint.PrettyPrinter(indent=4) pp.pprint(self.results) if total_moves > 0: pp.pprint("Wrong moves (%): " + str(wrong_moves / total_moves))
# from topp import TOPP # topp = TOPP(verbose=True) # topp.start() from anet import ANET from utils import Timer import numpy as np timer = Timer() nn = ANET(5) rbuf = np.load('/home/kim/skule/aiprog/project3/rbufs/size_5-ep_180.h5.npy', allow_pickle=True) test_ex = rbuf[0][0] to_predict = np.array([list(test_ex)]) timer.start("prediction") prediction = nn.model.predict(to_predict) timer.stop() # import tensorflow as tf # from tensorflow import keras # import numpy as np # import random # import os # def softmax(x): # """Compute softmax values for each sets of scores in x.""" # e_x = np.exp(x - np.max(x)) # return e_x / e_x.sum() # # model = keras.models.load_model("./models/size_5-ep_180.h5")
def __init__(self, IP_address=None, verbose=True): self.series_id = -1 BasicClientActorAbs.__init__(self, IP_address, verbose=verbose) self.model = ANET.new_from_file("../short_topp/model_step_10.h5")
from state_manager import StateManager from mcts import MCTS from tree import Tree import numpy as np import collections def train_anet(anet, RBUF): # Creates a minibatch of the RBUF and trains the anet on the minibatch batch_size = min(len(RBUF), 32) minibatch = random.sample(RBUF, batch_size) anet.train(minibatch) """ Initializations """ anet = ANET(size=board_size) agent = MCTS(exploration_rate=1, anet=anet) sm = StateManager() game = sm.create_game() tree = Tree(game) win_stats = [] RBUF = collections.deque(maxlen=rbuf_max_size) for i in range(offset, num_of_games + 1): print("Episode: {}/{}".format(i, num_of_games)) state = tree.root while (not sm.is_game_over()): best_child, training_case = agent.uct_search(tree, state, num_search_games) RBUF.append(training_case)
def main(): # n, num_games, verbose, starting_player, max_rollouts = setup_game() n, num_games, verbose, starting_player, max_rollouts = 5, 200, False, 1, 0.5 results = [] game_num = 1 viewer = None run_tournament = True with_training = True num_games_tournament = 25 if run_tournament: save_path = "short_topp" else: save_path = "long_topp" ##### CONFIG ##### buffer_size = 40 train_interval = 40 saving_interval = 10 moves_done = 0 epochs = 300 ################## buffer = ReplayBuffer(vfrac=0.1, tfrac=0.1, size=buffer_size) anet = init_anet(n, buffer) if with_training: anet.save_to_file(save_path + "/model_step_{0}.h5".format(0)) game = Hex(n, starting_player) ROOT_NODE = Node(game=game) while with_training and num_games >= game_num: game = Hex(n, starting_player) next_root = ROOT_NODE # viewer = Board(game) print("Game number {}".format(game_num)) while game.get_moves(): mc = MonteCarlo(game, max_rollouts, next_root) mc.run(lambda _input: ANET.predict(_input, model=anet.model)) case = mc.get_training_case() buffer.push(case) next_root = mc.get_best_move() game.do_move(next_root.move) moves_done += 1 if viewer: viewer.do_move(next_root.move, game.player) if moves_done % train_interval == 0: buffer.update() anet.train_model(epochs) anet.run_against_random(num_games=50, game_num=game_num) if saving_interval > 0 and game_num % saving_interval == 0: anet.save_to_file(save_path + "/model_step_{0}.h5".format(game_num)) buffer.size += 20 # train_interval += 5 # anet.optimizer.lr /= 2 if game.get_result(game.player) == 1: results.append(game.player) game_num += 1 if viewer: viewer.persist() if run_tournament: tournament = Tournament(num_games_tournament) tournament.run_tournament(save_path) else: anet.save_to_file("best_topp/model_2.h5")
def init_anet(size, buffer): return ANET(size=size, buffer=buffer, batch_size=20, optimizer=optimizers.Adagrad(0.005))
minibatch = random.sample(RBUF, k=batch_size) x_train, y_train = zip(*minibatch) anet.train(np.asarray(x_train), np.asarray(y_train)) def get_distribution(node): distribution = np.zeros(node.game.boardsize**2) for child in node.children: distribution[child.move] = child.visits print(distribution) D = normalize(distribution) return D """ Initializations """ anet = ANET(boardsize) agent = MCTS(exploration_rate=1, anet=anet) sm = StateManager(boardsize) game = sm.create_game() tree = Tree(game, 1.0) win_stats = [] # TODO: Save interval for ANET parameters RBUF = collections.deque(maxlen=500) for i in range(1, num_of_games + 1): progress_bar(i + 1) state = tree.root while (not sm.is_finished()): player = sm.game.get_current_player()