class AlphaGoPlayer(): def __init__(self, _, seed, player): self.game = GoGame(13, 7.5) # THe Go Game class #THERE IS NO init state self.board = self.game.get_starting_board() self.seed = seed self.player = -1 if player == 1 else 1 self.args = parse_args() self.nnet = NetTrainer(self.game, self.args) self.nnet.load_checkpoint(self.args.best_model_path) self.mct = MCT(self.nnet, self.game, self.args, noise=False) def get_action(self, _, opponent_action): if opponent_action != -1: # MEANS self.board = self.game.get_next_state(self.board, -1 * self.player, opponent_action) self.board.set_move_num(0) action_probs = self.mct.actionProb(self.board, self.player, 0) self.board.set_move_num(-1) best_action = np.argmax(action_probs) self.board = self.game.get_next_state(self.board, self.player, best_action) return best_action
class AlphaGoPlayer(): def __init__(self, init_state, seed, player): self.game = GoGame(init_state) # THe Go Game class #THERE IS NO init state # self.init_state = init_state self.seed = seed self.player = player def get_action(self, cur_state, opponent_action): # State is the board #Do we have to play oppenent's action ? why don't we get the played board then ? #This above step is wasting time. #Run the opponent's move cur_board = self.game.get_next_state(cur_state, -1 * self.player, opponent_action) #Get the possible actions possible_actions = self.game.get_valid_moves(cur_board, self.player) # ADDITIONAL LOGIC IF POSSIBLE ACTIONS SHAPED INCORRECTLY # # print(actions) # selected_action = None # possible_actions = [] # for action , indicator in enumerate(actions): # if indicator == 1: # # selected_action = action # # break # possible_actions.append(action) #can possible actions be null ? or will resign already be in there ? high_score = 0 greedy_action = None for action in possible_actions: new_board = self.game.get_next_state(cur_board, self.player, action) score = self.game.get_score(new_board, self.player) if score >= high_score: # Modify if multiple high scores greedy_action = action # The greedy action # assuming best action isn't None and resign and pass will be in the possible actions #No need for final board final_board = self.game.get_next_state(cur_board, self.player, greedy_action) return greedy_action
class AlphaGoPlayer(): def __init__(self, init_state, seed, player): self.game = GoGame(init_state) # THe Go Game class #THERE IS NO init state # self.init_state = init_state self.seed = seed self.player = player # WHERE ARE WE GETTING THE ARGS FROM ? self.args = parse_args() self.nnet = NetTrainer(self.game, self.args) self.nnet.load_checkpoint(self.args.best_model_path + str(self.args.type)) self.mct = MCT(self.nnet, self.game, self.args) def get_action(self, cur_state, opponent_action): cur_board = self.game.get_next_state(cur_state, -1 * self.player, opponent_action) action_probs = self.mct.actionProb(cur_board, self.player, 0) best_action = np.argmax(action_probs) return best_action
if __name__ == "__main__": game = GoGame(13, 5.5) board = game.get_starting_board() player = -1 while True: if game.get_game_ended(board, player): break actions = game.get_valid_moves(board, player) actions[-1] = 0 selected_action = None possible_actions = [] for action, indicator in enumerate(actions): if indicator == 1: possible_actions.append(action) if len(possible_actions) > 0: selected_action = random.choice(possible_actions) else: selected_action = game.get_action_space_size() - 1 board = game.get_next_state(board, player, selected_action) print(selected_action) board.print_board() player = -player