コード例 #1
0
class AlphaGoPlayer():
    def __init__(self, _, seed, player):
        self.game = GoGame(13, 7.5)  # THe Go Game class

        #THERE IS NO init state
        self.board = self.game.get_starting_board()

        self.seed = seed

        self.player = -1 if player == 1 else 1

        self.args = parse_args()
        self.nnet = NetTrainer(self.game, self.args)
        self.nnet.load_checkpoint(self.args.best_model_path)

        self.mct = MCT(self.nnet, self.game, self.args, noise=False)

    def get_action(self, _, opponent_action):

        if opponent_action != -1:  # MEANS
            self.board = self.game.get_next_state(self.board, -1 * self.player,
                                                  opponent_action)

        self.board.set_move_num(0)
        action_probs = self.mct.actionProb(self.board, self.player, 0)
        self.board.set_move_num(-1)

        best_action = np.argmax(action_probs)
        self.board = self.game.get_next_state(self.board, self.player,
                                              best_action)

        return best_action
コード例 #2
0
class AlphaGoPlayer():
    def __init__(self, init_state, seed, player):
        self.game = GoGame(init_state)  # THe Go Game class

        #THERE IS NO init state

        # self.init_state = init_state
        self.seed = seed
        self.player = player

    def get_action(self, cur_state, opponent_action):
        # State is the board

        #Do we have to play oppenent's action ? why don't we get the played board then ?
        #This above step is wasting time.

        #Run the opponent's move
        cur_board = self.game.get_next_state(cur_state, -1 * self.player,
                                             opponent_action)

        #Get the possible actions
        possible_actions = self.game.get_valid_moves(cur_board, self.player)

        # ADDITIONAL LOGIC IF POSSIBLE ACTIONS SHAPED INCORRECTLY
        #     # print(actions)
        #     selected_action = None
        #     possible_actions = []
        #     for action , indicator in enumerate(actions):
        #         if indicator == 1:
        #             # selected_action = action
        #             # break
        #             possible_actions.append(action)

        #can possible actions be null ? or will resign already be in there ?

        high_score = 0
        greedy_action = None

        for action in possible_actions:
            new_board = self.game.get_next_state(cur_board, self.player,
                                                 action)
            score = self.game.get_score(new_board, self.player)

            if score >= high_score:  # Modify if multiple high scores
                greedy_action = action  # The greedy action

        # assuming best action isn't None and resign and pass will be in the possible actions

        #No need for final board
        final_board = self.game.get_next_state(cur_board, self.player,
                                               greedy_action)

        return greedy_action
コード例 #3
0
class AlphaGoPlayer():
    def __init__(self, init_state, seed, player):
        self.game = GoGame(init_state)  # THe Go Game class

        #THERE IS NO init state

        # self.init_state = init_state
        self.seed = seed
        self.player = player

        # WHERE ARE WE GETTING THE ARGS FROM ?
        self.args = parse_args()
        self.nnet = NetTrainer(self.game, self.args)
        self.nnet.load_checkpoint(self.args.best_model_path +
                                  str(self.args.type))

        self.mct = MCT(self.nnet, self.game, self.args)

    def get_action(self, cur_state, opponent_action):

        cur_board = self.game.get_next_state(cur_state, -1 * self.player,
                                             opponent_action)

        action_probs = self.mct.actionProb(cur_board, self.player, 0)

        best_action = np.argmax(action_probs)

        return best_action
コード例 #4
0
if __name__ == "__main__":
    game = GoGame(13, 5.5)

    board = game.get_starting_board()
    player = -1

    while True:
        if game.get_game_ended(board, player):
            break

        actions = game.get_valid_moves(board, player)
        actions[-1] = 0
        selected_action = None
        possible_actions = []

        for action, indicator in enumerate(actions):
            if indicator == 1:
                possible_actions.append(action)

        if len(possible_actions) > 0:
            selected_action = random.choice(possible_actions)
        else:
            selected_action = game.get_action_space_size() - 1
        board = game.get_next_state(board, player, selected_action)

        print(selected_action)
        board.print_board()

        player = -player