コード例 #1
0
 def run(self):
     self.ai = MCTSPlayer(c_puct=5, n_playout=20)
     # self.ai.board = self.board
     move = self.ai.get_action(self.board)
     self.board.do_move(move)
     print(move)
     self.finishSignal.emit(int(move[0]), int(move[1]), int(move[2]),
                            int(move[3]))
コード例 #2
0
class RL_QG_agent(object):
    def __init__(self):
        self.temp = 1e-3  # the temperature param
        self.n_playout = 200  # num of simulations for each move
        self.c_puct = 5
        self.board_width = 8
        self.board_height = 8
        self.model_path = os.path.join("./models/curr_model_100rollout.pt")
        #self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, net_params=None)
        #self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout)
        self.mcts_player = MCTS_Pure(c_puct=5, n_playout=self.n_playout)
        self.env = gym.make("Reversi8x8-v0")
        self.init_model()
        #self.load_model()

    def init_model(self):
        self.board = Board(env=self.env,
                           width=self.board_width,
                           height=self.board_height)
        self.board.init_board()
        self.game = Game(self.board)
        self.have_step = False

    def place(self, state, enables, player=None):
        curr_state = bit_to_board(self.board.black, self.board.white)
        curr_state = 1 - (curr_state[0] + curr_state[1])
        reverse_change = np.where((curr_state - state[2]) == -1)
        if self.have_step == False:
            pass
        elif reverse_change[0].shape[0] > 1:
            self.board.init_board()
            self.have_step = False
        curr_state = bit_to_board(self.board.black, self.board.white)
        curr_state = 1 - (curr_state[0] + curr_state[1])
        change = np.where((curr_state - state[2]) == 1)
        if change[0].shape[0] == 1:
            action = change[0][0] * self.board_width + change[1][0]
            self.board.do_move(action)
        else:
            if self.have_step == False:
                pass
            else:
                action = 65
                self.board.do_move(action)

        move = self.mcts_player.get_action(self.board)
        self.board.do_move(move)
        self.have_step = True

        return move

    def load_model(self):
        self.policy_value_net.policy_value_net.load_state_dict(
            torch.load(self.model_path))
コード例 #3
0
ファイル: alpha_zeroII.py プロジェクト: smallsuqirrel/Alphgo
def run(states, sensible_moves, currentPlayer, lastMove):
    n = 5
    width, height = 8, 8
    board = Board(width=width, height=height, n_in_row=n)
    board.init_board()

    board.states = states
    board.availables = sensible_moves
    board.current_player = currentPlayer
    board.last_move = lastMove

    #best_policy = PolicyValueNetNumpy(width, height, policy_param)
    #mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400)

    #只用纯MCTS
    mcts_player = MCTS_Pure(c_puct=5, n_playout=4000)  # n_playout参数 表示 搜索次数

    nextmove = mcts_player.get_action(board)

    return nextmove
コード例 #4
0
 def __init__(self):
     self.temp = 1e-3  # the temperature param
     self.n_playout = 200  # num of simulations for each move
     self.c_puct = 5
     self.board_width = 8
     self.board_height = 8
     self.model_path = os.path.join("./models/curr_model_100rollout.pt")
     #self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, net_params=None)
     #self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct, n_playout=self.n_playout)
     self.mcts_player = MCTS_Pure(c_puct=5, n_playout=self.n_playout)
     self.env = gym.make("Reversi8x8-v0")
     self.init_model()
コード例 #5
0
class AI(QtCore.QThread):
    finishSignal = QtCore.pyqtSignal(int, int, int, int)

    # 构造函数里增加形参
    def __init__(self, board, parent=None):
        super(AI, self).__init__(parent)
        self.board = board

    # 重写 run() 函数
    def run(self):
        self.ai = MCTSPlayer(c_puct=5, n_playout=20)
        # self.ai.board = self.board
        move = self.ai.get_action(self.board)
        self.board.do_move(move)
        print(move)
        self.finishSignal.emit(int(move[0]), int(move[1]), int(move[2]),
                               int(move[3]))
コード例 #6
0
        opponent.reset_player()
        board = BoardSlim(width=BOARD_SHAPE[0],
                          height=BOARD_SHAPE[1],
                          n_in_row=4)
        game = Game(board)
        winner = game.start_play(player,
                                 opponent,
                                 start_player=i % 2,
                                 is_shown=(i == 0) and show_game)
        print("winner: ", winner)
        if winner == 1:
            stats["wins"] += 1
        elif winner == -1:
            stats["ties"] += 1
        stats["length"].append(BOARD_SHAPE[0] * BOARD_SHAPE[1] -
                               len(board.availables))
    return stats


if __name__ == "__main__":
    player = load_player()
    for playout in playouts:
        mcts_player = MCTSPlayer(n_playout=playout)
        stats = evaluate_player(player, mcts_player, show_game=True)
        print()
        print("win ratio agains %d playouts: " % playout,
              stats["wins"] / num_games)
        print("tie ratio agains %d playouts: " % playout,
              stats["ties"] / num_games)
        print("average game length agains %d playouts: ",
              np.mean(stats["length"]))
コード例 #7
0
    opts, args = getopt.getopt(sys.argv[1:], "c:m:")
except getopt.GetoptError:
    print('TwelveShogi.py -c <startColor(0 or 1)> -m <mode(0 or 1 or 2)>')
    sys.exit()
for opt, arg in opts:
    if opt == '-c':
        startColor = int(arg)
    elif opt == '-m':
        mode = int(arg)
shogiboard = ShogiBoard()
shogiboard.curStepColor = startColor
shogiboard.redrawBoard(window)

if mode == 1:
    # AIPlayer = TSAI()
    AIPlayer = MCTS_Pure()
    # AIPlayer = MCTSPlayer()
elif mode == 2:
    # AIPlayer = TSAI()
    AIPlayer = MCTS_Pure()
    trainCounts = 0
    # AIPlayer = MCTSPlayer(is_selfplay = 1)

top = 80
left = 70
yGap = 125
xGap = 140

curRow = 3
curCol = 0