Esempio n. 1
0
def run():
    n = 6
    width, height = 9, 9
    model_file = 'best_policy.model' #載入模型
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)      
        
        try:
            policy_param = pickle.load(open(model_file, 'rb'))
        except:
            policy_param = pickle.load(open(model_file, 'rb'), encoding = 'bytes')  # To support python3
        best_policy = PolicyValueNet(width, height, policy_param)
        mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400)  # set larger n_playout for better performance
        
        
        # human player, input your move in the format: 2,3
        human = Human()                   
        
        # set start_player=0 for human first
        game.start_play(human, mcts_player, start_player=1, is_shown=1)
    except KeyboardInterrupt:
        print('\n\rquit')
Esempio n. 2
0
def run():
    n = 5
    width, height = 8, 8
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)

        # ############### human VS AI ###################
        # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow

        best_policy = PolicyValueNet(width, height, model_file=model_file)
        mcts_player = MCTSPlayer(best_policy.policy_value_fn,
                                 c_puct=5,
                                 n_playout=500)

        # human player, input your move in the format: 2,3
        human1 = Human()

        # set start_player=0 for human first
        game.start_play(human1, mcts_player, start_player=1, is_shown=1)
        # game.start_play(human1, human2, start_player=0, is_shown=1)
    except KeyboardInterrupt:
        print('\n\rquit')
Esempio n. 3
0
def run():
    n = 4
    width, height = 6, 6
    model_file = 'best_policy.model'
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)

        # ############### human VS AI ###################
        # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow

        best_policy = PolicyValueNet(width, height, model_file=model_file)
        mcts_player = MCTSPlayer(best_policy.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)

        # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy
        # try:
        #     policy_param = pickle.load(open(model_file, 'rb'))
        # except:
        #     policy_param = pickle.load(open(model_file, 'rb'),
        #                                encoding='bytes')  # To support python3
        # best_policy = PolicyValueNetNumpy(width, height, policy_param)
        # mcts_player = MCTSPlayer(best_policy.policy_value_fn,
        #                          c_puct=5,
        #                          n_playout=400)  # set larger n_playout for better performance

        # uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout)
        # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000)

        # human player, input your move in the format: 2,3
        human = Human()

        # set start_player=0 for human first
        game.start_play(human, mcts_player, start_player=0, is_shown=1)
    except KeyboardInterrupt:
        print('\n\rquit')
    def __init__(self, init_model=None, is_shown=0):

        self.board_width = 15
        self.board_height = 15
        self.n_in_row = 5
        self.board = Board(width=self.board_width,
                           height=self.board_height,
                           n_in_row=self.n_in_row)
        self.is_shown = is_shown
        self.game = Game_UI(self.board, is_shown)

        self.learn_rate = 2e-3
        self.lr_multiplier = 1.0
        self.temp = 1.0
        self.n_playout = 400
        self.c_puct = 5
        self.buffer_size = 10000
        self.batch_size = 512
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.epochs = 5
        self.kl_targ = 0.02
        self.check_freq = 50
        self.game_batch_num = 1500
        self.best_win_ratio = 0.0
        self.pure_mcts_playout_num = 1000
        if init_model:
            self.policy_value_net = PolicyValueNet(self.board_width,
                                                   self.board_height,
                                                   model_file=init_model)
        else:
            self.policy_value_net = PolicyValueNet(self.board_width,
                                                   self.board_height)
        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.n_playout,
                                      is_selfplay=1)
def run_a2c_vs_sarsa():
    winners = []
    board_length = 8
    action_space = (board_length, board_length, board_length, board_length)

    agent_one = A2C((board_length, board_length), action_space, "a3c", "up",
                    1.0, 2000, 100000)
    agent_two = SARSAAgent((board_length, board_length),
                           action_space,
                           "sarsa_two",
                           "down",
                           1.0,
                           2000,
                           100000,
                           save_path="../data/modeldata/sarsa_two/model.ckpt")
    iterations = 200000
    for i in range(iterations):
        board = Board(board_length=8)
        game = Game(agent_one=agent_one, agent_two=agent_two, board=board)
        game.play(verbose=False)
        winners += [game.winner]
        agent_one.epsilon *= 0.99999
        if (i % 5000 == 0 and i > 0) or (iterations - 1 == i):
            victories_player_two = 0
            victories_player_one = 0
            for winner in winners:
                if winner == "a3c":
                    victories_player_one += 1
                if winner == "Two":
                    victories_player_two += 1
            logging.info("Current epsilon: {}".format(agent_one.epsilon))
            logging.info("Player One: {}".format(str(victories_player_one)))
            logging.info("Player Two: {}".format(str(victories_player_two)))
            logging.info("Mean Rewards Agent One: {}".format(
                agent_one.moving_average_rewards[-1]))
            logging.info("Mean Rewards Agent Two: {}".format(
                agent_two.moving_average_rewards[-1]))
def run_sarsa_lstm_vs_random():
    winners = []
    board_length = 8
    action_space = (board_length, board_length, board_length, board_length)

    agent_one = SARSALSTMAgent((board_length, board_length),
                               action_space,
                               "sarsa_lstm",
                               "up",
                               1.0,
                               2000,
                               100000,
                               caching=False)
    agent_two = RandomAgent((board_length, board_length),
                            (board_length, board_length), "Two", "down")
    iterations = 200000
    for i in range(iterations):
        board = Board(board_length=8)
        game = Game(agent_one=agent_one, agent_two=agent_two, board=board)
        game.play(verbose=False)
        winners += [game.winner]
        agent_one.epsilon *= 0.99999
        if (i % 5000 == 0 and i > 0) or iterations - 1 == i:
            victories_player_two = 0
            victories_player_one = 0
            for winner in winners:
                if winner == "sarsa_lstm":
                    victories_player_one += 1
                if winner == "Two":
                    victories_player_two += 1
            logging.info("Current epsilon: {}".format(agent_one.epsilon))
            logging.info("Player One: {}".format(str(victories_player_one)))
            logging.info("Player Two: {}".format(str(victories_player_two)))
            logging.info("Mean Rewards Agent One: {}".format(
                agent_one.moving_average_rewards[-1]))
            logging.info("Mean Rewards Agent Two: {}".format(
                agent_two.moving_average_rewards[-1]))
Esempio n. 7
0
 def __init__(self, init_model=None):
     # 设置棋盘和游戏的参数
     self.board_width = 10
     self.board_height = 10
     self.n_in_row = 4
     self.board = Board(width=self.board_width,
                        height=self.board_height,
                        n_in_row=self.n_in_row)
     self.game = Game(self.board)
     # 设置训练参数
     self.learn_rate = 2e-3 # 基准学习率
     self.lr_multiplier = 1.0  # 基于KL自动调整学习倍速
     self.temp = 1.0  # 温度参数
     self.n_playout = 400  # 每下一步棋,模拟的步骤数
     self.c_puct = 5 # exploitation和exploration之间的折中系数
     self.buffer_size = 10000
     self.batch_size = 512  # mini-batch size for training
     self.data_buffer = deque(maxlen=self.buffer_size) #使用 deque 创建一个双端队列
     self.play_batch_size = 1
     self.epochs = 5  # num of train_steps for each update
     self.kl_targ = 0.02 # 早停检查
     self.check_freq = 50 # 每50次检查一次,策略价值网络是否更新
     self.game_batch_num = 500 # 训练多少个epoch
     self.best_win_ratio = 0.0 # 当前最佳胜率,用他来判断是否有更好的模型
     # 弱AI(纯MCTS)模拟步数,用于给训练的策略AI提供对手
     self.pure_mcts_playout_num = 1000
     if init_model:
         # 通过init_model设置策略网络
         self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, model_file=init_model, use_gpu=True)
     else:
         # 训练一个新的策略网络
         self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, use_gpu=True)
     # AI Player,设置is_selfplay=1 自我对弈,因为是在进行训练
     self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                   c_puct=self.c_puct,
                                   n_playout=self.n_playout,
                                   is_selfplay=1)
Esempio n. 8
0
def run():
    n = 5
    width, height = 9, 9
    model_file = 'output/best_policy.model'
    try:
        board = Board(width=width,
                      height=height,
                      n_in_row=n,
                      forbidden_hands=True)
        game = Game(board)

        # ############### human VS AI ###################
        # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow

        # best_policy = PolicyValueNet(width, height, model_file = model_file)
        # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400)

        # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy
        best_policy = PolicyValueNetRes30(width,
                                          height,
                                          'l+',
                                          model_file=model_file)
        mcts_player = MCTSPlayer(
            best_policy.policy_value_fn, c_puct=5,
            n_playout=400)  # set larger n_playout for better performance

        # uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout)
        # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000)

        # human player, input your move in the format: 2,3
        human = Human()

        # set start_player=0 for human first
        game.start_play(human, mcts_player, start_player=1, is_shown=1)
    except KeyboardInterrupt:
        print('\n\rquit')
Esempio n. 9
0
 def __init__(self, game_batch_num, model_file=None):
     # params of the board and the game
     self.size = BOARD_SIZE
     use_gpu = False
     board = Board(size=self.size, n_in_row=N_IN_ROW)
     self.game = Game(board)
     # training params
     self.learn_rate = 2e-3
     self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
     self.temp = 1.0  # the temperature param
     self.n_playout = 400  # num of simulations for each move
     self.c_puct = 5
     self.batch_size = 512  # mini-batch size for training
     self.data_buffer = deque(maxlen=10000)
     self.play_batch_size = 1
     self.epochs = 5  # num of train_steps for each update
     self.kl_targ = 0.02
     self.check_freq = 50
     self.game_batch_num = game_batch_num
     self.best_win_ratio = 0.0
     # num of simulations used for the pure mcts, which is used as
     # the opponent to evaluate the trained policy
     self.pure_mcts_playout_num = 1000
     if model_file:
         # start training from an initial policy-value net
         self.policy_value_net = PolicyValueNet(size=self.size,
                                                model_file=model_file,
                                                use_gpu=use_gpu)
     else:
         # start training from a new policy-value net
         self.policy_value_net = PolicyValueNet(size=self.size,
                                                use_gpu=use_gpu)
     self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                   c_puct=self.c_puct,
                                   n_playout=self.n_playout,
                                   is_selfplay=1)
Esempio n. 10
0
 def __init__(self):
     # params of the board and the game
     self.board_width = 6
     self.board_height = 6
     self.n_in_row = 4
     self.board = Board(width=self.board_width,
                        height=self.board_height,
                        n_in_row=self.n_in_row)
     self.game = Game(self.board)
     # training params
     self.learn_rate = 5e-3
     self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
     self.temp = 1.0  # the temperature param
     self.n_playout = 400  # num of simulations for each move
     self.c_puct = 5
     self.buffer_size = 10000
     self.batch_size = 512  # mini-batch size for training
     self.data_buffer = deque(maxlen=self.buffer_size)
     self.play_batch_size = 1
     self.epochs = 5  # num of train_steps for each update
     self.kl_targ = 0.025
     self.check_freq = 50
     self.game_batch_num = 1500
     self.best_win_ratio = 0.0
     # num of simulations used for the pure mcts, which is used as the opponent to evaluate the trained policy
     self.pure_mcts_playout_num = 1000
     # start training from a given policy-value net
     #        policy_param = pickle.load(open('current_policy.model', 'rb'))
     #        self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, net_params = policy_param)
     # start training from a new policy-value net
     self.policy_value_net = PolicyValueNet(self.board_width,
                                            self.board_height)
     self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                   c_puct=self.c_puct,
                                   n_playout=self.n_playout,
                                   is_selfplay=1)
Esempio n. 11
0
def run():
    n = 5
    width, height = 15, 15
    # model_file = 'best_policy_8_8_5.model'
    model_file = 'best_policy.model'
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)

        ################ human VS AI ###################
        # MCTS player with the policy_value_net trained by AlphaZero algorithm
        #        policy_param = pickle.load(open(model_file, 'rb'))
        #        best_policy = PolicyValueNet(width, height, net_params = policy_param)
        #        mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400)

        # MCTS player with the trained policy_value_net written in pure numpy
        try:
            policy_param = pickle.load(open(model_file, 'rb'))
        except:
            policy_param = pickle.load(open(model_file, 'rb'),
                                       encoding='bytes')  # To support python3
        best_policy = PolicyValueNetNumpy(width, height, policy_param)
        mcts_player = MCTSPlayer(
            best_policy.policy_value_fn, c_puct=5,
            n_playout=400)  # set larger n_playout for better performance

        # uncomment the following line to play with pure MCTS (its much weaker even with a larger n_playout)
        #        mcts_player = MCTS_Pure(c_puct=5, n_playout=1000)

        # human player, input your move in the format: 2,3
        human = Human()

        # set start_player=0 for human first
        game.start_play(human, mcts_player, start_player=1, is_shown=1)
    except KeyboardInterrupt:
        print('\n\rquit')
Esempio n. 12
0
def evaluate_player(player1, player2, n_games=10):
    """
    Evaluate the trained policy by playing against the pure MCTS player
    Note: this is only for monitoring the progress of training
    current_mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                     c_puct=self.c_puct,
                                     n_playout=self.n_playout)
    """

    board = Board(width=9, height=9, n_in_row=5)
    game = Game(board)

    win_cnt = defaultdict(int)
    for i in range(n_games):
        winner = game.start_play(player1,
                                 player2,
                                 start_player=i % 2,
                                 is_shown=0)
        print(winner)
        win_cnt[winner] += 1
    win_ratio = 1.0 * (win_cnt[1] + 0.5 * win_cnt[-1]) / n_games
    print("player1:{}vs player2:{}. result: win: {}, lose: {}, tie:{}".format(
        player1, player2, win_cnt[1], win_cnt[2], win_cnt[-1]))
    return win_ratio
Esempio n. 13
0
def run():
    n = 5
    width, height = 10, 10
    model_file = 'best_policy.model'
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)
        graphic = Graphic()
        # graphic.run()
        print(1111)
        # thread1 = threading.Thread(target=graphic.run, args=())
        best_policy = PolicyValueNet(width,
                                     height,
                                     model_file='./model/' + model_file)
        mcts_player = MCTSPlayer(best_policy.policy_value_fn,
                                 c_puct=5,
                                 n_playout=1000)
        print("hhh")
        human = Human(graphic)
        # set start_player=0 for human first
        thread2 = threading.Thread(target=game.start_play,
                                   args=(human, mcts_player, graphic, 1, 1))
        # game.start_play(human, mcts_player, graphic, start_player=0, is_shown=1)
        # thread1.setDaemon(True)
        # thread1.start()
        thread2.setDaemon(True)
        thread2.start()
        graphic.run()
        # thread1.join()
        # thread2.join()
        # game.start_play(human, mcts_player, graphic, start_player=0, is_shown=1)

        # thread.start_new_thread(game.start_play, (human, mcts_player, graphic, 0, 1))
        # thread.start_new_thread(graphic.run, ())
    except KeyboardInterrupt:
        print('\n\rquit')
Esempio n. 14
0
def run():
    n = 5
    width, height = 9, 9
    iteration = 1000

    model_file = './model/current_policy_{}_{}_{}_iteration{}.model'.format(
        height, width, n, iteration)
    #model_file = './model/best_policy_{}_{}_{}.model'.format(height,width,n)
    try:
        board = Board(width=width, height=height, n_in_row=n)

        best_policy = PolicyValueNet(width, height, model_file=model_file)
        AI_player1 = MCTSPlayer(best_policy.policy_value_fn,
                                c_puct=5,
                                n_playout=400)
        AI_player2 = MCTSPlayer(best_policy.policy_value_fn,
                                c_puct=5,
                                n_playout=400)
        human = Human()

        game = Game("AlphaZero Gomoku", board, AI_player1, AI_player2)
        while True:
            game.play()
            pygame.display.update()

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    exit()
                elif event.type == pygame.MOUSEBUTTONDOWN:
                    mouse_x, mouse_y = pygame.mouse.get_pos()
                    game.mouseClick(mouse_x, mouse_y)
                    game.check_buttons(mouse_x, mouse_y)

    except KeyboardInterrupt:
        print('\n\rquit')
Esempio n. 15
0
    def go(self):
        print("One rule:\r\n Move piece form 'x,y' \r\n eg 1,3\r\n")
        print("-" * 60)
        print("Ready Go")

        mc = MonteCarloTreeSearch(self.net, 1000)
        node = TreeNode()
        board = Board()

        while True:
            if board.c_player == BLACK:
                action = input(f"Your piece is 'O' and move: ")
                action = [int(n, 10) for n in action.split(",")]
                action = action[0] * board.size + action[1]
                next_node = TreeNode(action=action)
            else:
                _, next_node = mc.search(board, node)

            board.move(next_node.action)
            board.show()

            next_node.parent = None
            node = next_node

            if board.is_draw():
                print("-" * 28 + "Draw" + "-" * 28)
                return

            if board.is_game_over():
                if board.c_player == BLACK:
                    print("-" * 28 + "Win" + "-" * 28)
                else:
                    print("-" * 28 + "Loss" + "-" * 28)
                return

            board.trigger()
Esempio n. 16
0
def battle(player1, player2, num_games = 100000, silent = False):
    draw_count = 0
    oneCount = 0
    twoCount = 0
    draw_count=0
    medTotal = 0
    medHad = 0
    medPos = set()
    for i in range(num_games):
        board = Board()
        result, total, had = play_game(board, player1, player2)
        medTotal += total
        medHad += had
        # medPos.update(pos)
        # print(board)
        if result == 'me':
            # print('nn won')
            oneCount += 1
        elif result == 'op':
            # print('random won')
            twoCount += 1
        else:
            # print('tie')
            draw_count += 1
        #if i%20==0:
   #         print('finished game #' + str(i))
        # input()
    if not silent:
        p1 = player1.typeRep()
        p2 = player2.typeRep()
        print("After {} games we have draws: {}, {} wins: {}, and {} wins: {}.".format(num_games, draw_count, p1, oneCount, p2, twoCount))

        print("Which gives percentages of draws: {:.2%}, {} wins: {:.2%}, and {} wins:  {:.2%}".format(
            draw_count / num_games, p1, oneCount / num_games, p2, twoCount / num_games))

    return oneCount, twoCount, draw_count, medTotal, medHad
Esempio n. 17
0
 def __init__(self, n: int, init_model=None):
     # params of the board and the game
     self.n = n
     self.board = Board(self.n)
     self.game = Game(self.board)
     # training params
     self.learn_rate = 5e-3
     self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
     self.temp = 1.0  # the temperature param
     self.n_play_out = 400  # number of simulations for each move
     self.c_puct = 5
     self.buffer_size = 10000
     self.batch_size = 512  # mini-batch size for training
     self.data_buffer = deque(maxlen=self.buffer_size)
     self.epochs = 5  # number of train_steps for each update
     self.kl_target = 0.025
     self.check_freq = 50
     self.game_batch_number = 10000
     self.best_win_ratio = 0.0
     self.episode_length = 0
     self.pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
     # number of simulations used for the pure mcts, which is used as the opponent to evaluate the trained policy
     self.last_batch_number = 0
     self.pure_mcts_play_out_number = 1000
     if init_model:
         # start training from an initial policy-value net
         policy_param = pickle.load(open(init_model, 'rb'))
         self.policy_value_net = PolicyValueNet(self.n,
                                                net_params=policy_param)
     else:
         # start training from a new policy-value net
         self.policy_value_net = PolicyValueNet(self.n)
     self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_func,
                                   c_puct=self.c_puct,
                                   n_play_out=self.n_play_out,
                                   is_self_play=1)
Esempio n. 18
0
import sys
from game import Board
from lexer import Lexer2048
from parser import Parser2048

board = Board(size=(4, 4))
lexer, parser = Lexer2048(), Parser2048(fmap=board.fmap)

while True:
    try:
        print("\033[32m" + "2048 >>>" + "\033[0m", end=" ")
        inp = input()
        command = lexer.preprocess(inp)
        out = parser.parse(lexer.tokenize(command))
        board.eout()

    except EOFError:
        print()
        exit()

    except Exception as E:
        print(str(E))
        print("-1", file=sys.stderr)
Esempio n. 19
0
    def move(self, board):
        print("MCTSPlayer thinks...")
        nodecnt = 0
        t0 = time.time()
        rtnode, alrdy_in = self.board_already_in_gt(board)
        # print("RTNODE: ", rtnode)
        # node not in; if node in, no need to do anything
        if not alrdy_in:
            # need to check if any parents are in gt.
            rtnode_would_be_parents = self.gt.get_would_be_parents_to_root(
                rtnode)

            # get parent in game tree, not grandparent, etc
            # print("RTNode parents", rtnode_would_be_parents)
            last_parent_in_gt = max(list(
                filter(lambda x: self.gt.node_in_tree(x),
                       rtnode_would_be_parents)),
                                    key=lambda x: len(x))
            # print("Latest parent: ", last_parent_in_gt)
            # print("Parent - RTnode", rtnode.replace(last_parent_in_gt, ""))

            # add all children of that parent
            last_added_parent = last_parent_in_gt
            for m in rtnode.replace(last_parent_in_gt, ""):
                # print("adding: ", m, "with parent: ", last_added_parent)
                self.gt.add_node(m, [0, 0], last_added_parent)
                last_added_parent = last_added_parent + m

        # which player are we?
        player = board.state[18]

        while True:
            nodecnt += 1
            # print("RTNODE in WHile LOOP: ", rtnode)
            chosennode = self.choose_next_node(rtnode, board)
            if time.time() - t0 > self.movet:
                break
            nodeboard = Board(board.state.copy())
            for m in chosennode.replace(rtnode, ""):
                nodeboard.pushMove(int(m))

            # Add children of current node
            for m in nodeboard.legalMoves():
                self.gt.add_node(str(m), data=[0, 0], parent=chosennode)

            score = self.playout(nodeboard, player)

            # back propagate score & numVisits
            # print("Chosen Node: '" + chosennode + "'")
            for node in self.gt.get_parents_to_root(chosennode):
                data = self.gt.get_data(node)
                data[0] += score
                data[1] += 1
                self.gt.update_data(node, data)

        best_move = None
        best_numVisits = 0
        for m in self.gt.get_children(rtnode):
            data = self.gt.get_data(m)
            if data[1] > best_numVisits:
                best_numVisits = data[1]
                best_move = m
        # return max(self.gt.get_children(MCTSPlayer.rootnode), key = lambda m: self.gt.get_data(m)[1])
        # print("node", rtnode, 'children visits', list(map(lambda x: (x,self.gt.get_data(x)[1]), self.gt.get_children(rtnode))))
        print(
            "MCTSPLayer explored {0} nodes in {1} seconds at {2:.2f} nodes/s".
            format(nodecnt, self.movet, nodecnt / self.movet))
        return int(best_move[-1])
Esempio n. 20
0
import sys
import cv2
sys.path.append('./build/lib.macosx-10.7-x86_64-3.6/')
sys.path.append('/Users/joshua/Coding/go/JoshieGo')
from game import Board
import gofeat
import numpy as np
mtx = np.ones(shape=(19, 19), dtype=np.int)
string = Board.mtx2str(mtx)
print(string)
print(dir(gofeat))
string = gofeat.random(string)
ret_mtx = np.fromstring(string, sep=' ', dtype=np.int).reshape(19, 19)
print(ret_mtx)
import pickle
games = pickle.load(open('go_test.pkl', 'rb'))
cnt = 0
for board_mtx, move in zip(games[0], games[1]):
    cnt += 1
    if cnt % 200 != 0:
        continue
    board = Board(board_mtx=board_mtx)
    canvas = board.visualize_board(grid_size=35)
    cv2.imshow('board', canvas)
    cv2.waitKey()
Esempio n. 21
0
def hello():
    b = Board()
    b.board[(1, 1)] = Tile.new('FFFF')
    return display_text(b)
Esempio n. 22
0
            if i + 1 < len(board):
                if board[i][j] == board[i + 1][j]:
                    score += board[i][j]
            if j + 1 < len(board):
                if board[i][j] == board[i][j + 1]:
                    score += board[i][j]
            if i - 1 >= 0:
                if board[i][j] == board[i - 1][j]:
                    score += board[i][j]
            if j - 1 >= 0:
                if board[i][j] == board[i][j - 1]:
                    score += board[i][j]
    return score


x = Board()
x.display_board()


def play_game(x):
    while x.check_alive:
        max = -1
        move = ""
        left_move = x.move_left(active=False)
        right_move = x.move_right(active=False)
        up_move = x.move_up(active=False)
        down_move = x.move_down(active=False)
        if left_move[0] and score_board(left_move[3]) >= max:
            max = score_board(left_move[3])
            move = "left"
        if up_move[0] and score_board(up_move[3]) >= max:
Esempio n. 23
0
 def setUp(self):
     self.game_board = Board(8,8)
Esempio n. 24
0
 def __init__(self, boardSize, numMines):
     self.board = Board(boardSize, numMines)
     self.numMines = numMines
     self.boardSize = boardSize
     self.win = False
     self.lose = False
Esempio n. 25
0
            else:
                pass
            board.tileListUpdate(userInput, currentPlayer)
            player.playerTurnChange(currentPlayer)
        boardCondition = board.boardCheck()
        if boardCondition != 'continue':
            self.boardClear()
            if boardCondition == 'p1 wins':
                if currentPlayer == 'X':
                    player.playerTurnChange(player.players[1])
                    self.headVictoryChange(player.players[0])
            elif boardCondition == 'p2 wins':
                self.headVictoryChange(player.players[1])
            elif boardCondition == 'tie':
                self.headVictoryChange()


    def boardClear(self):
        self.button0['text'], self.button1['text'], self.button2['text'], self.button3['text'], self.button4['text'], self.button5['text'], self.button6['text'], self.button7['text'], self.button8['text'] = ['' for _ in range(9)]
        board.tileListUpdate(None, None, True)


if __name__ == '__main__':
    root = tk.Tk()
    root.resizable(False, False)
    root.title('Tic Tac Toe')
    root.geometry('600x900+0+0')
    GameGUI = GameGUI(root)
    board = Board()
    player = Player('X', 'O')
    root.mainloop()
Esempio n. 26
0
 def __init__(self):
     self.WIN_SCORE = 10
     self.level = 0
     self.current_player = None
     self.opponent = None
     self.board = Board()
Esempio n. 27
0
 def test_solve(self):
     board = Board()
     self.assertFalse(board.is_solved)
     board.solve()
     self.assertTrue(board.is_solved)
Esempio n. 28
0
        if move_reward > 0:
            new_q = move_reward
        else:  # using the Bellman equation
            new_q = (1 -
                     self.LEARNING_RATE) * current_q + self.LEARNING_RATE * (
                         move_reward + self.DISCOUNT * max_future_q)
        self.table[obs][action] = new_q  # update the q-table

        return move_reward


if __name__ == "__main__":
    A = Agent("q-tables/qtable-main.pickle")
    startboard = np.array([[4] * 6, [0] * 2, [4] * 6])
    b = Board(startboard)
    print(b)

    c, player = np.random.randint(1, 3), 0
    empty = [0] * 6  # used to check if one side of the board is empty
    while b.board[0] != empty and b.board[2] != empty:
        pos = 0
        if c % 2 == 1:
            player = 1
        elif c % 2 == 0:
            player = 2

        if player == 1:
            try:
                pos = int(
                    input(("Player {}, choose your space to move from...\n".
Esempio n. 29
0
    def move(self, board):
        def h(b):
            gameover, winner = b.isGameOver()
            # print("eval at ", b.state, "\n", gameover, winner)
            if gameover:
                if winner is None:
                    return 0
                else:
                    return 2 - 4 * winner
            else:
                return self.evaluator.evaluate(b.state.copy().reshape((1, 19)))

        def minimax(brd, depth, maxplayer):
            self.increment_nodecount()
            MAXVAL = 100000
            gameover, _ = brd.isGameOver()
            if depth == 0 or gameover:
                return h(brd)
            if maxplayer:
                value = -MAXVAL
                for m in brd.legalMoves():
                    new_board = Board(brd.state.copy())
                    new_board.pushMove(m)
                    value = max(value, minimax(new_board, depth - 1, False))
                return value
            else:
                value = MAXVAL
                for m in brd.legalMoves():
                    new_board = Board(brd.state.copy())
                    new_board.pushMove(m)
                    value = min(value, minimax(new_board, depth - 1, True))
                return value

        t0 = time.time()

        print("MinimaxPlayer thinks...")
        # if move in cache, play it
        # print("MMP - pre - cache: ", self.cache)
        if tuple(board.state) in self.cache.keys():
            print(
                "MinimaxPlayer depth {0} played from cache in {1:.2f} seconds".
                format(self.DEPTH,
                       time.time() - t0))
            return self.cache[tuple(board.state)]

        move_scores = {}
        mxPl = board.state[18] == Board.BLACK_MOVE
        for move in board.legalMoves():
            val = minimax(Board(board.tryMove(move)), self.DEPTH, mxPl)
            # print(board.state, move, val, self.DEPTH, mxPl)
            move_scores[move] = val
        time_elapsed = time.time() - t0
        try:
            ncnt = self.nodecount / time_elapsed
        except ZeroDivisionError:
            ncnt = 1e9
        print(
            "MinimaxPlayer depth {0} explored {1} nodes in {2:.2f} seconds at {3:.2f} nodes/s"
            .format(self.DEPTH, self.nodecount, time_elapsed, ncnt))
        self.reset_nodecount()
        if board.state[18] == Board.WHITE_MOVE:
            bmove = max(move_scores.keys(), key=lambda x: move_scores[x])
            # while cache is smaller than cache limit, add move to cache
            if len(self.cache.keys()) < self.cachelimit:
                self.cache[tuple(board.state)] = bmove
                # print("MMP - post - cache: ", self.cache)
            return bmove
        else:
            bmove = min(move_scores.keys(), key=lambda x: move_scores[x])
            # while cache is smaller than cache limit, add move to cache
            if len(self.cache.keys()) < self.cachelimit:
                self.cache[tuple(board.state)] = bmove
                # print("MMP - post - cache: ", self.cache)
            return bmove
Esempio n. 30
0
from curses import wrapper

from game import (
    Board,
    Player,
    Pawn,
    Rook,
    Knight,
    King,
    Bishop,
    Queen,
    Renderer,
    TraditionalStrategy,
)

board = Board(8)

p1 = Player('Chris')
p1.color = curses.COLOR_RED
board.add_player(p1)
p1.setup(TraditionalStrategy)

p2 = Player('Jess')
p2.color = curses.COLOR_BLUE
board.add_player(p2)
p2.setup(TraditionalStrategy)


def main(stdscr):
    stdscr.clear()
    curses.noecho()