예제 #1
0
def eval_test():
    zeroNN1 = ZeroNN(verbose=False,
                     path=mkdir(join(FOLDER_ZERO_NNS, 'NNs')),
                     ckpt_idx=-1)
    zeroNN2 = ZeroNN(verbose=False,
                     path=mkdir(join(FOLDER_ZERO_NNS, 'NNs')),
                     ckpt_idx=-1)
    mcts1 = Mcts(0, 0, zeroNN=zeroNN1, max_acts_=100)
    mcts2 = Mcts(0, 0, zeroNN=zeroNN2, max_acts_=100)
    winrate1, winrate2, tie_rate, ai_hists = \
        eval_mcts(5, 5, 4, mcts1, mcts2, True, 1, True)
예제 #2
0
 def optimization(self):
     with self.lock_model_paths:
         zeroNN = ZeroNN(verbose=2,
                         path=self.folder_NNs,
                         ckpt_idx=-1,
                         num_samples=self.train_size,
                         epoch=3,
                         batch_size=self.batch_size,
                         save_epochs=4,
                         logger=self.logger)
         self.unchecked_model_paths = zeroNN.trained_model_paths
     self.logger.log('optimization start!')
     while self.self_play_cnt > 0:
         while self.train_data is None or\
            not self.data_avail or\
            self.train_ratio * len(self.train_data[0]) * 0.8 < self.batch_size:
             time.sleep(10)
         # Wait for the models to be evaluated
         # Better models need to be selected to generate better data
         # remove old models to ease burden of evaluator
         while len(self.unchecked_model_paths) > 10 and np.random.rand(
         ) < 0.99:
             time.sleep(20)
             with self.lock_model_paths:
                 self.unchecked_model_paths.remove(
                     self.unchecked_model_paths[round(np.random.rand() *
                                                      8)])
         # given time slices for the other two threads
         with self.lock_train_data:
             train_data = [
                 self.train_data[0].copy(), self.train_data[1].copy(),
                 self.train_data[2].copy()
             ]
         # save the training data in case that we need to use them to continue training
         for i in range(3):
             np.save(self.data_path[i], self.train_data[i])
         print(len(self.train_data[0]), 'saved')
         # select some playing histories to train to control overfitting
         nonrep_rand_nums = non_repeated_random_nums(
             len(train_data[0]),
             round(self.train_ratio * len(train_data[0])))
         zeroNN.fit(train_data[0][nonrep_rand_nums],
                    train_data[1][nonrep_rand_nums],
                    train_data[2][nonrep_rand_nums], 0.1)
         self.data_avail = False
         zeroNN.epoch = zeroNN.verbose = zeroNN.save_epochs = 10
         self.model_avail = True
         while not self.data_avail:
             time.sleep(30)
예제 #3
0
파일: NInRow.py 프로젝트: 1069066484/NInRow
def eval_debug():
    # zeroNN1 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs'))
    zeroNN1 = ZeroNN(path=join(FOLDER_ZERO_NNS + '885', 'NNs'),
                     ckpt_idx=join(FOLDER_ZERO_NNS + '885',
                                   'NNs/model.ckpt-1176'))
    # zeroNN2 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs'))
    zeroNN2 = ZeroNN(path=join(FOLDER_ZERO_NNS + '885', 'NNs'),
                     ckpt_idx=join(FOLDER_ZERO_NNS + '885',
                                   'NNs/model.ckpt-1857'))
    mcts1 = Mcts(0, 0, zeroNN=zeroNN1, max_acts_=64)
    mcts2 = Mcts(0, 0, zeroNN=zeroNN2, max_acts_=64)
    winrate1, winrate2, tie_rate, ai_hists = \
        eval_mcts(8, 8, 5, mcts1, mcts2, True, 2, True)
    print(winrate1, winrate2, tie_rate)
    '''
예제 #4
0
def eval_debug():
    # zeroNN1 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs'))
    zeroNN1 = ZeroNN(path=join(FOLDER_ZERO_NNS + '115', 'NNs'),
                     ckpt_idx=join(FOLDER_ZERO_NNS + '115',
                                   'NNs/model.ckpt-145981'))
    # zeroNN2 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs'))
    # zeroNN1 = None
    zeroNN2 = ZeroNN(path=join(FOLDER_ZERO_NNS + '115', 'NNs'),
                     ckpt_idx=join(FOLDER_ZERO_NNS + '115',
                                   'NNs/model.ckpt-105633'))
    zeroNN2 = None
    mcts1 = Mcts(0, 0, zeroNN=zeroNN1, max_acts_=512)
    mcts2 = Mcts(0, 0, zeroNN=zeroNN2, max_acts_=1024)
    winrate1, winrate2, tie_rate, ai_hists = \
        eval_mcts(11, 11, 5, mcts1, mcts2, sim_times=5, verbose=True)
    print(winrate1, winrate2, tie_rate)
    '''
예제 #5
0
파일: tests.py 프로젝트: 1069066484/NInRow
def op_eval_test():
    # zeroNN1 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs'))
    zeroNN1 = ZeroNN(path=join(FOLDER_ZERO_NNS + '115', 'NNs'), 
                     ckpt_idx=join(FOLDER_ZERO_NNS + '115', 'NNs/model.ckpt-153223'))
    zeroNN2 = zeroNN1
    mcts1 = Mcts(0,0,zeroNN=zeroNN1,max_acts_=512, op_weight=None)
    mcts2 = Mcts(0,0,zeroNN=zeroNN2,max_acts_=512, op_weight=1)
    winrate1, winrate2, tie_rate, ai_hists = \
        eval_mcts(11, 11, 5, mcts1, mcts2, sim_times=5, verbose=True)
    print(winrate1, winrate2, tie_rate)
예제 #6
0
def main_debug():
    MctsPuct.CHECK_DETAILS = True
    collect_ai_hists = False
    gb = '115'
    if True:
        path = join(mkdir(join(FOLDER_ZERO_NNS + gb, 'replays')),
                    curr_time_str() + '_AI_test')

        game = Game(int(gb[0]) + 10,
                    int(gb[1]) + 10,
                    5,
                    Game.Player.AI,
                    Game.Player.human,
                    collect_ai_hists=collect_ai_hists)
        zeroNN1 = ZeroNN(path=join(FOLDER_ZERO_NNS + gb, 'NNs'),
                         ckpt_idx=join(FOLDER_ZERO_NNS + gb,
                                       'NNs/model.ckpt-316206'))
        # model.ckpt-16533
        # zeroNN1 = None
        noise = 0
        if True:
            game.players[0].mcts.zeroNN = zeroNN1
            game.players[0].mcts.max_acts = 2048
        # game.players[0].mcts.val_mult = 2
        # game.players[0].mcts.noise = noise
        # game.players[0].mcts.do_policy_prune = False
        # game.players[0].mcts.hand_val = 0.5
        # game.players[0].c = 15

        # game.players[0].mcts.red_child = True
        # 10.8 9.5
        # game.players[0].mcts.further_check = False
        if False:
            game.players[1].mcts.zeroNN = zeroNN1
            game.players[1].mcts.max_acts = 1024
            game.players[1].mcts.noise = noise

        game.start(graphics=True)

        pk.dump(game.acts, open(pkfn(path), 'wb'))
    input("over")

    if collect_ai_hists:
        debug = mkdir("debug")
        probs, eval_board, winner = game.ai_hists()
        sp0, sp1, sp2 = ZeroNNTrainer.hists2enhanced_train_data(
            [[probs, eval_board, winner]])
        np.save(join(debug, npfn('sp0')), sp0)
        np.save(join(debug, npfn('sp1')), sp1)
        np.save(join(debug, npfn('sp2')), sp2)
        exit()
        print(len(probs), len(eval_board), winner)
        print(probs[0].shape, eval_board[0].shape, winner)
        game = Game(11, 11, 5, use_hists=np.array(eval_board))
        game.start()
예제 #7
0
파일: NInRow.py 프로젝트: 1069066484/NInRow
def main_debug():
    game = Game(8,
                8,
                5,
                Game.Player.AI,
                Game.Player.AI,
                collect_ai_hists=False)
    zeroNN1 = ZeroNN(
        path=join(FOLDER_ZERO_NNS + '885', 'NNs'),
        ckpt_idx=-1)  #join(FOLDER_ZERO_NNS + '885', 'NNs/model.ckpt-10300') )
    zeroNN2 = ZeroNN(path='new_train_zeronn/zeronn8', ckpt_idx=-1)
    zeroNN1 = zeroNN2
    game.players[0].mcts.zeroNN = zeroNN1
    game.players[0].mcts.max_acts = 512
    game.players[0].mcts.hv21 = 0
    game.players[0].mcts.hand_val = 0

    # game.players[0].mcts.red_child = True
    # 10.8 9.5
    # game.players[0].mcts.further_check = False

    game.players[1].mcts.zeroNN = zeroNN1
    game.players[1].mcts.max_acts = 512
    game.players[1].mcts.hv21 = 0
    game.players[1].mcts.hand_val = 0

    # game.players[1].mcts.update_itv = 0

    game.start(graphics=True)
    print("over")
    return None
    probs, eval_board, winner = game.ai_hists()
    for i in range(3):
        print(probs[i])
        print(eval_board[i][:, :, 0])
        print(eval_board[i][:, :, 1])
        print(eval_board[i][:, :, 2])
        print(eval_board[i][:, :, 3])
        print('\n\n')
    print(np.array(probs).shape)
    print(np.array(eval_board).shape)
    print(winner)
예제 #8
0
    def optimization(self, only_opt=False):
        # print('opt0')
        with self.lock_model_paths:
            zeroNN = ZeroNN(verbose=10,
                            path=self.folder_NNs,
                            ckpt_idx=-1,
                            num_samples=100000,
                            trained_model_paths=self.unchecked_model_paths,
                            epoch=10,
                            batch_size=self.batch_size,
                            save_epochs=10,
                            logger=self.logger)
        self.logger.log('optimization start!')
        while self.shared_vars['self_play_cnt'] > 0 or only_opt:
            # print('opt1')
            while not self.shared_vars['data_avail'] and not only_opt:
                # print('opt2')
                time.sleep(10)
            # train_data = ZeroNNTrainer.manip_train_data(self.shared_constants, self.shared_vars, self.lock_train_data,
            #                                             best_player_path=self.shared_vars['best_player_path'])
            train_data, check_data = self.get_train_data()
            if train_data[0].shape[0] < self.batch_size * 4 and not only_opt:
                time.sleep(10)
                continue
            # Wait for the models to be evaluated
            # Better models need to be selected to generate better data
            # remove old models to ease burden of evaluator
            while len(self.unchecked_model_paths) > self.n_eval_processes + 3:
                with self.lock_model_paths:
                    self.unchecked_model_paths.remove(
                        self.unchecked_model_paths[0])

            # select some playing histories to train to control overfitting
            # nonrep_rand_nums = non_repeated_random_nums(len(train_data[0]), round(self.train_ratio * len(train_data[0])))
            if zeroNN.predict_avail():
                eval = zeroNN.run_eval(check_data[0], check_data[1],
                                       check_data[2])
                self.logger.log(
                    'Opt check: \n',
                    'sp items:        [loss_policy,  loss_value,   loss_total,    acc_value,   acc_policy]:',
                    '\n   eval:         ', eval)
                # Evaluate how the zeroNN works on the latest played game.
                # This is the real test data since the data are not feeded for zeroNN's training yet so we need to save the
                # evaluations.
                self.loss_hists.append([self.shared_vars['curr_generation']] +
                                       eval)
                np.save(self.path_loss_hists, np.array(self.loss_hists))

            print('optimization fit', train_data[0].shape[0], '...')
            zeroNN.fit(train_data[0], train_data[1], train_data[2], 0.1)
            self.shared_vars['model_avail'] = True
            self.shared_vars['data_avail'] = False
        print('optimization over')
예제 #9
0
def test_664():
    winner = 0
    while True:
        MctsPuct.CHECK_DETAILS = True
        game = Game(5,
                    5,
                    4,
                    Game.Player.AI,
                    Game.Player.AI,
                    collect_ai_hists=False)
        zeroNN1 = ZeroNN(
            path=r'F:\Software\vspro\NInRow\NInRow\test554\NNs',
            ckpt_idx=
            r'F:\Software\vspro\NInRow\NInRow\test554\NNs\model.ckpt-257210')
        #216633
        #ckpt_idx=r'F:\Software\vspro\NInRow\NInRow\test554\NNs\model.ckpt-177628')
        """
         [[0.01 0.01 0.   0.   0.  ]
         [0.   0.01 0.01 0.02 0.01]
         [0.01 0.06 0.17 0.24 0.03]
         [0.   0.02 0.03 0.15 0.06]
         [0.   0.02 0.06 0.07 0.03]]
        """
        noise = 0

        game.players[0].mcts.zeroNN = zeroNN1
        game.players[1].mcts.zeroNN = zeroNN1

        game.players[0].mcts.max_acts = 256
        game.players[1].mcts.max_acts = 256

        game.players[0].mcts.noise = noise
        game.players[1].mcts.noise = noise

        game.start(graphics=True)
        winner += (game.winner == 0) * 2 - 1
        input(str(winner))
예제 #10
0
 def self_play(self):
     while not self.model_avail and self.nozero_mcts is None:
         time.sleep(5)
     time.sleep(round(np.random.rand() * 60 * self.n_play_threads + 1))
     self.logger.log('self_play start!')
     plays = self.plays
     while self.self_play_cnt > 0:
         zeroNN1 = ZeroNN(verbose=False,
                          path=self.folder_NNs,
                          ckpt_idx=self.best_player_path)
         zeroNN2 = ZeroNN(verbose=False,
                          path=self.folder_NNs,
                          ckpt_idx=self.best_player_path)
         best_player_path = self.best_player_path
         # we do not lock for self_play_cnt
         while self.self_play_cnt > 0:
             self.self_play_cnt -= plays
             # decay resign_val
             # rookies should always play the game to the end while masters are allowed to resign at an earlier stage
             self.resign_val = max(
                 0.75, self.resign_val - self.resign_val * 0.00002 * plays)
             self.logger.log('self_play:', 'self_play_cnt=',
                             self.self_play_cnt, ' self.resign_val=',
                             self.resign_val)
             # Create two identical players to 'self play'
             if self.nozero_mcts is not None:
                 mcts1 = Mcts(0,
                              0,
                              zeroNN=None,
                              max_acts_=self.nozero_mcts_sims,
                              const_temp=1,
                              noise=0.2,
                              resign_val=0.99,
                              temp2zero_moves=3,
                              hand_val=self.hand_val)
                 mcts2 = Mcts(0,
                              0,
                              zeroNN=None,
                              max_acts_=self.nozero_mcts_sims,
                              const_temp=1,
                              noise=0.2,
                              resign_val=0.99,
                              temp2zero_moves=3,
                              hand_val=self.hand_val)
             else:
                 mcts1 = Mcts(0,
                              0,
                              zeroNN=zeroNN1,
                              max_acts_=self.mcts_sims,
                              const_temp=1,
                              temp2zero_moves=3,
                              noise=0.2,
                              resign_val=self.resign_val,
                              hand_val=self.hand_val)
                 mcts2 = Mcts(0,
                              0,
                              zeroNN=zeroNN2,
                              max_acts_=self.mcts_sims,
                              const_temp=1,
                              temp2zero_moves=3,
                              noise=0.2,
                              resign_val=self.resign_val,
                              hand_val=self.hand_val)
             t = time.time()
             winrate1, winrate2, tie_rate, ai_hists = \
                 eval_mcts(self.board_rows, self.board_cols, self.n_in_row, mcts1, mcts2, False, plays//2, True)
             ai_hists = self.hists2enhanced_train_data(ai_hists)
             # Evaluate how the zeroNN works on the latest played game.
             # This is the real test data since the data are not feeded for zeroNN's training yet so we need to save the
             # evaluations.
             if self.nozero_mcts is None:
                 eval = zeroNN1.run_eval(ai_hists[0], ai_hists[1],
                                         ai_hists[2])
                 self.logger.log(
                     'self-play in ' + str(time.time() - t) + 's\n',
                     'sp items:        [loss_policy,       loss_value,           loss_total,            acc_value]:',
                     '\n   eval:         ', eval)
                 self.loss_hists.append([self.curr_generation] + eval)
             # Append the latest data to the old.
             with self.lock_train_data:
                 if self.train_data is None or len(
                         self.train_data) == self.batch_size:
                     self.train_data = ai_hists
                 else:
                     self.train_data = [np.vstack([self.train_data[0], ai_hists[0]]).astype(np.bool),
                                        np.vstack([self.train_data[1], ai_hists[1]]),
                                        np.vstack([self.train_data[2], ai_hists[2]])]\
                                            if self.train_data is not None else ai_hists
                 # save the training data in case that we need to use them to continue training
                 for i in range(3):
                     np.save(self.data_path[i], self.train_data[i])
                 # Discard some old data since our memory is running out
                 if len(self.train_data[0]) > self.train_size + 1:
                     for i in range(3):
                         self.train_data[i] = self.train_data[i][
                             -round(self.train_size * 0.6 + 1):]
             self.logger.log('self_play:', winrate1, winrate2, tie_rate,
                             '  new data size=', ai_hists[0].shape,
                             '   total data:', self.train_data[0].shape)
             self.data_avail = True
             with self.lock_model_best:
                 find_new_best = (self.best_player_path != best_player_path)
             if find_new_best and len(self.train_data[0]) > 10000:
                 # Discard some old data since a new best player is trained, we need to use data of games played by
                 # it to train new models
                 with self.lock_train_data:
                     len_train_data0 = len(self.train_data[0])
                     for i in range(3):
                         self.train_data[i] = self.train_data[i][
                             -round(len_train_data0 * 0.6 + 1):]
                 break
예제 #11
0
    def evaluator(self):
        while not self.model_avail:
            time.sleep(5)
        self.lock_model_paths.acquire()
        while len(self.unchecked_model_paths) != 0:
            self.unchecked_model_paths.pop()
        self.lock_model_paths.release()
        # try to test checkpoints as different as possible
        time.sleep(round(np.random.rand() * 30 * self.n_eval_threads + 1))
        self.logger.log('evaluator start!')
        while self.self_play_cnt > 0 or len(self.unchecked_model_paths) > 5:
            # use 'with' to lock to avoid forgetting to release it
            with self.lock_model_paths:
                # wait for latest trained model
                if len(self.unchecked_model_paths) < 2:
                    time.sleep(30)
                    continue
                path_to_check = self.unchecked_model_paths.pop()
                if len(self.unchecked_model_paths) > 5:
                    if np.random.rand() < 0.5:
                        self.unchecked_model_paths.pop()
                    else:
                        path_to_check = self.unchecked_model_paths.pop()
            self.logger.log('evaluator:', self.best_player_path, 'VS',
                            path_to_check, '...')
            if self.nozero_mcts is None:
                best_mcts = Mcts(0,
                                 0,
                                 zeroNN=ZeroNN(verbose=False,
                                               path=self.folder_NNs,
                                               ckpt_idx=self.best_player_path),
                                 max_acts_=self.mcts_sims,
                                 const_temp=0,
                                 noise=0.1,
                                 resign_val=self.resign_val,
                                 hand_val=self.hand_val)
            else:
                # When self.nozero_mcts is not None, the first generation of zeroNN is not generated yet
                # We double number of simulations since MCTS without zeroNN can make a faster searching,
                # which also means any trained model is able to defeat MCTS without zeroNN using doule simulations
                best_mcts = Mcts(0,
                                 0,
                                 zeroNN=None,
                                 max_acts_=self.nozero_mcts_sims,
                                 const_temp=0.2,
                                 noise=0.1,
                                 hand_val=self.hand_val)
            zeroNN_to_check = ZeroNN(verbose=False,
                                     path=self.folder_NNs,
                                     ckpt_idx=path_to_check)
            mcts2 = Mcts(0,
                         0,
                         zeroNN=zeroNN_to_check,
                         max_acts_=self.mcts_sims,
                         const_temp=0,
                         noise=0.1,
                         resign_val=self.resign_val,
                         hand_val=self.hand_val)

            # the evaluation must be fast to select the best model
            # play only several games, but require the player to check have a overwhelming advantage over the existing player
            # if the best player is defeated, then the player to check can take the first place
            winrate1, winrate2, tie_rate, _ = \
                eval_mcts(self.board_rows, self.board_cols, self.n_in_row, best_mcts, mcts2, False, [3,1], False)
            self.logger.log('evaluator:', self.best_player_path, 'VS',
                            path_to_check, '--', winrate1, '-', winrate2, '-',
                            tie_rate)
            time.sleep(5)
            # if the new player wins 3 out of 4 and draws in one game, replace the best player with it
            if winrate2 > 0.4 and winrate1 < 0.01:
                self.curr_generation += 1
                self.logger.log('evaluator:', path_to_check, 'defeat',
                                self.best_player_path, 'by',
                                winrate2 - winrate1)
                self.logger.log(path_to_check, 'becomes generation',
                                self.curr_generation)
                with self.lock_model_best:
                    self.best_player_path = path_to_check
                self.nozero_mcts = None
예제 #12
0
    def console(self):
        """
        Use console to help training
        """
        while True:
            cmd = input()
            try:
                if cmd == 'md':
                    folder = input(
                        'Input the folder name where the three data file(np file) exists. '
                        +
                        'The files should be named [selfplay0.npy], [selfplay1.npy] and [selfplay2.npy]:\n'
                    )
                    try:
                        data_path = [
                            join(folder, npfn('selfplay' + str(i)))
                            for i in range(3)
                        ]
                        train_data = [np.load(p) for p in data_path]
                        with self.lock_train_data:
                            self.train_data = [
                                np.vstack([self.train_data[0],
                                           train_data[0]]).astype(np.bool),
                                np.vstack([self.train_data[1], train_data[1]]),
                                np.vstack([self.train_data[2], train_data[2]])
                            ]
                        self.data_avail = True
                        print("load new data succeessfully, data size = ",
                              len(self.train_data[0]))
                    except:
                        print(
                            "md error: folder or files do not exist or are invalid"
                        )
                elif cmd == 'pl':
                    plays = input(
                        "Input the number of games for one self-play, it should be a positive even number:\n"
                    )

                    def plerr():
                        print("pl error: ", plays,
                              " is invalid to be a self-play number")

                    try:
                        plays = int(plays)
                        if plays > 0 and plays % 2 == 0:
                            self.plays = plays
                            print("self.plays -> ", self.plays)
                        else:
                            plerr()
                    except:
                        plerr()
                elif cmd == 'bs':
                    batch_size = input(
                        "Input the number of batch size, it should be a positive number:\n"
                    )

                    def bserr():
                        print("bs error")

                    try:
                        batch_size = int(batch_size)
                        if batch_size > 0:
                            self.batch_size = batch_size
                            print("Batch size reset to", batch_size)
                        else:
                            bserr()
                    except:
                        bserr()
                elif cmd == 'bp':
                    best_player_path = input(
                        "Input num best player path, th current is " +
                        str(self.best_player_path) + ":\n")
                    if os.path.exists(best_player_path + '.index'):
                        self.best_player_path = best_player_path
                        print("best player path -> ", self.best_player_path)
                    else:
                        print("bp error,", best_player_path, 'is invalid')
                elif cmd == 'nm':

                    def nmerr():
                        print("nm error")

                    nozero_mcts_sims = input(
                        "Input number of simulations of nozero_mcts's search. A non-positive number means deactivation\n"
                        + " Currently, nozero_mcts is " +
                        str(self.nozero_mcts) + ". And nozero_mcts_sims is " +
                        str(self.nozero_mcts_sims) + ":\n")
                    try:
                        nozero_mcts_sims = int(nozero_mcts_sims)
                        if nozero_mcts_sims > 0:
                            self.nozero_mcts_sims = nozero_mcts_sims
                            self.nozero_mcts = Mcts(
                                0,
                                0,
                                zeroNN=None,
                                max_acts_=self.nozero_mcts_sims,
                                const_temp=0.2,
                                noise=0.1,
                                hand_val=self.hand_val)
                            print(
                                "nozero_mcts activated. And nozero_mcts_sims is ",
                                self.nozero_mcts_sims)
                        else:
                            self.nozero_mcts = None
                            print(
                                "nozero_mcts deactivated. And nozero_mcts_sims is ",
                                self.nozero_mcts_sims)
                    except:
                        nmerr()
                elif cmd == 'op':
                    print("Start an optimization now")
                    self.data_avail = True
                elif cmd == 'pc':
                    self_play_cnt = input("Input number of play count:\n")
                    try:
                        self_play_cnt = int(self_play_cnt)
                        self.self_play_cnt = max(self_play_cnt, 0)
                        print("self.self_play_cnt -> ", self.self_play_cnt)
                    except:
                        print("pc error")
                elif cmd == 'te':
                    # use te will clear the screen
                    game = Game(self.board_rows,
                                self.board_cols,
                                self.n_in_row,
                                Game.Player.AI,
                                Game.Player.human,
                                collect_ai_hists=False)
                    zeroNN1 = ZeroNN(path=join(self.folder_NNs), ckpt_idx=-1)
                    game.players[0].mcts.zeroNN = zeroNN1
                    game.players[0].mcts.max_acts = self.mcts_sims
                    game.start(graphics=True)
                elif cmd == 'di':

                    def dierr():
                        print("di error")

                    discard = input(
                        "Input number of data to discard, the current number is "
                        + str(len(self.train_data[0])) + ":\n")
                    try:
                        discard = int(discard)
                        if discard <= 0 or discard > len(
                                self.train_data[0]) - 1:
                            dierr()
                        for i in range(3):
                            self.train_data[i] = self.train_data[i][discard:]
                        print("Discard successfully! The current number is ",
                              len(self.train_data[0]))
                    except:
                        dierr()
                elif cmd == 'hv':

                    def hverr():
                        print('hv errpr')

                    hand_val = input("Input hand_val, the current number is " +
                                     str(self.hand_val) + ":\n")
                    try:
                        self.hand_val = float(hand_val)
                    except:
                        hverr()
                elif cmd == 'sv':
                    for i in range(3):
                        np.save(self.data_path[i], self.train_data[i])
                elif cmd == 'pt':
                    print('Folder=', self.folder)
                else:
                    print("command error. (cmd=", cmd, ")")
            except:
                print("Unknown console error!")
예제 #13
0
    def self_play(consts, vars, unchecked_model_paths, lock_train_data,
                  lock_model_best, loss_hists, path_loss_hists, logger, idx):
        while not vars['model_avail'] and vars['nozero_mcts'] == False:
            time.sleep(5)
        logger.log('self_play' + str(idx) + ' start!')
        while vars['self_play_cnt'] > 0:
            zeroNN1 = ZeroNN(verbose=False,
                             path=consts['folder_NNs'],
                             ckpt_idx=vars['best_player_path'])
            zeroNN2 = zeroNN1
            best_player_path = vars['best_player_path']
            # we do not lock for self_play_cnt
            while vars['self_play_cnt'] > 0:
                vars['self_play_cnt'] -= vars['plays']
                # decay resign_val
                # rookies should always play the game to the end while masters are allowed to resign at an earlier stage
                vars['resign_val'] = max(
                    0.75, vars['resign_val'] -
                    vars['resign_val'] * 0.0001 * vars['plays'])

                # Create two identical players to 'self play'
                if vars['nozero_mcts']:
                    mcts1 = Mcts(0,
                                 0,
                                 zeroNN=None,
                                 max_acts_=vars['nozero_mcts_sims'],
                                 const_temp=1,
                                 noise=0,
                                 temp2zero_moves=3)
                    mcts2 = Mcts(0,
                                 0,
                                 zeroNN=None,
                                 max_acts_=vars['nozero_mcts_sims'],
                                 const_temp=1,
                                 noise=0,
                                 temp2zero_moves=3)
                else:
                    mcts1 = Mcts(0,
                                 0,
                                 zeroNN=zeroNN1,
                                 max_acts_=consts['mcts_sims'],
                                 const_temp=1,
                                 temp2zero_moves=3,
                                 noise=vars['noise'],
                                 resign_val=vars['resign_val'])
                    mcts2 = Mcts(0,
                                 0,
                                 zeroNN=zeroNN2,
                                 max_acts_=consts['mcts_sims'],
                                 const_temp=1,
                                 temp2zero_moves=3,
                                 noise=vars['noise'],
                                 resign_val=vars['resign_val'])
                t = time.time()
                logger.log(
                    'self_play' + str(idx) + ':', 'self_play_cnt=',
                    vars['self_play_cnt'], '  net=', 'nomcts'
                    if vars['nozero_mcts'] else vars['best_player_path'], '',
                    ' self.resign_val=', vars['resign_val'])
                winrate1, winrate2, tie_rate, ai_hists = \
                    eval_mcts(consts['board_rows'], consts['board_cols'], consts['n_in_row'], mcts1, mcts2, False, vars['plays']//2, True)
                ai_hists = ZeroNNTrainer.hists2enhanced_train_data(
                    ai_hists, consts)

                # Append the latest data to the old.
                # save the training data in case that we need to use them to continue training
                train_data = ZeroNNTrainer.manip_train_data(
                    consts,
                    vars,
                    lock_train_data,
                    ai_hists,
                    best_player_path=best_player_path)

                logger.log('self_play' + str(idx) + ':', winrate1, winrate2,
                           tie_rate, '  new data size=', ai_hists[0].shape,
                           '   total data:', train_data[0].shape)
                vars['data_avail'] = True
                with lock_model_best:
                    if vars['best_player_path'] != best_player_path:
                        break
        print("self play over")
예제 #14
0
    def evaluator(consts, vars, unchecked_model_paths, lock_model_paths,
                  lock_model_best, logger):
        while not vars['model_avail']:
            time.sleep(60)
        with lock_model_paths:
            while len(unchecked_model_paths) >= 2:
                unchecked_model_paths.remove(unchecked_model_paths[0])
        logger.log('evaluator start!')
        while vars['self_play_cnt'] > 0 or len(unchecked_model_paths) > 5:
            while len(unchecked_model_paths) == 0:
                time.sleep(20)
            # use 'with' to lock to avoid forgetting to release it
            with lock_model_paths:
                # wait for latest trained model
                if len(unchecked_model_paths) == 0:
                    continue
                path_to_check = unchecked_model_paths.pop()
            logger.log('evaluator:', vars['best_player_path'], 'VS',
                       path_to_check, '...')
            acts = min(256, consts['mcts_sims'])
            if vars['nozero_mcts'] == False:
                # join(shared_constants['folder_NNs'], 'model.ckpt-' + str(best_player_path))
                best_mcts = Mcts(0,
                                 0,
                                 zeroNN=ZeroNN(
                                     verbose=False,
                                     path=consts['folder_NNs'],
                                     ckpt_idx=vars['best_player_path']),
                                 max_acts_=acts,
                                 const_temp=0,
                                 noise=0,
                                 resign_val=vars['resign_val'])
            else:
                # When self.nozero_mcts is not None, the first generation of zeroNN is not generated yet
                # We double number of simulations since MCTS without zeroNN can make a faster searching,
                # which also means any trained model is able to defeat MCTS without zeroNN using doule simulations
                best_mcts = Mcts(0,
                                 0,
                                 zeroNN=None,
                                 max_acts_=acts,
                                 const_temp=0,
                                 noise=0)
            zeroNN_to_check = ZeroNN(verbose=False,
                                     path=consts['folder_NNs'],
                                     ckpt_idx=path_to_check)
            mcts2 = Mcts(0,
                         0,
                         zeroNN=zeroNN_to_check,
                         max_acts_=acts,
                         const_temp=0,
                         noise=0,
                         resign_val=vars['resign_val'])

            # the evaluation must be fast to select the best model
            # play only several games, but require the player to check have a overwhelming advantage over the existing player
            # if the best player is defeated, then the player to check can take the first place
            winrate1, winrate2, tie_rate, _ = \
                eval_mcts(consts['board_rows'], consts['board_cols'], consts['n_in_row'], best_mcts, mcts2, False, 8, False)
            logger.log('evaluator:', vars['best_player_path'], 'VS',
                       path_to_check, '--', winrate1, '-', winrate2, '-',
                       tie_rate)
            # if the new player wins 4 more than the opponent out of 30 games, replace the best player with it
            if winrate2 - winrate1 >= 3 / (8 * 2) - 0.0001:
                vars['curr_generation'] += 1
                logger.log('evaluator:', path_to_check, 'defeat',
                           vars['best_player_path'], 'by', winrate2 - winrate1)
                logger.log(path_to_check, 'becomes generation',
                           vars['curr_generation'])
                with lock_model_best:
                    vars['best_player_path'] = path_to_check
                vars['nozero_mcts'] = False
        print("evaluator over")
예제 #15
0
    def console(self):
        """
        Use console to help training
        """
        print("console started")
        while True:
            cmd = input()
            try:
                if cmd == 'md':
                    data_path = input(
                        'Input the name of the data file(np file) exists. ' +
                        'The files should be named [selfplayxxx.npz]:\n')
                    try:
                        # data_path = [join(folder, npfn('selfplay' + str(i))) for i in range(3)]
                        train_data = np.load(npfn(data_path))
                        best_player_path = data_path.split('y')[-1].split(
                            '.')[0]
                        train_data = ZeroNNTrainer.manip_train_data(
                            self.shared_constants,
                            self.shared_vars,
                            self.lock_train_data,
                            train_data,
                            best_player_path=best_player_path)
                        self.shared_vars['data_avail'] = True
                        print("load new data succeessfully, data size = ",
                              len(train_data[0]))
                    except:
                        print(
                            "md error: folder or files do not exist or are invalid"
                        )
                    del train_data
                elif cmd == 'pl':
                    plays = input(
                        "Input the number of games for one self-play, it should be a positive even number:\n"
                    )

                    def plerr():
                        print("pl error: ", plays,
                              " is invalid to be a self-play number")

                    try:
                        plays = int(plays)
                        if plays > 0 and plays % 2 == 0:
                            self.shared_vars['plays'] = plays
                            print("self.plays -> ", self.shared_vars['plays'])
                        else:
                            plerr()
                    except:
                        plerr()
                elif cmd == 'bs':
                    batch_size = input(
                        "Input the number of batch size, it should be a positive number:\n"
                    )

                    def bserr():
                        print("bs error")

                    try:
                        batch_size = int(batch_size)
                        if batch_size > 0:
                            self.batch_size = batch_size
                            print("Batch size reset to", batch_size)
                        else:
                            bserr()
                    except:
                        bserr()
                elif cmd == 'bp':
                    best_player_path = input(
                        "Input num best player path, th current is " +
                        str(self.shared_vars['best_player_path']) + ":\n")
                    if str2int(best_player_path) is not None:
                        best_player_path = join(
                            shared_constants['folder_NNs'],
                            'model.ckpt-' + str(best_player_path))
                    if os.path.exists(best_player_path + '.index'):
                        self.shared_vars['best_player_path'] = best_player_path
                        print("best player path -> ", best_player_path)
                    else:
                        print("bp error,", best_player_path, 'is invalid')
                elif cmd == 'nm':

                    def nmerr():
                        print("nm error")

                    nozero_mcts_sims = input(
                        "Input number of simulations of nozero_mcts's search. A non-positive number means deactivation\n"
                        + " Currently, nozero_mcts is " +
                        str(self.shared_vars['nozero_mcts']) +
                        ". And nozero_mcts_sims is " +
                        str(self.shared_vars['nozero_mcts_sims']) + ":\n")
                    try:
                        nozero_mcts_sims = int(nozero_mcts_sims)
                        if nozero_mcts_sims > 0:
                            self.shared_vars[
                                'nozero_mcts_sims'] = nozero_mcts_sims
                            self.shared_vars['nozero_mcts'] = True
                            print(
                                "nozero_mcts activated. And nozero_mcts_sims is ",
                                nozero_mcts_sims)
                        else:
                            self.shared_vars['nozero_mcts'] = False
                            print(
                                "nozero_mcts deactivated. And nozero_mcts_sims is ",
                                nozero_mcts_sims)
                    except:
                        nmerr()
                elif cmd == 'op':
                    # print("Error: Op command is unavailable in MP mode")
                    print("Start optimization now")
                    self.shared_vars['data_avail'] = True
                elif cmd == 'pc':
                    self_play_cnt = input("Input number of play count:\n")
                    try:
                        self_play_cnt = int(self_play_cnt)
                        self.shared_vars['self_play_cnt'] = max(
                            self_play_cnt, 0)
                        print("self.self_play_cnt -> ",
                              self.shared_vars['self_play_cnt'])
                    except:
                        print("pc error")
                elif cmd == 'te':
                    # use te will clear the screen
                    game = Game(self.board_rows,
                                self.board_cols,
                                self.n_in_row,
                                Game.Player.AI,
                                Game.Player.human,
                                collect_ai_hists=False)
                    zeroNN1 = ZeroNN(path=join(self.folder_NNs), ckpt_idx=-1)
                    game.players[0].mcts.zeroNN = zeroNN1
                    game.players[0].mcts.max_acts = self.mcts_sims
                    game.start(graphics=True)
                elif cmd == 'ns':
                    noise = input("Current noise is " +
                                  str(self.shared_vars['noise']) +
                                  ", input new noise:")
                    noise = to_type(noise, float)
                    if noise is None:
                        print("Invalid noise value")
                    else:
                        print("Set noise=", noise)
                        self.shared_vars['noise'] = noise
                elif cmd == 'tr':
                    # ratio = input("Train ratio")
                    ratio = input("Current train ratio is " +
                                  str(self.train_ratio) + ", input new ratio:")
                    ratio = to_type(ratio, float)
                    if ratio is None:
                        print("Invalid noise value")
                    else:
                        print("Set ratio=", ratio)
                        self.train_ratio = ratio
                else:
                    print("command error. (cmd=", cmd, ")")
            except:
                print("Unknown console error!")