def __init__(self,
              side,
              network,
              debugging=True,
              n_playout=800,
              search_threads=16,
              virtual_loss=0.02,
              policy_loop_arg=True,
              c_puct=5,
              dnoise=False,
              temp_round=conf.train_temp_round,
              can_surrender=False,
              surrender_threshold=-0.99,
              allow_legacy=False,
              repeat_noise=True):
     super(NetworkPlayer, self).__init__(side)
     self.network = network
     self.debugging = debugging
     self.queue = Queue(400)
     self.temp_round = temp_round
     self.can_surrender = can_surrender
     self.allow_legacy = allow_legacy
     self.surrender_threshold = surrender_threshold
     self.repeat_noise = repeat_noise
     self.mcts_policy = mcts_async.MCTS(self.policy_value_fn_queue,
                                        n_playout=n_playout,
                                        search_threads=search_threads,
                                        virtual_loss=virtual_loss,
                                        policy_loop_arg=policy_loop_arg,
                                        c_puct=c_puct,
                                        dnoise=dnoise)
예제 #2
0
            if move in legal_move_b:
                move = board.flipped_uci_labels([move])[0]
                action_probs.append((move,prob))
    else:
        for move,prob in zip(uci_labels,policyout):
            if move in legal_move:
                action_probs.append((move,prob))
    action_probs = sorted(action_probs,key=lambda x:x[1])
    return action_probs, valout
    
for one_play in range(10):
    states = []
    moves = []

    game_states = GameState()
    mcts_policy_w = mcts_async.MCTS(policy_value_fn_async_batch,n_playout=1600,search_threads=32,virtual_loss=0.03)
    mcts_policy_b = mcts_async.MCTS(policy_value_fn_async_batch,n_playout=1600,search_threads=32,virtual_loss=0.03)
    result = 'peace'
    for i in range(150):
        begin = time.time()
        is_end,winner = game_states.game_end()
        if is_end == True:
            result = winner
            break
        start = time.time()
        if i % 2 == 0:
            player = 'w'

            if i < 18:
                temp = 1
            else:
예제 #3
0
        with graph.as_default():
            saver = tf.train.Saver(var_list=tf.global_variables())
            saver.restore(sess, model_dir)
        print('param updated {}'.format(model_dir))
        nm.updated(latest_model_name)

    print("current weight {}".format(nm.netname))

    states = []
    moves = []

    game_states = GameState()
    mcts_policy_w = mcts_async.MCTS(policy_value_fn_queue,
                                    n_playout=800,
                                    search_threads=16,
                                    virtual_loss=0.02,
                                    policy_loop_arg=True,
                                    c_puct=5,
                                    dnoise=False)
    mcts_policy_b = mcts_async.MCTS(policy_value_fn_queue,
                                    n_playout=800,
                                    search_threads=16,
                                    virtual_loss=0.02,
                                    policy_loop_arg=True,
                                    c_puct=5,
                                    dnoise=False)
    result = 'peace'
    peace_round = 0
    remain_piece = countpiece(game_states.statestr)

    can_surrender = random.random() > 0.1