Beispiel #1
0
def play(network):
    """Plays out a self-play match, returning a MCTSPlayer object containing:
        - the final position
        - the n x 362 tensor of floats representing the mcts search probabilities
        - the n-ary tensor of floats representing the original value-net estimate
          where n is the number of moves in the game
    """
    readouts = FLAGS.num_readouts  # defined in strategies.py
    # Disable resign in 5% of games
    if random.random() < FLAGS.resign_disable_pct:
        resign_threshold = -1.0
    else:
        resign_threshold = None

    player = MCTSPlayer(network, resign_threshold=resign_threshold)

    player.initialize_game()

    # Must run this once at the start to expand the root node.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if FLAGS.verbose >= 3:
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (FLAGS.verbose >= 2) or (FLAGS.verbose >= 1 and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
                player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True)
        if FLAGS.verbose >= 3:
            print("Played >>",
                  coords.to_gtp(coords.from_flat(player.root.fmove)))

    if FLAGS.verbose >= 2:
        utils.dbg("%s: %.3f" % (player.result_string, player.root.Q))
        utils.dbg(player.root.position, player.root.position.score())

    return player
Beispiel #2
0
def play(network):
    search_n = 100
    player = MCTSPlayer(network=network,seconds_per_move=seconds_per_move,timed_match=timed_match,search_n=search_n, player_mode=0)
    player.initialize_game()
    while True:
        start = time.time()
        current_n = player.root.N
        while player.root.N < current_n + search_n:
            player.tree_search()
        move = player.pick_move()
        #print(move, player.root.status.to_play)
        player.play_move(move)
        if player.root.is_done():
            #print('[!] finish')
            break
    #X, p, v = player.generate_data()
    return player
Beispiel #3
0
    def test_extract_data_normal_end(self):
        player = MCTSPlayer(DummyNet())
        player.initialize_game()
        player.tree_search()
        player.play_move(None)
        player.tree_search()
        player.play_move(None)
        self.assertTrue(player.root.is_done())
        player.set_result(player.root.position.result(), was_resign=False)

        data = list(player.extract_data())
        self.assertEqual(2, len(data))
        position, _, result = data[0]
        # White wins by komi
        self.assertEqual(go.WHITE, result)
        self.assertEqual("W+{}".format(player.root.position.komi),
                         player.result_string)
Beispiel #4
0
    def test_extract_data_resign_end(self):
        player = MCTSPlayer(DummyNet())
        player.initialize_game()
        player.tree_search()
        player.play_move((0, 0))
        player.tree_search()
        player.play_move(None)
        player.tree_search()
        # Black is winning on the board
        self.assertEqual(go.BLACK, player.root.position.result())
        # But if Black resigns
        player.set_result(go.WHITE, was_resign=True)

        data = list(player.extract_data())
        position, _, result = data[0]
        # Result should say White is the winner
        self.assertEqual(go.WHITE, result)
        self.assertEqual("W+R", player.result_string)
Beispiel #5
0
def play(network):
    readouts = FLAGS.num_readouts
    player = MCTSPlayer(network)
    player.initialize_game()

    first_node = player.root.select_leaf()
    prob, val = network.predict(first_node.position.state)
    first_node.incorporate_results(prob, val, first_node)

    while True:
#        player.root.inject_noise()
        current_readouts = player.root.N
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        move = player.pick_move()
        player.play_move(move)
        tf.logging.info('playing move: %d hamming distance: %d' % (move, state_diff(player.root.position.state)))
        if player.root.is_done():
            tf.logging.info('done')
            break
Beispiel #6
0
class AlphaDoge(QThread):
    tuple_signal = pyqtSignal(tuple)

    def __init__(self, ckpt=None,seconds_per_move=5,timed_match=False,search_n=800):
        QThread.__init__(self)
        self.player = MCTSPlayer(PVNet(ckpt),seconds_per_move=seconds_per_move,timed_match=timed_match,search_n=search_n)

    def set_status(self, status):
        self.player.set_status(status)
    
    def play_move(self, coord):
        self.player.play_move(coord)

    def reset(self):
        self.player.reset()
    
    def __del__(self):
        self.wait()
    
    def run(self):
        coord = self.player.suggest_move()
        if coord==None: coord=(-1,-1)
        self.tuple_signal.emit(coord)
Beispiel #7
0
def play(network):
    readouts = FLAGS.num_readouts
    player = MCTSPlayer(network)
    player.initialize_game()

    first_node = player.root.select_leaf()
    prob, val = network.predict(first_node.position.state)
    first_node.incorporate_results(prob, val, first_node)

    lastmove = -1
    hamm_dist = state_diff(player.root.position.state)

    for lo in range(0, hamm_dist):
        #        player.root.inject_noise()
        current_readouts = player.root.N
        start = time.time()
        while player.root.N < current_readouts + readouts and time.time(
        ) - start < FLAGS.time_per_move:
            player.tree_search()

        move = player.pick_move()
        if move == lastmove:
            tf.logging.info('lastmove == move')
            return state_diff(player.root.position.state)
        before = state_diff(player.root.position.state)
        player.play_move(move)
        after = state_diff(player.root.position.state)
        if after > before:
            tf.logging.info('move increasing distance')
            return after
        tf.logging.info('playing move: %d hamming distance: %d' %
                        (move, state_diff(player.root.position.state)))
        if player.root.is_done():
            tf.logging.info('done')
            return 0
        lastmove = move
    return state_diff(player.root.position.state)