Esempio n. 1
0
def fight(nn_path, game_number, batch_size, rollout_time, max_thread):
    pvn = PolicyValueNetwork.load_model(nn_path)
    mcts = MCTS(
        PolicyValueNetwork.load_model(nn_path),
        rollout_time=rollout_time, max_thread=max_thread
    )
    wins = {pvn: 0, mcts: 0}
    progress_bar = ProgressBar(game_number*2)
    finished_game_count = 0
    progress_bar.update(finished_game_count)
    for player, opponent in [(pvn, mcts), (mcts, pvn)]:
        player.color = BLACK
        opponent.color = WHITE
        boards = set()
        game_count = game_number
        while len(boards) or game_count > 0:
            for _ in range(min(batch_size-len(boards), game_count)):
                game_count -= 1
                boards.add(Board())
            cache_boards = list(boards)
            positions = player.get_positions(cache_boards, Tau=1.0)
            positions = tolist(positions)
            finished_boards = []
            for board, position in zip(cache_boards, positions):
                board.move(position)
                if board.is_over:
                    winner = board.winner
                    if winner == pvn.color:
                        wins[pvn] += 1
                    elif winner == mcts.color:
                        wins[mcts] += 1
                    finished_boards.append(board)

            sys.stdout.write(
                ' '*70 + '{:d}\r'.format(
                    int(np.mean([len(board.history) for board in boards]))
                )
            )
            sys.stdout.flush()

            for board in finished_boards:
                finished_game_count += 1
                boards.remove(board)

            player, opponent = opponent, player
            progress_bar.update(finished_game_count)
    sys.stdout.write(' '*79 + '\r')
    sys.stdout.flush()

    print('pvn wins:{:.2f}'.format((wins[pvn]+0.0)/game_number/2*100))
    print('mcts wins:{:.2f}'.format((wins[mcts]+0.0)/game_number/2*100))
Esempio n. 2
0
def get_game(nn_path, mcts_config, time_delay):
    player = Player()
    mcts = AIPlayer(MCTS(PolicyValueNetwork.load_model(nn_path),
                         **mcts_config))

    while True:
        os.system('cls')
        color = input('black: 1\nwhite: 2\n')
        try:
            color = int(color)
            if color in [1, 2]:
                break
        except:
            pass

    if color == 1:
        # game = Game(player, mcts, time_delay=time_delay)
        game = TestGame(player, mcts, time_delay=time_delay)
    else:
        # game = Game(mcts, player, time_delay=time_delay)
        game = TestGame(mcts, player, time_delay=time_delay)

    # _play = game.play
    # def play():
    #     _play(Tau=0.0, verbose=2)
    #
    # game.play = play

    return game
Esempio n. 3
0
from __future__ import print_function
from gomokuZero.utils import check_load_path
from gomokuZero.train.pipeline import Trainer
from gomokuZero.model.neural_network import PolicyValueNetwork

pvn = PolicyValueNetwork(blocks=3, filters=16)
# board_tensors, policy_tensors, value_tensors = get_samples(
#     pvn, 10, 5, game_batch_size=2, max_thread=4, rollout_time=100
# )
# print(board_tensors.shape, policy_tensors.shape, value_tensors.shape)
if check_load_path('test.json') is None:
    trainer = Trainer(
        pvn, **{
            'blocks': 2,
            'kernel_size': (3, 3),
            'filters': 16,
            'game_number': 8,
            'step_to_explore': 3,
            'game_batch_size': 4,
            'pool_size': 40,
            'pool_point': 0,
            'augment': False,
            'rollout_time': 32,
            'max_thread': 16,
            'epochs': 4,
            'train_batch_size': 128,
            'train_epochs': 2,
            'lr': {
                400: 1e-2,
                600: 1e-3,
                float('inf'): 1e-4
Esempio n. 4
0
# nn_path = '/data/zero/test_version_nn_config.json'
# nn_path = 'data/pre_train/pre_train_version_nn_config.json'
nn_path = 'data/pre_train/yixin_version_nn_config.json'
# nn_path = 'data/pre_train/input_coding_version_nn_config.json'
# nn_path = 'data/pre_train/input_coding_augmentation_version_nn_config.json'
# nn_path = 'data/cache/cache_pre_train_version_nn_config.json'
mcts_config = {
    'rollout_time': 100,
    'max_thread': 1,
    'gamma': 0.0,
    'max_depth': 4
}

case = 2
if case == 1:
    player_1 = MCTS(PolicyValueNetwork.load_model(nn_path), **mcts_config)
    player_2 = PolicyValueNetwork.load_model(nn_path)

    tournament = Tournament(player_1, player_2)
    tournament.play(10, 5)

elif case == 2:
    black_player = AIPlayer(
        MCTS(PolicyValueNetwork.load_model(nn_path), **mcts_config))
    # white_player = AIPlayer(MCTS(
    #     PolicyValueNetwork.load_model(nn_path), **mcts_config
    # ))
    white_player = AIPlayer(PolicyValueNetwork.load_model(nn_path))

    game = Game(black_player, white_player, time_delay=2)
    game.play(Tau=0.0, verbose=2)
from __future__ import print_function

from gomokuZero.model.neural_network import PolicyValueNetwork
from gomokuZero.board.board import Board
from gomokuZero.board.play import PlayerBase, Game

class NeuralNetworkPlayer(PlayerBase):
    def __init__(self, neuralNetwork):
        self.neuralNetwork = neuralNetwork

    def get_position(self, board):
        return self.neuralNetwork.get_positions(board, True)

# player_1 = NeuralNetworkPlayer(PolicyValueNetwork())
# player_2 = NeuralNetworkPlayer(PolicyValueNetwork())
# game = Game(player_1, player_2, time_delay=1)
# game.play()
pvn = PolicyValueNetwork()
pvn_copy = pvn.copy()
print(id(pvn), id(pvn_copy))
board = Board([(8, 8)])
print(pvn.get_values(board), pvn_copy.get_values(board))
# print(pvn.get_policy_values([board, board], True))
# print(pvn.get_position_values([board, board], True))
Esempio n. 6
0
from gomokuZero.model.neural_network import PolicyValueNetwork
from gomokuZero.utils import check_load_path

try:
    range = xrange
except NameError:
    pass

if K.backend() == 'theano':
    nn_path = 'data/pre_train/yixin_version_nn_config.json'
    # nn_path = 'data/pre_train/input_coding_version_nn_config.json'
    # nn_path = 'data/pre_train/input_coding_augmentation_version_nn_config.json'
else:
    # nn_path = 'data/pre_train/yixin_version_tf_nn_config.json'
    nn_path = 'data/pre_train/tournament_version_tf_nn_config.json'
pvn = PolicyValueNetwork.load_model(nn_path)
# pvn = PolicyValueNetwork()

# samples = np.load(check_load_path('data/records/yixin_samples.npz'))
samples = np.load(check_load_path('data/records/tournament_samples.npz'))
board_tensors = samples['board_tensors']
if K.backend() == 'tensorflow':
    board_tensors = np.transpose(board_tensors, (0, 2, 3, 1))
policy_tensors = samples['policy_tensors'].reshape((-1, SIZE**2))
value_tensors = samples['value_tensors'][:, :1]

try:
    # trainer_path = 'data/cache/cache_yixin_version_pre_trainer.json'
    trainer_path = 'data/cache/cache_tournament_version_tf_pre_trainer.json'
    with open(check_load_path(trainer_path), 'r') as f:
        trainer_config = json.load(f)
Esempio n. 7
0
from __future__ import print_function
from gomokuZero.model.neural_network import PolicyValueNetwork
from gomokuZero.model.mcts import MCTS
from gomokuZero.board.board import Board
import time

pvn = PolicyValueNetwork(blocks=1,
                         filters=32,
                         create_function_name='create_resnet_version_3')
mcts = MCTS(pvn)
start = time.time()
mcts.get_positions(Board(),
                   1.0,
                   100,
                   2,
                   exploration_epsilon=0.25,
                   gamma=0.0,
                   max_depth=4,
                   verbose=2)
end = time.time()
print('time:{:.4f}'.format(end - start))
root = list(mcts.boards2roots.values())[0]

count = 0


def visit(node, depth):
    if node.is_virtual:
        global count
        count += 1
        print(count, len(node.is_virtual), depth)