Esempio n. 1
0
def parse_args():
    parser = argparse.ArgumentParser()
    valid_modes = utils.get_valid_game_modes_string()
    parser.add_argument('--mode',
                        help=('A valid game mode name. valid modes are {%s}.' %
                              valid_modes),
                        default=None)
    parser.add_argument('--gpu_id',
                        help=('GPU id to use, or -1 to use the CPU.'),
                        default=0,
                        type=int)
    parser.add_argument(
        '--game_type',
        help=('Type is a more general term which may include many game ' +
              'modes. For example, moku is the type of tictactoe, connect4 ' +
              'and gomoku modes.'),
        default='moku')
    parser.add_argument(
        '--games_queue_port',
        help=('Port opened to receive games from the ' +
              'players\' queue. If negative, defaults to value in config.py'),
        default=-1,
        type=int)
    parser.add_argument('--file_server_port',
                        help=('Port opened to tranfer files to the players. ' +
                              'If negative, defaults to value in config.py.'),
                        default=-1,
                        type=int)
    parser.add_argument(
        '--authkey',
        help=('Authentication key for the communication with players\'' +
              'queue. If empty, defaults to value in config.py.'),
        default='')
    args = parser.parse_args()
    return args
Esempio n. 2
0
def parse_args():
    parser = argparse.ArgumentParser()
    valid_modes = utils.get_valid_game_modes_string()
    parser.add_argument('--mode',
                        help=('A valid game mode name. valid modes are {%s}.' %
                              valid_modes),
                        default=None)
    parser.add_argument(
        '--gpu_id',
        nargs='+',
        help=('List (separated by spaces) of GPU ids to use, or -1 to use ' +
              'the CPU.'),
        default=['0'])
    parser.add_argument(
        '--game_type',
        help=('Type is a more general term which may include many game ' +
              'modes. For example, moku is the type of tictactoe, connect4 ' +
              'and gomoku modes.'),
        default='moku')
    parser.add_argument('-n',
                        '--num_player_processes',
                        help=('Number of parallel player processes to run.'),
                        default=3,
                        type=int)
    args = parser.parse_args()
    return args
Esempio n. 3
0
def parse_args():
    parser = argparse.ArgumentParser()
    valid_modes = utils.get_valid_game_modes_string()
    parser.add_argument('--mode',
                        help=('A valid game mode name. valid modes are {%s}.' %
                              valid_modes),
                        default=None)
    parser.add_argument(
        '--gpu_id',
        nargs='+',
        help=('List (separated by spaces) of GPU ids to use, or -1 to use ' +
              'the CPU.'),
        default=['0'])
    parser.add_argument(
        '--game_type',
        help=('Type is a more general term which may include many game ' +
              'modes. For example, moku is the type of tictactoe, connect4 ' +
              'and gomoku modes.'),
        default='moku')
    parser.add_argument('-n',
                        '--num_player_processes',
                        help=('Number of parallel player processes to run.'),
                        default=3,
                        type=int)
    parser.add_argument(
        '--games_queue_port',
        help=('Port opened to receive games from the ' +
              'players\' queue. If negative, defaults to value in config.py'),
        default=-1,
        type=int)
    parser.add_argument('--file_server_port',
                        help=('Port opened to tranfer files to the players. ' +
                              'If negative, defaults to value in config.py.'),
                        default=-1,
                        type=int)
    parser.add_argument(
        '--authkey',
        help=('Authentication key for the communication with players\'' +
              'queue. If empty, defaults to value in config.py.'),
        default='')
    parser.add_argument(
        '--server_ip',
        help=('IP address of the server machine. If empty, defaults to ' +
              'value in config.py.'),
        default='')
    args = parser.parse_args()
    return args
Esempio n. 4
0
def main():
    args = parse_args()

    valid_modes_list = utils.get_valid_game_modes()
    valid_modes_string = utils.get_valid_game_modes_string()
    if args.mode not in valid_modes_list:
        print('Invalid game mode informed. Please inform a mode with ' +
              '--mode=mode_name, where mode_name is one of the following ' +
              '{%s}' % valid_modes_string)
        sys.exit()

    gconf = utils.get_game_config(args.mode, 'challenge')

    if args.num_challenges > 0:
        gconf.num_challenges = args.num_challenges

    if args.game_type == 'moku':
        (game_config_string, game_manager_module, game_manager_kwargs,
            game_manager_io_module, game_manager_io_kwargs) = \
                utils.generate_moku_manager_params(
                    gconf.drop_mode, gconf.moku_size, gconf.board_size,
                    args.gpu_id, gconf.num_res_layers, gconf.num_channels)
    else:
        raise NotImplementedError('Game type %s is not supported.' %
                                  args.game_type)

    train_dir = osp.join('train_files', game_config_string)

    for i in range(len(args.num_iters_ckpt)):
        x = int(args.num_iters_ckpt[i])
        if x < 0:
            x = utils.get_last_checkpoint_number(train_dir)
        args.num_iters_ckpt[i] = x

    ckpt_paths = [
        utils.get_checkpoint_path(train_dir, x) for x in args.num_iters_ckpt
    ]

    gmio_module = __import__(game_manager_io_module[0])
    gmio_class = getattr(gmio_module, game_manager_io_module[1])
    game_manager_io = gmio_class(**game_manager_io_kwargs)

    gm_module = __import__(game_manager_module[0])
    gm_class = getattr(gm_module, game_manager_module[1])
    ip1 = 0

    args.max_simulations_per_move = [
        int(x) for x in args.max_simulations_per_move
    ]
    local_max_simulations_per_move = args.max_simulations_per_move
    if len(local_max_simulations_per_move) == 1:
        local_max_simulations_per_move = \
            local_max_simulations_per_move * len(args.num_iters_ckpt)
    elif len(local_max_simulations_per_move) != len(args.num_iters_ckpt):
        print('Number of arguments in max_simulations_per_move and ' +
              'num_iters_ckpt do not match. See --help for more information.')
        sys.exit()

    local_eval_batch_size = [0] * len(local_max_simulations_per_move)
    for i in range(len(local_max_simulations_per_move)):
        if local_max_simulations_per_move[i] < 1:
            local_max_simulations_per_move[i] = gconf.max_simulations_per_move
            local_eval_batch_size[i] = gconf.eval_batch_size
        else:
            local_eval_batch_size[i] = \
                int(local_max_simulations_per_move[i] / 100.0) + 1

    print('Running %d challenges for each pair of checkpoints.' %
          gconf.num_challenges)

    results = np.zeros(
        (2, len(args.num_iters_ckpt), len(args.num_iters_ckpt), 3), np.int32)
    for ichallenge in range(gconf.num_challenges):
        iend_ckpt1 = len(args.num_iters_ckpt) - 1
        if args.include_self_play:
            iend_ckpt1 += 1
        for ickpt1 in range(iend_ckpt1):
            istart_ckpt2 = ickpt1 + 1
            if args.include_self_play:
                istart_ckpt2 -= 1
            for ickpt2 in range(istart_ckpt2, len(args.num_iters_ckpt)):
                chal_ckpt_nums = [
                    args.num_iters_ckpt[ickpt1], args.num_iters_ckpt[ickpt2]
                ]
                chal_ckpt_paths = [ckpt_paths[ickpt1], ckpt_paths[ickpt2]]
                chal_max_simulations_per_move = [
                    local_max_simulations_per_move[ickpt1],
                    local_max_simulations_per_move[ickpt2]
                ]
                chal_eval_batch_size = [
                    local_eval_batch_size[ickpt1],
                    local_eval_batch_size[ickpt2]
                ]

                print('=====================================================')
                print('Checkpoint %d vs. %d' % tuple(chal_ckpt_nums))
                print('=====================================================')

                game_managers = []
                for i, ckpt in enumerate(chal_ckpt_paths):
                    print('Net %d' % (i + 1))
                    game_manager_kwargs['ckpt_path'] = ckpt
                    game_managers.append(gm_class(**game_manager_kwargs))
                print('=====================================================')
                print()

                state = game_managers[0].initial_state()
                mctss = [
                    MCTS(game_managers[i], chal_max_simulations_per_move[i],
                         gconf.cpuct, gconf.virtual_loss, state,
                         gconf.root_noise_weight, gconf.dirichlet_noise_param,
                         chal_eval_batch_size[i],
                         game_manager_kwargs['tf_device'])
                    for i in range(len(game_managers))
                ]

                iplayer = ip1
                iplay = 0
                moves = []
                imove = None
                while not game_managers[iplayer].is_over(
                        state.state[np.newaxis])[0]:
                    if iplay < gconf.num_relaxed_turns:
                        turn_temperature = 1.0
                    else:
                        turn_temperature = gconf.move_temperature
                    imc = iplayer % len(mctss)
                    if args.show_middle_game:
                        game_manager_io.print_board(state, imove)

                        stats = mctss[imc].simulate(state,
                                                    gconf.max_seconds_per_move)

                        print('Net %d to play:' % (iplayer + 1))

                        if args.show_mcts:
                            print('MCTS stats')
                            game_manager_io.print_stats(stats)
                            print()

                        if args.show_win_prob:
                            with tf.device(game_manager_kwargs['tf_device']):
                                _, value_prior = \
                                    mctss[imc].game_manager.predict(
                                        tf.constant(state.state[np.newaxis],
                                                    tf.float32))
                                win_prob = (value_prior[0] + 1.0) / 2.0
                                print('Estimated win probability: %.03f\n' %
                                      win_prob)

                        if args.show_move_prob:
                            print('Move probabilities:')
                            game_manager_io.print_stats_on_board(stats, 1)
                            print()

                        if args.show_move_prob_temp:
                            print('Move probabilities with temperature ' +
                                  '%.1e' % turn_temperature)
                            game_manager_io.print_stats_on_board(
                                stats, turn_temperature)
                            print()

                    imove, _ = mctss[imc].choose_move(turn_temperature)
                    moves.append((imove, iplayer))
                    state = game_managers[iplayer].update_state(state, imove)
                    iplayer = (iplayer + 1) % 2
                    for imc2 in range(len(mctss)):
                        mctss[imc2].update_root(imove, state)
                    iplay += 1
                game_manager_io.print_board(state, imove)
                iwinner = game_managers[iplayer].get_iwinner(
                    state.state[np.newaxis])[0]

                print('Checkpoint %d vs. %d result (match %d):' %
                      tuple(chal_ckpt_nums + [ichallenge + 1]))
                if iwinner < 0:
                    print('DRAW')
                    results[0, ickpt1, ickpt2, 2] += 1
                    results[1, ickpt1, ickpt2, 2] += 1
                elif iwinner == ip1:
                    print('Checkpoint %d won' % args.num_iters_ckpt[ickpt1])
                    results[ip1, ickpt1, ickpt2, 0] += 1
                    results[(ip1 + 1) % 2, ickpt2, ickpt1, 1] += 1
                else:
                    print('Checkpoint %d won' % args.num_iters_ckpt[ickpt2])
                    results[(ip1 + 1) % 2, ickpt2, ickpt1, 0] += 1
                    results[ip1, ickpt1, ickpt2, 1] += 1

                print('\nNumber of wins of the players in the rows vs. the ' +
                      'players in the columns. Missing results are draws.\n')
                print_results(np.sum(results[:, :, :, 0], axis=0),
                              args.num_iters_ckpt)

                if args.show_results_by_player:
                    print('Results when playing as player 1.\n')
                    print_results(results[0, :, :, 0], args.num_iters_ckpt)
                    print('Results when playing as player 2.\n')
                    print_results(results[1, :, :, 0], args.num_iters_ckpt)

        ip1 = (ip1 + 1) % 2
Esempio n. 5
0
def parse_args():
    parser = argparse.ArgumentParser()
    valid_modes = utils.get_valid_game_modes_string()
    parser.add_argument('--mode',
                        help=('A valid game mode name. valid modes are {%s}.' %
                              valid_modes),
                        default=None)
    parser.add_argument('--gpu_id',
                        help=('GPU id to use, or -1 to use the CPU.'),
                        default=0,
                        type=int)
    parser.add_argument(
        '--game_type',
        help=('Type is a more general term which may include many game ' +
              'modes. For example, moku is the type of tictactoe, connect4 ' +
              'and gomoku modes.'),
        default='moku')
    parser.add_argument(
        '--iuser',
        help=('Index of the user, 0 to play first and 1 to play second. ' +
              'Or you can also use -1 to let the computer play as both ' +
              'players or 2 if you want to play as both players.'),
        default=0,
        type=int)
    parser.add_argument(
        '--num_challenges',
        help=('Number of games to play. If less than one, defaults to value ' +
              'in config.py.'),
        default=0,
        type=int)
    parser.add_argument(
        '--num_iters_ckpt',
        nargs='+',
        help=('List (separated by spaces) of number of iterations in the ' +
              'checkpoints to load. e.g. if the file is called ' +
              'moku3_3x3_1000.ckpt, put 1000 in the list. Use -1 to load ' +
              'the latest checkpoint or 0 to use a naive network.'),
        default=['0', '-1'])
    parser.add_argument(
        '--max_simulations_per_move',
        nargs='+',
        help=('Max number of MCTS simulations per move. This must be a ' +
              'list (separated by spaces) of either one single value or a ' +
              'list whose length is the same as the list in num_iters_ckpt. ' +
              'If a value is less than one, then this value is replaced by ' +
              'that defined in config.py.'),
        default=['0'])
    parser.add_argument(
        '-sm',
        '--show_mcts',
        help=('If set, the MCTS stats for the current state will ' +
              'be displayed.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-sp',
        '--show_move_prob',
        help=('If set, the probabilities of playing at each position will ' +
              'be displayed.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-spt',
        '--show_move_prob_temp',
        help=('If set, the probabilities of playing at each position ' +
              'rebalanced by the temperature will be displayed.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-sw',
        '--show_win_prob',
        help=('If set, the winning probability estimated by the network ' +
              'will be displayed.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-smid',
        '--show_middle_game',
        help=('If set, shows the board after each move. Otherwise, only the ' +
              'final board of each challenge is shown.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-sr',
        '--show_results_by_player',
        help=('If set, also shows the winning results separately when ' +
              'playing as player 1 and 2.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-isp',
        '--include_self_play',
        help=('If set, games between the same checkpoint as both players ' +
              'are included in the challenges.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    args = parser.parse_args()
    return args
Esempio n. 6
0
def main():
    args = parse_args()

    valid_modes_list = utils.get_valid_game_modes()
    valid_modes_string = utils.get_valid_game_modes_string()
    if args.mode not in valid_modes_list:
        print('Invalid game mode informed. Please inform a mode with ' +
              '--mode=mode_name, where mode_name is one of the following ' +
              '{%s}' % valid_modes_string)
        sys.exit()

    gconf = utils.get_game_config(args.mode, 'test')

    max_ckpts_to_keep = 1
    args.gpu_id = [int(x) for x in args.gpu_id]
    if len(args.gpu_id) == 0:
        player_gpu_ids = [-1 for _ in range(args.num_player_processes)]
    elif len(args.gpu_id) == 1:
        player_gpu_ids = [
            args.gpu_id[0] for _ in range(args.num_player_processes)
        ]
    else:
        player_gpu_ids = []
        num_repetitions = args.num_player_processes // len(args.gpu_id) + 1
        for _ in range(num_repetitions):
            player_gpu_ids += args.gpu_id
        player_gpu_ids = player_gpu_ids[:args.num_player_processes]

    print('Player gpu ids', player_gpu_ids)

    players_game_manager_kwargs = []
    for gpu_id in player_gpu_ids:
        if args.game_type == 'moku':
            (game_config_string, game_manager_module, game_manager_kwargs,
                _, _) = \
                    utils.generate_moku_manager_params(
                        gconf.drop_mode, gconf.moku_size, gconf.board_size,
                        gpu_id, gconf.num_res_layers, gconf.num_channels)
        else:
            raise NotImplementedError('Game type %s is not supported.' %
                                      args.game_type)
        players_game_manager_kwargs.append(game_manager_kwargs)

    train_dir = osp.join('train_files', game_config_string)
    if not osp.exists(train_dir):
        os.makedirs(train_dir)

    netconf = NetworkConfig

    if args.games_queue_port >= 0:
        netconf.games_queue_port = args.games_queue_port
    if args.file_server_port >= 0:
        netconf.file_server_port = args.file_server_port
    if args.server_ip != '':
        netconf.server_ip = args.server_ip
    if args.authkey != '':
        netconf.authkey = args.authkey.encode('utf-8')

    client_manager = init_client_manager(netconf.server_ip,
                                         netconf.games_queue_port,
                                         netconf.authkey)
    trmanager_plmanager_queue = client_manager.get_trmanager_plmanager_queue()
    player_trmanager_queue = client_manager.get_player_trmanager_queue()
    plmanager_fileclient_queue = mp.Queue(1)
    fileclient_plmanager_queue = mp.Queue(1)

    plmanager_player_queue = mp.Queue(gconf.queue_capacity)

    print('%s: Launching players' %
          datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))

    file_client_p = mp.Process(target=file_client,
                               args=(
                                   netconf.server_ip,
                                   netconf.file_server_port,
                                   plmanager_fileclient_queue,
                                   fileclient_plmanager_queue,
                               ))
    file_client_p.daemon = True
    file_client_p.start()

    players_p = [
        mp.Process(target=player,
                   args=(
                       player_trmanager_queue,
                       plmanager_player_queue,
                       train_dir,
                       gconf.max_simulations_per_move,
                       gconf.max_seconds_per_move,
                       gconf.move_temperature,
                       gconf.num_relaxed_turns,
                       gconf.random_move_probability,
                       gconf.cpuct,
                       gconf.virtual_loss,
                       gconf.root_noise_weight,
                       gconf.dirichlet_noise_param,
                       gconf.eval_batch_size,
                       game_manager_module,
                       players_game_manager_kwargs[i],
                   )) for i in range(args.num_player_processes)
    ]
    for p in players_p:
        p.daemon = True
        p.start()

    player_manager_p = mp.Process(target=player_manager,
                                  args=(
                                      trmanager_plmanager_queue,
                                      plmanager_player_queue,
                                      plmanager_fileclient_queue,
                                      fileclient_plmanager_queue,
                                      train_dir,
                                      max_ckpts_to_keep,
                                  ))
    player_manager_p.daemon = True
    player_manager_p.start()

    for p in players_p:
        p.join()
Esempio n. 7
0
def main():
    args = parse_args()

    valid_modes_list = utils.get_valid_game_modes()
    valid_modes_string = utils.get_valid_game_modes_string()
    if args.mode not in valid_modes_list:
        print('Invalid game mode informed. Please inform a mode with ' +
              '--mode=mode_name, where mode_name is one of the following ' +
              '{%s}' % valid_modes_string)
        sys.exit()

    gconf = utils.get_game_config(args.mode, 'test')

    if args.game_type == 'moku':
        (game_config_string, game_manager_module, game_manager_kwargs,
            _, _) = \
                utils.generate_moku_manager_params(
                    gconf.drop_mode, gconf.moku_size, gconf.board_size,
                    args.gpu_id, gconf.num_res_layers, gconf.num_channels)
    else:
        raise NotImplementedError('Game type %s is not supported.' %
                                  args.game_type)

    train_dir = osp.join('train_files', game_config_string)

    if not osp.exists(train_dir):
        os.makedirs(train_dir)

    ckpt_path = None
    ckpt = tf.train.get_checkpoint_state(train_dir)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_path = ckpt.model_checkpoint_path

    game_manager_kwargs['ckpt_path'] = ckpt_path

    netconf = NetworkConfig()

    if args.games_queue_port >= 0:
        netconf.games_queue_port = args.games_queue_port
    if args.file_server_port >= 0:
        netconf.file_server_port = args.file_server_port
    if args.authkey != '':
        netconf.authkey = args.authkey.encode('utf-8')

    server_manager = init_server_manager(netconf.games_queue_port,
                                         netconf.authkey, gconf.queue_capacity)
    player_trmanager_queue = server_manager.get_player_trmanager_queue()
    trmanager_plmanager_queue = server_manager.get_trmanager_plmanager_queue()

    trmanager_trainer_queue = mp.Queue(gconf.queue_capacity)
    trainer_trmanager_queue = mp.Queue(1)

    print('%s: Starting trainer' %
          datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))

    file_server_p = mp.Process(target=file_server,
                               args=(train_dir, netconf.file_server_port))
    file_server_p.daemon = True
    file_server_p.start()

    train_manager_p = mp.Process(
        target=train_manager,
        args=(player_trmanager_queue, trmanager_plmanager_queue,
              trainer_trmanager_queue, trmanager_trainer_queue, train_dir,
              gconf.max_samples_per_result_to_train,
              gconf.num_games_per_checkpoint, gconf.train_batch_size,
              gconf.augment_training_samples, gconf.use_relative_value_labels,
              game_config_string, game_manager_module, game_manager_kwargs))
    train_manager_p.daemon = True
    train_manager_p.start()

    trainer(trmanager_trainer_queue, trainer_trmanager_queue, train_dir,
            gconf.train_batch_size, gconf.save_ckpt_interval,
            gconf.max_train_iters, gconf.initial_lr, gconf.lr_decay,
            gconf.lr_decay_steps, gconf.log_interval,
            gconf.backpropagate_losing_policies,
            gconf.keep_checkpoint_every_n_hours, game_config_string,
            game_manager_module, game_manager_kwargs)
Esempio n. 8
0
def main():
    args = parse_args()

    valid_modes_list = utils.get_valid_game_modes()
    valid_modes_string = utils.get_valid_game_modes_string()
    if args.mode not in valid_modes_list:
        print('Invalid game mode informed. Please inform a mode with ' +
              '--mode=mode_name, where mode_name is one of the following ' +
              '{%s}' % valid_modes_string)
        sys.exit()

    gconf = utils.get_game_config(args.mode, 'test')

    if args.game_type == 'moku':
        (game_config_string, game_manager_module, game_manager_kwargs,
            game_manager_io_module, game_manager_io_kwargs) = \
                utils.generate_moku_manager_params(
                    gconf.drop_mode, gconf.moku_size, gconf.board_size,
                    args.gpu_id, gconf.num_res_layers, gconf.num_channels)
    else:
        raise NotImplementedError('Game type %s is not supported.' %
                                  args.game_type)

    train_dir = osp.join('train_files', game_config_string)

    ckpt_path = utils.get_checkpoint_path(train_dir, args.num_iters_ckpt)

    game_manager_kwargs['ckpt_path'] = ckpt_path

    gm_module = __import__(game_manager_module[0])
    gm_class = getattr(gm_module, game_manager_module[1])
    game_manager = gm_class(**game_manager_kwargs)

    gmio_module = __import__(game_manager_io_module[0])
    gmio_class = getattr(gmio_module, game_manager_io_module[1])
    game_manager_io = gmio_class(**game_manager_io_kwargs)

    state = game_manager.initial_state()

    mctss = [
        MCTS(game_manager, gconf.max_simulations_per_move, gconf.cpuct,
             gconf.virtual_loss, state, gconf.root_noise_weight,
             gconf.dirichlet_noise_param, gconf.eval_batch_size,
             game_manager_kwargs['tf_device'])
    ]

    iplayer = 0
    iplay = 0
    moves = []
    last_played_imove = None
    while not game_manager.is_over(state.state[np.newaxis])[0]:
        imove = None
        if iplay < gconf.num_relaxed_turns:
            turn_temperature = 1.0
        else:
            turn_temperature = gconf.move_temperature
        imc = iplayer % len(mctss)
        print('===== New turn =====')
        game_manager_io.print_board(state, last_played_imove)
        if args.iuser == 2 or iplayer == args.iuser:
            # User types a move
            imove = game_manager_io.get_input(state)
        if imove == GameManagerIO.IEXIT:
            break
        if imove == GameManagerIO.ICOMPUTER_MOVE or \
                (args.iuser != 2 and iplayer != args.iuser):
            # Computer chooses a move
            stats = mctss[imc].simulate(state, gconf.max_seconds_per_move)
            if args.show_mcts:
                print('MCTS stats')
                game_manager_io.print_stats(stats)
                print()

            if args.show_win_prob or imove == GameManagerIO.ICOMPUTER_MOVE:
                with tf.device(game_manager_kwargs['tf_device']):
                    _, value_prior = game_manager.predict(
                        tf.constant(state.state[np.newaxis], tf.float32))
                    win_prob = (value_prior[0] + 1.0) / 2.0
                    print('Estimated win probability: %.03f\n' % win_prob)

            if args.show_move_prob or imove == GameManagerIO.ICOMPUTER_MOVE:
                print('Move probabilities:')
                game_manager_io.print_stats_on_board(stats, 1)
                print()

            if args.show_move_prob_temp:
                print('Move probabilities with temperature ' +
                      '%.1e' % turn_temperature)
                game_manager_io.print_stats_on_board(stats, turn_temperature)
                print()

            if imove == GameManagerIO.ICOMPUTER_MOVE:
                # If user asked for computer prediction,
                # escape before actually choosing a move
                continue

            imove, _ = mctss[imc].choose_move(turn_temperature)
            moves.append((imove, iplayer))
        last_played_imove = imove
        state = game_manager.update_state(state, last_played_imove)
        iplayer = (iplayer + 1) % 2
        for imc2 in range(len(mctss)):
            mctss[imc2].update_root(last_played_imove, state)
        iplay += 1

    if imove == GameManagerIO.IEXIT:
        print('Game unfinished')
    else:
        game_manager_io.print_board(state, imove)
        iwinner = game_manager.get_iwinner(state.state[np.newaxis])
        if iwinner < 0:
            print('DRAW')
        else:
            if args.iuser == 2:
                print('Player %d WON.' % (iwinner + 1))
            elif iwinner == args.iuser:
                print('You WON!')
            else:
                print('You LOST!')
Esempio n. 9
0
def parse_args():
    parser = argparse.ArgumentParser()
    valid_modes = utils.get_valid_game_modes_string()
    parser.add_argument('--mode',
                        help=('A valid game mode name. valid modes are {%s}.' %
                              valid_modes),
                        default=None)
    parser.add_argument('--gpu_id',
                        help=('GPU id to use, or -1 to use the CPU.'),
                        default=0,
                        type=int)
    parser.add_argument(
        '--game_type',
        help=('Type is a more general term which may include many game ' +
              'modes. For example, moku is the type of tictactoe, connect4 ' +
              'and gomoku modes.'),
        default='moku')
    parser.add_argument(
        '--iuser',
        help=('Index of the user, 0 to play first and 1 to play second. ' +
              'Or you can also use -1 to let the computer play as both ' +
              'players or 2 if you want to play as both players.'),
        default=0,
        type=int)
    parser.add_argument(
        '--num_iters_ckpt',
        help=('Number of iterations in the checkpoint to load. ' +
              'e.g. if the file is called moku3_3x3_1000.ckpt, type 1000. ' +
              'Use -1 to load the latest checkpoint or 0 to use a naive ' +
              'network.'),
        default=-1,
        type=int)
    parser.add_argument(
        '-sm',
        '--show_mcts',
        help=('If set, the MCTS stats for the current state will ' +
              'be displayed.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-sp',
        '--show_move_prob',
        help=('If set, the probabilities of playing at each position will ' +
              'be displayed.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-spt',
        '--show_move_prob_temp',
        help=('If set, the probabilities of playing at each position ' +
              'rebalanced by the temperature will be displayed.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    parser.add_argument(
        '-sw',
        '--show_win_prob',
        help=('If set, the winning probability estimated by the network ' +
              'will be displayed.'),
        nargs='?',
        const=True,
        default=False,
        type=bool)
    args = parser.parse_args()
    return args
Esempio n. 10
0
def main():
    args = parse_args()

    valid_modes_list = utils.get_valid_game_modes()
    valid_modes_string = utils.get_valid_game_modes_string()
    if args.mode not in valid_modes_list:
        print('Invalid game mode informed. Please inform a mode with ' +
              '--mode=mode_name, where mode_name is one of the following ' +
              '{%s}' % valid_modes_string)
        sys.exit()

    args.gpu_id = [int(x) for x in args.gpu_id]

    gconf = utils.get_game_config(args.mode, 'test')

    if len(args.gpu_id) == 0:
        player_gpu_ids = [-1 for _ in range(args.num_player_processes)]
    elif len(args.gpu_id) == 1:
        player_gpu_ids = [
            args.gpu_id[0] for _ in range(args.num_player_processes)
        ]
    else:
        # Leave first GPU for training and the others for playing
        gpus_for_players = args.gpu_id[1:]
        player_gpu_ids = []
        num_repetitions = (args.num_player_processes // len(gpus_for_players) +
                           1)
        for _ in range(num_repetitions):
            player_gpu_ids += gpus_for_players
        player_gpu_ids = player_gpu_ids[:args.num_player_processes]

    print('Player gpu ids', player_gpu_ids)

    if args.game_type == 'moku':
        (game_config_string, game_manager_module, game_manager_kwargs,
            _, _) = \
                utils.generate_moku_manager_params(
                    gconf.drop_mode, gconf.moku_size, gconf.board_size,
                    args.gpu_id[0], gconf.num_res_layers, gconf.num_channels)
    else:
        raise NotImplementedError('Game type %s is not supported.' %
                                  args.game_type)

    max_ckpts_to_keep = 1

    players_game_manager_kwargs = []
    for gpu_id in player_gpu_ids:
        if args.game_type == 'moku':
            (game_config_string, game_manager_module, game_manager_kwargs,
                _, _) = \
                    utils.generate_moku_manager_params(
                        gconf.drop_mode, gconf.moku_size, gconf.board_size,
                        gpu_id, gconf.num_res_layers, gconf.num_channels)
        else:
            raise NotImplementedError('Game type %s is not supported.' %
                                      args.game_type)
        players_game_manager_kwargs.append(game_manager_kwargs)

    train_dir = osp.join('train_files', game_config_string)

    os.makedirs(train_dir, exist_ok=True)

    ckpt_path = None
    ckpt = tf.train.get_checkpoint_state(train_dir)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_path = ckpt.model_checkpoint_path

    game_manager_kwargs['ckpt_path'] = ckpt_path

    trmanager_trainer_queue = mp.Queue(gconf.queue_capacity)
    trainer_trmanager_queue = mp.Queue(1)

    print('%s: Starting trainer' %
          datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))

    player_trmanager_queue = mp.Queue(gconf.queue_capacity)
    trmanager_plmanager_queue = mp.Queue(gconf.queue_capacity)

    train_manager_p = mp.Process(
        target=train_manager,
        args=(player_trmanager_queue, trmanager_plmanager_queue,
              trainer_trmanager_queue, trmanager_trainer_queue, train_dir,
              gconf.max_samples_per_result_to_train,
              gconf.num_games_per_checkpoint, gconf.train_batch_size,
              gconf.augment_training_samples, gconf.use_relative_value_labels,
              game_config_string, game_manager_module, game_manager_kwargs))
    train_manager_p.daemon = True
    train_manager_p.start()

    plmanager_player_queue = mp.Queue(gconf.queue_capacity)

    print('%s: Launching players' %
          datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))

    players_p = [
        mp.Process(target=player,
                   args=(
                       player_trmanager_queue,
                       plmanager_player_queue,
                       train_dir,
                       gconf.max_simulations_per_move,
                       gconf.max_seconds_per_move,
                       gconf.move_temperature,
                       gconf.num_relaxed_turns,
                       gconf.random_move_probability,
                       gconf.cpuct,
                       gconf.virtual_loss,
                       gconf.root_noise_weight,
                       gconf.dirichlet_noise_param,
                       gconf.eval_batch_size,
                       game_manager_module,
                       players_game_manager_kwargs[i],
                   )) for i in range(args.num_player_processes)
    ]
    for p in players_p:
        p.daemon = True
        p.start()

    plmanager_fileclient_queue = None
    fileclient_plmanager_queue = None
    player_manager_p = mp.Process(target=player_manager,
                                  args=(
                                      trmanager_plmanager_queue,
                                      plmanager_player_queue,
                                      plmanager_fileclient_queue,
                                      fileclient_plmanager_queue,
                                      train_dir,
                                      max_ckpts_to_keep,
                                  ))
    player_manager_p.daemon = True
    player_manager_p.start()

    trainer(trmanager_trainer_queue, trainer_trmanager_queue, train_dir,
            gconf.train_batch_size, gconf.save_ckpt_interval,
            gconf.max_train_iters, gconf.initial_lr, gconf.lr_decay,
            gconf.lr_decay_steps, gconf.log_interval,
            gconf.backpropagate_losing_policies,
            gconf.keep_checkpoint_every_n_hours, game_config_string,
            game_manager_module, game_manager_kwargs)