Example #1
0
def run():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')
    pb = board_input

    for i in range(16):
        pb = Conv2D(64, (3, 3),
                    padding='same',
                    data_format='channels_first',
                    activation='relu')(pb)

    policy_conv = Conv2D(2, (1, 1),
                         data_format='channels_first',
                         activation='relu')(pb)

    policy_flat = Flatten()(policy_conv)

    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    value_conv = Conv2D(1, (1, 1),
                        data_format='channels_first',
                        activation='relu')(pb)

    value_flat = Flatten()(value_conv)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()

    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    num_games = 10

    for i in range(num_games):
        print(f'Game {i+1}/{num_games}')
        start_time = time.time()
        simulate_game(board_size, black_agent, c1, white_agent, c2)

        elapsed = time.time() - start_time
        print(f'elapsed: {elapsed} s')
        print(
            f'estimated time remaining this session: {(num_games - (i + 1)) * elapsed} s'
        )

    exp = zero.combine_experience([c1, c2], board_size)
    black_agent.train(exp, 0.01, 1024)

    with h5py.File('agz_experience.h5', 'a') as expfile:
        exp.serialize(expfile)
Example #2
0
def main():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)

    board_input = Input(shape=encoder.shape(), name='board_input')

    pb = board_input

    for i in range(4):
        pb = Conv2D(64, (3, 3), padding='same',
                    data_format='channels_first')(pb)
        pb = BatchNormalization(axis=1)(pb)
        pb = Activation('relu')(pb)

    # Policy output
    policy_conv = Conv2D(2, (1, 1), data_format='channels_first')(pb)
    policy_batch = BatchNormalization(axis=1)(policy_conv)
    policy_relu = Activation('relu')(policy_batch)
    policy_flat = Flatten()(policy_relu)
    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    # Value output
    value_conv = Conv2D(1, (1, 1), data_format='channels_first')(pb)
    value_batch = BatchNormalization(axis=1)(value_conv)
    value_relu = Activation('relu')(value_batch)
    value_flat = Flatten()(value_relu)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()
    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    print('Starting the game!')
    game = GameState.new_game(board_size)

    c1.begin_episode()
    c2.begin_episode()
    black_move = black_agent.select_move(game)
    print('B', black_move)
    game = game.apply_move(black_move)
    white_move = white_agent.select_move(game)
    print('W', white_move)
    black_move = black_agent.select_move(game)
    print('B', black_move)

    c1.complete_episode(1)
    c2.complete_episode(-1)
    exp = zero.combine_experience([c1, c2])
    black_agent.train(exp, 0.01, 2048)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    #     parser.add_argument('--network', default='large')
    #     parser.add_argument('--hidden-size', type=int, default=512)
    parser.add_argument('output_file')
    args = parser.parse_args()

    encoder = encoders.get_encoder_by_name('zero', args.board_size)
    model = networks.dual_residual_network(input_shape=encoder.shape(),
                                           blocks=8)
    model.summary()

    new_agent = zero.ZeroAgent(model, encoder, rounds_per_move=1000, c=2.0)
    with h5py.File(args.output_file, 'w') as outf:
        new_agent.serialize(outf)
Example #4
0
def main():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)

    board_input = Input(shape=encoder.shape(), name='board_input')
    pb = board_input
    for i in range(4):
        pb = Conv2D(64, (3, 3),
                    padding='same',
                    data_format='channels_first',
                    activation='relu')(pb)
        pb = BatchNormalization(axis=1)(pb)
        pb = Activation('relu')(pb)

    policy_conv = Conv2D(2, (1, 1),
                         data_format='channels_first',
                         activation='relu')(pb)
    policy_batch = BatchNormalization(axis=1)(policy_conv)
    policy_flat = Flatten()(policy_batch)
    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    value_conv = Conv2D(1, (1, 1),
                        data_format='channels_first',
                        activation='relu')(pb)
    value_batch = BatchNormalization(axis=1)(value_conv)
    value_flat = Flatten()(value_batch)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])
    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zerp.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()
    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    for i in range(5):
        simulate_game(board_size, black_agent, c1, white_agent, c2)

    exp = zero.combine_experience([c1, c2])
    black_agent.train(exp, 0.01, 2048)
Example #5
0
                      activation='softmax')(policy_flat)

value_conv = Conv2D(1, (1, 1),
                    data_format='channels_first',
                    activation='relu')(pb)                           # <3>
value_flat = Flatten()(value_conv)                       # <3>
value_hidden = Dense(256, activation='relu')(value_flat) # <3>
value_output = Dense(1, activation='tanh')(value_hidden) # <3>

model = Model(
    inputs=[board_input],
    outputs=[policy_output, value_output])
# end::zero_model[]

# tag::zero_train[]
black_agent = zero.ZeroAgent(
    model, encoder, rounds_per_move=10, c=2.0)  # <4>
white_agent = zero.ZeroAgent(
    model, encoder, rounds_per_move=10, c=2.0)
c1 = zero.ZeroExperienceCollector()
c2 = zero.ZeroExperienceCollector()
black_agent.set_collector(c1)
white_agent.set_collector(c2)

for i in range(5):   # <5>
    simulate_game(board_size, black_agent, c1, white_agent, c2)

exp = zero.combine_experience([c1, c2])
black_agent.train(exp, 0.01, 2048)
# end::zero_train[]
Example #6
0
def generate_game(board_size, game_id_str, rounds_per_move=10, c=2.0):
    start = time.time()
    print(f'Generating {game_id_str}...')

    game = GameState.new_game(board_size)
    encoder = zero.ZeroEncoder(board_size)

    # load current best agent, if any
    # has to be able to pass through cPickle which is why we don't just reuse it

    if os.path.exists('agz_bot.h5'):

        with h5py.File('agz_bot.h5') as bot_file:
            black_agent = zero.load_zero_agent(bot_file)
            white_agent = zero.load_zero_agent(bot_file)

    else:
        print(f'WARN: using default model to generate {game_id_str}')

        model = zero_model(board_size)

        black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c)
        white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c)

    agents = {
        Player.black: black_agent,
        Player.white: white_agent,
    }

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()

    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    c1.begin_episode()
    c2.begin_episode()

    while not game.is_over():
        next_move = agents[game.next_player].select_move(game)
        game = game.apply_move(next_move)

    game_result = scoring.compute_game_result(game)

    if game_result.winner == Player.black:
        c1.complete_episode(1)
        c2.complete_episode(-1)
    else:
        c1.complete_episode(-1)
        c2.complete_episode(1)

    combined = zero.combine_experience([c1, c2], board_size)

    c1 = c2 = game_result = None
    model = encoder = None
    game = None

    del black_agent.model
    del white_agent.model

    black_agent = white_agent = None

    import gc

    K.clear_session()
    gc.collect()

    return combined, game_id_str, time.time() - start