Ejemplo n.º 1
0
def run():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')
    pb = board_input

    for i in range(16):
        pb = Conv2D(64, (3, 3),
                    padding='same',
                    data_format='channels_first',
                    activation='relu')(pb)

    policy_conv = Conv2D(2, (1, 1),
                         data_format='channels_first',
                         activation='relu')(pb)

    policy_flat = Flatten()(policy_conv)

    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    value_conv = Conv2D(1, (1, 1),
                        data_format='channels_first',
                        activation='relu')(pb)

    value_flat = Flatten()(value_conv)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()

    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    num_games = 10

    for i in range(num_games):
        print(f'Game {i+1}/{num_games}')
        start_time = time.time()
        simulate_game(board_size, black_agent, c1, white_agent, c2)

        elapsed = time.time() - start_time
        print(f'elapsed: {elapsed} s')
        print(
            f'estimated time remaining this session: {(num_games - (i + 1)) * elapsed} s'
        )

    exp = zero.combine_experience([c1, c2], board_size)
    black_agent.train(exp, 0.01, 1024)

    with h5py.File('agz_experience.h5', 'a') as expfile:
        exp.serialize(expfile)
Ejemplo n.º 2
0
def main():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)

    board_input = Input(shape=encoder.shape(), name='board_input')

    pb = board_input

    for i in range(4):
        pb = Conv2D(64, (3, 3), padding='same',
                    data_format='channels_first')(pb)
        pb = BatchNormalization(axis=1)(pb)
        pb = Activation('relu')(pb)

    # Policy output
    policy_conv = Conv2D(2, (1, 1), data_format='channels_first')(pb)
    policy_batch = BatchNormalization(axis=1)(policy_conv)
    policy_relu = Activation('relu')(policy_batch)
    policy_flat = Flatten()(policy_relu)
    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    # Value output
    value_conv = Conv2D(1, (1, 1), data_format='channels_first')(pb)
    value_batch = BatchNormalization(axis=1)(value_conv)
    value_relu = Activation('relu')(value_batch)
    value_flat = Flatten()(value_relu)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()
    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    print('Starting the game!')
    game = GameState.new_game(board_size)

    c1.begin_episode()
    c2.begin_episode()
    black_move = black_agent.select_move(game)
    print('B', black_move)
    game = game.apply_move(black_move)
    white_move = white_agent.select_move(game)
    print('W', white_move)
    black_move = black_agent.select_move(game)
    print('B', black_move)

    c1.complete_episode(1)
    c2.complete_episode(-1)
    exp = zero.combine_experience([c1, c2])
    black_agent.train(exp, 0.01, 2048)
Ejemplo n.º 3
0
def main():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)

    board_input = Input(shape=encoder.shape(), name='board_input')
    pb = board_input
    for i in range(4):
        pb = Conv2D(64, (3, 3),
                    padding='same',
                    data_format='channels_first',
                    activation='relu')(pb)
        pb = BatchNormalization(axis=1)(pb)
        pb = Activation('relu')(pb)

    policy_conv = Conv2D(2, (1, 1),
                         data_format='channels_first',
                         activation='relu')(pb)
    policy_batch = BatchNormalization(axis=1)(policy_conv)
    policy_flat = Flatten()(policy_batch)
    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    value_conv = Conv2D(1, (1, 1),
                        data_format='channels_first',
                        activation='relu')(pb)
    value_batch = BatchNormalization(axis=1)(value_conv)
    value_flat = Flatten()(value_batch)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])
    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zerp.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()
    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    for i in range(5):
        simulate_game(board_size, black_agent, c1, white_agent, c2)

    exp = zero.combine_experience([c1, c2])
    black_agent.train(exp, 0.01, 2048)
Ejemplo n.º 4
0
def zero_model(board_size):
    residual_layers = 4

    encoder = zero.ZeroEncoder(board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')
    pb = board_input

    pb = Conv2D(128, (3, 3), padding='same', data_format='channels_first')(pb)
    pb = BatchNormalization(axis=1)(pb)
    pb = Activation(activation='relu')(pb)

    for i in range(residual_layers):
        pb = create_residual_block(pb)
        conv = pb

    neck = pb

    policy_head = create_policy_head(neck, board_size)
    value_head = create_value_head(neck, board_size)

    return Model(inputs=[board_input], outputs=[policy_head, value_head])
Ejemplo n.º 5
0
        next_move = agents[game.next_player].select_move(game)
        game = game.apply_move(next_move)

    game_result = scoring.compute_game_result(game)
    if game_result.winner == Player.black:
        black_collector.complete_episode(1)
        white_collector.complete_episode(-1)
    else:
        black_collector.complete_episode(-1)
        white_collector.complete_episode(1)
# end::zero_simulate[]


# tag::zero_model[]
board_size = 9
encoder = zero.ZeroEncoder(board_size)

board_input = Input(shape=encoder.shape(), name='board_input')
pb = board_input
for i in range(4):                     # <1>
    pb = Conv2D(64, (3, 3),            # <1>
        padding='same',                # <1>
        data_format='channels_first',  # <1>
        activation='relu')(pb)         # <1>

policy_conv = Conv2D(2, (1, 1),
                     data_format='channels_first',
                     activation='relu')(pb)
policy_flat = Flatten()(policy_conv)
policy_output = Dense(encoder.num_moves(),
                      activation='softmax')(policy_flat)
Ejemplo n.º 6
0
def generate_game(board_size, game_id_str, rounds_per_move=10, c=2.0):
    start = time.time()
    print(f'Generating {game_id_str}...')

    game = GameState.new_game(board_size)
    encoder = zero.ZeroEncoder(board_size)

    # load current best agent, if any
    # has to be able to pass through cPickle which is why we don't just reuse it

    if os.path.exists('agz_bot.h5'):

        with h5py.File('agz_bot.h5') as bot_file:
            black_agent = zero.load_zero_agent(bot_file)
            white_agent = zero.load_zero_agent(bot_file)

    else:
        print(f'WARN: using default model to generate {game_id_str}')

        model = zero_model(board_size)

        black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c)
        white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c)

    agents = {
        Player.black: black_agent,
        Player.white: white_agent,
    }

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()

    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    c1.begin_episode()
    c2.begin_episode()

    while not game.is_over():
        next_move = agents[game.next_player].select_move(game)
        game = game.apply_move(next_move)

    game_result = scoring.compute_game_result(game)

    if game_result.winner == Player.black:
        c1.complete_episode(1)
        c2.complete_episode(-1)
    else:
        c1.complete_episode(-1)
        c2.complete_episode(1)

    combined = zero.combine_experience([c1, c2], board_size)

    c1 = c2 = game_result = None
    model = encoder = None
    game = None

    del black_agent.model
    del white_agent.model

    black_agent = white_agent = None

    import gc

    K.clear_session()
    gc.collect()

    return combined, game_id_str, time.time() - start