예제 #1
0
def run():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')
    pb = board_input

    for i in range(16):
        pb = Conv2D(64, (3, 3),
                    padding='same',
                    data_format='channels_first',
                    activation='relu')(pb)

    policy_conv = Conv2D(2, (1, 1),
                         data_format='channels_first',
                         activation='relu')(pb)

    policy_flat = Flatten()(policy_conv)

    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    value_conv = Conv2D(1, (1, 1),
                        data_format='channels_first',
                        activation='relu')(pb)

    value_flat = Flatten()(value_conv)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()

    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    num_games = 10

    for i in range(num_games):
        print(f'Game {i+1}/{num_games}')
        start_time = time.time()
        simulate_game(board_size, black_agent, c1, white_agent, c2)

        elapsed = time.time() - start_time
        print(f'elapsed: {elapsed} s')
        print(
            f'estimated time remaining this session: {(num_games - (i + 1)) * elapsed} s'
        )

    exp = zero.combine_experience([c1, c2], board_size)
    black_agent.train(exp, 0.01, 1024)

    with h5py.File('agz_experience.h5', 'a') as expfile:
        exp.serialize(expfile)
예제 #2
0
def main():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)

    board_input = Input(shape=encoder.shape(), name='board_input')

    pb = board_input

    for i in range(4):
        pb = Conv2D(64, (3, 3), padding='same',
                    data_format='channels_first')(pb)
        pb = BatchNormalization(axis=1)(pb)
        pb = Activation('relu')(pb)

    # Policy output
    policy_conv = Conv2D(2, (1, 1), data_format='channels_first')(pb)
    policy_batch = BatchNormalization(axis=1)(policy_conv)
    policy_relu = Activation('relu')(policy_batch)
    policy_flat = Flatten()(policy_relu)
    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    # Value output
    value_conv = Conv2D(1, (1, 1), data_format='channels_first')(pb)
    value_batch = BatchNormalization(axis=1)(value_conv)
    value_relu = Activation('relu')(value_batch)
    value_flat = Flatten()(value_relu)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()
    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    print('Starting the game!')
    game = GameState.new_game(board_size)

    c1.begin_episode()
    c2.begin_episode()
    black_move = black_agent.select_move(game)
    print('B', black_move)
    game = game.apply_move(black_move)
    white_move = white_agent.select_move(game)
    print('W', white_move)
    black_move = black_agent.select_move(game)
    print('B', black_move)

    c1.complete_episode(1)
    c2.complete_episode(-1)
    exp = zero.combine_experience([c1, c2])
    black_agent.train(exp, 0.01, 2048)
def generate_experience(learning_agent, reference_agent, exp_file, num_games,
                        board_size, num_workers):
    experience_files = []
    workers = []
    gpu_frac = 1 / float(num_workers)
    games_per_worker = num_games // num_workers
    for i in range(num_workers):
        filename = get_temp_file()
        experience_files.append(filename)
        worker = multiprocessing.Process(target=do_self_play,
                                         args=(
                                             board_size,
                                             learning_agent,
                                             reference_agent,
                                             games_per_worker,
                                             filename,
                                             gpu_frac,
                                         ))
        worker.start()
        workers.append(worker)

    # Wait for all workers to finish.
    print('Waiting for workers...')
    for worker in workers:
        worker.join()

    # Merge experience buffers.
    print('Merging experience buffers...')
    first_filename = experience_files[0]
    other_filenames = experience_files[1:]
    with h5py.File(first_filename, 'r') as expf:
        combined_buffer = zero.load_experience(expf)
    for filename in other_filenames:
        with h5py.File(filename, 'r') as expf:
            next_buffer = zero.load_experience(expf)
        combined_buffer = zero.combine_experience(
            [combined_buffer, next_buffer])
    print('Saving into %s...' % exp_file)
    with h5py.File(exp_file, 'w') as experience_outf:
        combined_buffer.serialize(experience_outf)

    # Clean up.
    for fname in experience_files:
        os.unlink(fname)
예제 #4
0
def main():
    board_size = 9
    encoder = zero.ZeroEncoder(board_size)

    board_input = Input(shape=encoder.shape(), name='board_input')
    pb = board_input
    for i in range(4):
        pb = Conv2D(64, (3, 3),
                    padding='same',
                    data_format='channels_first',
                    activation='relu')(pb)
        pb = BatchNormalization(axis=1)(pb)
        pb = Activation('relu')(pb)

    policy_conv = Conv2D(2, (1, 1),
                         data_format='channels_first',
                         activation='relu')(pb)
    policy_batch = BatchNormalization(axis=1)(policy_conv)
    policy_flat = Flatten()(policy_batch)
    policy_output = Dense(encoder.num_moves(),
                          activation='softmax')(policy_flat)

    value_conv = Conv2D(1, (1, 1),
                        data_format='channels_first',
                        activation='relu')(pb)
    value_batch = BatchNormalization(axis=1)(value_conv)
    value_flat = Flatten()(value_batch)
    value_hidden = Dense(256, activation='relu')(value_flat)
    value_output = Dense(1, activation='tanh')(value_hidden)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])
    black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    white_agent = zerp.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0)
    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()
    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    for i in range(5):
        simulate_game(board_size, black_agent, c1, white_agent, c2)

    exp = zero.combine_experience([c1, c2])
    black_agent.train(exp, 0.01, 2048)
def do_self_play(board_size, agent1_filename, agent2_filename, num_games,
                 experience_filename, gpu_frac):
    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = load_agent(agent1_filename)
    agent2 = load_agent(agent2_filename)

    collector1 = zero.ZeroExperienceCollector()
    collector2 = zero.ZeroExperienceCollector()

    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        collector1.begin_episode()
        collector2.begin_episode()
        agent1.set_collector(collector1)
        agent2.set_collector(collector2)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2

        game_record = simulate_game(black_player, white_player, board_size)

        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            print('Agent 2 wins.')
            collector1.complete_episode(reward=-1)
            collector2.complete_episode(reward=1)
        color1 = color1.other

    experience = zero.combine_experience([collector1, collector2])
    print('Saving experience buffer to %s\n' % experience_filename)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
예제 #6
0
                      activation='softmax')(policy_flat)

value_conv = Conv2D(1, (1, 1),
                    data_format='channels_first',
                    activation='relu')(pb)                           # <3>
value_flat = Flatten()(value_conv)                       # <3>
value_hidden = Dense(256, activation='relu')(value_flat) # <3>
value_output = Dense(1, activation='tanh')(value_hidden) # <3>

model = Model(
    inputs=[board_input],
    outputs=[policy_output, value_output])
# end::zero_model[]

# tag::zero_train[]
black_agent = zero.ZeroAgent(
    model, encoder, rounds_per_move=10, c=2.0)  # <4>
white_agent = zero.ZeroAgent(
    model, encoder, rounds_per_move=10, c=2.0)
c1 = zero.ZeroExperienceCollector()
c2 = zero.ZeroExperienceCollector()
black_agent.set_collector(c1)
white_agent.set_collector(c2)

for i in range(5):   # <5>
    simulate_game(board_size, black_agent, c1, white_agent, c2)

exp = zero.combine_experience([c1, c2])
black_agent.train(exp, 0.01, 2048)
# end::zero_train[]
예제 #7
0
def generate_game(board_size, game_id_str, rounds_per_move=10, c=2.0):
    start = time.time()
    print(f'Generating {game_id_str}...')

    game = GameState.new_game(board_size)
    encoder = zero.ZeroEncoder(board_size)

    # load current best agent, if any
    # has to be able to pass through cPickle which is why we don't just reuse it

    if os.path.exists('agz_bot.h5'):

        with h5py.File('agz_bot.h5') as bot_file:
            black_agent = zero.load_zero_agent(bot_file)
            white_agent = zero.load_zero_agent(bot_file)

    else:
        print(f'WARN: using default model to generate {game_id_str}')

        model = zero_model(board_size)

        black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c)
        white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c)

    agents = {
        Player.black: black_agent,
        Player.white: white_agent,
    }

    c1 = zero.ZeroExperienceCollector()
    c2 = zero.ZeroExperienceCollector()

    black_agent.set_collector(c1)
    white_agent.set_collector(c2)

    c1.begin_episode()
    c2.begin_episode()

    while not game.is_over():
        next_move = agents[game.next_player].select_move(game)
        game = game.apply_move(next_move)

    game_result = scoring.compute_game_result(game)

    if game_result.winner == Player.black:
        c1.complete_episode(1)
        c2.complete_episode(-1)
    else:
        c1.complete_episode(-1)
        c2.complete_episode(1)

    combined = zero.combine_experience([c1, c2], board_size)

    c1 = c2 = game_result = None
    model = encoder = None
    game = None

    del black_agent.model
    del white_agent.model

    black_agent = white_agent = None

    import gc

    K.clear_session()
    gc.collect()

    return combined, game_id_str, time.time() - start