def run(): board_size = 9 encoder = zero.ZeroEncoder(board_size) board_input = Input(shape=encoder.shape(), name='board_input') pb = board_input for i in range(16): pb = Conv2D(64, (3, 3), padding='same', data_format='channels_first', activation='relu')(pb) policy_conv = Conv2D(2, (1, 1), data_format='channels_first', activation='relu')(pb) policy_flat = Flatten()(policy_conv) policy_output = Dense(encoder.num_moves(), activation='softmax')(policy_flat) value_conv = Conv2D(1, (1, 1), data_format='channels_first', activation='relu')(pb) value_flat = Flatten()(value_conv) value_hidden = Dense(256, activation='relu')(value_flat) value_output = Dense(1, activation='tanh')(value_hidden) model = Model(inputs=[board_input], outputs=[policy_output, value_output]) black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0) white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0) c1 = zero.ZeroExperienceCollector() c2 = zero.ZeroExperienceCollector() black_agent.set_collector(c1) white_agent.set_collector(c2) num_games = 10 for i in range(num_games): print(f'Game {i+1}/{num_games}') start_time = time.time() simulate_game(board_size, black_agent, c1, white_agent, c2) elapsed = time.time() - start_time print(f'elapsed: {elapsed} s') print( f'estimated time remaining this session: {(num_games - (i + 1)) * elapsed} s' ) exp = zero.combine_experience([c1, c2], board_size) black_agent.train(exp, 0.01, 1024) with h5py.File('agz_experience.h5', 'a') as expfile: exp.serialize(expfile)
def main(): board_size = 9 encoder = zero.ZeroEncoder(board_size) board_input = Input(shape=encoder.shape(), name='board_input') pb = board_input for i in range(4): pb = Conv2D(64, (3, 3), padding='same', data_format='channels_first')(pb) pb = BatchNormalization(axis=1)(pb) pb = Activation('relu')(pb) # Policy output policy_conv = Conv2D(2, (1, 1), data_format='channels_first')(pb) policy_batch = BatchNormalization(axis=1)(policy_conv) policy_relu = Activation('relu')(policy_batch) policy_flat = Flatten()(policy_relu) policy_output = Dense(encoder.num_moves(), activation='softmax')(policy_flat) # Value output value_conv = Conv2D(1, (1, 1), data_format='channels_first')(pb) value_batch = BatchNormalization(axis=1)(value_conv) value_relu = Activation('relu')(value_batch) value_flat = Flatten()(value_relu) value_hidden = Dense(256, activation='relu')(value_flat) value_output = Dense(1, activation='tanh')(value_hidden) model = Model(inputs=[board_input], outputs=[policy_output, value_output]) c1 = zero.ZeroExperienceCollector() c2 = zero.ZeroExperienceCollector() black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0) white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0) black_agent.set_collector(c1) white_agent.set_collector(c2) print('Starting the game!') game = GameState.new_game(board_size) c1.begin_episode() c2.begin_episode() black_move = black_agent.select_move(game) print('B', black_move) game = game.apply_move(black_move) white_move = white_agent.select_move(game) print('W', white_move) black_move = black_agent.select_move(game) print('B', black_move) c1.complete_episode(1) c2.complete_episode(-1) exp = zero.combine_experience([c1, c2]) black_agent.train(exp, 0.01, 2048)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) # parser.add_argument('--network', default='large') # parser.add_argument('--hidden-size', type=int, default=512) parser.add_argument('output_file') args = parser.parse_args() encoder = encoders.get_encoder_by_name('zero', args.board_size) model = networks.dual_residual_network(input_shape=encoder.shape(), blocks=8) model.summary() new_agent = zero.ZeroAgent(model, encoder, rounds_per_move=1000, c=2.0) with h5py.File(args.output_file, 'w') as outf: new_agent.serialize(outf)
def main(): board_size = 9 encoder = zero.ZeroEncoder(board_size) board_input = Input(shape=encoder.shape(), name='board_input') pb = board_input for i in range(4): pb = Conv2D(64, (3, 3), padding='same', data_format='channels_first', activation='relu')(pb) pb = BatchNormalization(axis=1)(pb) pb = Activation('relu')(pb) policy_conv = Conv2D(2, (1, 1), data_format='channels_first', activation='relu')(pb) policy_batch = BatchNormalization(axis=1)(policy_conv) policy_flat = Flatten()(policy_batch) policy_output = Dense(encoder.num_moves(), activation='softmax')(policy_flat) value_conv = Conv2D(1, (1, 1), data_format='channels_first', activation='relu')(pb) value_batch = BatchNormalization(axis=1)(value_conv) value_flat = Flatten()(value_batch) value_hidden = Dense(256, activation='relu')(value_flat) value_output = Dense(1, activation='tanh')(value_hidden) model = Model(inputs=[board_input], outputs=[policy_output, value_output]) black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0) white_agent = zerp.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0) c1 = zero.ZeroExperienceCollector() c2 = zero.ZeroExperienceCollector() black_agent.set_collector(c1) white_agent.set_collector(c2) for i in range(5): simulate_game(board_size, black_agent, c1, white_agent, c2) exp = zero.combine_experience([c1, c2]) black_agent.train(exp, 0.01, 2048)
activation='softmax')(policy_flat) value_conv = Conv2D(1, (1, 1), data_format='channels_first', activation='relu')(pb) # <3> value_flat = Flatten()(value_conv) # <3> value_hidden = Dense(256, activation='relu')(value_flat) # <3> value_output = Dense(1, activation='tanh')(value_hidden) # <3> model = Model( inputs=[board_input], outputs=[policy_output, value_output]) # end::zero_model[] # tag::zero_train[] black_agent = zero.ZeroAgent( model, encoder, rounds_per_move=10, c=2.0) # <4> white_agent = zero.ZeroAgent( model, encoder, rounds_per_move=10, c=2.0) c1 = zero.ZeroExperienceCollector() c2 = zero.ZeroExperienceCollector() black_agent.set_collector(c1) white_agent.set_collector(c2) for i in range(5): # <5> simulate_game(board_size, black_agent, c1, white_agent, c2) exp = zero.combine_experience([c1, c2]) black_agent.train(exp, 0.01, 2048) # end::zero_train[]
def generate_game(board_size, game_id_str, rounds_per_move=10, c=2.0): start = time.time() print(f'Generating {game_id_str}...') game = GameState.new_game(board_size) encoder = zero.ZeroEncoder(board_size) # load current best agent, if any # has to be able to pass through cPickle which is why we don't just reuse it if os.path.exists('agz_bot.h5'): with h5py.File('agz_bot.h5') as bot_file: black_agent = zero.load_zero_agent(bot_file) white_agent = zero.load_zero_agent(bot_file) else: print(f'WARN: using default model to generate {game_id_str}') model = zero_model(board_size) black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c) white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c) agents = { Player.black: black_agent, Player.white: white_agent, } c1 = zero.ZeroExperienceCollector() c2 = zero.ZeroExperienceCollector() black_agent.set_collector(c1) white_agent.set_collector(c2) c1.begin_episode() c2.begin_episode() while not game.is_over(): next_move = agents[game.next_player].select_move(game) game = game.apply_move(next_move) game_result = scoring.compute_game_result(game) if game_result.winner == Player.black: c1.complete_episode(1) c2.complete_episode(-1) else: c1.complete_episode(-1) c2.complete_episode(1) combined = zero.combine_experience([c1, c2], board_size) c1 = c2 = game_result = None model = encoder = None game = None del black_agent.model del white_agent.model black_agent = white_agent = None import gc K.clear_session() gc.collect() return combined, game_id_str, time.time() - start