def test_config_previous_states(): man = get_manager() for game in games: # create GenerationMetaAttributes generation_descr = templates.default_generation_desc(game) generation_descr.num_previous_states = 2 # lookup game in manager transformer = man.get_transformer(game, generation_descr) print transformer.x_cords # look game from database game_info = lookup.by_name(game) assert game == game_info.game sm = game_info.get_sm() basestate0 = sm.get_initial_state() basestate1 = advance_state(game_info.get_sm(), basestate0) basestate2 = advance_state(game_info.get_sm(), basestate1) print "rows x cols", transformer.num_rows, transformer.num_cols print "num_channels", transformer.num_channels print "basestate0:" print transformer.state_to_channels(basestate0.to_list(), []) print "basestate1:" print transformer.state_to_channels(basestate1.to_list(), [basestate0.to_list()]) print "basestate2:" print transformer.state_to_channels( basestate2.to_list(), [basestate1.to_list(), basestate0.to_list()])
def play_game(generation_name, player_clz, num_previous_states=1): game = "breakthrough" # ensure we have a network man = get_manager() generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True, num_previous_states=num_previous_states) nn = man.create_new_network(game, "tiny", generation_descr) man.save_network(nn, generation_name) nn.summary() conf = get_default_conf(generation_name, max_dump_depth=2, playouts_per_iteration=42) attrutil.pprint(conf) gm = GameMaster(lookup.by_name(game)) gm.add_player(player_clz(conf), "white") gm.add_player(get.get_player("random"), "black") gm.start(meta_time=30, move_time=15) last_move = None while not gm.finished(): last_move = gm.play_single_move(last_move=last_move) gm.finalise_match(last_move) return gm
def load_network_fixme(self, game): import glob import datetime p = os.path.join(self.data_path, game, "models") os.chdir(p) gens = glob.glob("%s_*" % game) for g in gens: print "doing", game, g generation = os.path.splitext(g)[0] new_style_gen = generation.replace(game + "_", "") print generation, new_style_gen # dummy generation_descr generation_descr = templates.default_generation_desc(game) json_str = open(self.model_path(game, generation)).read() keras_model = keras_models.model_from_json(json_str) keras_model.load_weights(self.weights_path(game, generation)) transformer = self.get_transformer(game, generation_descr) print transformer, keras_model, generation_descr nn = NeuralNetwork(transformer, keras_model, generation_descr) generation_descr.name = new_style_gen generation_descr.trained_losses = "unknown" generation_descr.trained_validation_losses = "unknown" generation_descr.trained_policy_accuracy = "unknown" generation_descr.trained_value_accuracy = "unknown" ctime = os.stat(self.model_path(game, generation)).st_ctime generation_descr.date_created = datetime.datetime.fromtimestamp( ctime).strftime("%Y/%m/%d %H:%M") print generation_descr self.save_network(nn)
def test_tron(): import py.test py.test.skip("WIP") game = "tron_10x10" generation = "test_1" man = get_manager() # create a nn model_conf = templates.nn_model_config_template(game) generation_descr = templates.default_generation_desc( game, generation, multiple_policy_heads=True) nn = man.create_new_network(game, model_conf, generation_descr) game_info = lookup.by_name(game) sm = game_info.get_sm() basestate = sm.get_initial_state() policy, scores = nn.predict_1(basestate.to_list()) print policy, scores res = nn.predict_n([basestate.to_list(), basestate.to_list()]) assert len(res) == 2 and len(res[0]) == 2 and len(res[1]) == 2 print policy, scores
def retrain(args): game = sys.argv[1] gen_prefix = sys.argv[2] gen_prefix_next = sys.argv[3] configs = Configs() train_config = getattr(configs, game)(gen_prefix) generation_descr = templates.default_generation_desc(train_config.game, multiple_policy_heads=True, num_previous_states=0) # create a transformer man = get_manager() transformer = man.get_transformer(train_config.game, generation_descr) # create the manager trainer = train.TrainManager(train_config, transformer) trainer.update_config(train_config, next_generation_prefix=gen_prefix_next) nn_model_config = get_nn_model(train_config.game, transformer) #nn_model_config = templates.nn_model_config_template(train_config.game, "small", transformer) trainer.get_network(nn_model_config, generation_descr) data = trainer.gather_data() trainer.do_epochs(data) trainer.save()
def test_config_channel_last(): man = get_manager() for game in games: # create GenerationDescription generation_descr = templates.default_generation_desc(game) generation_descr.channel_last = True # lookup game in manager transformer = man.get_transformer(game, generation_descr) print transformer.x_cords # look game from database game_info = lookup.by_name(game) assert game == game_info.game sm = game_info.get_sm() basestate = sm.get_initial_state() print "rows x cols", transformer.num_rows, transformer.num_cols print print transformer.state_to_channels(basestate.to_list()) basestate = advance_state(game_info.get_sm(), basestate) print print transformer.state_to_channels(basestate.to_list())
def do_training(game, gen_prefix, next_step, starting_step, num_previous_states, gen_prefix_next, do_data_augmentation=False): man = get_manager() # create a transformer generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True, num_previous_states=num_previous_states) transformer = man.get_transformer(game, generation_descr) # create train_config train_config = get_train_config(game, gen_prefix, next_step, starting_step) trainer = train.TrainManager(train_config, transformer, do_data_augmentation=do_data_augmentation) trainer.update_config(train_config, next_generation_prefix=gen_prefix_next) # get the nn model and set on trainer nn_model_config = get_nn_model(train_config.game, transformer) trainer.get_network(nn_model_config, generation_descr) trainer.do_epochs() trainer.save()
def do_transformer(num_previous_states): game = "breakthrough" game_info = lookup.by_name(game) sm = game_info.get_sm() man = get_manager() # only multiple_policy_heads supported in c++ generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True, num_previous_states=num_previous_states) t = man.get_transformer(game, generation_descr) # create transformer wrapper object c_transformer = cppinterface.create_c_transformer(t) nn = man.create_new_network(game, "small", generation_descr) verbose = True total_predictions = 0 total_s0 = 0 total_s1 = 0 total_s2 = 0 for ii in range(10): print ii start = time.time() array = c_transformer.test(cppinterface.sm_to_ptr(sm)) total_s0 += time.time() - start sz = len(array) / (t.num_channels * t.channel_size) total_predictions += sz array = array.reshape(sz, t.num_channels, t.num_cols, t.num_rows) total_s1 += time.time() - start if verbose: np.set_printoptions(threshold=np.inf, formatter={'float_kind': float_formatter0}) print array # test we can actually predict res = nn.get_model().predict(array, batch_size=sz) # print res[0].shape # print res[1].shape total_s2 += time.time() - start if verbose: np.set_printoptions(threshold=np.inf, formatter={'float_kind': float_formatter1}) print res print total_predictions, "time taken", [ s * 1000 for s in (total_s0, total_s1, total_s2) ]
def test_generation_desc(): game = "breakthrough" gen_prefix = "x1" prev_states = 1 gen_desc = templates.default_generation_desc( game, gen_prefix, multiple_policy_heads=True, num_previous_states=prev_states) attrutil.pprint(gen_desc)
def speed_test(): ITERATIONS = 3 man = get_manager() # get data train_config = config() # get nn to test speed on transformer = man.get_transformer(train_config.game) trainer = train.TrainManager(train_config, transformer) nn_model_config = templates.nn_model_config_template(train_config.game, "small", transformer) generation_descr = templates.default_generation_desc(train_config.game) trainer.get_network(nn_model_config, generation_descr) data = trainer.gather_data() res = [] batch_size = 4096 sample_count = len(data.inputs) keras_model = trainer.nn.get_model() # warm up for i in range(2): idx, end_idx = i * batch_size, (i + 1) * batch_size print i, idx, end_idx inputs = np.array(data.inputs[idx:end_idx]) res.append(keras_model.predict(inputs, batch_size=batch_size)) print res[0] for _ in range(ITERATIONS): res = [] times = [] gc.collect() print 'Starting speed run' num_batches = sample_count / batch_size + 1 print "batches %s, batch_size %s, inputs: %s" % (num_batches, batch_size, len(data.inputs)) for i in range(num_batches): idx, end_idx = i * batch_size, (i + 1) * batch_size inputs = np.array(data.inputs[idx:end_idx]) print "inputs", len(inputs) s = time.time() Y = keras_model.predict(inputs, batch_size=batch_size) times.append(time.time() - s) print "outputs", len(Y[0]) print "times taken", times print "total_time taken", sum(times) print "predictions per second", sample_count / float(sum(times))
def get_generation_descr(): generation_descr = templates.default_generation_desc(game) generation_descr.num_previous_states = 0 yield generation_descr generation_descr.num_previous_states = 2 yield generation_descr generation_descr.num_previous_states = 30 yield generation_descr generation_descr.multiple_policy_heads = True yield generation_descr
def test_cache(): buf = train.SamplesBuffer() # we need the data for this test conf = get_conf() # create a transformer man = get_manager() generation_descr = templates.default_generation_desc(conf.game) generation_descr.num_previous_states = 0 generation_descr.multiple_policy_heads = False transformer = man.get_transformer(conf.game) run_with_transformer(buf, conf, transformer)
def test_game_descriptions(): game_descs = gamedesc.Games() names = [name for name in dir(game_descs) if name[0] != "_"] names = ["breakthroughSmall", "breakthrough", "englishDraughts"] names = ["connect6"] for name in names: print print "=" * 80 print name print "=" * 80 meth = getattr(game_descs, name) game_description = meth() print name, game_description.game print game_description print "-" * 80 game_info = lookup.by_name(game_description.game) # create GenerationDescription generation_descr = templates.default_generation_desc( game_description.game) transformer = GdlBasesTransformer(game_info, generation_descr, game_description) transformer = transformer sm = game_info.get_sm() basestate = sm.get_initial_state() from test_model import advance_state for i in range(25): print "move made", i print game_info.model.basestate_to_str(basestate) print transformer.state_to_channels(basestate.to_list()) sm.update_bases(basestate) if sm.is_terminal(): break basestate = advance_state(sm, basestate)
def setup_c4(batch_size=1024): game = "connectFour" man = get_manager() # only multiple_policy_heads supported in c++ generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True) nn = man.create_new_network(game, "small", generation_descr) game_info = lookup.by_name(game) supervisor = cppinterface.Supervisor(game_info.get_sm(), nn, batch_size=batch_size) conf = templates.selfplay_config_template() return supervisor, conf
def setup_c4(batch_size=1024): game = "connectFour" man = get_manager() # only multiple_policy_heads supported in c++ generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True) nn = man.create_new_network(game, "smaller", generation_descr) game_info = lookup.by_name(game) supervisor = cppinterface.Supervisor(game_info.get_sm(), nn, batch_size=batch_size) conf = confs.SelfPlayConfig() conf.max_number_of_samples = 4 conf.resign_score_probability = 0.1 conf.resign_false_positive_retry_percentage = 0.5 conf.select_iterations = 0 conf.sample_iterations = 50 conf.score_iterations = 5 p = conf.select_puct_config = templates.puct_config_template("policy") p.temperature = 0.5 p.depth_temperature_start = 1 p.depth_temperature_increment = 0.5 p.depth_temperature_stop = 40 p.random_scale = 0.85 p.choose = "choose_temperature" p.verbose = False conf.sample_puct_config = templates.puct_config_template("test") conf.sample_puct_config.verbose = False conf.score_puct_config = templates.puct_config_template("test") conf.score_puct_config.verbose = False return supervisor, conf
def go(): ITERATIONS = 3 man = get_manager() # get data train_config = config() # get nn to test speed on transformer = man.get_transformer(train_config.game) trainer = train.TrainManager(train_config, transformer) nn_model_config = templates.nn_model_config_template(train_config.game, "small", transformer) generation_descr = templates.default_generation_desc(train_config.game) trainer.get_network(nn_model_config, generation_descr) data = trainer.gather_data() r = Runner(trainer.gather_data(), trainer.nn.get_model()) r.warmup()
def test_nn_model_config_template(): game = "breakthrough" gen_prefix = "x1" prev_states = 1 gen_desc = templates.default_generation_desc( game, gen_prefix, multiple_policy_heads=True, num_previous_states=prev_states) transformer = man.get_transformer(game, gen_desc) model = templates.nn_model_config_template("breakthrough", "small", transformer) attrutil.pprint(model) keras_model = get_network_model(model, gen_desc) network = NeuralNetwork(transformer, keras_model, gen_desc) print network network.summary()
def test_inline_supervisor_creation(): games = "breakthrough reversi breakthroughSmall connectFour".split() man = get_manager() for game in games: game_info = lookup.by_name(game) # get statemachine sm = game_info.get_sm() # only multiple_policy_heads supported in c++ generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True) nn = man.create_new_network(game, "small", generation_descr) for batch_size in (1, 128, 1024): supervisor = cppinterface.Supervisor(sm, nn, batch_size=batch_size) supervisor = supervisor continue
def create_new_network(self, game, nn_model_conf=None, generation_descr=None): if generation_descr is None: generation_descr = templates.default_generation_desc(game) transformer = self.get_transformer(game, generation_descr) if isinstance(nn_model_conf, str): nn_model_conf = templates.nn_model_config_template(game, network_size_hint=nn_model_conf, transformer=transformer) elif nn_model_conf is None: nn_model_conf = templates.nn_model_config_template(game, network_size_hint="small", transformer=transformer) assert isinstance(nn_model_conf, confs.NNModelConfig) assert isinstance(generation_descr, datadesc.GenerationDescription) keras_model = get_network_model(nn_model_conf, generation_descr) return NeuralNetwork(transformer, keras_model, generation_descr)
def get_transformer(self, game, generation_descr=None): from ggpzero.nn.bases import GdlBasesTransformer, GdlBasesTransformer_Draws if generation_descr is None: generation_descr = templates.default_generation_desc(game) assert isinstance(generation_descr, datadesc.GenerationDescription) desc = generation_descr key = (game, desc.channel_last, desc.multiple_policy_heads, desc.num_previous_states, desc.draw_head) transformer = self.transformers.get(key) if transformer is None: # looks up the game in the ggplib database game_info = lookup.by_name(game) transformer_clz = GdlBasesTransformer_Draws if generation_descr.draw_head else GdlBasesTransformer transformer = transformer_clz(game_info, generation_descr) self.transformers[key] = transformer return transformer
def play_game(generation_name, num_previous_states): game = "breakthrough" # ensure we have a network man = get_manager() generation_descr = templates.default_generation_desc(game, multiple_policy_heads=True, num_previous_states=num_previous_states) nn = man.create_new_network(game, "tiny", generation_descr) man.save_network(nn, generation_name) nn.summary() conf = templates.puct_config_template(generation_name, "compete") gm = GameMaster(get_gdl_for_game(game)) gm.add_player(CppPUCTPlayer(conf=conf), "white") gm.add_player(get.get_player("random"), "black") gm.start(meta_time=30, move_time=15) gm.play_to_end()
def test_net_sizes_with_l2(): man = get_manager() for game in games: generation_descr = templates.default_generation_desc(game, name="L2_1") transformer = man.get_transformer(game, generation_descr) # create a nn for size in "tiny medium".split(): print print size model_conf = templates.nn_model_config_template( game, size, transformer) model_conf.l2_regularisation = True model_conf.dropout_rate_value = -1 model_conf.dropout_rate_policy = -1 nn = man.create_new_network(game, model_conf, generation_descr) nn.summary() print # print "hit return to compile" # raw_input() nn.compile()
def test_save_load_net(): man = get_manager() game = "breakthrough" generation = "gen_1" generation_descr = templates.default_generation_desc(game, generation) transformer = man.get_transformer(game, generation_descr) model_conf = templates.nn_model_config_template(game, "tiny", transformer) nn = man.create_new_network(game, model_conf, generation_descr) nn.summary() man.save_network(nn) assert man.can_load(game, generation) nn2 = man.load_network(game, generation) nn2.summary() assert nn is not nn2 assert nn.generation_descr.name == generation assert nn.generation_descr.name == nn2.generation_descr.name assert nn.gdl_bases_transformer is nn2.gdl_bases_transformer
def test_net_multiple_policies(): man = get_manager() for game in games: # create GenerationDescription generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True) nn = man.create_new_network(game, "tiny", generation_descr) nn.summary() game_info = lookup.by_name(game) sm = game_info.get_sm() basestate0 = sm.get_initial_state() heads = nn.predict_1(basestate0.to_list()) print heads.policies, heads.scores basestate1 = advance_state(game_info.get_sm(), basestate0) heads = nn.predict_n([basestate1.to_list()]) assert len(heads) == 1 and len(heads[0].policies) == 2 and len( heads[0].scores) == 2 print heads[0].policies, heads[0].scores
def test_trainer_update_config(): # we need the data for this test conf = get_conf_reversi() # create a transformer man = get_manager() generation_descr = templates.default_generation_desc(conf.game) generation_descr.num_previous_states = 2 generation_descr.multiple_policy_heads = True transformer = man.get_transformer(conf.game, generation_descr) # create the manager trainer = train.TrainManager(conf, transformer, next_generation_prefix="x2test") nn_model_config = templates.nn_model_config_template(conf.game, "tiny", transformer) trainer.get_network(nn_model_config, generation_descr) data = trainer.gather_data() print data trainer.do_epochs(data) trainer.save()