def rl_loop(): # monkeypatch the hyperparams so that we get a quickly executing network. dual_net.get_default_hyperparams = lambda **kwargs: { 'k': 1, 'fc_width': 2, 'num_shared_layers': 1, 'l2_strength': 2e-4, 'momentum': 0.9} with tempfile.TemporaryDirectory() as base_dir: model_save_file = os.path.join(base_dir, 'models', '000000-bootstrap') selfplay_dir = os.path.join(base_dir, 'data', 'selfplay') model_selfplay_dir = os.path.join(selfplay_dir, '000000-bootstrap', 'worker1') gather_dir = os.path.join(base_dir, 'data', 'training_chunks') sgf_dir = os.path.join(base_dir, 'sgf', '000000-bootstrap') os.mkdir(os.path.join(base_dir, 'data')) print("Creating random initial weights...") dual_net.DualNetworkTrainer(model_save_file).bootstrap() print("Playing some games...") # Do two selfplay runs to test gather functionality main.selfplay( load_file=model_save_file, output_dir=model_selfplay_dir, output_sgf=sgf_dir, readouts=10) main.selfplay( load_file=model_save_file, output_dir=model_selfplay_dir, output_sgf=sgf_dir, readouts=10) print("Gathering game output...") main.gather(input_directory=selfplay_dir, output_directory=gather_dir) print("Training on gathered game data... (ctrl+C to quit)") main.train(gather_dir, save_file=model_save_file, num_steps=10000)
def test_train(self): with tempfile.TemporaryDirectory() as model_dir, \ tempfile.NamedTemporaryFile() as tf_record: preprocessing.make_dataset_from_sgf( 'tests/example_game.sgf', tf_record.name) model_save = os.path.join(model_dir, 'test_model') n = dual_net.DualNetworkTrainer(model_save, **fast_hparams) n.train([tf_record.name], num_steps=1)
def train(chunk_dir, save_file, load_file=None, generation_num=0, logdir=None, num_steps=None): tf_records = gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz')) tf_records = [f for f in tf_records if (generation_num - 50) < int(os.path.basename(f)[:6]) <= generation_num] n = dual_net.DualNetworkTrainer(save_file) with timer("Training"): n.train(tf_records, init_from=load_file, logdir=logdir, num_steps=num_steps)
def test_inference(self): with tempfile.TemporaryDirectory() as model_dir: model_path = os.path.join(model_dir, 'blah') n = dual_net.DualNetworkTrainer(model_path, **fast_hparams) n.bootstrap() n1 = dual_net.DualNetwork(model_path, **fast_hparams) n1.run(go.Position()) # In the past we've had issues initializing two separate NNs # in the same process... just double check that two DualNetwork # instances can live side by side. n2 = dual_net.DualNetwork(model_path, **fast_hparams) n2.run(go.Position())
def rl_loop(): """Run the reinforcement learning loop This is meant to be more of an integration test than a realistic way to run the reinforcement learning. """ # monkeypatch the hyperparams so that we get a quickly executing network. dual_net.get_default_hyperparams = lambda **kwargs: { 'k': 8, 'fc_width': 16, 'num_shared_layers': 1, 'l2_strength': 1e-4, 'momentum': 0.9 } dual_net.TRAIN_BATCH_SIZE = 16 #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions. preprocessing.SHUFFLE_BUFFER_SIZE = 10000 with tempfile.TemporaryDirectory() as base_dir: model_save_file = os.path.join(base_dir, 'models', '000000-bootstrap') selfplay_dir = os.path.join(base_dir, 'data', 'selfplay') model_selfplay_dir = os.path.join(selfplay_dir, '000000-bootstrap') gather_dir = os.path.join(base_dir, 'data', 'training_chunks') sgf_dir = os.path.join(base_dir, 'sgf', '000000-bootstrap') os.mkdir(os.path.join(base_dir, 'data')) print("Creating random initial weights...") dual_net.DualNetworkTrainer(model_save_file).bootstrap() print("Playing some games...") # Do two selfplay runs to test gather functionality main.selfplay(load_file=model_save_file, output_dir=model_selfplay_dir, output_sgf=sgf_dir, holdout_pct=0, readouts=10) main.selfplay(load_file=model_save_file, output_dir=model_selfplay_dir, output_sgf=sgf_dir, holdout_pct=0, readouts=10) print("Gathering game output...") main.gather(input_directory=selfplay_dir, output_directory=gather_dir) print("Training on gathered game data... (ctrl+C to quit)") main.train(gather_dir, save_file=model_save_file, num_steps=10000, logdir="logs", verbosity=2)
def train(chunk_dir, save_file, load_file=None, generation_num=0, logdir=None, num_steps=None, verbosity=1): tf_records = sorted(gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz'))) tf_records = tf_records[-1 * (WINDOW_SIZE // EXAMPLES_PER_RECORD):] print("Training from:", tf_records[0], "to", tf_records[-1]) n = dual_net.DualNetworkTrainer(save_file, logdir=logdir) with timer("Training"): n.train(tf_records, init_from=load_file, num_steps=num_steps, verbosity=verbosity)
def validate(*tf_record_dirs, load_file=None, logdir=None, num_steps=100): """Computes the error terms for a set of holdout data specified by `holdout_dir`, using the model specified at `load_file` and logging TB metrics to the dir in `logdir`, using `num_steps` batches of examples """ n = dual_net.DualNetworkTrainer(logdir=logdir) with timer("Building lists of holdout files"): tf_records = [ item for sublist in map( lambda path: gfile.Glob(os.path.join(path, '*.zz')), tf_record_dirs) for item in sublist ] with timer("Validating from {} to {}".format( os.path.basename(tf_records[0]), os.path.basename(tf_records[-1]))): n.validate(tf_records, batch_size=dual_net.TRAIN_BATCH_SIZE, init_from=load_file, num_steps=num_steps)
def bootstrap(save_file): dual_net.DualNetworkTrainer(save_file).bootstrap()