Ejemplo n.º 1
0
def rl_loop():
    # monkeypatch the hyperparams so that we get a quickly executing network.
    dual_net.get_default_hyperparams = lambda **kwargs: {
    'k': 1, 'fc_width': 2, 'num_shared_layers': 1, 'l2_strength': 2e-4, 'momentum': 0.9}

    with tempfile.TemporaryDirectory() as base_dir:
        model_save_file = os.path.join(base_dir, 'models', '000000-bootstrap')
        selfplay_dir = os.path.join(base_dir, 'data', 'selfplay')
        model_selfplay_dir = os.path.join(selfplay_dir, '000000-bootstrap', 'worker1')
        gather_dir = os.path.join(base_dir, 'data', 'training_chunks')
        sgf_dir = os.path.join(base_dir, 'sgf', '000000-bootstrap')
        os.mkdir(os.path.join(base_dir, 'data'))

        print("Creating random initial weights...")
        dual_net.DualNetworkTrainer(model_save_file).bootstrap()
        print("Playing some games...")
        # Do two selfplay runs to test gather functionality
        main.selfplay(
            load_file=model_save_file,
            output_dir=model_selfplay_dir,
            output_sgf=sgf_dir,
            readouts=10)
        main.selfplay(
            load_file=model_save_file,
            output_dir=model_selfplay_dir,
            output_sgf=sgf_dir,
            readouts=10)
        print("Gathering game output...")
        main.gather(input_directory=selfplay_dir, output_directory=gather_dir)
        print("Training on gathered game data... (ctrl+C to quit)")
        main.train(gather_dir, save_file=model_save_file, num_steps=10000)
Ejemplo n.º 2
0
 def test_train(self):
     with tempfile.TemporaryDirectory() as model_dir, \
         tempfile.NamedTemporaryFile() as tf_record:
         preprocessing.make_dataset_from_sgf(
             'tests/example_game.sgf', tf_record.name)
         model_save = os.path.join(model_dir, 'test_model')
         n = dual_net.DualNetworkTrainer(model_save, **fast_hparams)
         n.train([tf_record.name], num_steps=1)
Ejemplo n.º 3
0
def train(chunk_dir, save_file, load_file=None, generation_num=0,
          logdir=None, num_steps=None):
    tf_records = gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz'))
    tf_records = [f for f in tf_records
                  if (generation_num - 50) < int(os.path.basename(f)[:6]) <= generation_num]

    n = dual_net.DualNetworkTrainer(save_file)
    with timer("Training"):
        n.train(tf_records, init_from=load_file,
                logdir=logdir, num_steps=num_steps)
Ejemplo n.º 4
0
    def test_inference(self):
        with tempfile.TemporaryDirectory() as model_dir:
            model_path = os.path.join(model_dir, 'blah')
            n = dual_net.DualNetworkTrainer(model_path, **fast_hparams)
            n.bootstrap()

            n1 = dual_net.DualNetwork(model_path, **fast_hparams)
            n1.run(go.Position())

            # In the past we've had issues initializing two separate NNs
            # in the same process... just double check that two DualNetwork
            # instances can live side by side.
            n2 = dual_net.DualNetwork(model_path, **fast_hparams)
            n2.run(go.Position())
Ejemplo n.º 5
0
def rl_loop():
    """Run the reinforcement learning loop

    This is meant to be more of an integration test than a realistic way to run
    the reinforcement learning.
    """
    # monkeypatch the hyperparams so that we get a quickly executing network.
    dual_net.get_default_hyperparams = lambda **kwargs: {
        'k': 8,
        'fc_width': 16,
        'num_shared_layers': 1,
        'l2_strength': 1e-4,
        'momentum': 0.9
    }

    dual_net.TRAIN_BATCH_SIZE = 16

    #monkeypatch the shuffle buffer size so we don't spin forever shuffling up positions.
    preprocessing.SHUFFLE_BUFFER_SIZE = 10000

    with tempfile.TemporaryDirectory() as base_dir:
        model_save_file = os.path.join(base_dir, 'models', '000000-bootstrap')
        selfplay_dir = os.path.join(base_dir, 'data', 'selfplay')
        model_selfplay_dir = os.path.join(selfplay_dir, '000000-bootstrap')
        gather_dir = os.path.join(base_dir, 'data', 'training_chunks')
        sgf_dir = os.path.join(base_dir, 'sgf', '000000-bootstrap')
        os.mkdir(os.path.join(base_dir, 'data'))

        print("Creating random initial weights...")
        dual_net.DualNetworkTrainer(model_save_file).bootstrap()
        print("Playing some games...")
        # Do two selfplay runs to test gather functionality
        main.selfplay(load_file=model_save_file,
                      output_dir=model_selfplay_dir,
                      output_sgf=sgf_dir,
                      holdout_pct=0,
                      readouts=10)
        main.selfplay(load_file=model_save_file,
                      output_dir=model_selfplay_dir,
                      output_sgf=sgf_dir,
                      holdout_pct=0,
                      readouts=10)
        print("Gathering game output...")
        main.gather(input_directory=selfplay_dir, output_directory=gather_dir)
        print("Training on gathered game data... (ctrl+C to quit)")
        main.train(gather_dir,
                   save_file=model_save_file,
                   num_steps=10000,
                   logdir="logs",
                   verbosity=2)
Ejemplo n.º 6
0
def train(chunk_dir,
          save_file,
          load_file=None,
          generation_num=0,
          logdir=None,
          num_steps=None,
          verbosity=1):
    tf_records = sorted(gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz')))
    tf_records = tf_records[-1 * (WINDOW_SIZE // EXAMPLES_PER_RECORD):]

    print("Training from:", tf_records[0], "to", tf_records[-1])

    n = dual_net.DualNetworkTrainer(save_file, logdir=logdir)
    with timer("Training"):
        n.train(tf_records,
                init_from=load_file,
                num_steps=num_steps,
                verbosity=verbosity)
Ejemplo n.º 7
0
def validate(*tf_record_dirs, load_file=None, logdir=None, num_steps=100):
    """Computes the error terms for a set of holdout data specified by
    `holdout_dir`, using the model specified at `load_file` and logging TB
    metrics to the dir in `logdir`, using `num_steps` batches of examples
    """
    n = dual_net.DualNetworkTrainer(logdir=logdir)

    with timer("Building lists of holdout files"):
        tf_records = [
            item for sublist in map(
                lambda path: gfile.Glob(os.path.join(path, '*.zz')),
                tf_record_dirs) for item in sublist
        ]

    with timer("Validating from {} to {}".format(
            os.path.basename(tf_records[0]),
            os.path.basename(tf_records[-1]))):
        n.validate(tf_records,
                   batch_size=dual_net.TRAIN_BATCH_SIZE,
                   init_from=load_file,
                   num_steps=num_steps)
Ejemplo n.º 8
0
def bootstrap(save_file):
    dual_net.DualNetworkTrainer(save_file).bootstrap()