Example #1
0
def bootstrap(
    working_dir: 'tf.estimator working directory.',
    model_save_path: 'Where to export the first bootstrapped generation'):
    _ensure_dir_exists(working_dir)
    _ensure_dir_exists(os.path.dirname(model_save_path))
    dual_net.bootstrap(working_dir)
    dual_net.export_model(working_dir, model_save_path)
Example #2
0
def train(working_dir):
    model_num, model_name = fsdb.get_latest_model()

    print("Training on gathered game data, initializing from {}".format(model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    training_file = os.path.join(
        fsdb.golden_chunk_dir(), str(new_model_num) + '.tfrecord.zz')
    while not gfile.Exists(training_file):
        print("Waiting for", training_file)
        time.sleep(1*60)
    print("Using Golden File:", training_file)

    try:
        save_file = os.path.join(fsdb.models_dir(), new_model_name)
        print("Training model")
        dual_net.train(training_file)
        print("Exporting model to ", save_file)
        dual_net.export_model(working_dir, save_file)
    except Exception as e:
        import traceback
        logging.error(traceback.format_exc())
        print(traceback.format_exc())
        logging.exception("Train error")
        sys.exit(1)
Example #3
0
def bootstrap(
        working_dir: 'tf.estimator working directory.',
        model_save_path: 'Where to export the first bootstrapped generation'):
    _ensure_dir_exists(working_dir)
    _ensure_dir_exists(os.path.dirname(model_save_path))
    dual_net.bootstrap(working_dir)
    dual_net.export_model(working_dir, model_save_path)
Example #4
0
def main(argv):
    """Train on examples and export the updated model weights."""
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s", len(tf_records),
                 tf_records[0], tf_records[-1])

    if FLAGS.dist_train:
        hvd.init()

    mllogger = mllog.get_mllogger()
    mllog.config(filename="train.log")

    mllog.config(default_namespace="worker1",
                 default_stack_offset=1,
                 default_clear_line=False)

    with utils.logged_timer("Training"):
        train(*tf_records)
    if (not FLAGS.dist_train) or hvd.rank() == 0:
        if FLAGS.export_path:
            dual_net.export_model(FLAGS.export_path)
            epoch = int(os.path.basename(FLAGS.export_path))
            mllogger.event(key="save_model", value={"Iteration": epoch})
        if FLAGS.freeze:
            dual_net.freeze_graph(FLAGS.export_path, FLAGS.use_trt,
                                  FLAGS.trt_max_batch_size,
                                  FLAGS.trt_precision,
                                  FLAGS.selfplay_precision)
Example #5
0
def train(tf_records: 'list of files of tf_records to train on',
          model_save_path: 'Where to export the completed generation.'):
    print("Training on:", tf_records[0], "to", tf_records[-1])
    with utils.logged_timer("Training"):
        dual_net.train(*tf_records)
    print("== Training done.  Exporting model to ", model_save_path)
    dual_net.export_model(flags.FLAGS.model_dir, model_save_path)
    freeze_graph(model_save_path)
Example #6
0
def train(working_dir: 'tf.estimator working directory.',
          tf_records: 'list of files of tf_records to train on',
          model_save_path: 'Where to export the completed generation.'):
    print("Training on:", tf_records[0], "to", tf_records[-1])
    with utils.logged_timer("Training"):
        dual_net.train(working_dir, tf_records)
    print("== Training done.  Exporting model to ", model_save_path)
    dual_net.export_model(working_dir, model_save_path)
    freeze_graph(model_save_path)
Example #7
0
def main(argv):
    """Train on examples and export the updated model weights."""
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s",
                 len(tf_records), tf_records[0], tf_records[-1])
    with utils.logged_timer("Training"):
        train(*tf_records)
    if FLAGS.export_path:
        dual_net.export_model(FLAGS.export_path)
Example #8
0
File: main.py Project: qipa/minigo
def train(
        working_dir: 'tf.estimator working directory.',
        tf_records: 'list of files of tf_records to train on',
        model_save_path: 'Where to export the completed generation.',
        generation_num: 'Which generation you are training.'=0):
    print("Training on:", tf_records[0], "to", tf_records[-1])
    with timer("Training"):
        dual_net.train(working_dir, tf_records, generation_num)
        dual_net.export_model(working_dir, model_save_path)
        freeze_graph(model_save_path)
Example #9
0
def train(working_dir: 'tf.estimator working directory.',
          chunk_dir: 'Directory where gathered training chunks are.',
          model_save_path: 'Where to export the completed generation.',
          generation_num: 'Which generation you are training.' = 0):
    tf_records = sorted(gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz')))
    tf_records = tf_records[-1 * (WINDOW_SIZE // EXAMPLES_PER_RECORD):]

    print("Training from:", tf_records[0], "to", tf_records[-1])

    with timer("Training"):
        dual_net.train(working_dir, tf_records, generation_num)
        dual_net.export_model(working_dir, model_save_path)
Example #10
0
def train(
    working_dir: 'tf.estimator working directory.',
    chunk_dir: 'Directory where gathered training chunks are.',
    model_save_path: 'Where to export the completed generation.',
    generation_num: 'Which generation you are training.'=0):
    tf_records = sorted(gfile.Glob(os.path.join(chunk_dir, '*.tfrecord.zz')))
    tf_records = tf_records[-1 * (WINDOW_SIZE // EXAMPLES_PER_RECORD):]

    print("Training from:", tf_records[0], "to", tf_records[-1])

    with timer("Training"):
        dual_net.train(working_dir, tf_records, generation_num)
        dual_net.export_model(working_dir, model_save_path)
Example #11
0
def bootstrap(
        working_dir: 'tf.estimator working directory. If not set, defaults to a random tmp dir'=None,
        model_save_path: 'Where to export the first bootstrapped generation'=None):
    if working_dir is None:
        with tempfile.TemporaryDirectory() as working_dir:
            _ensure_dir_exists(working_dir)
            _ensure_dir_exists(os.path.dirname(model_save_path))
            dual_net.bootstrap(working_dir)
            dual_net.export_model(working_dir, model_save_path)
    else:
        _ensure_dir_exists(working_dir)
        _ensure_dir_exists(os.path.dirname(model_save_path))
        dual_net.bootstrap(working_dir)
        dual_net.export_model(working_dir, model_save_path)
Example #12
0
    def test_inference(self):
        with tempfile.TemporaryDirectory() as working_dir, \
                tempfile.TemporaryDirectory() as export_dir:
            dual_net.bootstrap(working_dir, **fast_hparams)
            exported_model = os.path.join(export_dir, 'bootstrap-model')
            dual_net.export_model(working_dir, exported_model)

            n1 = dual_net.DualNetwork(exported_model, **fast_hparams)
            n1.run(go.Position())

            # In the past we've had issues initializing two separate NNs
            # in the same process... just double check that two DualNetwork
            # instances can live side by side.
            n2 = dual_net.DualNetwork(exported_model, **fast_hparams)
            n2.run(go.Position())
Example #13
0
def main(argv):
    """Train on examples and export the updated model weights."""
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s",
                 len(tf_records), tf_records[0], tf_records[-1])
    with utils.logged_timer("Training"):
        train(*tf_records)
    if FLAGS.export_path:
        dual_net.export_model(FLAGS.export_path)
    if FLAGS.freeze:
        if FLAGS.use_tpu:
            dual_net.freeze_graph_tpu(FLAGS.export_path)
        else:
            dual_net.freeze_graph(FLAGS.export_path, FLAGS.use_trt,
                                  FLAGS.trt_max_batch_size, FLAGS.trt_precision)
Example #14
0
    def test_inference(self):
        with tempfile.TemporaryDirectory() as working_dir, \
                tempfile.TemporaryDirectory() as export_dir:
            dual_net.bootstrap(working_dir, **fast_hparams)
            exported_model = os.path.join(export_dir, 'bootstrap-model')
            dual_net.export_model(working_dir, exported_model)

            n1 = dual_net.DualNetwork(exported_model, **fast_hparams)
            n1.run(go.Position())

            # In the past we've had issues initializing two separate NNs
            # in the same process... just double check that two DualNetwork
            # instances can live side by side.
            n2 = dual_net.DualNetwork(exported_model, **fast_hparams)
            n2.run(go.Position())
Example #15
0
def bootstrap(
        working_dir: 'tf.estimator working directory. If not set, defaults to a random tmp dir'=None,
        model_save_path: 'Where to export the first bootstrapped generation'=None):
    qmeas.start_time('bootstrap')
    if working_dir is None:
        with tempfile.TemporaryDirectory() as working_dir:
            _ensure_dir_exists(working_dir)
            _ensure_dir_exists(os.path.dirname(model_save_path))
            dual_net.bootstrap(working_dir)
            dual_net.export_model(working_dir, model_save_path)
    else:
        _ensure_dir_exists(working_dir)
        _ensure_dir_exists(os.path.dirname(model_save_path))
        dual_net.bootstrap(working_dir)
        dual_net.export_model(working_dir, model_save_path)
    qmeas.stop_time('bootstrap')
Example #16
0
def main(argv):
    """Train on examples and export the updated model weights."""
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s", len(tf_records),
                 tf_records[0], tf_records[-1])
    with utils.logged_timer("Training"):
        estimator = train(*tf_records)
    if FLAGS.export_path:
        dual_net.export_model(FLAGS.export_path)
        estimator.export_saved_model(FLAGS.export_path,
                                     serving_input_receiver_fn())
    else:
        estimator.export_saved_model('saved_model',
                                     serving_input_receiver_fn())
    if FLAGS.freeze:
        if FLAGS.use_tpu:
            dual_net.freeze_graph_tpu(FLAGS.export_path)
        else:
            dual_net.freeze_graph(FLAGS.export_path)
Example #17
0
def main(argv):
    """Train on examples and export the updated model weights."""
    if FLAGS.dist_train:
        hvd.init()
    mll.global_batch_size(FLAGS.train_batch_size)
    mll.lr_rates(FLAGS.lr_rates)
    mll.lr_boundaries(FLAGS.lr_boundaries)
    tf_records = argv[1:]
    logging.info("Training on %s records: %s to %s", len(tf_records),
                 tf_records[0], tf_records[-1])
    with utils.logged_timer("Training"):
        train(*tf_records)

    if (not FLAGS.dist_train) or hvd.rank() == 0:
        if FLAGS.export_path:
            dual_net.export_model(FLAGS.export_path)
        if FLAGS.freeze:
            if FLAGS.use_tpu:
                dual_net.freeze_graph_tpu(FLAGS.export_path)
            else:
                dual_net.freeze_graph(FLAGS.export_path)
Example #18
0
def main(unused_argv):
    """Bootstrap random weights."""
    utils.ensure_dir_exists(os.path.dirname(FLAGS.export_path))
    if FLAGS.create_bootstrap:
        dual_net.bootstrap()
    dual_net.export_model(FLAGS.export_path)