예제 #1
0
def train(working_dir):
    model_num, model_name = fsdb.get_latest_model()

    games = gfile.Glob(os.path.join(fsdb.selfplay_dir(), model_name, '*.zz'))
    if len(games) < MIN_GAMES_PER_GENERATION:
        print("{} doesn't have enough games to train a new model yet ({})".
              format(model_name, len(games)))
        print("Sleeping...")
        time.sleep(10 * 60)
        print("Done...")
        sys.exit(1)

    print("Training on gathered game data, initializing from {}".format(
        model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    training_file = os.path.join(fsdb.golden_chunk_dir(),
                                 str(new_model_num) + '.tfrecord.zz')
    while not gfile.Exists(training_file):
        print("Waiting for", training_file)
        time.sleep(1 * 60)
    print("Using Golden File:", training_file)

    save_file = os.path.join(fsdb.models_dir(), new_model_name)
    try:
        main.train(working_dir, [training_file],
                   save_file,
                   generation_num=model_num + 1)
    except:
        logging.exception("Train error")
예제 #2
0
def fill_and_wait_models(bufsize=dual_net.EXAMPLES_PER_GENERATION,
                         write_dir=None,
                         threads=8,
                         model_window=100,
                         skip_first_rsync=False):
    """ Fills a ringbuffer with positions from the most recent games, then
    continually rsync's and updates the buffer until a new model is promoted.
    Once it detects a new model, iit then dumps its contents for training to
    immediately begin on the next model.
    """
    write_dir = write_dir or fsdb.golden_chunk_dir()
    buf = ExampleBuffer(bufsize)
    models = fsdb.get_models()[-model_window:]
    if not skip_first_rsync:
        with timer("Rsync"):
            smart_rsync(models[-1][0] - 6)
    files = tqdm(map(files_for_model, models), total=len(models))
    buf.parallel_fill(list(itertools.chain(*files)), threads=threads)

    print("Filled buffer, watching for new games")
    while fsdb.get_latest_model()[0] == models[-1][0]:
        with timer("Rsync"):
            smart_rsync(models[-1][0] - 2)
        new_files = tqdm(map(files_for_model, models[-2:]), total=len(models))
        buf.update(list(itertools.chain(*new_files)))
        time.sleep(60)
    latest = fsdb.get_latest_model()

    print("New model!", latest[1], "!=", models[-1][1])
    print(buf)
    buf.flush(os.path.join(write_dir, str(latest[0] + 1) + '.tfrecord.zz'))
예제 #3
0
파일: rl_loop.py 프로젝트: wtdeng/minigo
def train(working_dir):
    model_num, model_name = fsdb.get_latest_model()

    print("Training on gathered game data, initializing from {}".format(model_name))
    new_model_num = model_num + 1
    new_model_name = shipname.generate(new_model_num)
    print("New model will be {}".format(new_model_name))
    training_file = os.path.join(
        fsdb.golden_chunk_dir(), str(new_model_num) + '.tfrecord.zz')
    while not gfile.Exists(training_file):
        print("Waiting for", training_file)
        time.sleep(1*60)
    print("Using Golden File:", training_file)

    try:
        save_file = os.path.join(fsdb.models_dir(), new_model_name)
        print("Training model")
        dual_net.train(training_file)
        print("Exporting model to ", save_file)
        dual_net.export_model(working_dir, save_file)
    except Exception as e:
        import traceback
        logging.error(traceback.format_exc())
        print(traceback.format_exc())
        logging.exception("Train error")
        sys.exit(1)
예제 #4
0
def fill_and_wait_time(bufsize=dual_net.EXAMPLES_PER_GENERATION,
                       write_dir=None,
                       threads=32,
                       start_from=None):
    start_from = start_from or dt.datetime.utcnow()
    write_dir = write_dir or fsdb.golden_chunk_dir()
    buf = ExampleBuffer(bufsize)
    chunk_to_make, fast_write = _determine_chunk_to_make(write_dir)

    hours = fsdb.get_hour_dirs()
    with timer("Rsync"):
        time_rsync(
            min(dt.datetime.strptime(hours[-1], "%Y-%m-%d-%H/"), start_from))
        start_from = dt.datetime.utcnow()

    hours = fsdb.get_hour_dirs()
    files = (tf.gfile.Glob(os.path.join(LOCAL_DIR, d, "*.zz"))
             for d in reversed(hours)
             if tf.gfile.Exists(os.path.join(LOCAL_DIR, d)))
    files = itertools.islice(files, get_window_size(chunk_to_make))

    models = fsdb.get_models()
    buf.parallel_fill(list(itertools.chain.from_iterable(files)),
                      threads=threads)
    print("Filled buffer, watching for new games")

    while (fsdb.get_latest_model() == models[-1]
           or buf.total_updates < MINIMUM_NEW_GAMES):
        with timer("Rsync"):
            time_rsync(start_from - dt.timedelta(minutes=60))
        start_from = dt.datetime.utcnow()
        hours = sorted(fsdb.get_hour_dirs(LOCAL_DIR))
        new_files = list(
            map(lambda d: tf.gfile.Glob(os.path.join(LOCAL_DIR, d, '*.zz')),
                hours[-2:]))
        buf.update(list(itertools.chain.from_iterable(new_files)))
        if fast_write:
            break
        time.sleep(30)
        if fsdb.get_latest_model() != models[-1]:
            print("New model!  Waiting for games. Got", buf.total_updates,
                  "new games so far")

    latest = fsdb.get_latest_model()
    print("New model!", latest[1], "!=", models[-1][1])
    print(buf)
    buf.flush(chunk_to_make)