def train(working_dir): model_num, model_name = fsdb.get_latest_model() games = gfile.Glob(os.path.join(fsdb.selfplay_dir(), model_name, '*.zz')) if len(games) < MIN_GAMES_PER_GENERATION: print("{} doesn't have enough games to train a new model yet ({})". format(model_name, len(games))) print("Sleeping...") time.sleep(10 * 60) print("Done...") sys.exit(1) print("Training on gathered game data, initializing from {}".format( model_name)) new_model_num = model_num + 1 new_model_name = shipname.generate(new_model_num) print("New model will be {}".format(new_model_name)) training_file = os.path.join(fsdb.golden_chunk_dir(), str(new_model_num) + '.tfrecord.zz') while not gfile.Exists(training_file): print("Waiting for", training_file) time.sleep(1 * 60) print("Using Golden File:", training_file) save_file = os.path.join(fsdb.models_dir(), new_model_name) try: main.train(working_dir, [training_file], save_file, generation_num=model_num + 1) except: logging.exception("Train error")
def smart_rsync(from_model_num=0, source_dir=None, dest_dir=LOCAL_DIR): source_dir = source_dir or fsdb.selfplay_dir() from_model_num = 0 if from_model_num < 0 else from_model_num models = [m for m in fsdb.get_models() if m[0] >= from_model_num] for _, model in models: _rsync_dir(os.path.join(source_dir, model), os.path.join(dest_dir, model))
def time_rsync(from_date, source_dir=None, dest_dir=LOCAL_DIR): source_dir = source_dir or fsdb.selfplay_dir() while from_date < dt.datetime.utcnow(): src = os.path.join(source_dir, from_date.strftime("%Y-%m-%d-%H")) if tf.gfile.Exists(src): _rsync_dir( src, os.path.join(dest_dir, from_date.strftime("%Y-%m-%d-%H"))) from_date = from_date + dt.timedelta(hours=1)
def selfplay(verbose=2): _, model_name = fsdb.get_latest_model() games = gfile.Glob(os.path.join(fsdb.selfplay_dir(), model_name, '*.zz')) if len(games) > MAX_GAMES_PER_GENERATION: print("{} has enough games ({})".format(model_name, len(games))) time.sleep(10 * 60) sys.exit(1) print("Playing a game with model {}".format(model_name)) model_save_path = os.path.join(fsdb.models_dir(), model_name) game_output_dir = os.path.join(fsdb.selfplay_dir(), model_name) game_holdout_dir = os.path.join(fsdb.holdout_dir(), model_name) sgf_dir = os.path.join(fsdb.sgf_dir(), model_name) main.selfplay( load_file=model_save_path, output_dir=game_output_dir, holdout_dir=game_holdout_dir, output_sgf=sgf_dir, holdout_pct=HOLDOUT_PCT, verbose=verbose, )
def make_chunk_for(output_dir=LOCAL_DIR, local_dir=LOCAL_DIR, game_dir=None, model_num=1, positions=dual_net.EXAMPLES_PER_GENERATION, threads=8, samples_per_game=4): """ Explicitly make a golden chunk for a given model `model_num` (not necessarily the most recent one). While we haven't yet got enough samples (EXAMPLES_PER_GENERATION) Add samples from the games of previous model. """ game_dir = game_dir or fsdb.selfplay_dir() ensure_dir_exists(output_dir) models = [(num, name) for num, name in fsdb.get_models() if num < model_num] buf = ExampleBuffer(positions) files = [] for _, model in sorted(models, reverse=True): local_model_dir = os.path.join(local_dir, model) if not tf.gfile.Exists(local_model_dir): print("Rsyncing", model) _rsync_dir(os.path.join(game_dir, model), local_model_dir) files.extend(tf.gfile.Glob(os.path.join(local_model_dir, '*.zz'))) if len(files) * samples_per_game > positions: break print("Filling from {} files".format(len(files))) buf.parallel_fill(files, threads=threads, samples_per_game=samples_per_game) print(buf) output = os.path.join(output_dir, str(model_num) + '.tfrecord.zz') print("Writing to", output) buf.flush(output)