def launch_eval(black_num=0, white_num=0): if black_num <= 0 or white_num <= 0: print("Need real model numbers") return b = fsdb.get_model(black_num) w = fsdb.get_model(white_num) b_model_path = os.path.join(fsdb.models_dir(), b) w_model_path = os.path.join(fsdb.models_dir(), w) kubernetes.config.load_kube_config() configuration = kubernetes.client.Configuration() api_instance = kubernetes.client.BatchV1Api( kubernetes.client.ApiClient(configuration)) raw_job_conf = open("cluster/evaluator/gpu-evaluator.yaml").read() env_job_conf = os.path.expandvars(raw_job_conf) t = jinja2.Template(env_job_conf) job_conf = yaml.load(t.render({'white': w_model_path, 'black': b_model_path, 'wnum': white_num, 'bnum': black_num})) resp = api_instance.create_namespaced_job('default', body=job_conf) job_conf = yaml.load(t.render({'white': b_model_path, 'black': w_model_path, 'wnum': black_num, 'bnum': white_num})) resp = api_instance.create_namespaced_job('default', body=job_conf)
def same_run_eval(black_num=0, white_num=0): """Shorthand to spawn a job matching up two models from the same run, identified by their model number """ if black_num <= 0 or white_num <= 0: print("Need real model numbers") return b = fsdb.get_model(black_num) w = fsdb.get_model(white_num) bucket = fsdb.eval_dir b_model_path = os.path.join(fsdb.models_dir(), b) w_model_path = os.path.join(fsdb.models_dir(), w) launch_eval_job(b_model_path + ".pb", w_model_path + ".pb", "{:d}-{:d}".format(black_num, white_num), bucket)
def train(working_dir): model_num, model_name = fsdb.get_latest_model() games = gfile.Glob(os.path.join(fsdb.selfplay_dir(), model_name, '*.zz')) if len(games) < MIN_GAMES_PER_GENERATION: print("{} doesn't have enough games to train a new model yet ({})". format(model_name, len(games))) print("Sleeping...") time.sleep(10 * 60) print("Done...") sys.exit(1) print("Training on gathered game data, initializing from {}".format( model_name)) new_model_num = model_num + 1 new_model_name = shipname.generate(new_model_num) print("New model will be {}".format(new_model_name)) training_file = os.path.join(fsdb.golden_chunk_dir(), str(new_model_num) + '.tfrecord.zz') while not gfile.Exists(training_file): print("Waiting for", training_file) time.sleep(1 * 60) print("Using Golden File:", training_file) save_file = os.path.join(fsdb.models_dir(), new_model_name) try: main.train(working_dir, [training_file], save_file, generation_num=model_num + 1) except: logging.exception("Train error")
def eval_pv(eval_positions): model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if idx == idx_start: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) mcts = strategies.MCTSPlayer( player.network, resign_threshold=-1) for name, position in eval_positions: mcts.initialize_game(position) mcts.suggest_move(position) path = [] node = mcts.root while node.children: node = node.children.get(node.best_child()) path.append("{},{}".format(node.fmove, int(node.N))) save_file = os.path.join( FLAGS.data_dir, "pv-{}-{}".format(name, idx)) with open(save_file, "w") as data: data.write("{}, {}\n".format(idx, ",".join(path)))
def validate(working_dir, model_num=None, validate_name=None): """ Runs validate on the directories up to the most recent model, or up to (but not including) the model specified by `model_num` """ if model_num is None: model_num, model_name = fsdb.get_latest_model() else: model_num = int(model_num) model_name = fsdb.get_model(model_num) # Model N was trained on games up through model N-2, so the validation set # should only be for models through N-2 as well, thus the (model_num - 1) # term. models = list( filter(lambda num_name: num_name[0] < (model_num - 1), fsdb.get_models())) # Run on the most recent 50 generations, # TODO(brianklee): make this hyperparameter dependency explicit/not hardcoded holdout_dirs = [ os.path.join(fsdb.holdout_dir(), pair[1]) for pair in models[-50:] ] main.validate(working_dir, *holdout_dirs, checkpoint_name=os.path.join(fsdb.models_dir(), model_name), validate_name=validate_name)
def train(working_dir): model_num, model_name = fsdb.get_latest_model() print("Training on gathered game data, initializing from {}".format(model_name)) new_model_num = model_num + 1 new_model_name = shipname.generate(new_model_num) print("New model will be {}".format(new_model_name)) training_file = os.path.join( fsdb.golden_chunk_dir(), str(new_model_num) + '.tfrecord.zz') while not gfile.Exists(training_file): print("Waiting for", training_file) time.sleep(1*60) print("Using Golden File:", training_file) try: save_file = os.path.join(fsdb.models_dir(), new_model_name) print("Training model") dual_net.train(training_file) print("Exporting model to ", save_file) dual_net.export_model(working_dir, save_file) except Exception as e: import traceback logging.error(traceback.format_exc()) print(traceback.format_exc()) logging.exception("Train error") sys.exit(1)
def main(unusedargv): sgf_files = oneoff_utils.find_and_filter_sgf_files(FLAGS.sgf_dir, FLAGS.min_year, FLAGS.komi) pos_data, move_data, result_data, move_idxs = sample_positions_from_games( sgf_files=sgf_files, num_positions=FLAGS.num_positions) df = get_training_curve_data(fsdb.models_dir(), pos_data, move_data, result_data, FLAGS.idx_start, FLAGS.eval_every) save_plots(FLAGS.plot_dir, df)
def main(unusedargv): model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) # List vars constructed when using dual_net. dual_net_list(model_paths[0]) # Calculate l2 cost over a sequence of our models. df = get_l2_cost_data(model_paths, FLAGS.idx_start, FLAGS.eval_every) print(df) save_plots(FLAGS.plot_dir, df)
def backfill(): models = [m[1] for m in fsdb.get_models()] import dual_net import tensorflow as tf from tqdm import tqdm features, labels = dual_net.get_inference_input() dual_net.model_fn(features, labels, tf.estimator.ModeKeys.PREDICT, dual_net.get_default_hyperparams()) for model_name in tqdm(models): if model_name.endswith('-upgrade'): continue try: load_file = os.path.join(fsdb.models_dir(), model_name) dest_file = os.path.join(fsdb.models_dir(), model_name) main.convert(load_file, dest_file) except: print('failed on', model_name) continue
def main(): root = os.path.abspath( os.path.join("sgf", fsdb.FLAGS.bucket_name, "sgf/eval")) sync(root, True) models = fsdb.get_models() data = wins_subset(fsdb.models_dir()) print(len(data)) r = compute_ratings(data) for v, k in sorted([(v, k) for k, v in r.items()])[-20:][::-1]: print(models[model_num_for(k)][1], v) db = sqlite3.connect("ratings.db") print(db.execute("select count(*) from wins").fetchone()[0], "games") for m in models[-10:]: m_id = model_id(m[0]) print(m[1], r.get(m_id, "model id not found({})".format(m_id)))
def eval_policy(eval_positions): """Evaluate all positions with all models save the policy heatmaps as CSVs CSV name is "heatmap-<position_name>-<model-index>.csv" CSV format is: model number, value network output, policy network outputs position_name is taken from the SGF file Policy network outputs (19x19) are saved in flat order (see coord.from_flat) """ model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) player = None for i, idx in enumerate( tqdm(range(idx_start, len(model_paths), eval_every))): if player and i % 20 == 0: player.network.sess.close() tf.reset_default_graph() player = None if not player: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) pos_names, positions = zip(*eval_positions) # This should be batched at somepoint. eval_probs, eval_values = player.network.run_many(positions) for pos_name, probs, value in zip(pos_names, eval_probs, eval_values): save_file = os.path.join(FLAGS.data_dir, "heatmap-{}-{}.csv".format(pos_name, idx)) with open(save_file, "w") as data: data.write("{}, {}, {}\n".format(idx, value, ",".join(map(str, probs))))
def selfplay(verbose=2): _, model_name = fsdb.get_latest_model() games = gfile.Glob(os.path.join(fsdb.selfplay_dir(), model_name, '*.zz')) if len(games) > MAX_GAMES_PER_GENERATION: print("{} has enough games ({})".format(model_name, len(games))) time.sleep(10 * 60) sys.exit(1) print("Playing a game with model {}".format(model_name)) model_save_path = os.path.join(fsdb.models_dir(), model_name) game_output_dir = os.path.join(fsdb.selfplay_dir(), model_name) game_holdout_dir = os.path.join(fsdb.holdout_dir(), model_name) sgf_dir = os.path.join(fsdb.sgf_dir(), model_name) main.selfplay( load_file=model_save_path, output_dir=game_output_dir, holdout_dir=game_holdout_dir, output_sgf=sgf_dir, holdout_pct=HOLDOUT_PCT, verbose=verbose, )
def suggest_pairs(top_n=10, per_n=3): """ Find the maximally interesting pairs of players to match up First, sort the ratings by uncertainty. Then, take the ten highest players with the highest uncertainty For each of them, call them `p1` Sort all the models by their distance from p1's rating and take the 20 nearest rated models. ('candidate_p2s') Choose pairings, (p1, p2), randomly from this list. 'ratings' is a list of (model_num, rating, uncertainty) tuples Returns a list of *model numbers*, not model ids. """ db = sqlite3.connect("ratings.db") data = db.execute("select model_winner, model_loser from wins").fetchall() bucket_ids = [ id[0] for id in db.execute("select id from models where bucket = ?", ( fsdb.models_dir(), )).fetchall() ] bucket_ids.sort() data = [d for d in data if d[0] in bucket_ids and d[1] in bucket_ids] ratings = [(model_num_for(k), v[0], v[1]) for k, v in compute_ratings(data).items()] ratings.sort() ratings = ratings[ 100:] # Filter off the first 100 models, which improve too fast. ratings.sort(key=lambda r: r[2], reverse=True) res = [] for p1 in ratings[:top_n]: candidate_p2s = sorted(ratings, key=lambda p2_tup: abs(p1[1] - p2_tup[1]))[1:20] choices = random.sample(candidate_p2s, per_n) print("Pairing {}, sigma {:.2f}".format(p1[0], p1[2])) for p2 in choices: res.append([p1[0], p2[0]]) print(" {}, ratings delta {:.2f}".format(p2[0], abs(p1[1] - p2[1]))) return res
def bootstrap(working_dir): bootstrap_name = shipname.generate(0) bootstrap_model_path = os.path.join(fsdb.models_dir(), bootstrap_name) print("Bootstrapping with working dir {}\n Model 0 exported to {}".format( working_dir, bootstrap_model_path)) main.bootstrap(working_dir, bootstrap_model_path)
def swa(): path_base = fsdb.models_dir() model_names = [ "000393-lincoln", "000390-indus", "000404-hannibal", "000447-hawke", "000426-grief", "000431-lion", "000428-invincible", "000303-olympus", "000291-superb", "000454-victorious", ] model_names = model_names[:FLAGS.count] model_paths = [os.path.join(path_base, m) for m in model_names] # construct the graph features, labels = dual_net.get_inference_input() dual_net.model_fn(features, labels, tf.estimator.ModeKeys.PREDICT) # restore all saved weights meta_graph_def = meta_graph.read_meta_graph_file(model_paths[0] + '.meta') stored_var_names = set([ n.name for n in meta_graph_def.graph_def.node if n.op == 'VariableV2' ]) var_list = [ v for v in tf.global_variables() if v.op.name in stored_var_names ] var_list.sort(key=lambda v: v.op.name) print(stored_var_names) print(len(stored_var_names), len(var_list)) sessions = [tf.Session() for _ in model_paths] saver = tf.train.Saver() for sess, model_path in zip(sessions, model_paths): saver.restore(sess, model_path) # Load all VariableV2s for each model. values = [sess.run(var_list) for sess in sessions] # Iterate over all variables average values from all models. all_assign = [] for var, vals in zip(var_list, zip(*values)): print("{}x {}".format(len(vals), var)) if var.name == "global_step:0": avg = vals[0] for val in vals: avg = tf.maximum(avg, val) else: avg = tf.add_n(vals) / len(vals) continue all_assign.append(tf.assign(var, avg)) # Run all asign ops on an existing model (which has other ops and graph). sess = sessions[0] sess.run(all_assign) # Export a new saved model. ensure_dir_exists(FLAGS.data_dir) dest_path = os.path.join(FLAGS.data_dir, "swa-" + str(FLAGS.count)) saver.save(sess, dest_path)
def top_n(n=10): data = wins_subset(fsdb.models_dir()) r = compute_ratings(data) return [(model_num_for(k), v) for v, k in sorted([(v, k) for k, v in r.items()])[-n:][::-1]]
def import_files(files, bucket=None): if bucket is None: bucket = fsdb.models_dir() print("Importing for bucket:", bucket) db = sqlite3.connect("ratings.db") new_games = 0 with db: c = db.cursor() for _file in tqdm(files): match = re.match(EVAL_REGEX, os.path.basename(_file)) if not match: print("Bad file: ", _file) continue timestamp = match.groups(1)[0] with open(_file) as f: text = f.read() pw = re.search(PW_REGEX, text) pb = re.search(PB_REGEX, text) result = re.search(RESULT_REGEX, text) if not (pw and pb and result): print("Fields not found: ", _file) pw = pw.group(1) pb = pb.group(1) result = result.group(1) m_num_w = re.match(MODEL_REGEX, pw).group(1) m_num_b = re.match(MODEL_REGEX, pb).group(1) # v10 and v9 have the same model and name for 588, so cross eval # games played with this model should be ignored. if m_num_w == '000588' or m_num_b == '000588': continue try: # create models or ignore. maybe_insert_model(db, bucket, pb, m_num_b) maybe_insert_model(db, bucket, pw, m_num_w) b_id = rowid_for(db, bucket, pb) w_id = rowid_for(db, bucket, pw) # insert into games or bail game_id = None try: with db: c = db.cursor() c.execute( """insert into games(timestamp, filename, b_id, w_id, black_won, result) values(?, ?, ?, ?, ?, ?) """, [ timestamp, os.path.relpath(_file), b_id, w_id, result.lower().startswith('b'), result ]) game_id = c.lastrowid except sqlite3.IntegrityError: # print("Duplicate game: {}".format(_file)) continue if game_id is None: print("Somehow, game_id was None") # update wins/game counts on model, and wins table. c.execute( "update models set num_games = num_games + 1 where id in (?, ?)", [b_id, w_id]) if result.lower().startswith('b'): c.execute( "update models set black_games = black_games + 1, black_wins = black_wins + 1 where id = ?", (b_id, )) c.execute( "update models set white_games = white_games + 1 where id = ?", (w_id, )) c.execute( "insert into wins(game_id, model_winner, model_loser) values(?, ?, ?)", [game_id, b_id, w_id]) elif result.lower().startswith('w'): c.execute( "update models set black_games = black_games + 1 where id = ?", (b_id, )) c.execute( "update models set white_games = white_games + 1, white_wins = white_wins + 1 where id = ?", (w_id, )) c.execute( "insert into wins(game_id, model_winner, model_loser) values(?, ?, ?)", [game_id, w_id, b_id]) new_games += 1 if new_games % 1000 == 0: print("committing", new_games) db.commit() except: print("Bailed!") db.rollback() raise print("Added {} new games to database".format(new_games))
def model_id(name_or_num): db = sqlite3.connect("ratings.db") bucket = fsdb.models_dir() if not isinstance(name_or_num, str): name_or_num = fsdb.get_model(name_or_num) return rowid_for(db, bucket, name_or_num)