def eval_pv(eval_positions): model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if idx == idx_start: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) mcts = strategies.MCTSPlayer(player.network, resign_threshold=-1) for name, position in eval_positions: mcts.initialize_game(position) mcts.suggest_move(position) path = [] node = mcts.root while node.children: node = node.children.get(node.best_child()) path.append("{},{}".format(node.fmove, int(node.N))) save_file = os.path.join(FLAGS.data_dir, "pv-{}-{}".format(name, idx)) with open(save_file, "w") as data: data.write("{}, {}\n".format(idx, ",".join(path)))
def get_training_curve_data(model_dir, pos_data, move_data, result_data, idx_start, eval_every): model_paths = oneoff_utils.get_model_paths(model_dir) df = pd.DataFrame() player = None print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if player: oneoff_utils.restore_params(model_paths[idx], player) else: player = oneoff_utils.load_player(model_paths[idx]) correct, squared_errors = eval_player(player=player, positions=pos_data, moves=move_data, results=result_data) avg_acc = np.mean(correct) avg_mse = np.mean(squared_errors) print("Model: {}, acc: {:.4f}, mse: {:.4f}".format( model_paths[idx], avg_acc, avg_mse)) df = df.append({ "num": idx, "acc": avg_acc, "mse": avg_mse }, ignore_index=True) return df
def eval_for_policy(eval_positions, model_dir, data_dir, idx_start, eval_every): """Evaluate all positions with all models save the policy heatmaps as CSVs CSV name is "heatmap-<position_name>-<model-index>.csv" CSV format is: model number, value network output, policy network outputs position_name is taken from the SGF file Policy network outputs (19x19) are saved in flat order (see coord.from_flat) """ model_paths = oneoff_utils.get_model_paths(model_dir) print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if idx == idx_start: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) pos_names, positions = zip(*eval_positions) # This should be batched at somepoint. eval_probs, eval_values = player.network.run_many(positions) for pos_name, probs, value in zip(pos_names, eval_probs, eval_values): save_file = os.path.join( data_dir, "heatmap-{}-{}.csv".format(pos_name, idx)) with open(save_file, "w") as data: data.write("{}, {}, {}\n".format( idx, value, ",".join(map(str, probs))))
def eval_policy(eval_positions): """Evaluate all positions with all models save the policy heatmaps as CSVs CSV name is "heatmap-<position_name>-<model-index>.csv" CSV format is: model number, value network output, policy network outputs position_name is taken from the SGF file Policy network outputs (19x19) are saved in flat order (see coord.from_flat) """ model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) player = None for i, idx in enumerate( tqdm(range(idx_start, len(model_paths), eval_every))): if player and i % 20 == 0: player.network.sess.close() tf.reset_default_graph() player = None if not player: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) pos_names, positions = zip(*eval_positions) # This should be batched at somepoint. eval_probs, eval_values = player.network.run_many(positions) for pos_name, probs, value in zip(pos_names, eval_probs, eval_values): save_file = os.path.join(FLAGS.data_dir, "heatmap-{}-{}.csv".format(pos_name, idx)) with open(save_file, "w") as data: data.write("{}, {}, {}\n".format(idx, value, ",".join(map(str, probs))))
Policy network outputs (19x19) are saved in flat order (see coord.from_flat) """ model_paths = oneoff_utils.get_model_paths(FLAGS.model_dir) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if idx == idx_start: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) pos_names, positions = zip(*eval_positions) # This should be batched at somepoint. eval_probs, eval_values = player.network.run_many(positions) for pos_name, probs, value in zip(pos_names, eval_probs, eval_values): save_file = os.path.join( FLAGS.data_dir, "heatmap-{}-{}.csv".format(pos_name, idx)) with open(save_file, "w") as data: data.write("{}, {}, {}\n".format( idx, value, ",".join(map(str, probs)))) def positions_from_sgfs(sgf_files):
def evaluate(): """Get Policy and Value for each network, for each position Usage: python3 sharp_positions.py evaluate --sgf_dir data/s --model_dir models/ """ def short_str(v): if isinstance(v, float): return "{.3f}".format(v) return str(v) # Load positons sgf_names, all_positions = get_final_positions() # Run and save some data about each position # Save to csv because that's easy model_paths = oneoff_utils.get_model_paths(FLAGS.model_dir) num_models = len(model_paths) print("Evaluating {} models: {} to {}".format(num_models, model_paths[0], model_paths[-1])) print() with open(FLAGS.results, "w") as results: results.write(",".join(sgf_names) + "\n") player = None for idx in tqdm(range(FLAGS.min_idx, num_models, 1), desc="model"): model = model_paths[idx] if player and idx % 50 == 0: player.network.sess.close() tf.reset_default_graph() player = None if player: oneoff_utils.restore_params(model, player) else: player = oneoff_utils.load_player(model) row = [model] for positions in grouper(FLAGS.batch_size, all_positions): probs, values = player.network.run_many(positions) # NOTE(sethtroisi): For now we store the top n moves to shrink # the size of the recorded data. top_n = FLAGS.top_n top_policy_move = np.fliplr(np.argsort(probs))[:, :top_n] top_policy_value = np.fliplr(np.sort(probs))[:, :top_n] # One position at a time for v, m, p in zip(values, top_policy_move, top_policy_value): row.append(v) row.extend(itertools.chain.from_iterable(zip(m, p))) if len(positions) > 10: average_seen = top_policy_value.sum() / len(positions) if average_seen < 0.3: print("\t", average_seen, top_policy_value.sum(axis=-1)) results.write(",".join(map(short_str, row)) + "\n")