def eval_pv(eval_positions): model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if idx == idx_start: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) mcts = strategies.MCTSPlayer(player.network, resign_threshold=-1) for name, position in eval_positions: mcts.initialize_game(position) mcts.suggest_move(position) path = [] node = mcts.root while node.children: node = node.children.get(node.best_child()) path.append("{},{}".format(node.fmove, int(node.N))) save_file = os.path.join(FLAGS.data_dir, "pv-{}-{}".format(name, idx)) with open(save_file, "w") as data: data.write("{}, {}\n".format(idx, ",".join(path)))
def eval_for_policy(eval_positions, model_dir, data_dir, idx_start, eval_every): """Evaluate all positions with all models save the policy heatmaps as CSVs CSV name is "heatmap-<position_name>-<model-index>.csv" CSV format is: model number, value network output, policy network outputs position_name is taken from the SGF file Policy network outputs (19x19) are saved in flat order (see coord.from_flat) """ model_paths = oneoff_utils.get_model_paths(model_dir) print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if idx == idx_start: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) pos_names, positions = zip(*eval_positions) # This should be batched at somepoint. eval_probs, eval_values = player.network.run_many(positions) for pos_name, probs, value in zip(pos_names, eval_probs, eval_values): save_file = os.path.join( data_dir, "heatmap-{}-{}.csv".format(pos_name, idx)) with open(save_file, "w") as data: data.write("{}, {}, {}\n".format( idx, value, ",".join(map(str, probs))))
def get_training_curve_data(model_dir, pos_data, move_data, result_data, idx_start, eval_every): model_paths = oneoff_utils.get_model_paths(model_dir) df = pd.DataFrame() player = None print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if player: oneoff_utils.restore_params(model_paths[idx], player) else: player = oneoff_utils.load_player(model_paths[idx]) correct, squared_errors = eval_player(player=player, positions=pos_data, moves=move_data, results=result_data) avg_acc = np.mean(correct) avg_mse = np.mean(squared_errors) print("Model: {}, acc: {:.4f}, mse: {:.4f}".format( model_paths[idx], avg_acc, avg_mse)) df = df.append({ "num": idx, "acc": avg_acc, "mse": avg_mse }, ignore_index=True) return df
def main(unusedargv): model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) # List vars constructed when using dual_net. dual_net_list(model_paths[0]) # Calculate l2 cost over a sequence of our models. df = get_l2_cost_data(model_paths, FLAGS.idx_start, FLAGS.eval_every) print(df) save_plots(FLAGS.plot_dir, df)
def eval_policy(eval_positions): """Evaluate all positions with all models save the policy heatmaps as CSVs CSV name is "heatmap-<position_name>-<model-index>.csv" CSV format is: model number, value network output, policy network outputs position_name is taken from the SGF file Policy network outputs (19x19) are saved in flat order (see coord.from_flat) """ model_paths = oneoff_utils.get_model_paths(fsdb.models_dir()) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) player = None for i, idx in enumerate( tqdm(range(idx_start, len(model_paths), eval_every))): if player and i % 20 == 0: player.network.sess.close() tf.reset_default_graph() player = None if not player: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) pos_names, positions = zip(*eval_positions) # This should be batched at somepoint. eval_probs, eval_values = player.network.run_many(positions) for pos_name, probs, value in zip(pos_names, eval_probs, eval_values): save_file = os.path.join(FLAGS.data_dir, "heatmap-{}-{}.csv".format(pos_name, idx)) with open(save_file, "w") as data: data.write("{}, {}, {}\n".format(idx, value, ",".join(map(str, probs))))
"Eval every k models to generate the curve") FLAGS = tf.app.flags.FLAGS def eval_policy(eval_positions) """Evaluate all positions with all models save the policy heatmaps as CSVs CSV name is "heatmap-<position_name>-<model-index>.csv" CSV format is: model number, value network output, policy network outputs position_name is taken from the SGF file Policy network outputs (19x19) are saved in flat order (see coord.from_flat) """ model_paths = oneoff_utils.get_model_paths(FLAGS.model_dir) idx_start = FLAGS.idx_start eval_every = FLAGS.eval_every print("Evaluating models {}-{}, eval_every={}".format( idx_start, len(model_paths), eval_every)) for idx in tqdm(range(idx_start, len(model_paths), eval_every)): if idx == idx_start: player = oneoff_utils.load_player(model_paths[idx]) else: oneoff_utils.restore_params(model_paths[idx], player) pos_names, positions = zip(*eval_positions) # This should be batched at somepoint.
def minimize(): """Find a subset of problems that maximal explains rating. Usage: python3 sharp_positions.py minimize \ --model_dir models --sgf_dir data/s --rating_json ratings.json --results results.csv """ ########################### HYPER PARAMETERS ############################### # Stop when r2 is this much worse than full set of positions r2_stopping_percent = 0.96 # for this many iterations stopping_iterations = 5 # Limit SVM to a smaller number of positions to speed up code. max_positions_fit = 300 # Filter any position that "contributes" less than this percent of max. filter_contribution_percent = 0.3 # Never filter more than this many positions in one iterations filter_limit = 25 ########################### HYPER PARAMETERS ############################### # Load positons model_paths = oneoff_utils.get_model_paths(FLAGS.model_dir) num_models = len(model_paths) assert num_models > 0, FLAGS.model_dir # Load model ratings # wget https://cloudygo.com/v12-19x19/json/ratings.json ratings = json.load(open(FLAGS.rating_json)) raw_ratings = {int(r[0]): float(r[1]) for r in ratings} model_ratings = [] for model in model_paths: model_idx = get_model_idx(model) if model_idx < FLAGS.min_idx: continue model_ratings.append(raw_ratings[model_idx]) model_ratings = np.array(model_ratings) assert 0 < len(model_ratings) <= num_models, len(model_ratings) num_models = len(model_ratings) sgf_names, all_positions = get_final_positions() # Trim off common path prefix. common_path = os.path.commonpath(sgf_names) sgf_names = [name[len(common_path) + 1:] for name in sgf_names] print("Considering {} positions, {} models".format(len(all_positions), num_models)) print() # Load model data top_n = FLAGS.top_n positions = defaultdict(list) with open(FLAGS.results) as results: headers = results.readline().strip() assert headers.count(",") + 1 == len(sgf_names) # Row is <model_name> + positions x [value, top_n x [move, move_policy]] for row in tqdm(results.readlines(), desc="result line"): data = row.split(",") model_idx = get_model_idx(data.pop(0)) if model_idx < FLAGS.min_idx: continue data_per = 1 + top_n * 2 assert len(data) % data_per == 0, len(data) for position, position_data in enumerate(grouper(data_per, data)): value = float(position_data.pop(0)) moves = list(map(int, position_data[0::2])) move_policy = list(map(float, position_data[1::2])) positions[position].append([value, moves, move_policy]) def one_hot(n, i): one_hot = [0] * n if 0 <= i < n: one_hot[i] += 1 return one_hot # NOTE: top_n isn't the same semantic value here and can be increased. one_hot_moves = top_n num_features = 1 + 5 + (one_hot_moves + 1) # Features by position features = [] pos_top_moves = [] for position, data in tqdm(positions.items(), desc="featurize"): assert len(data) == num_models, len(data) top_moves = Counter([d[1][0] for d in data]) top_n_moves = [m for m, c in top_moves.most_common(one_hot_moves)] if len(top_n_moves) < one_hot_moves: top_n_moves.extend([-1] * (one_hot_moves - len(top_n_moves))) assert len(top_n_moves) == one_hot_moves, "pad with dummy moves" pos_top_moves.append(top_n_moves) # Eventaully we want # [model 1 position 1 features, m1 p2 features, m1 p3 features, ... ] # [model 2 position 1 features, m2 p2 features, m2 p3 features, ... ] # [model 3 position 1 features, m3 p2 features, m3 p3 features, ... ] # ... # [model m position 1 features, mm p2 features, mm p3 features, ... ] # We'll do position selection by joining [model x position_feature] feature_columns = [] for model, (v, m, mv) in enumerate(data): # Featurization (for each positions): # * Value (-1 to 1), Bucketed value # * Cluster all model by top_n moves (X,Y,Z or other)? # * value of that move for model # * policy value of top move model_features = [] model_features.append(2 * v - 1) # NOTE(sethtroisi): Consider bucketize value by value percentiles. value_bucket = np.searchsorted((0.2, 0.4, 0.6, 0.8), v) model_features.extend(one_hot(5, value_bucket)) # Policy weight for most common X moves (among all models). policy_weights = [0] * (one_hot_moves + 1) for move, policy_value in zip(m, mv): if move in top_n_moves: policy_weights[top_n_moves.index(move)] = policy_value else: policy_weights[-1] += policy_value model_features.extend(policy_weights) assert len(model_features) == num_features feature_columns.append(model_features) features.append(feature_columns) features = np.array(features) print("Feature shape", features.shape) print() # Split the models to test / train train_size = int(num_models * 0.9) train_models = sorted(np.random.permutation(num_models)[:train_size]) test_models = sorted(set(range(num_models)) - set(train_models)) assert set(train_models + test_models) == set(range(num_models)) features_train = features[:, train_models, :] features_test = features[:, test_models, :] labels_train = model_ratings[train_models] labels_test = model_ratings[test_models] # Choose some set of positions and see how well they explain ratings positions_to_use = set(positions.keys()) linearSVM = svm.LinearSVR() best_test_r2 = 0 below_threshold = 0 for iteration in itertools.count(1): iter_positions = np.random.permutation(list(positions_to_use)) iter_positions = sorted(iter_positions[:max_positions_fit]) # Take this set of positions and build X X = np.concatenate(features_train[iter_positions], axis=1) Xtest = np.concatenate(features_test[iter_positions], axis=1) assert X.shape == (train_size, num_features * len(iter_positions)) linearSVM.fit(X, labels_train) score_train = linearSVM.score(X, labels_train) score_test = linearSVM.score(Xtest, labels_test) print("iter {}, {}/{} included, R^2: {:.4f} train, {:.3f} test".format( iteration, len(iter_positions), len(positions_to_use), score_train, score_test)) # Determine the most and least useful position: # TODO(amj,brilee): Validate this math. assert len(linearSVM.coef_) == num_features * len(iter_positions) # The intercepts tell us how much this contributes to overall rating # but coef tell us how much different answers differentiate rating. coef_groups = list(grouper(num_features, linearSVM.coef_)) position_coefs = [abs(sum(c)) for c in coef_groups] pos_value_idx = np.argsort(position_coefs) max_pos = pos_value_idx[-1] most_value = position_coefs[max_pos] print("\tMost value {} => {:.1f} {}".format( max_pos, most_value, sgf_names[iter_positions[max_pos]])) # Drop any positions that aren't very useful for dropped, pos_idx in enumerate(pos_value_idx[:filter_limit], 1): contribution = position_coefs[pos_idx] positions_to_use.remove(iter_positions[pos_idx]) print("\t\tdropping({}): {:.1f} {}".format( dropped, contribution, sgf_names[iter_positions[pos_idx]])) if contribution > filter_contribution_percent * most_value: break print() best_test_r2 = max(best_test_r2, score_test) if score_test > r2_stopping_percent * best_test_r2: below_threshold = 0 else: below_threshold += 1 if below_threshold == stopping_iterations: print("{}% decrease in R^2, stopping".format( 100 - int(100 * r2_stopping_percent))) break # Write down the differentiating positions and their answers. svm_data = [] for position_idx in list(reversed(pos_value_idx)): coefs = coef_groups[position_idx] # Global position index. position = iter_positions[position_idx] sgf_name = sgf_names[position] top_moves = pos_top_moves[position] svm_data.append([sgf_name, [top_moves, coefs.tolist()]]) with open(FLAGS.SVM_json, "w") as svm_json: json.dump(svm_data, svm_json) print("Dumped data about {} positions to {}".format( len(svm_data), FLAGS.SVM_json))
def evaluate(): """Get Policy and Value for each network, for each position Usage: python3 sharp_positions.py evaluate --sgf_dir data/s --model_dir models/ """ def short_str(v): if isinstance(v, float): return "{.3f}".format(v) return str(v) # Load positons sgf_names, all_positions = get_final_positions() # Run and save some data about each position # Save to csv because that's easy model_paths = oneoff_utils.get_model_paths(FLAGS.model_dir) num_models = len(model_paths) print("Evaluating {} models: {} to {}".format(num_models, model_paths[0], model_paths[-1])) print() with open(FLAGS.results, "w") as results: results.write(",".join(sgf_names) + "\n") player = None for idx in tqdm(range(FLAGS.min_idx, num_models, 1), desc="model"): model = model_paths[idx] if player and idx % 50 == 0: player.network.sess.close() tf.reset_default_graph() player = None if player: oneoff_utils.restore_params(model, player) else: player = oneoff_utils.load_player(model) row = [model] for positions in grouper(FLAGS.batch_size, all_positions): probs, values = player.network.run_many(positions) # NOTE(sethtroisi): For now we store the top n moves to shrink # the size of the recorded data. top_n = FLAGS.top_n top_policy_move = np.fliplr(np.argsort(probs))[:, :top_n] top_policy_value = np.fliplr(np.sort(probs))[:, :top_n] # One position at a time for v, m, p in zip(values, top_policy_move, top_policy_value): row.append(v) row.extend(itertools.chain.from_iterable(zip(m, p))) if len(positions) > 10: average_seen = top_policy_value.sum() / len(positions) if average_seen < 0.3: print("\t", average_seen, top_policy_value.sum(axis=-1)) results.write(",".join(map(short_str, row)) + "\n")