def create_latex_table( # noqa: C901 filepath, render_score_fn=latex_render_score_subscript, ): items = result_utils.load_json(filepath) groups = collections.defaultdict(list) for item in items: hp = item["hyperparams"] groups[(hp["donor_fisher_examples"], hp["target_fisher_examples"])].append(item) merged_scores = {} for k, group_items in groups.items(): scores = np.array( [get_single_score(item["merged_score"]) for item in group_items]) mean = np.mean(scores) stddev = np.std(scores) if len(group_items) > 1 else None merged_scores[k] = render_score_fn(mean, stddev) all_donor_fisher_examples = sorted(set(k[0] for k in groups.keys())) all_target_fisher_examples = sorted(set(k[1] for k in groups.keys())) rows = [ len(all_target_fisher_examples) * [""] for _ in all_donor_fisher_examples ] for col_idx, target_examples in enumerate(all_target_fisher_examples): for row_idx, donor_examples in enumerate(all_donor_fisher_examples): rows[row_idx][col_idx] = merged_scores[(donor_examples, target_examples)] for row, examples in zip(rows, all_donor_fisher_examples): row.insert(0, str(examples)) rows = [ R"\toprule", [R"\textbf{Examples}"] + [str(examples) for examples in all_target_fisher_examples], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def create_latex_table( # noqa: C901 jasons, render_score_fn=latex_render_score_subscript, target_task_order=result_utils.GLUE_TASKS_ORDER, donor_task_order=result_utils.GLUE_TASKS_ORDER, no_original_scores=False, ): items = [] for jason in jasons: items.extend(jason) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict( { "target_task": item["task"], "donor_task": item["intermediate_task"], } ) row_groups[group_key].append(item) def create_donor_to_merge_summary(target_task): ret = {} for k, v in row_groups.items(): if k["target_task"] != target_task: continue ret[k["donor_task"]] = v ret2 = {} for donor_task, ret_items in ret.items(): merged_scores = np.array( [get_single_score(item["score"]) for item in ret_items] ) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None ret2[donor_task] = (mean, stddev) return ret2 def get_original_task_summary(task): ret = {} for k, v in row_groups.items(): if k["target_task"] != target_task: continue ret[k["donor_task"]] = v if not ret: return None, None ret_items = max(ret.values(), key=len) merged_scores = np.array( [get_single_score(item["original_score"]) for item in ret_items] ) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None return mean, stddev rows = [len(target_task_order) * [""] for _ in donor_task_order] for col_idx, target_task in enumerate(target_task_order): donor_to_merge_summary = create_donor_to_merge_summary(target_task) for row_idx, donor_task in enumerate(donor_task_order): if donor_task == target_task and not no_original_scores: mean, stddev = get_original_task_summary(target_task) rows[row_idx][col_idx] = render_score_fn(mean, stddev, is_orig=True) continue if donor_task not in donor_to_merge_summary: continue mean, stddev = donor_to_merge_summary[donor_task] rows[row_idx][col_idx] = render_score_fn(mean, stddev) for row, task in zip(rows, donor_task_order): row.insert(0, result_utils.TASK_NICE_NAMES[task]) rows = [ R"\toprule", [R"\textbf{Task}"] + [result_utils.TASK_NICE_NAMES[t] for t in target_task_order], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def create_latex_table( # noqa: C901 filepath, render_score_fn=latex_render_score_subscript, ): if not isinstance(filepath, (list, tuple)): filepath = [filepath] items = [] for fp in filepath: its = result_utils.load_json(fp) its = [_index_to_epoch(it, fp) for it in its] its = [ it for it in its if it["hyperparams"]["target_epoch"].is_integer() ] items.extend(its) groups = collections.defaultdict(list) for item in items: hp = item["hyperparams"] groups[(hp["donor_epoch"], hp["target_epoch"])].append(item) original_scores = {} for target_epoch in range(1, 11): group_items = max( [ group_items for (_, te), group_items in groups.items() if te == target_epoch ], key=len, ) og_scores = np.array( [get_single_score(item["original_score"]) for item in group_items]) mean = np.mean(og_scores) stddev = np.std(og_scores) if len(group_items) > 1 else None original_scores[target_epoch] = render_score_fn(mean, stddev) merged_scores = {} for k, group_items in groups.items(): scores = np.array( [get_single_score(item["merged_score"]) for item in group_items]) og_mean = np.mean( [get_single_score(item["original_score"]) for item in group_items]) mean = np.mean(scores) stddev = np.std(scores) if len(group_items) > 1 else None merged_scores[k] = render_score_fn(mean - og_mean, stddev) rows = [] for row_idx in range(8): donor_epoch = (row_idx + 1) / 2 rows.append([str(donor_epoch)] + [ merged_scores[(donor_epoch, float(target_epoch))] for target_epoch in range(1, 11) ]) rows = [ R"\toprule", [R"\textbf{Epoch}"] + [str(target_epoch) for target_epoch in range(1, 11)], R"\midrule", [R"\textit{Unmerged}"] + [original_scores[target_epoch] for target_epoch in range(1, 11)], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def create_latex_table( # noqa: C901 filepath, render_score_fn=latex_render_score_subscript, l2_coeffs=(0.0, 1e-6, 3e-4, 0.01, 0.1), coeff_to_pretty={ 0.0: "0", 1e-6: "1e-6", 3e-4: "3e-4", 0.01: "1e-2", 0.1: "1e-1", }, ): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) def get_original_score(target_coeff): ret = {} for k, v in row_groups.items(): if k["target_reg_strength"] != target_coeff: continue ret[k["donor_reg_strength"]] = v if not ret: return None, None ret_items = max(ret.values(), key=len) merged_scores = np.array( [get_single_score(item["original_score"]) for item in ret_items]) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None return render_score_fn(mean, stddev) rows = [len(l2_coeffs) * [""] for _ in l2_coeffs] for col_idx, target_coeff in enumerate(l2_coeffs): for row_idx, donor_coeff in enumerate(l2_coeffs): key = hashabledict({ "target_reg_strength": target_coeff, "donor_reg_strength": donor_coeff, }) row_items = row_groups[key] merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(row_items) > 1 else None rows[row_idx][col_idx] = render_score_fn(mean, stddev) for row, coeff in zip(rows, l2_coeffs): row.insert(0, coeff_to_pretty[coeff]) rows = [ R"\toprule", [""] + [coeff_to_pretty[t] for t in coeff_to_pretty], R"\midrule", ["Original"] + [get_original_score(t) for t in coeff_to_pretty], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)