def _eval_param_to_key(eval_exp, eval_param): return hashabledict({ "pretrained_model": eval_param.pretrained_model, "task": eval_param.task, "reg_strength": eval_param.reg_strength, "reg_type": eval_param.reg_type, })
def create_csv_table(filepath, round_digits=1): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict({ "target_task": item["target_task"], "donor_task": item["donor_task"], }) row_groups[group_key].append(item) header = [ "task", "donor", "merged score", "stddev", "orig score", "stddev", "mean boost", "stddev", "max boost", "min boost", "num trials", ] body = [] for hp, row_items in row_groups.items(): og_scores = np.array( [get_single_score(item["original_score"]) for item in row_items]) merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) row = [ hp["target_task"], hp["donor_task"], round(np.mean(merged_scores), round_digits), round(np.std(merged_scores), round_digits), # round(np.mean(og_scores), round_digits), round(np.std(og_scores), round_digits), # round(np.mean(merged_scores - og_scores), round_digits), round(np.std(merged_scores - og_scores), round_digits), # round(np.max(merged_scores - og_scores), round_digits), round(np.min(merged_scores - og_scores), round_digits), len(row_items), ] body.append(row) body = sorted(body, key=lambda r: r[:2]) rows = [header] + body return result_utils.csv_to_str(rows)
def get_hyperparams_to_original_score(json_file, task): items = _load_json(json_file) hp_to_score = {} for item in items: if item["task"] != task: continue hp = hashabledict(item["hyperparams"]) hp_to_score[hp] = item["original_score"] return hp_to_score
def create_csv_table(filepath, round_digits=1): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) header = [ "MNLI ckpt", "RTE ckpt", "merged RTE acc", "merged stddev", "orig RTE acc", "orig stddev", "MNLI body acc", "MNLI body stddev", "num trials", ] body = [] for hp, row_items in row_groups.items(): og_scores = [ get_single_score(item["original_score"]) for item in row_items ] merged_scores = [ get_single_score(item["merged_score"]) for item in row_items ] donor_body_scores = [ get_single_score(item["donor_body_score"]) for item in row_items ] row = [ hp["donor_ckpt_index"], hp["target_ckpt_index"], round(np.mean(merged_scores), round_digits), round(np.std(merged_scores), round_digits), # round(np.mean(og_scores), round_digits), round(np.std(og_scores), round_digits), # round(np.mean(donor_body_scores), round_digits), round(np.std(donor_body_scores), round_digits), len(row_items), ] body.append(row) body = sorted(body, key=lambda r: r[:2]) rows = [header] + body return result_utils.csv_to_str(rows)
def json_to_best_per_target(filepath): items = _load_json(filepath) hp_task_to_items = collections.defaultdict(list) for item in items: hp_task = {"task": item["task"]} hp_task.update(item["hyperparams"]) hp_task = hashabledict(hp_task) hp_task_to_items[hp_task].append(item) ret = [] for task_items in hp_task_to_items.values(): best_item = max(task_items, key=lambda r: _get_single_score(r["merged_score"])) ret.append(best_item) return ret
def get_hyperparams_to_best_merged_score(json_file, task): items = _load_json(json_file) hp_to_score = collections.defaultdict(list) for item in items: if item["task"] != task: continue hp = hashabledict(item["hyperparams"]) hp_to_score[hp].append(item["merged_score"]) hp_to_score = { hp: max(scores, key=lambda s: _get_single_score(s)) for hp, scores in hp_to_score.items() } return hp_to_score
def create_csv_table(filepath, round_digits=1, group_by_ckpt_index=True, best_per_finetuned_model=False): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) if best_per_finetuned_model: new_row_groups = collections.defaultdict(list) for hp, row_items in row_groups.items(): # TODO: get best original score as well best = max(row_items, key=lambda r: get_single_score(r["merged_score"])) new_key = dict(hp) del new_key["train_run_uuid"] new_key = hashabledict(new_key) new_row_groups[new_key].append(best) row_groups = new_row_groups header = [ "task", "task ckpt", "merged task f1", "stddev", "orig task f1", "stddev", "mean boost", "stddev", "max boost", "min boost", "num trials", ] body = [] for hp, row_items in row_groups.items(): og_scores = np.array( [get_single_score(item["original_score"]) for item in row_items]) merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) row = [ hp["task"], hp["target_ckpt_index"] if group_by_ckpt_index else "-", round(np.mean(merged_scores), round_digits), round(np.std(merged_scores), round_digits), # round(np.mean(og_scores), round_digits), round(np.std(og_scores), round_digits), # round(np.mean(merged_scores - og_scores), round_digits), round(np.std(merged_scores - og_scores), round_digits), # round(np.max(merged_scores - og_scores), round_digits), round(np.min(merged_scores - og_scores), round_digits), len(row_items), ] body.append(row) body = sorted(body, key=lambda r: r[:2]) rows = [header] + body return result_utils.csv_to_str(rows)
def _something_joined(train_exp, eval_exp, merge_exp, ckpt_index): merge_run_params = _get_finished_run_params(merge_exp) eval_run_params = _get_finished_run_params(eval_exp) ret = [] eval_key_to_results = {} for eval_id, p in eval_run_params: key = _eval_param_to_key(eval_exp, p) ckpt = p.checkpoints_summary.checkpoint_uuids[ckpt_index] eval_results = eval_exp.retrieve_items_by_class( eval_execs.CheckpointEvaluationResults, eval_id) ckpt_id_to_results = {r.checkpoint_blob_uuid: r for r in eval_results} eval_key_to_results[key] = ckpt_id_to_results[ckpt] for merge_run_id, p in merge_run_params: merge_results = merge_exp.retrieve_items_by_class( merging_execs.MergingEvaluationResults, merge_run_id) task_to_eval_score = {} merge_key = _merge_param_to_key(train_exp, merge_exp, p) for m in p.models_to_merge: eval_key = {"task": m.task} eval_key.update(merge_key) eval_key = hashabledict(eval_key) task_to_eval_score[m.task] = eval_key_to_results[eval_key].results[ f"{m.task}_acc"] ret_item = { "common_parameters": merge_key.copy(), "original_scores": task_to_eval_score.copy(), "merge_results": [], } for merge_result in merge_results: scores = {} for task in merge_result.tasks: eval_score = task_to_eval_score[task] for name, merge_score in merge_result.results.items(): if task in name: break scores[task] = (merge_score, eval_score) print(merge_key) print(merge_result.weighting) print(scores) print() merged_scores = _key_results_by_task(merge_result.results, merge_result.tasks) relative_difference = { task: merged_score / task_to_eval_score[task] for task, merged_score in merged_scores.items() } absolute_difference = { task: 100 * (merged_score - task_to_eval_score[task]) for task, merged_score in merged_scores.items() } weighting = { task: weight for task, weight in zip(merge_result.tasks, merge_result.weighting) } merged_ret = { "weighting": weighting, "scores": merged_scores, "relative_difference": relative_difference, "absolute_difference": absolute_difference, } ret_item["merge_results"].append(merged_ret) ret.append(ret_item) print(2 * "\n") return ret
def create_latex_table( # noqa: C901 jasons, render_score_fn=latex_render_score_subscript, target_task_order=result_utils.GLUE_TASKS_ORDER, donor_task_order=result_utils.GLUE_TASKS_ORDER, no_original_scores=False, ): items = [] for jason in jasons: items.extend(jason) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict( { "target_task": item["task"], "donor_task": item["intermediate_task"], } ) row_groups[group_key].append(item) def create_donor_to_merge_summary(target_task): ret = {} for k, v in row_groups.items(): if k["target_task"] != target_task: continue ret[k["donor_task"]] = v ret2 = {} for donor_task, ret_items in ret.items(): merged_scores = np.array( [get_single_score(item["score"]) for item in ret_items] ) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None ret2[donor_task] = (mean, stddev) return ret2 def get_original_task_summary(task): ret = {} for k, v in row_groups.items(): if k["target_task"] != target_task: continue ret[k["donor_task"]] = v if not ret: return None, None ret_items = max(ret.values(), key=len) merged_scores = np.array( [get_single_score(item["original_score"]) for item in ret_items] ) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None return mean, stddev rows = [len(target_task_order) * [""] for _ in donor_task_order] for col_idx, target_task in enumerate(target_task_order): donor_to_merge_summary = create_donor_to_merge_summary(target_task) for row_idx, donor_task in enumerate(donor_task_order): if donor_task == target_task and not no_original_scores: mean, stddev = get_original_task_summary(target_task) rows[row_idx][col_idx] = render_score_fn(mean, stddev, is_orig=True) continue if donor_task not in donor_to_merge_summary: continue mean, stddev = donor_to_merge_summary[donor_task] rows[row_idx][col_idx] = render_score_fn(mean, stddev) for row, task in zip(rows, donor_task_order): row.insert(0, result_utils.TASK_NICE_NAMES[task]) rows = [ R"\toprule", [R"\textbf{Task}"] + [result_utils.TASK_NICE_NAMES[t] for t in target_task_order], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def create_varying_params(exp, train_exp, target_fisher_exp, donor_fisher_exp): train_run_uuids = train_exp.retrieve_run_uuids(RunState.FINISHED) target_fisher_run_uuids = target_fisher_exp.retrieve_run_uuids( RunState.FINISHED) donor_fisher_run_uuids = donor_fisher_exp.retrieve_run_uuids( RunState.FINISHED) train_run_params = { rid: train_exp.retrieve_run_params(rid) for rid in train_run_uuids } target_fisher_run_params = [ target_fisher_exp.retrieve_run_params(rid) for rid in target_fisher_run_uuids ] donor_fisher_run_params = [ donor_fisher_exp.retrieve_run_params(rid) for rid in donor_fisher_run_uuids ] fisher_run_uuids = target_fisher_run_uuids + donor_fisher_run_uuids fisher_run_params = target_fisher_run_params + donor_fisher_run_params grouping_to_params = collections.defaultdict(list) for fi_rid, fi_rp in zip(fisher_run_uuids, fisher_run_params): assert fi_rp.finetuned_exp_uuid == train_exp.uuid tr_rp = train_run_params[fi_rp.finetuned_run_uuid] is_target = fi_rid in target_fisher_run_uuids fisher_exp = target_fisher_exp if is_target else donor_fisher_exp # NOTE: We could do this in fewer db calls and probably be faster. summary = fisher_exp.retrieve_single_item_by_class( fisher_execs.FisherMatricesSummary, fi_rid) for i, sfm_uuid in enumerate(summary.saved_fisher_matrix_uuids): saved_fisher_matrix = fisher_exp.get_storage().retrieve_item( sfm_uuid) grouping_key = { "index": i, "variational_fisher_beta": fi_rp.variational_fisher_beta, "learning_rate": fi_rp.learning_rate, # "num_examples": fi_rp.num_examples, # "pretrained_model": fi_rp.pretrained_model, "reg_strength": tr_rp.reg_strength, "reg_type": tr_rp.reg_type, } grouping_key = hashabledict(grouping_key) model_to_merge = ModelToMerge( task=tr_rp.task, train_run_uuid=fi_rp.finetuned_run_uuid, fisher_run_uuid=fi_rid, model_checkpoint_uuid=fi_rp.finetuned_ckpt_uuid, fisher_matrix_uuid=saved_fisher_matrix.blob_uuid, ) grouping_to_params[grouping_key].append(model_to_merge) varying_params = [] for grouping_key, models in grouping_to_params.items(): # Make sure the tasks in this grouping are unique. if len(models) == 1: logging.warning( f"Skipping merge for grouping key {grouping_key} since only 1 model was found." ) continue assert len(set(p.task for p in models)) == 2 (target_model, ) = [ m for m in models if m.fisher_run_uuid in target_fisher_run_uuids ] donor_models = [ m for m in models if m.fisher_run_uuid in donor_fisher_run_uuids ] for donor_model in donor_models: varying_param = { "models_to_merge": [target_model, donor_model], "task": target_model.task, # "pretrained_model": grouping_key["pretrained_model"], "reg_strength": grouping_key["reg_strength"], "reg_type": grouping_key["reg_type"], } varying_params.append(varying_param) return varying_params
def _create_varying_pairwise_merge_params(exp, fisher_exp, train_exp): train_run_uuids = train_exp.retrieve_run_uuids(RunState.FINISHED) fisher_run_uuids = fisher_exp.retrieve_run_uuids(RunState.FINISHED) train_run_params = { rid: train_exp.retrieve_run_params(rid) for rid in train_run_uuids } fisher_run_params = [ fisher_exp.retrieve_run_params(rid) for rid in fisher_run_uuids ] grouping_to_params = collections.defaultdict(list) for fi_rid, fi_rp in zip(fisher_run_uuids, fisher_run_params): assert fi_rp.ft_exp_uuid == train_exp.uuid tr_rp = train_run_params[fi_rp.ft_run_uuid] grouping_key = {} grouping_key.update(fisher_exp.create_run_key_values(fi_rp)) grouping_key.update(train_exp.create_run_key_values(tr_rp)) del grouping_key["finetuned_run_uuid"] del grouping_key["finetuned_ckpt_uuid"] del grouping_key["task"] grouping_key = hashabledict(grouping_key) saved_fisher_matrix = fisher_exp.retrieve_single_item_by_class( fisher_execs.SavedFisherMatrix, fi_rid) model_to_merge = ModelToMerge( task=tr_rp.task, train_run_uuid=fi_rp.ft_run_uuid, fisher_run_uuid=fi_rid, model_checkpoint_uuid=fi_rp.finetuned_ckpt_uuid, fisher_matrix_uuid=saved_fisher_matrix.blob_uuid, ) grouping_to_params[grouping_key].append(model_to_merge) varying_params = [] for grouping_key, models in grouping_to_params.items(): # Make sure the tasks in this grouping are unique. assert len(set(p.task for p in models)) == len(models) base_param = { "pretrained_model": grouping_key["pretrained_model"], "fisher_type": grouping_key["fisher_type"], } if grouping_key["fisher_type"] == "diagonal": base_param["fisher_params"] = { "y_samples": grouping_key["diagonal_y_samples"], } else: raise ValueError( f"Invalid fisher_type {grouping_key['fisher_type']}.") for pair in itertools.combinations(models, 2): varying_param = base_param.copy() varying_param["models_to_merge"] = tuple(pair) varying_params.append(varying_param) return varying_params
def create_varying_params_merge_informed_pair(exp, fisher_exp, train_exp, tasks_map): train_run_uuids = train_exp.retrieve_run_uuids(RunState.FINISHED) fisher_run_uuids = fisher_exp.retrieve_run_uuids(RunState.FINISHED) train_run_params = { rid: train_exp.retrieve_run_params(rid) for rid in train_run_uuids } fisher_run_params = [ fisher_exp.retrieve_run_params(rid) for rid in fisher_run_uuids ] grouping_to_params = collections.defaultdict(list) for fi_rid, fi_rp in zip(fisher_run_uuids, fisher_run_params): assert fi_rp.ft_exp_uuid == train_exp.uuid tr_rp = train_run_params[fi_rp.ft_run_uuid] grouping_key = {} grouping_key.update(fisher_exp.create_run_key_values(fi_rp)) grouping_key.update(train_exp.create_run_key_values(tr_rp)) del grouping_key["finetuned_run_uuid"] del grouping_key["finetuned_ckpt_uuid"] del grouping_key["task"] grouping_key = hashabledict(grouping_key) saved_fisher_matrix = fisher_exp.retrieve_single_item_by_class( fisher_execs.SavedFisherMatrix, fi_rid) model_to_merge = ModelToMerge( task=tr_rp.task, train_run_uuid=fi_rp.ft_run_uuid, fisher_run_uuid=fi_rid, model_checkpoint_uuid=fi_rp.finetuned_ckpt_uuid, fisher_matrix_uuid=saved_fisher_matrix.blob_uuid, ) grouping_to_params[grouping_key].append(model_to_merge) varying_params = [] for grouping_key, models in grouping_to_params.items(): # Make sure the tasks in this grouping are unique. assert len(set(p.task for p in models)) == len(models) base_param = { "pretrained_model": grouping_key["pretrained_model"], "reg_type": grouping_key["reg_type"], "reg_strength": grouping_key["reg_strength"], "fisher_type": grouping_key["fisher_type"], } if grouping_key["fisher_type"] == "diagonal": base_param["fisher_params"] = { "y_samples": grouping_key["diagonal_y_samples"], } else: raise ValueError( f"Invalid fisher_type {grouping_key['fisher_type']}.") for i, model in enumerate(models): target_task = model.task for donor_task in tasks_map[target_task]: varying_param = base_param.copy() donor_mtm = [mtm for mtm in models if mtm.task == donor_task] assert len(donor_mtm) == 1 varying_param.update({ "models_to_merge": [model] + donor_mtm, "task": target_task, }) varying_params.append(varying_param) return varying_params
def create_varying_weight_search_phase_i_params( exp, fisher_exp, train_exp, target_task, max_models=None ): train_run_uuids = train_exp.retrieve_run_uuids(RunState.FINISHED) fisher_run_uuids = fisher_exp.retrieve_run_uuids(RunState.FINISHED) train_run_params = { rid: train_exp.retrieve_run_params(rid) for rid in train_run_uuids } fisher_run_params = [ fisher_exp.retrieve_run_params(rid) for rid in fisher_run_uuids ] grouping_to_params = collections.defaultdict(list) for fi_rid, fi_rp in zip(fisher_run_uuids, fisher_run_params): assert fi_rp.ft_exp_uuid == train_exp.uuid tr_rp = train_run_params[fi_rp.ft_run_uuid] grouping_key = {} grouping_key.update(fisher_exp.create_run_key_values(fi_rp)) grouping_key.update(train_exp.create_run_key_values(tr_rp)) del grouping_key["finetuned_run_uuid"] del grouping_key["finetuned_ckpt_uuid"] del grouping_key["task"] grouping_key = hashabledict(grouping_key) saved_fisher_matrix = fisher_exp.retrieve_single_item_by_class( fisher_execs.SavedFisherMatrix, fi_rid ) model_to_merge = ModelToMerge( task=tr_rp.task, train_run_uuid=fi_rp.ft_run_uuid, fisher_run_uuid=fi_rid, model_checkpoint_uuid=fi_rp.finetuned_ckpt_uuid, fisher_matrix_uuid=saved_fisher_matrix.blob_uuid, ) grouping_to_params[grouping_key].append(model_to_merge) varying_params = [] for grouping_key, models in grouping_to_params.items(): # Make sure the tasks in this grouping are unique. assert len(set(p.task for p in models)) == len(models) varying_param = { "pretrained_model": grouping_key["pretrained_model"], "reg_type": grouping_key["reg_type"], "reg_strength": grouping_key["reg_strength"], "fisher_type": grouping_key["fisher_type"], } if grouping_key["fisher_type"] == "diagonal": varying_param["fisher_params"] = { "y_samples": grouping_key["diagonal_y_samples"], } else: raise ValueError(f"Invalid fisher_type {grouping_key['fisher_type']}.") # target_task tasks = [p.task for p in models] rotation = tasks.index(target_task) varying_param["models_to_merge"] = _rotate(models, rotation) if max_models: varying_param["models_to_merge"] = varying_param["models_to_merge"][ :max_models ] varying_params.append(varying_param) return varying_params
def create_latex_table( # noqa: C901 filepath, render_score_fn=latex_render_score_subscript, l2_coeffs=(0.0, 1e-6, 3e-4, 0.01, 0.1), coeff_to_pretty={ 0.0: "0", 1e-6: "1e-6", 3e-4: "3e-4", 0.01: "1e-2", 0.1: "1e-1", }, ): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) def get_original_score(target_coeff): ret = {} for k, v in row_groups.items(): if k["target_reg_strength"] != target_coeff: continue ret[k["donor_reg_strength"]] = v if not ret: return None, None ret_items = max(ret.values(), key=len) merged_scores = np.array( [get_single_score(item["original_score"]) for item in ret_items]) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None return render_score_fn(mean, stddev) rows = [len(l2_coeffs) * [""] for _ in l2_coeffs] for col_idx, target_coeff in enumerate(l2_coeffs): for row_idx, donor_coeff in enumerate(l2_coeffs): key = hashabledict({ "target_reg_strength": target_coeff, "donor_reg_strength": donor_coeff, }) row_items = row_groups[key] merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(row_items) > 1 else None rows[row_idx][col_idx] = render_score_fn(mean, stddev) for row, coeff in zip(rows, l2_coeffs): row.insert(0, coeff_to_pretty[coeff]) rows = [ R"\toprule", [""] + [coeff_to_pretty[t] for t in coeff_to_pretty], R"\midrule", ["Original"] + [get_original_score(t) for t in coeff_to_pretty], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def _extract_common_params(run_params): return hashabledict( pretrained_model=run_params.pretrained_model, reg_strength=run_params.reg_strength, reg_type=run_params.reg_type, )
def create_varying_params_all_mnli_to_rte(exp, mnli_fisher_exp, rte_fisher_exp, train_exp): train_run_uuids = train_exp.retrieve_run_uuids(RunState.FINISHED) mnli_fisher_run_uuids = mnli_fisher_exp.retrieve_run_uuids( RunState.FINISHED) rte_fisher_run_uuids = rte_fisher_exp.retrieve_run_uuids(RunState.FINISHED) train_run_params = { rid: train_exp.retrieve_run_params(rid) for rid in train_run_uuids } mnli_fisher_run_params = [ mnli_fisher_exp.retrieve_run_params(rid) for rid in mnli_fisher_run_uuids ] rte_fisher_run_params = [ rte_fisher_exp.retrieve_run_params(rid) for rid in rte_fisher_run_uuids ] grouping_to_params = collections.defaultdict(list) for fi_rid, fi_rp in zip( mnli_fisher_run_uuids + rte_fisher_run_uuids, mnli_fisher_run_params + rte_fisher_run_params, ): assert fi_rp.ft_exp_uuid == train_exp.uuid if fi_rp.task not in ["mnli", "rte"]: continue # NOTE: I forgot to add this before launching the experiment. elif fi_rp.task == "mnli" and fi_rid not in mnli_fisher_run_uuids: continue tr_rp = train_run_params[fi_rp.ft_run_uuid] if fi_rid in mnli_fisher_run_uuids: fisher_exp = mnli_fisher_exp else: fisher_exp = rte_fisher_exp grouping_key = {} grouping_key.update(fisher_exp.create_run_key_values(fi_rp)) grouping_key.update(train_exp.create_run_key_values(tr_rp)) del grouping_key["finetuned_run_uuid"] del grouping_key["finetuned_ckpt_uuid"] del grouping_key["task"] grouping_key = hashabledict(grouping_key) saved_fisher_matrix = fisher_exp.retrieve_single_item_by_class( fisher_execs.SavedFisherMatrix, fi_rid) model_to_merge = ModelToMerge( task=tr_rp.task, train_run_uuid=fi_rp.ft_run_uuid, fisher_run_uuid=fi_rid, model_checkpoint_uuid=fi_rp.finetuned_ckpt_uuid, fisher_matrix_uuid=saved_fisher_matrix.blob_uuid, ) grouping_to_params[grouping_key].append(model_to_merge) varying_params = [] for grouping_key, models in grouping_to_params.items(): base_param = { "pretrained_model": grouping_key["pretrained_model"], "reg_type": grouping_key["reg_type"], "reg_strength": grouping_key["reg_strength"], "fisher_type": grouping_key["fisher_type"], } if grouping_key["fisher_type"] == "diagonal": base_param["fisher_params"] = { "y_samples": grouping_key["diagonal_y_samples"], } else: raise ValueError( f"Invalid fisher_type {grouping_key['fisher_type']}.") rte_models = [mtm for mtm in models if mtm.task == "rte"] mnli_models = [mtm for mtm in models if mtm.task == "mnli"] for rte_model in rte_models: for model in mnli_models: varying_param = base_param.copy() varying_param.update({ "models_to_merge": [rte_model, model], "task": "rte", }) varying_params.append(varying_param) return varying_params
def create_csv_table(filepath, round_digits=1): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) row_groups2 = collections.defaultdict(list) for hp, row_items in row_groups.items(): best_og = max( row_items, key=lambda item: get_single_score(item["original_score"])) best_merged = max( row_items, key=lambda item: get_single_score(item["merged_score"])) hp = dict(hp) del hp["train_run_uuid"] group_key = hashabledict(hp) row_groups2[group_key].append({ "original_score": best_og["original_score"], "merged_score": best_merged["merged_score"], }) header = [ "task", "mlm train ex", "mlm reg str", "merged task f1", "stddev", "orig task f1", "stddev", "mean boost", "stddev", "max boost", "min boost", "num trials", ] body = [] for hp, row_items in row_groups2.items(): og_scores = np.array( [get_single_score(item["original_score"]) for item in row_items]) merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) row = [ hp["task"], hp["pretrained_examples"], hp["pretrained_reg_strength"], # q, round(np.mean(merged_scores), round_digits), round(np.std(merged_scores), round_digits), # round(np.mean(og_scores), round_digits), round(np.std(og_scores), round_digits), # round(np.mean(merged_scores - og_scores), round_digits), round(np.std(merged_scores - og_scores), round_digits), # round(np.max(merged_scores - og_scores), round_digits), round(np.min(merged_scores - og_scores), round_digits), len(row_items), ] body.append(row) body = sorted(body, key=lambda r: r[:3]) rows = [header] + body return result_utils.csv_to_str(rows)