def get_metric_deprecated(re, metric="test_rmse"): if metric == "mean_test_rmse": print("TSF_MT", np.mean(list(re["tsfmt"]["test_rmse"].values()))) print("TSF_EL", np.mean(list(re["tsfel"]["test_rmse"].values()))) print("TSF_POS", np.mean(list(re["tsfpos"]["test_rmse"].values()))) print("TSF_PARSING", np.mean(list(re["tsfparsing"]["test_rmse"].values()))) print("MONO_MT", np.mean(list(re["monomt"]["test_rmse"].values()))) print("MI", np.mean(list(re["mi"]["test_rmse"].values()))) print("SF_Keywords_F1", np.mean(list(re["sf"]["test_rmse"]["Keywords_F1"].values()))) print( "SF_Keywords_Precision", np.mean(list( re["sf"]["test_rmse"]["Keywords_Precision"].values()))) print("SF_Keywords_Recall", np.mean(list(re["sf"]["test_rmse"]["Keywords_Recall"].values()))) print("SF_NN_F1", np.mean(list(re["sf"]["test_rmse"]["NN_F1"].values()))) print("SF_NN_Precision", np.mean(list(re["sf"]["test_rmse"]["NN_Precision"].values()))) print("SF_NN_Recall", np.mean(list(re["sf"]["test_rmse"]["NN_Recall"].values()))) print("BLI_MUSE", np.mean(list(re["bli"]["test_rmse"]["MUSE"].values()))) print("BLI_Artetxe17", np.mean(list(re["bli"]["test_rmse"]["Artetxe17"].values()))) print("BLI_Artetxe16", np.mean(list(re["bli"]["test_rmse"]["Artetxe16"].values()))) elif metric == "test_rmse": for task in re: keys = list(re[task].keys()) for key in keys: if key.startswith("result"): if key != "result" and key != "result_upper_preds" and key != "result_lower_preds": print( "{}_{}".format(task.capitalize(), key[7:].capitalize()), calculate_rmse( re[task][key], re[task]["{}_labels".format(key[7:])])) elif key == "result": print( "{}".format(task.capitalize()), calculate_rmse(re[task][key], re[task]["labels"]))
def aggregate_k_split_result(re): for model in re: test_preds = [] test_labels = [] for test_pred, test_label in zip(re[model]["test_preds"], re[model]["test_labels"]): test_preds.append(test_pred) test_labels.append(test_label) test_preds = np.concatenate(test_preds) test_labels = np.concatenate(test_labels) test_rmse = calculate_rmse(test_preds, test_labels) re[model]["test_rmse_all"] = test_rmse
def get_metric_refactor(re, metric="test_rmse"): if metric == "test_rmse": for task in re: for eval_metric in re[task].keys(): if eval_metric != "test_langs" or eval_metric != "test_lang_pairs": reee = re[task][eval_metric] print( "{}_{}".format(task.capitalize(), eval_metric.capitalize()), calculate_rmse( re[task][eval_metric], re[task]["{}_labels".format(eval_metric[7:])]))
def aggregate_k_split_baseline_result(re): for model in re: re[model]["rmse"] = {} for baseline_type in re[model]["test_preds"]: test_preds = [] test_labels = [] for test_pred, test_label in zip( re[model]["test_preds"][baseline_type], re[model]["test_labels"]): test_preds.append(test_pred) test_labels.append(test_label) test_preds = np.concatenate(test_preds) test_labels = np.concatenate(test_labels) test_rmse = calculate_rmse(test_preds, test_labels) re[model]["rmse"][baseline_type] = test_rmse
def get_baseline(tasks=None): if tasks is None: tasks = get_tasks() for task in tasks: org_data = read_data(task, shuffle=False) metrics = task_eval_columns(task) rmses = [] for metric in metrics: labels = org_data[metric]["labels"].values preds = np.mean(labels).repeat(len(labels)) rmse = calculate_rmse(preds, labels) rmses.append(rmse) print("Mean baseline for task {} and metric {} is rmse {:.2f}". format(task, metric, rmse)) print(f"Mean: {np.mean(rmses)}")
def get_model_baseline(tasks=None): from copy import deepcopy if tasks is None: tasks = get_tasks() for task in tasks: org_data = read_data(task, shuffle=False) metrics = task_eval_columns(task) rmses = [] for metric in metrics: others = deepcopy(metrics) others.remove(metric) labelss = [] for other in others: print(other) labels = org_data[other]["labels"].values labelss.append(labels) labels = org_data[metric]["labels"].values preds = sum(labelss) / len(labelss) rmse = calculate_rmse(preds, labels) rmses.append(rmse) print( "model mean baseline for task {} and metric {} is rmse {:.2f}". format(task, metric, rmse)) print(f"Mean: {np.mean(rmses)}")