Example #1
0
def get_metric_deprecated(re, metric="test_rmse"):
    if metric == "mean_test_rmse":
        print("TSF_MT", np.mean(list(re["tsfmt"]["test_rmse"].values())))
        print("TSF_EL", np.mean(list(re["tsfel"]["test_rmse"].values())))
        print("TSF_POS", np.mean(list(re["tsfpos"]["test_rmse"].values())))
        print("TSF_PARSING",
              np.mean(list(re["tsfparsing"]["test_rmse"].values())))
        print("MONO_MT", np.mean(list(re["monomt"]["test_rmse"].values())))
        print("MI", np.mean(list(re["mi"]["test_rmse"].values())))
        print("SF_Keywords_F1",
              np.mean(list(re["sf"]["test_rmse"]["Keywords_F1"].values())))
        print(
            "SF_Keywords_Precision",
            np.mean(list(
                re["sf"]["test_rmse"]["Keywords_Precision"].values())))
        print("SF_Keywords_Recall",
              np.mean(list(re["sf"]["test_rmse"]["Keywords_Recall"].values())))
        print("SF_NN_F1",
              np.mean(list(re["sf"]["test_rmse"]["NN_F1"].values())))
        print("SF_NN_Precision",
              np.mean(list(re["sf"]["test_rmse"]["NN_Precision"].values())))
        print("SF_NN_Recall",
              np.mean(list(re["sf"]["test_rmse"]["NN_Recall"].values())))
        print("BLI_MUSE",
              np.mean(list(re["bli"]["test_rmse"]["MUSE"].values())))
        print("BLI_Artetxe17",
              np.mean(list(re["bli"]["test_rmse"]["Artetxe17"].values())))
        print("BLI_Artetxe16",
              np.mean(list(re["bli"]["test_rmse"]["Artetxe16"].values())))
    elif metric == "test_rmse":
        for task in re:
            keys = list(re[task].keys())
            for key in keys:
                if key.startswith("result"):
                    if key != "result" and key != "result_upper_preds" and key != "result_lower_preds":
                        print(
                            "{}_{}".format(task.capitalize(),
                                           key[7:].capitalize()),
                            calculate_rmse(
                                re[task][key],
                                re[task]["{}_labels".format(key[7:])]))
                    elif key == "result":
                        print(
                            "{}".format(task.capitalize()),
                            calculate_rmse(re[task][key], re[task]["labels"]))
Example #2
0
def aggregate_k_split_result(re):
    for model in re:
        test_preds = []
        test_labels = []
        for test_pred, test_label in zip(re[model]["test_preds"],
                                         re[model]["test_labels"]):
            test_preds.append(test_pred)
            test_labels.append(test_label)
        test_preds = np.concatenate(test_preds)
        test_labels = np.concatenate(test_labels)
        test_rmse = calculate_rmse(test_preds, test_labels)
        re[model]["test_rmse_all"] = test_rmse
Example #3
0
def get_metric_refactor(re, metric="test_rmse"):
    if metric == "test_rmse":
        for task in re:
            for eval_metric in re[task].keys():
                if eval_metric != "test_langs" or eval_metric != "test_lang_pairs":
                    reee = re[task][eval_metric]
                    print(
                        "{}_{}".format(task.capitalize(),
                                       eval_metric.capitalize()),
                        calculate_rmse(
                            re[task][eval_metric],
                            re[task]["{}_labels".format(eval_metric[7:])]))
Example #4
0
def aggregate_k_split_baseline_result(re):
    for model in re:
        re[model]["rmse"] = {}
        for baseline_type in re[model]["test_preds"]:
            test_preds = []
            test_labels = []
            for test_pred, test_label in zip(
                    re[model]["test_preds"][baseline_type],
                    re[model]["test_labels"]):
                test_preds.append(test_pred)
                test_labels.append(test_label)
            test_preds = np.concatenate(test_preds)
            test_labels = np.concatenate(test_labels)
            test_rmse = calculate_rmse(test_preds, test_labels)
            re[model]["rmse"][baseline_type] = test_rmse
Example #5
0
def get_baseline(tasks=None):
    if tasks is None:
        tasks = get_tasks()
    for task in tasks:
        org_data = read_data(task, shuffle=False)
        metrics = task_eval_columns(task)
        rmses = []
        for metric in metrics:
            labels = org_data[metric]["labels"].values
            preds = np.mean(labels).repeat(len(labels))
            rmse = calculate_rmse(preds, labels)
            rmses.append(rmse)
            print("Mean baseline for task {} and metric {} is rmse {:.2f}".
                  format(task, metric, rmse))
        print(f"Mean: {np.mean(rmses)}")
Example #6
0
def get_model_baseline(tasks=None):
    from copy import deepcopy
    if tasks is None:
        tasks = get_tasks()
    for task in tasks:
        org_data = read_data(task, shuffle=False)
        metrics = task_eval_columns(task)
        rmses = []
        for metric in metrics:
            others = deepcopy(metrics)
            others.remove(metric)
            labelss = []
            for other in others:
                print(other)
                labels = org_data[other]["labels"].values
                labelss.append(labels)
            labels = org_data[metric]["labels"].values
            preds = sum(labelss) / len(labelss)
            rmse = calculate_rmse(preds, labels)
            rmses.append(rmse)
            print(
                "model mean baseline for task {} and metric {} is rmse {:.2f}".
                format(task, metric, rmse))
        print(f"Mean: {np.mean(rmses)}")