Exemple #1
0
def create_csv_table(items, round_digits=1):

    row_groups = collections.defaultdict(list)
    for item in items:
        group_key = (item["task"], item["intermediate_task"])
        row_groups[group_key].append(item)

    header = [
        "task",
        "int task",
        "score",
        "stddev",
        "num trials",
    ]
    body = []
    for (task, int_task), row_items in row_groups.items():
        scores = [get_single_score(item["score"]) for item in row_items]
        # scores = [s for s in scores if s > 60.0]
        scores = np.array(scores)
        print(scores)
        row = [
            task,
            int_task,
            round(np.mean(scores), round_digits),
            round(np.std(scores), round_digits),
            len(scores),
        ]
        body.append(row)

    body = sorted(body, key=lambda r: r[:2])

    rows = [header] + body

    return result_utils.csv_to_str(rows)
Exemple #2
0
def create_csv_table(items, round_digits=1):

    row_groups = collections.defaultdict(list)
    for item in items:
        group_key = (item["task"], item["mlm_examples"])
        row_groups[group_key].append(item)

    header = [
        "task",
        "mlm ex",
        "task f1",
        "stddev",
        "num trials",
    ]
    body = []
    for (task, mlm_examples), row_items in row_groups.items():
        scores = np.array(
            [get_single_score(item["score"]) for item in row_items])
        row = [
            task,
            mlm_examples,
            round(np.mean(scores), round_digits),
            round(np.std(scores), round_digits),
            len(row_items),
        ]
        body.append(row)

    body = sorted(body, key=lambda r: r[:1])

    rows = [header] + body

    return result_utils.csv_to_str(rows)
Exemple #3
0
def create_csv_table(filepath, round_digits=1):
    items = result_utils.load_json(filepath)

    row_groups = collections.defaultdict(list)
    for item in items:
        group_key = hashabledict({
            "target_task": item["target_task"],
            "donor_task": item["donor_task"],
        })
        row_groups[group_key].append(item)

    header = [
        "task",
        "donor",
        "merged score",
        "stddev",
        "orig score",
        "stddev",
        "mean boost",
        "stddev",
        "max boost",
        "min boost",
        "num trials",
    ]
    body = []
    for hp, row_items in row_groups.items():
        og_scores = np.array(
            [get_single_score(item["original_score"]) for item in row_items])
        merged_scores = np.array(
            [get_single_score(item["merged_score"]) for item in row_items])
        row = [
            hp["target_task"],
            hp["donor_task"],
            round(np.mean(merged_scores), round_digits),
            round(np.std(merged_scores), round_digits),
            #
            round(np.mean(og_scores), round_digits),
            round(np.std(og_scores), round_digits),
            #
            round(np.mean(merged_scores - og_scores), round_digits),
            round(np.std(merged_scores - og_scores), round_digits),
            #
            round(np.max(merged_scores - og_scores), round_digits),
            round(np.min(merged_scores - og_scores), round_digits),
            len(row_items),
        ]
        body.append(row)

    body = sorted(body, key=lambda r: r[:2])

    rows = [header] + body

    return result_utils.csv_to_str(rows)
Exemple #4
0
def create_csv_table(filepath, round_digits=1):
    items = result_utils.load_json(filepath)

    row_groups = collections.defaultdict(list)
    for item in items:
        group_key = hashabledict(item["hyperparams"])
        row_groups[group_key].append(item)

    header = [
        "MNLI ckpt",
        "RTE ckpt",
        "merged RTE acc",
        "merged stddev",
        "orig RTE acc",
        "orig stddev",
        "MNLI body acc",
        "MNLI body stddev",
        "num trials",
    ]
    body = []
    for hp, row_items in row_groups.items():
        og_scores = [
            get_single_score(item["original_score"]) for item in row_items
        ]
        merged_scores = [
            get_single_score(item["merged_score"]) for item in row_items
        ]
        donor_body_scores = [
            get_single_score(item["donor_body_score"]) for item in row_items
        ]
        row = [
            hp["donor_ckpt_index"],
            hp["target_ckpt_index"],
            round(np.mean(merged_scores), round_digits),
            round(np.std(merged_scores), round_digits),
            #
            round(np.mean(og_scores), round_digits),
            round(np.std(og_scores), round_digits),
            #
            round(np.mean(donor_body_scores), round_digits),
            round(np.std(donor_body_scores), round_digits),
            len(row_items),
        ]
        body.append(row)

    body = sorted(body, key=lambda r: r[:2])

    rows = [header] + body

    return result_utils.csv_to_str(rows)
Exemple #5
0
def create_csv_table(filepath,
                     round_digits=1,
                     group_by_ckpt_index=True,
                     best_per_finetuned_model=False):
    items = result_utils.load_json(filepath)

    row_groups = collections.defaultdict(list)
    for item in items:
        group_key = hashabledict(item["hyperparams"])
        row_groups[group_key].append(item)

    if best_per_finetuned_model:
        new_row_groups = collections.defaultdict(list)
        for hp, row_items in row_groups.items():
            # TODO: get best original score as well
            best = max(row_items,
                       key=lambda r: get_single_score(r["merged_score"]))

            new_key = dict(hp)
            del new_key["train_run_uuid"]
            new_key = hashabledict(new_key)
            new_row_groups[new_key].append(best)
        row_groups = new_row_groups

    header = [
        "task",
        "task ckpt",
        "merged task f1",
        "stddev",
        "orig task f1",
        "stddev",
        "mean boost",
        "stddev",
        "max boost",
        "min boost",
        "num trials",
    ]
    body = []
    for hp, row_items in row_groups.items():
        og_scores = np.array(
            [get_single_score(item["original_score"]) for item in row_items])
        merged_scores = np.array(
            [get_single_score(item["merged_score"]) for item in row_items])
        row = [
            hp["task"],
            hp["target_ckpt_index"] if group_by_ckpt_index else "-",
            round(np.mean(merged_scores), round_digits),
            round(np.std(merged_scores), round_digits),
            #
            round(np.mean(og_scores), round_digits),
            round(np.std(og_scores), round_digits),
            #
            round(np.mean(merged_scores - og_scores), round_digits),
            round(np.std(merged_scores - og_scores), round_digits),
            #
            round(np.max(merged_scores - og_scores), round_digits),
            round(np.min(merged_scores - og_scores), round_digits),
            len(row_items),
        ]
        body.append(row)

    body = sorted(body, key=lambda r: r[:2])

    rows = [header] + body

    return result_utils.csv_to_str(rows)
Exemple #6
0
def create_csv_table(filepath, round_digits=1):
    items = result_utils.load_json(filepath)

    row_groups = collections.defaultdict(list)
    for item in items:
        group_key = hashabledict(item["hyperparams"])
        row_groups[group_key].append(item)

    row_groups2 = collections.defaultdict(list)
    for hp, row_items in row_groups.items():
        best_og = max(
            row_items,
            key=lambda item: get_single_score(item["original_score"]))
        best_merged = max(
            row_items, key=lambda item: get_single_score(item["merged_score"]))
        hp = dict(hp)
        del hp["train_run_uuid"]
        group_key = hashabledict(hp)
        row_groups2[group_key].append({
            "original_score":
            best_og["original_score"],
            "merged_score":
            best_merged["merged_score"],
        })

    header = [
        "task",
        "mlm train ex",
        "mlm reg str",
        "merged task f1",
        "stddev",
        "orig task f1",
        "stddev",
        "mean boost",
        "stddev",
        "max boost",
        "min boost",
        "num trials",
    ]
    body = []
    for hp, row_items in row_groups2.items():
        og_scores = np.array(
            [get_single_score(item["original_score"]) for item in row_items])
        merged_scores = np.array(
            [get_single_score(item["merged_score"]) for item in row_items])
        row = [
            hp["task"],
            hp["pretrained_examples"],
            hp["pretrained_reg_strength"],
            # q,
            round(np.mean(merged_scores), round_digits),
            round(np.std(merged_scores), round_digits),
            #
            round(np.mean(og_scores), round_digits),
            round(np.std(og_scores), round_digits),
            #
            round(np.mean(merged_scores - og_scores), round_digits),
            round(np.std(merged_scores - og_scores), round_digits),
            #
            round(np.max(merged_scores - og_scores), round_digits),
            round(np.min(merged_scores - og_scores), round_digits),
            len(row_items),
        ]
        body.append(row)

    body = sorted(body, key=lambda r: r[:3])

    rows = [header] + body

    return result_utils.csv_to_str(rows)