Beispiel #1
0
def main():
    output_dir = os.path.dirname(__file__)

    experiments = [
        # "A_May24_11_08_ela_skresnext50_32x4d_fold0_fp16",
        # "A_May15_17_03_ela_skresnext50_32x4d_fold1_fp16",
        # "A_May21_13_28_ela_skresnext50_32x4d_fold2_fp16",
        # "A_May26_12_58_ela_skresnext50_32x4d_fold3_fp16",
        #
        # "B_Jun05_08_49_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        # "B_Jun09_16_38_rgb_tf_efficientnet_b6_ns_fold1_local_rank_0_fp16",
        # "B_Jun11_08_51_rgb_tf_efficientnet_b6_ns_fold2_local_rank_0_fp16",
        # "B_Jun11_18_38_rgb_tf_efficientnet_b6_ns_fold3_local_rank_0_fp16",
        #
        "C_Jun24_22_00_rgb_tf_efficientnet_b2_ns_fold2_local_rank_0_fp16",
        #
        # "D_Jun18_16_07_rgb_tf_efficientnet_b7_ns_fold1_local_rank_0_fp16",
        # "D_Jun20_09_52_rgb_tf_efficientnet_b7_ns_fold2_local_rank_0_fp16",
        #
        # "E_Jun18_19_24_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        # "E_Jun21_10_48_rgb_tf_efficientnet_b6_ns_fold0_istego100k_local_rank_0_fp16",
        #
        "F_Jun29_19_43_rgb_tf_efficientnet_b3_ns_fold0_local_rank_0_fp16",
        #
        "G_Jul03_21_14_nr_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        # "G_Jul05_00_24_nr_rgb_tf_efficientnet_b6_ns_fold1_local_rank_0_fp16",
        # "G_Jul06_03_39_nr_rgb_tf_efficientnet_b6_ns_fold2_local_rank_0_fp16",
        "G_Jul07_06_38_nr_rgb_tf_efficientnet_b6_ns_fold3_local_rank_0_fp16",
        #
        "H_Jul11_16_37_nr_rgb_tf_efficientnet_b7_ns_mish_fold2_local_rank_0_fp16",
        "H_Jul12_18_42_nr_rgb_tf_efficientnet_b7_ns_mish_fold1_local_rank_0_fp16",
    ]

    all_predictions = []
    labels = experiments
    scoring_fn = alaska_weighted_auc

    for metric in [
        # "loss",
        # "bauc",
        "cauc"
    ]:
        holdout_predictions_d4 = get_predictions_csv(experiments, metric, "holdout", "d4")
        oof_predictions_d4 = get_predictions_csv(experiments, metric, "oof", "d4")
        test_predictions_d4 = get_predictions_csv(experiments, metric, "test", "d4")

        fnames_for_checksum = [x + f"{metric}" for x in experiments]

        bin_pred_d4 = make_binary_predictions(holdout_predictions_d4)
        y_true = bin_pred_d4[0].y_true_type.values

        bin_pred_d4_score = scoring_fn(y_true, blend_predictions_ranked(bin_pred_d4).Label)

        cls_pred_d4 = make_classifier_predictions(holdout_predictions_d4)
        cls_pred_d4_score = scoring_fn(y_true, blend_predictions_ranked(cls_pred_d4).Label)

        prod_pred_d4_score = scoring_fn(
            y_true, blend_predictions_ranked(cls_pred_d4).Label * blend_predictions_ranked(bin_pred_d4).Label
        )

        if False:
            bin_pred_d4_cal = make_binary_predictions_calibrated(holdout_predictions_d4, oof_predictions_d4)
            bin_pred_d4_cal_score = scoring_fn(y_true, blend_predictions_ranked(bin_pred_d4_cal).Label)

            cls_pred_d4_cal = make_classifier_predictions_calibrated(holdout_predictions_d4, oof_predictions_d4)
            cls_pred_d4_cal_score = scoring_fn(y_true, blend_predictions_ranked(cls_pred_d4_cal).Label)

            prod_pred_d4_cal_score = scoring_fn(
                y_true,
                blend_predictions_ranked(cls_pred_d4_cal).Label * blend_predictions_ranked(bin_pred_d4_cal).Label,
            )
        else:
            bin_pred_d4_cal_score = 0
            cls_pred_d4_cal_score = 0
            prod_pred_d4_cal_score = 0

        print(metric, "Bin  NC", "d4", bin_pred_d4_score)
        print(metric, "Cls  NC", "d4", cls_pred_d4_score)
        print(metric, "Prod NC", "d4", prod_pred_d4_score)
        print(metric, "Bin  CL", "d4", bin_pred_d4_cal_score)
        print(metric, "Cls  CL", "d4", cls_pred_d4_cal_score)
        print(metric, "Prod CL", "d4", prod_pred_d4_cal_score)

        max_score = max(
            bin_pred_d4_score,
            cls_pred_d4_score,
            bin_pred_d4_cal_score,
            cls_pred_d4_cal_score,
            prod_pred_d4_score,
            prod_pred_d4_cal_score,
        )

        if bin_pred_d4_score == max_score:
            predictions = make_binary_predictions(test_predictions_d4)

            predictions = blend_predictions_ranked(predictions)
            predictions.to_csv(
                os.path.join(output_dir, f"rank_{max_score:.4f}_bin_{compute_checksum_v2(fnames_for_checksum)}.csv"),
                index=False,
            )
        if bin_pred_d4_cal_score == max_score:
            predictions = make_binary_predictions_calibrated(test_predictions_d4, oof_predictions_d4)

            predictions = blend_predictions_ranked(predictions)
            predictions.to_csv(
                os.path.join(
                    output_dir, f"rank_{max_score:.4f}_bin_cal_{compute_checksum_v2(fnames_for_checksum)}.csv"
                ),
                index=False,
            )
        if cls_pred_d4_score == max_score:
            predictions = make_classifier_predictions(test_predictions_d4)

            predictions = blend_predictions_ranked(predictions)
            predictions.to_csv(
                os.path.join(output_dir, f"rank_{max_score:.4f}_cls_{compute_checksum_v2(fnames_for_checksum)}.csv"),
                index=False,
            )
        if cls_pred_d4_cal_score == max_score:
            predictions = make_classifier_predictions_calibrated(test_predictions_d4, oof_predictions_d4)

            predictions = blend_predictions_ranked(predictions)
            predictions.to_csv(
                os.path.join(
                    output_dir, f"rank_{max_score:.4f}_cls_cal_{compute_checksum_v2(fnames_for_checksum)}.csv"
                ),
                index=False,
            )
        if prod_pred_d4_score == max_score:
            cls_predictions = make_classifier_predictions(test_predictions_d4)
            bin_predictions = make_binary_predictions(test_predictions_d4)

            predictions1 = blend_predictions_ranked(cls_predictions)
            predictions2 = blend_predictions_ranked(bin_predictions)
            predictions = predictions1.copy()
            predictions.Label = predictions1.Label * predictions2.Label

            predictions.to_csv(
                os.path.join(output_dir, f"rank_{max_score:.4f}_prod_{compute_checksum_v2(fnames_for_checksum)}.csv"),
                index=False,
            )
        if prod_pred_d4_cal_score == max_score:
            cls_predictions = make_classifier_predictions_calibrated(test_predictions_d4, oof_predictions_d4)
            bin_predictions = make_binary_predictions_calibrated(test_predictions_d4, oof_predictions_d4)

            predictions1 = blend_predictions_ranked(cls_predictions)
            predictions2 = blend_predictions_ranked(bin_predictions)
            predictions = predictions1.copy()
            predictions.Label = predictions1.Label * predictions2.Label

            predictions.to_csv(
                os.path.join(
                    output_dir, f"rank_{max_score:.4f}_prod_cal_{compute_checksum_v2(fnames_for_checksum)}.csv"
                ),
                index=False,
            )
def main():
    output_dir = os.path.dirname(__file__)

    experiments = [
        "G_Jul03_21_14_nr_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        "G_Jul05_00_24_nr_rgb_tf_efficientnet_b6_ns_fold1_local_rank_0_fp16",
        "G_Jul06_03_39_nr_rgb_tf_efficientnet_b6_ns_fold2_local_rank_0_fp16",
        "G_Jul07_06_38_nr_rgb_tf_efficientnet_b6_ns_fold3_local_rank_0_fp16",
    ]

    scoring_fn = alaska_weighted_auc

    for metric in [
        # "loss",
        # "bauc",
        "cauc"
    ]:
        holdout_predictions_d4 = get_predictions_csv(experiments, metric, "holdout", "d4")
        oof_predictions_d4 = get_predictions_csv(experiments, metric, "oof", "d4")
        test_predictions_d4 = get_predictions_csv(experiments, metric, "test", "d4")

        fnames_for_checksum = [x + f"{metric}" for x in experiments]

        bin_pred_d4 = make_binary_predictions(holdout_predictions_d4)
        y_true = bin_pred_d4[0].y_true_type.values

        bin_pred_d4_score = scoring_fn(y_true, blend_predictions_mean(bin_pred_d4).Label)

        cls_pred_d4 = make_classifier_predictions(holdout_predictions_d4)
        cls_pred_d4_score = scoring_fn(y_true, blend_predictions_mean(cls_pred_d4).Label)

        bin_pred_d4_cal = make_binary_predictions_calibrated(holdout_predictions_d4, oof_predictions_d4)
        bin_pred_d4_cal_score = scoring_fn(y_true, blend_predictions_mean(bin_pred_d4_cal).Label)

        cls_pred_d4_cal = make_classifier_predictions_calibrated(holdout_predictions_d4, oof_predictions_d4)
        cls_pred_d4_cal_score = scoring_fn(y_true, blend_predictions_mean(cls_pred_d4_cal).Label)

        prod_pred_d4_cal_score = scoring_fn(
            y_true, blend_predictions_mean(cls_pred_d4_cal).Label * blend_predictions_mean(bin_pred_d4_cal).Label
        )

        print(metric, "Bin NC", "d4", bin_pred_d4_score)
        print(metric, "Bin CL", "d4", cls_pred_d4_score)
        print(metric, "Cls NC", "d4", bin_pred_d4_cal_score)
        print(metric, "Cls CL", "d4", cls_pred_d4_cal_score)
        print(metric, "Prod  ", "d4", prod_pred_d4_cal_score)

        max_score = max(
            bin_pred_d4_score, cls_pred_d4_score, bin_pred_d4_cal_score, cls_pred_d4_cal_score, prod_pred_d4_cal_score
        )

        if bin_pred_d4_score == max_score:
            predictions = make_binary_predictions(test_predictions_d4)

            predictions = blend_predictions_mean(predictions)
            predictions.to_csv(
                os.path.join(output_dir, f"mean_{max_score:.4f}_bin_{compute_checksum_v2(fnames_for_checksum)}.csv"),
                index=False,
            )
        if bin_pred_d4_cal_score == max_score:
            predictions = make_binary_predictions_calibrated(test_predictions_d4, oof_predictions_d4)

            predictions = blend_predictions_mean(predictions)
            predictions.to_csv(
                os.path.join(
                    output_dir, f"mean_{max_score:.4f}_bin_cal_{compute_checksum_v2(fnames_for_checksum)}.csv"
                ),
                index=False,
            )
        if cls_pred_d4_score == max_score:
            predictions = make_classifier_predictions(test_predictions_d4)

            predictions = blend_predictions_mean(predictions)
            predictions.to_csv(
                os.path.join(output_dir, f"mean_{max_score:.4f}_cls_{compute_checksum_v2(fnames_for_checksum)}.csv"),
                index=False,
            )
        if cls_pred_d4_cal_score == max_score:
            predictions = make_classifier_predictions_calibrated(test_predictions_d4, oof_predictions_d4)

            predictions = blend_predictions_mean(predictions)
            predictions.to_csv(
                os.path.join(
                    output_dir, f"mean_{max_score:.4f}_cls_cal_{compute_checksum_v2(fnames_for_checksum)}.csv"
                ),
                index=False,
            )
        if prod_pred_d4_cal_score == max_score:
            cls_predictions = make_classifier_predictions_calibrated(test_predictions_d4, oof_predictions_d4)
            bin_predictions = make_binary_predictions_calibrated(test_predictions_d4, oof_predictions_d4)

            predictions1 = blend_predictions_mean(cls_predictions)
            predictions2 = blend_predictions_mean(bin_predictions)
            predictions = predictions1.copy()
            predictions.Label = predictions1.Label * predictions2.Label

            predictions.to_csv(
                os.path.join(
                    output_dir, f"mean_{max_score:.4f}_prod_cal_{compute_checksum_v2(fnames_for_checksum)}.csv"
                ),
                index=False,
            )
def main():
    output_dir = os.path.dirname(__file__)

    experiments = [
        "B_Jun05_08_49_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        "B_Jun09_16_38_rgb_tf_efficientnet_b6_ns_fold1_local_rank_0_fp16",
        "B_Jun11_08_51_rgb_tf_efficientnet_b6_ns_fold2_local_rank_0_fp16",
        "B_Jun11_18_38_rgb_tf_efficientnet_b6_ns_fold3_local_rank_0_fp16",
        "G_Jul03_21_14_nr_rgb_tf_efficientnet_b6_ns_fold0_local_rank_0_fp16",
        "G_Jul05_00_24_nr_rgb_tf_efficientnet_b6_ns_fold1_local_rank_0_fp16",
        "G_Jul06_03_39_nr_rgb_tf_efficientnet_b6_ns_fold2_local_rank_0_fp16",
        "G_Jul07_06_38_nr_rgb_tf_efficientnet_b6_ns_fold3_local_rank_0_fp16",
    ]

    for metric in [
        # "loss",
        # "bauc",
        "cauc"
    ]:
        holdout_predictions_d4 = get_predictions_csv(experiments, metric, "holdout", "d4")
        oof_predictions_d4 = get_predictions_csv(experiments, metric, "oof", "d4")
        test_predictions_d4 = get_predictions_csv(experiments, metric, "test", "d4")

        hld_bin_pred_d4 = make_binary_predictions(holdout_predictions_d4)
        hld_y_true = hld_bin_pred_d4[0].y_true_type.values

        oof_bin_pred_d4 = make_binary_predictions(oof_predictions_d4)

        hld_cls_pred_d4 = make_classifier_predictions(holdout_predictions_d4)
        oof_cls_pred_d4 = make_classifier_predictions(oof_predictions_d4)

        bin_pred_d4_cal = make_binary_predictions_calibrated(holdout_predictions_d4, oof_predictions_d4)
        cls_pred_d4_cal = make_classifier_predictions_calibrated(holdout_predictions_d4, oof_predictions_d4)

        print(
            "   ", "      ", "  ", "   OOF", "     OOF 5K", "     OOF 1K", "        HLD", "     HLD 5K", "     HLD 1K"
        )
        print(
            metric,
            "Bin NC",
            "{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}".format(
                np.mean([alaska_weighted_auc(x.y_true_type, x.Label) for x in oof_bin_pred_d4]),
                np.mean([shaky_wauc(x.y_true_type, x.Label) for x in oof_bin_pred_d4]),
                np.mean([shaky_wauc_public(x.y_true_type, x.Label) for x in oof_bin_pred_d4]),
                alaska_weighted_auc(hld_y_true, blend_predictions_mean(hld_bin_pred_d4).Label),
                shaky_wauc(hld_y_true, blend_predictions_mean(hld_bin_pred_d4).Label),
                shaky_wauc_public(hld_y_true, blend_predictions_mean(hld_bin_pred_d4).Label),
            ),
        )

        print(
            metric,
            "Cls NC",
            "{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}".format(
                np.mean([alaska_weighted_auc(x.y_true_type, x.Label) for x in oof_cls_pred_d4]),
                np.mean([shaky_wauc(x.y_true_type, x.Label) for x in oof_cls_pred_d4]),
                np.mean([shaky_wauc_public(x.y_true_type, x.Label) for x in oof_cls_pred_d4]),
                alaska_weighted_auc(hld_y_true, blend_predictions_mean(hld_cls_pred_d4).Label),
                shaky_wauc(hld_y_true, blend_predictions_mean(hld_cls_pred_d4).Label),
                shaky_wauc_public(hld_y_true, blend_predictions_mean(hld_cls_pred_d4).Label),
            ),
        )

        print(
            metric,
            "Bin CL",
            "                                    {:.6f}\t{:.6f}\t{:.6f}".format(
                alaska_weighted_auc(hld_y_true, blend_predictions_mean(bin_pred_d4_cal).Label),
                shaky_wauc(hld_y_true, blend_predictions_mean(bin_pred_d4_cal).Label),
                shaky_wauc_public(hld_y_true, blend_predictions_mean(bin_pred_d4_cal).Label),
            ),
        )
        print(
            metric,
            "Cls CL",
            "                                    {:.6f}\t{:.6f}\t{:.6f}".format(
                alaska_weighted_auc(hld_y_true, blend_predictions_mean(cls_pred_d4_cal).Label),
                shaky_wauc(hld_y_true, blend_predictions_mean(cls_pred_d4_cal).Label),
                shaky_wauc_public(hld_y_true, blend_predictions_mean(cls_pred_d4_cal).Label),
            ),
        )
        print(
            metric,
            "Prd NC",
            "{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}".format(
                np.mean(
                    [
                        alaska_weighted_auc(x.y_true_type, x.Label * y.Label)
                        for (x, y) in zip(oof_bin_pred_d4, oof_cls_pred_d4)
                    ]
                ),
                np.mean(
                    [shaky_wauc(x.y_true_type, x.Label * y.Label) for (x, y) in zip(oof_bin_pred_d4, oof_cls_pred_d4)]
                ),
                np.mean(
                    [
                        shaky_wauc_public(x.y_true_type, x.Label * y.Label)
                        for (x, y) in zip(oof_bin_pred_d4, oof_cls_pred_d4)
                    ]
                ),
                alaska_weighted_auc(
                    hld_y_true,
                    blend_predictions_mean(bin_pred_d4_cal).Label * blend_predictions_mean(cls_pred_d4_cal).Label,
                ),
                shaky_wauc(
                    hld_y_true,
                    blend_predictions_mean(bin_pred_d4_cal).Label * blend_predictions_mean(cls_pred_d4_cal).Label,
                ),
                shaky_wauc_public(
                    hld_y_true,
                    blend_predictions_mean(bin_pred_d4_cal).Label * blend_predictions_mean(cls_pred_d4_cal).Label,
                ),
            ),
        )