Esempio n. 1
0
def search_averaging_weight_for_each_col(predictions: list,
                                         target: np.ndarray,
                                         trials=10000):
    best_score = -np.inf
    best_weights = np.zeros((len(predictions), 24))
    utils.set_seed(1213)
    for i in tqdm(range(trials)):
        dice = np.random.rand(len(predictions), 24)
        weights = dice / dice.sum(axis=0)
        blended = np.zeros_like(predictions[0], dtype=np.float32)
        for weight, pred in zip(weights, predictions):
            blended += weight * pred
        score_class, class_weight = lwlrap(truth=target, scores=blended)
        score = (score_class * class_weight).sum()
        if score > best_score:
            best_score = score
            best_weights = weights
    return {"best_score": best_score, "best_weights": best_weights}
Esempio n. 2
0
                                         ])[[f"s{i}" for i in range(24)
                                             ]].mean().reset_index(drop=False)
        oof_predictions.append(oof_tta_df)

        tta_pred_df = pd.concat(tta_predictions, axis=0).reset_index(drop=True)
        tta_pred_df = tta_pred_df.groupby(["recording_id"])[[
            f"s{i}" for i in range(24)
        ]].mean().reset_index(drop=False)
        fold_predictions.append(tta_pred_df)

    oof_df = pd.concat(oof_predictions, axis=0).reset_index(drop=True)

    oof_indices = oof_df[["index"]]
    ground_truth = oof_indices.merge(ground_truth_df, on="index", how="left")
    columns = [f"s{i}" for i in range(24)]
    score_class, weight = lwlrap(ground_truth[columns].values,
                                 oof_df[columns].values)
    score = (score_class * weight).sum()
    logger.info(f"TTA all LWLRAP: {score:.5f}")

    class_level_score = {config_name: score_class}

    for key in oof_tta_dict:
        tta_df = pd.concat(oof_tta_dict[key], axis=0).reset_index(drop=True)
        oof_indices = tta_df[["index"]]
        ground_truth = oof_indices.merge(ground_truth_df,
                                         on="index",
                                         how="left")
        score_class, weight = lwlrap(ground_truth[columns].values,
                                     tta_df[columns].values)
        score = (score_class * weight).sum()
        logger.info(f"TTA {key} LWLRAP: {score:.5f}")
Esempio n. 3
0
            f"Best score {result_dict['best_score']}, Best Weights{result_dict['best_weights']}"
        )
        for i, class_ in enumerate(classes):
            weights_dict[class_] = result_dict["best_weights"][:, i]
    else:
        for class_ in classes:
            weights_dict[class_] = config["strategy"]["weights"]
    blended = np.zeros((len(oofs[0]), 24))
    class_level_score = {}
    for class_ in weights_dict:
        index = classes.index(class_)
        weights = weights_dict[class_]
        for weight, oof in zip(weights, oofs):
            blended[:, index] += weight * oof[class_].values

    score_class, weight = lwlrap(ground_truth_df[classes].values, blended)
    score = (score_class * weight).sum()
    logger.info(f"Blended LWLRAP: {score:5f}")
    class_level_score["blend_score"] = score
    class_level_score["blend_weight"] = weights_dict
    class_level_score["blend"] = score_class

    for oof, name in zip(oofs, names):
        score_class, weight = lwlrap(ground_truth_df[classes].values,
                                     oof[classes].values)
        score = (score_class * weight).sum()
        logger.info(f"Name: {name} LWLRAP: {score:5f}")
        class_level_score[f"{name}_score"] = score
        class_level_score[name] = score_class

    blended_sub = np.zeros((len(submissions[0]), 24))
Esempio n. 4
0
def eval_one_epoch(model,
                   loader,
                   criterion,
                   device: torch.device,
                   input_key: str,
                   input_target_key: str,
                   epoch: int,
                   writer: SummaryWriter,
                   strong=False):
    loss_meter = utils.AverageMeter()
    lwlrap_meter = utils.AverageMeter()

    model.eval()

    preds = []
    targs = []
    recording_ids = []
    progress_bar = tqdm(loader, desc="valid")
    for step, batch in enumerate(progress_bar):
        with torch.no_grad():
            recording_ids.extend(batch["recording_id"])
            x = batch[input_key].to(device)
            y = batch[input_target_key]

            for key in y:
                y[key] = y[key].to(device)

            output = model(x)
            loss = criterion(output, y).detach()

        loss_meter.update(loss.item(), n=len(loader))

        if strong:
            clipwise_output = output["framewise_output"].detach().cpu().numpy().max(axis=1)
        else:
            clipwise_output = output["clipwise_output"].detach().cpu().numpy()
        target = y["weak"].detach().cpu().numpy()

        preds.append(clipwise_output)
        targs.append(target)

        score_class, weight = clb.lwlrap(target, clipwise_output)
        score = (score_class * weight).sum()
        lwlrap_meter.update(score, n=1)

        progress_bar.set_description(
            f"Epoch: {epoch + 1} "
            f"Step: [{step + 1}/{len(loader)}] "
            f"loss: {loss_meter.val:.4f} loss(avg) {loss_meter.avg:.4f} "
            f"lwlrap: {lwlrap_meter.val:.4f} lwlrap(avg) {lwlrap_meter.avg:.4f}")

        global_step = epoch * len(loader) + step + 1
        writer.add_scalar(tag="loss/batch", scalar_value=loss_meter.val, global_step=global_step)
        writer.add_scalar(tag="lwlrap/batch", scalar_value=lwlrap_meter.val, global_step=global_step)

    y_pred = np.concatenate(preds, axis=0)
    y_true = np.concatenate(targs, axis=0)

    oof_pred_df = pd.DataFrame(y_pred, columns=[f"s{i}" for i in range(y_pred.shape[1])])
    oof_pred_df = pd.concat([
        pd.DataFrame({"recording_id": recording_ids}),
        oof_pred_df
    ], axis=1)

    oof_targ_df = pd.DataFrame(y_true, columns=[f"s{i}" for i in range(y_pred.shape[1])])
    oof_targ_df = pd.concat([
        pd.DataFrame({"recording_id": recording_ids}),
        oof_targ_df
    ], axis=1)

    oof_pred_df = oof_pred_df.groupby("recording_id").max().reset_index(drop=True)
    oof_targ_df = oof_targ_df.groupby("recording_id").max().reset_index(drop=True)

    score_class, weight = clb.lwlrap(oof_targ_df.values, oof_pred_df.values)
    score = (score_class * weight).sum()

    writer.add_scalar(tag="loss/epoch", scalar_value=loss_meter.avg, global_step=epoch + 1)
    writer.add_scalar(tag="lwlrap/epoch", scalar_value=score, global_step=epoch + 1)
    return loss_meter.avg, score
Esempio n. 5
0
def train_one_epoch(model,
                    loader,
                    optimizer,
                    scheduler,
                    criterion,
                    device: torch.device,
                    input_key: str,
                    input_target_key: str,
                    epoch: int,
                    writer: SummaryWriter):
    loss_meter = utils.AverageMeter()
    lwlrap_meter = utils.AverageMeter()

    model.train()

    preds = []
    targs = []

    progress_bar = tqdm(loader, desc="train")
    for step, batch in enumerate(progress_bar):
        x = batch[input_key].to(device)
        y = batch[input_target_key]
        for key in y:
            y[key] = y[key].to(device)

        output = model(x)
        loss = criterion(output, y)

        if hasattr(optimizer, "first_step"):
            loss.backward()
            optimizer.first_step(zero_grad=True)

            criterion(model(x), y).backward()
            optimizer.second_step(zero_grad=True)
        else:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        loss_meter.update(loss.item(), n=len(loader))

        clipwise_output = output["clipwise_output"].detach().cpu().numpy()
        target = y["weak"].detach().cpu().numpy()

        preds.append(clipwise_output)
        targs.append(target)

        score_class, weight = clb.lwlrap(target, clipwise_output)
        score = (score_class * weight).sum()
        lwlrap_meter.update(score, n=1)

        progress_bar.set_description(
            f"Epoch: {epoch + 1} "
            f"Step: [{step + 1}/{len(loader)}] "
            f"loss: {loss_meter.val:.4f} loss(avg) {loss_meter.avg:.4f} "
            f"lwlrap: {lwlrap_meter.val:.4f} lwlrap(avg) {lwlrap_meter.avg:.4f}")

        global_step = epoch * len(loader) + step + 1
        writer.add_scalar(tag="loss/batch", scalar_value=loss_meter.val, global_step=global_step)
        writer.add_scalar(tag="lwlrap/batch", scalar_value=lwlrap_meter.val, global_step=global_step)

    scheduler.step()

    y_pred = np.concatenate(preds, axis=0)
    y_true = np.concatenate(targs, axis=0)

    score_class, weight = clb.lwlrap(y_true, y_pred)
    score = (score_class * weight).sum()

    writer.add_scalar(tag="loss/epoch", scalar_value=loss_meter.avg, global_step=epoch + 1)
    writer.add_scalar(tag="lwlrap/epoch", scalar_value=score, global_step=epoch + 1)
    return loss_meter.avg, score
Esempio n. 6
0
            fold_prediction_df = pd.concat([
                pd.DataFrame({"recording_id": recording_ids}),
                fold_prediction_df
            ],
                                           axis=1)

            fold_prediction_df = fold_prediction_df.groupby(
                "recording_id").max().reset_index(drop=False)
            fold_predictions.append(fold_prediction_df)
            submission_name = "strong.csv"

    oof_df = pd.concat(oof_predictions, axis=0).reset_index(drop=True)
    oof_target_df = pd.concat(oof_targets_list, axis=0).reset_index(drop=True)

    columns = [f"s{i}" for i in range(24)]
    score_class, weight = lwlrap(oof_target_df[columns].values,
                                 oof_df[columns].values)
    score = (score_class * weight).sum()
    logger.info(f"Valid LWLRAP: {score:.5f}")
    class_level_score = {config_name: score_class}
    utils.save_json(class_level_score,
                    submission_file_dir / "class_level_results.json")

    oof_df.to_csv(submission_file_dir / oof_name, index=False)

    folds_prediction_df = pd.concat(fold_predictions,
                                    axis=0).reset_index(drop=True)
    folds_prediction_df = folds_prediction_df.groupby(
        "recording_id").mean().reset_index(drop=False)

    assert len(folds_prediction_df) == len(submission), \
        "prediction length does not match sample submission length"
Esempio n. 7
0
    assert len(folds_prediction_df) == len(submission), \
        "prediction length does not match sample submission length"
    assert folds_prediction_df.shape[1] == submission.shape[1], \
        "number of classes in prediction does not match that of sample submission"
    assert len(set(folds_prediction_df["recording_id"]) - set(submission["recording_id"])) == 0, \
        "recording_id in prediction has unknown value"
    assert len(set(submission["recording_id"]) - set(folds_prediction_df["recording_id"])) == 0, \
        "prediction doesn't have enough recording_id"

    folds_prediction_df.to_csv(submission_file_dir / "submission.csv",
                               index=False)

    summary = {}

    columns = [f"s{i}" for i in range(24)]
    score_class, weight = clb.lwlrap(oof_target_high[columns].values,
                                     oof_df_high[columns].values)
    score_high = (score_class * weight).sum()
    logger.info(f"Valid LWLRAP(high): {score_high:.5f}")

    summary["high"] = {
        "score": score_high,
        "score_class": score_class,
        "weight": weight
    }

    score_class, weight = clb.lwlrap(oof_target_low[columns].values,
                                     oof_df_low[columns].values)
    score_low = (score_class * weight).sum()
    logger.info(f"Valid LWLRAP(low): {score_low:.5f}")

    summary["low"] = {