Example #1
0
def compute_predictions(model,
                        dataset,
                        batch_size=1,
                        workers=0) -> pd.DataFrame:
    df = defaultdict(list)
    for batch in tqdm(
            DataLoader(dataset,
                       batch_size=batch_size,
                       num_workers=workers,
                       shuffle=False,
                       drop_last=False,
                       pin_memory=True)):
        batch = any2device(batch, device="cuda")

        image_ids = batch[INPUT_IMAGE_ID_KEY]
        df[INPUT_IMAGE_ID_KEY].extend(image_ids)

        outputs = model(**batch)

        if OUTPUT_PRED_MODIFICATION_FLAG in outputs:
            df[OUTPUT_PRED_MODIFICATION_FLAG].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG]).flatten())

        if OUTPUT_PRED_MODIFICATION_TYPE in outputs:
            df[OUTPUT_PRED_MODIFICATION_TYPE].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_TYPE]).tolist())

    df = pd.DataFrame.from_dict(df)
    return df
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-dd",
                        "--data-dir",
                        type=str,
                        default=os.environ.get("KAGGLE_2020_ALASKA2"))

    args = parser.parse_args()
    data_dir = args.data_dir

    cover = os.path.join(data_dir, "Cover")
    JMiPOD = os.path.join(data_dir, "JMiPOD")
    JUNIWARD = os.path.join(data_dir, "JUNIWARD")
    UERD = os.path.join(data_dir, "UERD")

    dataset = (fs.find_images_in_dir(cover) + fs.find_images_in_dir(JMiPOD) +
               fs.find_images_in_dir(JUNIWARD) + fs.find_images_in_dir(UERD))
    # dataset = dataset[:500]

    mean, std = compute_mean_std(tqdm(dataset))
    print(mean.size())
    print(std.size())
    print(
        "Mean",
        np.array2string(to_numpy(mean),
                        precision=2,
                        separator=",",
                        max_line_width=119))
    print(
        "Std ",
        np.array2string(to_numpy(std),
                        precision=2,
                        separator=",",
                        max_line_width=119))
Example #3
0
def compute_trn_predictions(model,
                            dataset,
                            fp16=False,
                            batch_size=1,
                            workers=0) -> pd.DataFrame:
    df = defaultdict(list)
    for batch in tqdm(
            DataLoader(dataset,
                       batch_size=batch_size,
                       num_workers=workers,
                       shuffle=False,
                       drop_last=False,
                       pin_memory=True)):
        batch = any2device(batch, device="cuda")

        if fp16 and INPUT_FEATURES_JPEG_FLOAT in batch:
            batch[INPUT_FEATURES_JPEG_FLOAT] = batch[
                INPUT_FEATURES_JPEG_FLOAT].half()

        if INPUT_TRUE_MODIFICATION_FLAG in batch:
            y_trues = to_numpy(batch[INPUT_TRUE_MODIFICATION_FLAG]).flatten()
            df[INPUT_TRUE_MODIFICATION_FLAG].extend(y_trues)

        if INPUT_TRUE_MODIFICATION_TYPE in batch:
            y_labels = to_numpy(batch[INPUT_TRUE_MODIFICATION_TYPE]).flatten()
            df[INPUT_TRUE_MODIFICATION_TYPE].extend(y_labels)

        image_ids = batch[INPUT_IMAGE_ID_KEY]
        df[INPUT_IMAGE_ID_KEY].extend(image_ids)

        outputs = model(**batch)

        if OUTPUT_PRED_MODIFICATION_FLAG in outputs:
            df[OUTPUT_PRED_MODIFICATION_FLAG].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG]).flatten())

        if OUTPUT_PRED_MODIFICATION_TYPE in outputs:
            df[OUTPUT_PRED_MODIFICATION_TYPE].extend(
                outputs[OUTPUT_PRED_MODIFICATION_TYPE].tolist())

        if OUTPUT_PRED_EMBEDDING in outputs:
            df[OUTPUT_PRED_EMBEDDING].extend(
                outputs[OUTPUT_PRED_EMBEDDING].tolist())

        # Save also TTA predictions for future use
        if OUTPUT_PRED_MODIFICATION_FLAG + "_tta" in outputs:
            df[OUTPUT_PRED_MODIFICATION_FLAG + "_tta"].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG +
                                 "_tta"]).tolist())

        if OUTPUT_PRED_MODIFICATION_TYPE + "_tta" in outputs:
            df[OUTPUT_PRED_MODIFICATION_TYPE + "_tta"].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_TYPE +
                                 "_tta"]).tolist())

    df = pd.DataFrame.from_dict(df)
    return df
    def on_batch_end(self, runner: IRunner):
        image_ids = runner.input[self.image_id_key]
        outputs = to_numpy(runner.output[self.output_key].detach())
        targets = to_numpy(runner.input[self.input_key].detach())

        for img_id, y_true, y_pred in zip(image_ids, targets, outputs):
            if img_id not in self.scores_per_image:
                self.scores_per_image[img_id] = {"intersection": 0, "union": 0}

            y_true_labels = self.inputs_to_labels(y_true)
            y_pred_labels = self.outputs_to_labels(y_pred)
            intersection = (y_true_labels * y_pred_labels).sum()
            union = y_true_labels.sum() + y_pred_labels.sum() - intersection

            self.scores_per_image[img_id]["intersection"] += float(
                intersection)
            self.scores_per_image[img_id]["union"] += float(union)
Example #5
0
    def valid_fn(epoch, valid_dataloader, criterion, device):
        model.eval()

        pred_scores = []
        true_scores = []

        for batch_idx, batch_data in enumerate(valid_dataloader):
            batch_data = any2device(batch_data, device)
            outputs = model(**batch_data)

            y_pred = outputs[OUTPUT_PRED_MODIFICATION_TYPE]
            y_true = batch_data[INPUT_TRUE_MODIFICATION_TYPE]

            loss = criterion(y_pred, y_true)

            pred_scores.extend(to_numpy(parse_classifier_probas(y_pred)))
            true_scores.extend(to_numpy(y_true))

            xm.master_print(f"Batch: {batch_idx}, loss: {loss.item()}")

        val_wauc = alaska_weighted_auc(xla_all_gather(true_scores, device),
                                       xla_all_gather(pred_scores, device))
        xm.master_print(f"Valid epoch: {epoch}, wAUC: {val_wauc}")
        return val_wauc
    def on_loader_end(self, runner: IRunner):
        eps = 1e-7
        ious_per_image = []

        # Gather statistics from all nodes
        all_gathered_scores_per_image = all_gather(self.scores_per_image)

        n = len(self.thresholds)
        all_scores_per_image = defaultdict(lambda: {
            "intersection": np.zeros(n),
            "union": np.zeros(n)
        })
        for scores_per_image in all_gathered_scores_per_image:
            for image_id, values in scores_per_image.items():
                all_scores_per_image[image_id]["intersection"] += values[
                    "intersection"]
                all_scores_per_image[image_id]["union"] += values["union"]

        for image_id, values in all_scores_per_image.items():
            intersection = values["intersection"]
            union = values["union"]
            metric = intersection / (union + eps)
            ious_per_image.append(metric)

        thresholds = to_numpy(self.thresholds)
        iou = np.mean(ious_per_image, axis=0)
        assert len(iou) == len(thresholds)

        threshold_index = np.argmax(iou)
        iou_at_threshold = iou[threshold_index]
        threshold_value = thresholds[threshold_index]

        runner.loader_metrics[self.prefix + "/" +
                              "threshold"] = float(threshold_value)
        runner.loader_metrics[self.prefix] = float(iou_at_threshold)

        if get_rank() in {-1, 0}:
            logger = get_tensorboard_logger(runner)
            logger.add_histogram(self.prefix, iou, global_step=runner.epoch)
def test_calibartion():
    oof_predictions = pd.read_csv(
        "/old_models/May07_16_48_rgb_resnet34_fold0/oof_predictions.csv")

    print(
        "Uncalibrated",
        alaska_weighted_auc(oof_predictions["y_true"].values,
                            oof_predictions["y_pred"].values))

    # ir = IR(out_of_bounds="clip")
    # ir.fit(oof_predictions["y_pred"].values, oof_predictions["y_true"].values)
    # p_calibrated = ir.transform(oof_predictions["y_pred"].values)
    # print("IR", alaska_weighted_auc(oof_predictions["y_true"].values, p_calibrated))
    #
    # lr = LR()
    # lr.fit(oof_predictions["y_pred"].values.reshape(-1, 1), oof_predictions["y_true"].values)
    # p_calibrated = lr.predict_proba(oof_predictions["y_pred"].values.reshape(-1, 1))
    # print("LR", alaska_weighted_auc(oof_predictions["y_true"].values, p_calibrated[:, 1]))

    x = torch.from_numpy(oof_predictions["y_pred"].values)
    x = torch.sigmoid(logit(x) * 100)
    x = to_numpy(x)

    print("Temp", alaska_weighted_auc(oof_predictions["y_true"].values, x))
    def on_batch_end(self, runner):
        pred_probas = self.outputs_to_probas(runner.output[self.output_key])
        true_labels = runner.input[self.input_key]

        self.y_trues.extend(to_numpy(true_labels))
        self.y_preds.extend(to_numpy(pred_probas))
Example #9
0
def parse_and_softmax(x):
    if isinstance(x, str):
        x = np.fromstring(x[1:-1], dtype=np.float32, sep=",")
    x = torch.tensor(x).softmax(dim=0)
    return to_numpy(x)