Example #1
0
def eda_fold(y_preds,
             y_trues,
             ids,
             mask_thresh=0.5,
             min_size_thresh=1500,
             max_size_thresh=50000,
             dilation=2):
    y_binary = []
    for el, gt, sample_id in zip(y_preds, y_trues, ids):
        y_pred = el > mask_thresh
        if np.sum(y_pred) > 0:
            plt.figure(figsize=(15, 25))
            plt.subplot(3, 2, 1)
            plt.title("original")
            plt.imshow(el, cmap="gray")

            plt.subplot(3, 2, 2)
            plt.title(f"binary, score {dice_coef_metric(y_pred, gt):0.4f}")
            plt.imshow(y_pred, cmap="gray")

            labels = measure.label(y_pred)
            for n, region in enumerate(measure.regionprops(labels)):
                if region.area < min_size_thresh:
                    y_pred[labels == n + 1] = 0

            plt.subplot(3, 2, 3)
            plt.title(
                f"remove small, score {dice_coef_metric(y_pred, gt):0.4f}")
            plt.imshow(y_pred, cmap="gray")

            y_pred = ndimage.binary_fill_holes(y_pred)
            if dilation > 0:
                y_pred = morphology.dilation(y_pred, morphology.disk(dilation))

            plt.subplot(3, 2, 4)
            plt.title(f"morphology, score {dice_coef_metric(y_pred, gt):0.4f}")
            plt.imshow(y_pred, cmap="gray")

            labels = measure.label(y_pred)
            for n, region in enumerate(measure.regionprops(labels)):
                print(region.area)
                # if region.area < min_size_thresh:
                #     y_pred[labels == n + 1] = 0

            score = dice_coef_metric(y_pred, gt)

            plt.subplot(3, 2, 5)
            plt.title(f"gt score {score:0.4f}")
            plt.imshow(gt, cmap="gray")

            image = cv2.imread(
                f"/mnt/ssd2/dataset/pneumo/train_png/{sample_id}.png", 0)
            plt.subplot(3, 2, 6)
            plt.title(f"image")
            plt.imshow(image, cmap="gray")
            plt.show()
Example #2
0
def val_epoch(model, optimizer, data_loader_valid, device, epoch):
    print("START validation")
    model.eval()
    cntr = 0
    valloss = 0

    threshold = 0.5

    progress_bar_valid = tqdm(enumerate(data_loader_valid),
                              total=len(data_loader_valid),
                              desc="Predicting",
                              ncols=0,
                              postfix=["dice:"])

    with torch.set_grad_enabled(False):
        for i, valdata in progress_bar_valid:
            cntr += 1
            images = valdata[0]
            targets = valdata[1]
            image_ids = valdata[2]

            images_3chan = torch.FloatTensor(
                np.empty(
                    (images.shape[0], 3, images.shape[2], images.shape[3])))

            for chan_idx in range(3):
                images_3chan[:, chan_idx:chan_idx + 1, :, :] = images

            images = Variable(images_3chan.cuda())
            targets = Variable(targets.cuda())

            outputs = model(images)

            out_cut = np.copy(outputs.data.cpu().numpy())

            for i in range(outputs.shape[0]):
                image_id = image_ids[i]
                y_pred = out_cut[i, 0]
                image_name = osp.join(
                    '/mnt/ssd2/dataset/pneumo/predictions/sx101_fold5_val',
                    f'{image_id}.png')

                cv2.imwrite(image_name, np.uint8(255 * y_pred))

            out_cut[np.nonzero(out_cut < threshold)] = 0.0
            out_cut[np.nonzero(out_cut >= threshold)] = 1.0

            picloss = dice_coef_metric(out_cut, targets.data.cpu().numpy())
            valloss += picloss

        print(
            "Epoch:  " + str(epoch) + "  Threshold:  " + str(threshold) +
            "  Validation DICE score:", valloss / cntr)

    return valloss / cntr
def check_mean_fold(fold, dilation, min_size_thresh):

    if "+" in model_name:
        model_lst = model_name.split("+")
    else:
        model_lst = [model_name]

    all_outputs_fold = []
    for model in model_lst:
        model_outputs = []

        filename = (
            f"/mnt/ssd1/dataset/pneumo/sota_predictions/{model}/{fold}_{model}_valid/{fold}_{model}_valid_index.npz"
        )
        tfz = np.load(filename)
        outputs, outputs_mirror, ids, gts = tfz["outputs"], tfz[
            "outputs_mirror"], tfz["ids"], tfz["gts"]

        outputs = outputs / 255.0  # (N,H,W)
        outputs_mirror = outputs_mirror / 255.0
        for jj in range(outputs.shape[0]):
            model_outputs.append((outputs[jj][0] + outputs_mirror[jj][0]) / 2.)

        all_outputs_fold.append(model_outputs)  # (3,N,H,W)

    all_outputs_fold = np.array(all_outputs_fold)
    fold_mean = []
    for jj in range(all_outputs_fold.shape[1]):
        sample = 0
        for ii in range(all_outputs_fold.shape[0]):
            sample += all_outputs_fold[ii][jj]
        sample /= all_outputs_fold.shape[0]
        fold_mean.append(sample)

    fold_mean = np.array(fold_mean)  # (N,H,W)

    fold_mean_bin = []
    for i in range(fold_mean.shape[0]):
        agree_dice = dice_coef_metric(fold_mean[i], gts[i])
        # print (f"Pic dice on mean: {agree_dice}  and agreement: {agreement_tta}")
        if agree_dice > agreement_tta:
            dilation_c = 0
        else:
            dilation_c = dilation

        fold_mean_bin.append(
            binarize_sample(
                (fold_mean[i], mask_thresh, min_size_thresh, dilation_c)))
    fold_mean_bin = np.array(fold_mean_bin)

    return dice_coef_metric_batch(fold_mean_bin, gts)
Example #4
0
def compare_dice():
    fns1 = sorted(
        glob("/mnt/ssd2/dataset/pneumo/predictions/uint8/se154/debug2/*png"))
    fns2 = sorted(
        glob("/mnt/ssd2/dataset/pneumo/predictions/uint8/sx101/debug/*png"))

    scores = []
    for fn1, fn2 in tqdm(zip(fns1, fns2), total=len(fns1)):
        img1 = cv2.imread(fn1) / 255.0
        img2 = cv2.imread(fn2) / 255.0

        scores.append(dice_coef_metric(img1 > 0.5, img2 > 0.5))
    print(np.mean(scores))
    print(scores[:10])
def check_binary_union_fold(fold, dilation, min_size_thresh):

    if "+" in model_name:
        model_lst = model_name.split("+")
    else:
        model_lst = [model_name]

    models_mean = []
    models_mean_bin = []
    models_gt = []
    for model in model_lst:
        model_mean = []

        filename = (
            f"/mnt/ssd1/dataset/pneumo/sota_predictions/{model}/{fold}_{model}_valid/{fold}_{model}_valid_index.npz"
        )
        tfz = np.load(filename)
        outputs, outputs_mirror, ids, gts = tfz["outputs"], tfz[
            "outputs_mirror"], tfz["ids"], tfz["gts"]

        outputs = outputs / 255.0  # (N,H,W)
        outputs_mirror = outputs_mirror / 255.0

        for jj in range(outputs.shape[0]):
            model_mean.append((outputs[jj][0] + outputs_mirror[jj][0]) / 2.)

        models_mean.append(model_mean)  # (3,N,H,W)
    models_mean = np.array(models_mean)

    for j in range(models_mean.shape[0]):
        model_mean_bin = []
        for k in range(models_mean[j].shape[0]):
            agree_dice = dice_coef_metric(models_mean[j][k], gts[k])
            # print (f"Pic dice on union: {agree_dice}  and agreement: {agreement_tta}")
            if agree_dice > agreement_tta:
                dilation_c = 0
            else:
                dilation_c = dilation
            sample_mean_bin = binarize_sample(
                (models_mean[j][k], mask_thresh, min_size_thresh, dilation_c))

            model_mean_bin.append(sample_mean_bin)
        models_mean_bin.append(model_mean_bin)

    models_mean_bin = np.array(models_mean_bin)
    # print (models_mean_bin.shape)
    models_union_bin = np.amax(models_mean_bin, axis=0)  # (8,N,H,W)
    # print (models_union_bin.shape)

    return dice_coef_metric_batch(models_union_bin, gts)
Example #6
0
def score_sample(data):
    el, gt, mask_thresh, min_size_thresh, dilation = data
    y_pred = el > mask_thresh
    if np.sum(y_pred) > 0:

        labels = measure.label(y_pred)
        for n, region in enumerate(measure.regionprops(labels)):
            if region.area < min_size_thresh:
                y_pred[labels == n + 1] = 0

        y_pred = ndimage.binary_fill_holes(y_pred)
        if dilation > 0:
            try:
                y_pred = morphology.dilation(y_pred, morphology.disk(dilation))
            except:
                pass

    sample_score = dice_coef_metric(y_pred, gt)
    return sample_score
def val_epoch(model, data_loader_valid, epoch):
    print("START validation")
    model.eval()
    cntr = 0
    valloss = 0

    threshold = 0.5

    progress_bar_valid = tqdm(
        enumerate(data_loader_valid), total=len(data_loader_valid), desc="Validation", ncols=0, postfix=["dice:"]
    )

    with torch.set_grad_enabled(False):
        for i, valdata in progress_bar_valid:
            cntr += 1
            images = valdata[0]
            targets = valdata[1]

            images_3chan = torch.FloatTensor(np.empty((images.shape[0], 3, images.shape[2], images.shape[3])))

            for chan_idx in range(3):
                images_3chan[:, chan_idx : chan_idx + 1, :, :] = images

            images = Variable(images_3chan.cuda())
            targets = Variable(targets.cuda())

            outputs = model(images)

            out_cut = np.copy(outputs.data.cpu().numpy())
            out_cut[np.nonzero(out_cut < threshold)] = 0.0
            out_cut[np.nonzero(out_cut >= threshold)] = 1.0

            picloss = dice_coef_metric(out_cut, targets.data.cpu().numpy())
            valloss += picloss

        print("Epoch:  " + str(epoch) + "  Threshold:  " + str(threshold) + "  Validation DICE score:", valloss / cntr)

    return valloss / cntr
Example #8
0
def get_data(model_name="sx101", fold=0):
    paths = get_paths()
    dumps_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["predictions"])
    dst = osp.join(dumps_dir, model_name, f"{fold}_{model_name}_valid")

    dataset_valid = SIIMDataset_Unet(mode="valid", fold=fold)
    vloader = torch.utils.data.DataLoader(dataset_valid,
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=NCORE)

    progress_bar = tqdm(enumerate(vloader),
                        total=len(vloader),
                        desc=f"generating masks f{fold}")

    y_trues, ids = [], []
    for i, batch in progress_bar:
        images, targets, batch_ids = batch
        y_trues.append(np.array(targets[0, 0] > 0.5))
        ids.append(batch_ids[0])

    filenames = [osp.join(dst, f"{sample_id}.png") for sample_id in ids]
    with Pool(NCORE) as p:
        y_preds = list(
            tqdm(p.imap_unordered(read_prediction, filenames),
                 total=len(filenames),
                 desc="reading predictions"))

    scores = []
    for yp, yt in zip(y_preds, y_trues):
        # print(np.amax(yp), np.amin(yp))
        scores.append(dice_coef_metric(yp > 0.5, yt))

    # print(scores)
    print(np.mean(scores))
    return y_preds, y_trues, scores, ids
Example #9
0
def predict_fold(model_name,
                 fold=0,
                 mode="valid",
                 out_folder="outs",
                 weights_dir="outs",
                 validate=True):
    assert mode in ("train", "valid", "test"), mode

    checkpoint = select_best_checkpoint(osp.join(weights_dir, model_name),
                                        fold, model_name)
    model_ft = torch.load(checkpoint)
    model_ft.to(DEVICE)
    model_ft.eval()

    name_pattern = f"{fold}_{model_name}_{mode}"
    dst = osp.join(out_folder, model_name, name_pattern)
    os.makedirs(dst, exist_ok=True)

    npz_file = osp.join(dst, f"{name_pattern}_fp32_d.npz")
    if osp.exists(npz_file):
        return -1

    dataset_valid = SIIMDataset_Unet(mode=mode, fold=fold)
    vloader = torch.utils.data.DataLoader(dataset_valid,
                                          batch_size=2,
                                          shuffle=False,
                                          num_workers=4)

    progress_bar = tqdm(enumerate(vloader),
                        total=len(vloader),
                        desc=f"Predicting {mode} {fold}")

    outputs, disagreements, gts, filenames, all_ids = [], [], [], [], []
    for i, batch in progress_bar:
        images, targets, ids = batch

        mirror = torch.flip(images, (3, ))
        batch1ch = torch.cat([images, mirror], dim=0)
        batch3ch = torch.FloatTensor(
            np.empty(
                (batch1ch.shape[0], 3, batch1ch.shape[2], batch1ch.shape[3])))
        for chan_idx in range(3):
            batch3ch[:, chan_idx:chan_idx + 1, :, :] = batch1ch
        images = Variable(batch3ch.cuda())
        targets = targets.data.cpu().numpy()

        preictions = model_ft(images)
        probability = preictions.data.cpu().numpy()

        for j in range(targets.shape[0]):
            predict1 = probability[0 + j]
            predict1[predict1 < 0.02] = 0
            predict1_mirror = probability[targets.shape[0] + j][:, :, ::-1]
            predict1_mirror[predict1_mirror < 0.02] = 0

            predict = np.mean(np.concatenate([predict1, predict1_mirror],
                                             axis=0),
                              axis=0)

            disagree = dice_coef_metric(predict1 > 0.5, predict1_mirror > 0.5)
            # area = np.sum(predict > 0.5)
            # if area > 0:
            #     disagree = np.sum(np.abs(predict1 - predict1_mirror)) / area
            # else:
            #     disagree = 0

            outputs.append(predict)
            disagreements.append(disagree)

            filenames.append(osp.join(dst, f"{ids[j]}.png"))
            all_ids.append(ids[j])
            gts.append(np.array(targets[j, 0] > 0.5).astype(np.bool))

        if i % 50 == 0 and i != 0:
            gc.collect()

    np.savez_compressed(
        npz_file,
        outputs=np.array(outputs),
        disagreements=np.array(disagreements),
        ids=np.array(all_ids),
        gts=np.array(gts),
    )

    with Pool() as p:
        scores = list(
            tqdm(p.imap_unordered(calc_score, zip(gts, outputs)),
                 total=len(filenames),
                 desc="calc score"))
    p.close()

    del outputs, gts
    gc.collect()
    score = np.mean(scores)
    print(f"\n{model_name} fold{fold} {score:0.4f}\n")
    return score
Example #10
0
def calc_score(data):
    y_true, y_pred = data
    return dice_coef_metric(y_pred > 0.5, y_true > 0.5)
Example #11
0
def train_one_epoch(model,
                    optimizer,
                    data_loader,
                    device,
                    epoch,
                    print_freq,
                    losstype="bcedice"):
    model.train()

    cntr = 0
    losses = []
    accur = []

    # if not os.path.exists(model_name+'_trout/'):
    # 	os.mkdir(model_name+'_trout/')

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 100
        warmup_iters = min(100, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters,
                                           warmup_factor)

    progress_bar = tqdm(enumerate(data_loader),
                        total=len(data_loader),
                        desc="Predicting",
                        ncols=0,
                        postfix=["dice:", "loss:"])

    for i, traindata in progress_bar:
        if traindata[1].sum():
            # if 1:  # only for fine-tuning!

            images, targets = traindata[0], traindata[1]

            images_3chan = torch.FloatTensor(
                np.empty(
                    (images.shape[0], 3, images.shape[2], images.shape[3])))
            # print(i, data[0].shape, images_3chan.shape)
            for chan_idx in range(3):
                images_3chan[:, chan_idx:chan_idx + 1, :, :] = images
            # print ("train: ", images.shape, targets.shape, images_3chan.shape)

            images = Variable(images_3chan.cuda())
            targets = Variable(targets.cuda())

            outputs = model(images)

            out_cut = np.copy(outputs.data.cpu().numpy())
            out_cut[np.nonzero(out_cut < 0.5)] = 0.0
            out_cut[np.nonzero(out_cut >= 0.5)] = 1.0

            train_dice = dice_coef_metric(out_cut, targets.data.cpu().numpy())

            if losstype == "dice_only":
                loss = dice_coef_loss(outputs, targets)
            else:
                loss = bce_dice_loss(outputs, targets)

            losses.append(loss.item())
            accur.append(train_dice)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if cntr % 10 == 0:
                progress_bar.postfix[
                    0] = f"loss: {np.mean(np.array(losses)):0.4f}"
                progress_bar.postfix[
                    1] = f"dice: {np.mean(np.array(accur)):0.4f}"
                progress_bar.update()

            if lr_scheduler is not None:
                lr_scheduler.step()
            cntr += 1

    print("Epoch [%d]" % (epoch))
    print("Mean loss on train:",
          np.array(losses).mean(), "Mean DICE on train:",
          np.array(accur).mean())