Ejemplo n.º 1
0
def create_submission(learner, path, test_size=256, thr=0.5):
    """
    Create submission file for kaggle

    learner: Learner object to get predictions with
    path: path to submission file
    test_size: size of test images
    thr: probability threshold

    return: dataframe corresponding to submission file
    """
    sub = pd.DataFrame(columns=['ImageId', 'EncodedPixels'])
    for x, y in tqdm(learner.data.test_dl):
        preds = learner.pred_batch(batch=(x, y))
        preds = preds[:, 1]
        preds[preds.view(preds.shape[0], -1).sum(-1) < 5e-3 * test_size**2,
              ...] = 0.0
        idxs = next(learner.data.test_dl.sampler_iter)
        for k, pred in enumerate(preds.squeeze(1)):
            y = (pred > thr).float().numpy()
            y = cv2.resize(y, (1024, 1024), interpolation=cv2.INTER_CUBIC)
            y = (y > 0.5).astype(np.uint8) * 255
            id = learner.data.test_ds.items[idxs[k]].with_suffix('').name
            rle = mask2rle(y.T, *y.shape[-2:])
            sub.loc[idxs[k]] = [id, rle]
    sub.to_csv(path, index=False)
    return sub
Ejemplo n.º 2
0
def create_submission_mtl(learner, path, test_size=256, thr=0.5, thr_clf=0.5):
    """
    Create submission file for kaggle for multi-task learning problem

    learner: Learner object to get predictions with
    path: path to submission file
    test_size: size of test images
    thr: probability threshold for segmentation
    clf_thr: probability threshold for classification

    return: dataframe corresponding to submission file
    """
    sub = pd.DataFrame(columns=['ImageId', 'EncodedPixels'])
    for x, y in tqdm(learner.data.test_dl):
        y_cat, y_mask = learner.pred_batch(batch=(x, y))
        y_cat = nn.Softmax(dim=1)(y_cat)[:, 1]
        y_mask = nn.Softmax(dim=1)(y_mask)[:, 1]
        y_mask[y_mask.view(y_mask.shape[0], -1).sum(-1) < 1e-4 * test_size * 2,
               ...] = 0.0
        idxs = next(learner.data.test_dl.sampler_iter)
        for k, (cat, mask) in enumerate(zip(y_cat, y_mask.squeeze(1))):
            if cat < thr_clf:
                rle = '-1'
            else:
                mask = (mask > thr).float().numpy()
                mask = cv2.resize(mask, (1024, 1024),
                                  interpolation=cv2.INTER_AREA)
                mask = (mask > 0.5).astype(np.uint8) * 255
                rle = mask2rle(mask.T, *mask.shape[-2:])
            id = learner.data.test_ds.items[idxs[k]].with_suffix('').name
            sub.loc[idxs[k]] = [id, rle]
    sub.to_csv(path, index=False)
    return sub
Ejemplo n.º 3
0
def create_submission_kfold_mtl(learner,
                                path,
                                pred_path,
                                n_folds=5,
                                test_size=256,
                                thr=0.5,
                                thr_clf=0.5):
    """
    Create submission file for kaggle for multi-task learning problem with
    kfold cross-validation

    learner: Learner object to get predictions with
    path: path to submission file
    pred_path: path to folder where probability tensors are stored
    n_folds: number of folds for cross-validation
    test_size: size of test images
    thr: probability threshold for segmentation
    clf_thr: probability threshold for classification

    return: dataframe corresponding to submission file
    """
    sub = pd.DataFrame(columns=['ImageId', 'EncodedPixels'])
    for i in tqdm(len(learner.data.test_dl)):
        y_cat, y_mask = 0, 0
        for f in range(n_folds):
            y_cat += torch.load(pred_path / str(f) / f'cat_{i}.t')
            y_mask += torch.load(pred_path / str(f) / f'mask_{i}.t')
        y_cat /= n_folds
        y_mask /= n_folds
        y_mask[y_mask.view(y_mask.shape[0], -1).sum(-1) < 1e-4 * test_size * 2,
               ...] = 0.0
        idxs = next(learner.data.test_dl.sampler_iter)
        for k, (cat, mask) in enumerate(zip(y_cat, y_mask.squeeze(1))):
            if cat < thr_clf:
                rle = '-1'
            else:
                mask = (mask > thr).float().numpy()
                mask = cv2.resize(mask, (1024, 1024),
                                  interpolation=cv2.INTER_AREA)
                mask = (mask > 0.5).astype(np.uint8) * 255
                rle = mask2rle(mask.T, *mask.shape[-2:])
            id = learner.data.test_ds.items[idxs[k]].with_suffix('').name
            sub.loc[idxs[k]] = [id, rle]
    sub.to_csv(path, index=False)
    return sub
Ejemplo n.º 4
0
def change_csv(old, new, path, size=256):
    """
    Change mask csv to match resizd images

    old: path to base csv for full size images
    new: path to new csv for resized images
    path: path to train folder
    size: size to which masks are to be resized
    """
    df = pd.read_csv(old, sep=', ')
    new_df = pd.DataFrame(columns=['ImageId', 'EncodedPixels'])
    for row in tqdm(df.itertuples(), total=df.shape[0]):
        image_id = row.ImageId
        label = row.EncodedPixels
        image_id = Path(path.name) / (image_id + '.dcm')
        mask = rle2mask(label, 1024, 1024)
        mask = cv2.resize(mask, (size, size), interpolation=cv2.INTER_AREA)
        mask = (mask > 127).astype(np.uint8) * 255
        label = mask2rle(mask.T, size, size)
        new_df.loc[row.Index] = [image_id, label]
    new_df.to_csv(new, index=False)
Ejemplo n.º 5
0
def create_submission_kfold(learner,
                            path,
                            pred_path,
                            n_folds=5,
                            test_size=256,
                            thr=0.5):
    """
    Create submission file for kaggle

    learner: Learner object to get predictions with
    path: path to submission file
    pred_path: path to folder where probability tensors are stored
    n_folds: number of folds for cross-validation
    test_size: size of test images
    thr: probability threshold

    return: dataframe corresponding to submission file
    """
    sub = pd.DataFrame(columns=['ImageId', 'EncodedPixels'])
    n = len(learner.data.test_dl)
    for i in tqdm(n):
        preds = 0
        for f in range(n_folds):
            preds += torch.load(pred_path / str(f) / f'mask_{i}.t')
        preds /= n_folds
        preds[preds.view(preds.shape[0], -1).sum(-1) < 1e-3 * test_size**2,
              ...] = 0.0
        idxs = np.arange(i * n, (i + 1) * n)
        for k, pred in enumerate(preds.squeeze(1)):
            y = (pred > thr).float().numpy()
            y = cv2.resize(y, (1024, 1024), interpolation=cv2.INTER_AREA)
            y = (y > 0.5).astype(np.uint8) * 255
            id = learner.data.test_ds.items[idxs[k]].with_suffix('').name
            rle = mask2rle(y.T, *y.shape[-2:])
            sub.loc[idxs[k]] = [id, rle]
    sub.to_csv(path, index=False)
    return sub