예제 #1
0
    def __init__(self, fold=0, mode="train", image_size=1024, normalized=False):
        assert mode in ("train", "valid", "test"), mode
        self.df = pd.read_csv("tables/folds_v6_st2.csv")
        if mode == "train":
            self.df = self.df[self.df["fold_id"] != fold]
        elif mode == "valid":
            self.df = self.df[self.df["fold_id"] == fold]
        else:
            self.df = pd.read_csv("tables/stage_2_sample_submission.csv")
            self.df[" EncodedPixels"] = ["-1"] * self.df.shape[0]

        print(self.df.head())
        print(f"{mode} {self.df.shape[0]}")

        self.gb = self.df.groupby("ImageId")
        self.fnames = list(self.gb.groups.keys())

        paths = get_paths()

        self.paths = paths
        self.height = image_size
        self.width = image_size
        self.image_dir = os.path.join(paths["dataset"]["path"], paths["dataset"]["images_dir"])
        self.mask_dir = os.path.join(paths["dataset"]["path"], paths["dataset"]["masks_dir"])
        if mode == "test":
            self.image_dir = os.path.join(paths["dataset"]["path"], paths["dataset"]["test_dir"])

        self.augs = False
        if mode == "train":
            self.augs = True
        self.transform = strong_aug()
        self.cache = False
        self.norm_transform = Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
        self.normalized = normalized
예제 #2
0
def check_iter():
    batch_size = 8
    paths = get_paths()['dataset']
    data_params = get_params()['data_params']

    df = pd.read_csv('tables/folds_n01.csv')
    # df = pd.read_csv(os.path.join(paths['path'], paths['sample']))
    #df = pd.read_csv('subm/subm1.csv')

    print(df.head())

    train_dataset = CifarDataset('test', data_params, paths, df, valid_transform())
    train_loader = data.DataLoader(train_dataset, batch_size=batch_size,
                                   shuffle=False, num_workers=12, drop_last=False)

    for batch in train_loader:
        img = batch['image']
        y = batch['y'].numpy()
        img = np.transpose(img.numpy(), (0, 2, 3, 1))

        plt.figure(figsize=(25, 35))
        for i in range(batch_size):
            plt.subplot(2, batch_size // 2, i + 1)
            plt.title(idx2klass.get(y[i]))
            shw = IMAGENET_STD * img[i] + IMAGENET_MEAN
            plt.imshow(shw)

        plt.show()
예제 #3
0
def main():
    args = parse_args()
    print(args)
    paths = get_paths()
    weights_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["weights"])
    dumps_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["predictions"])

    models = args.models
    if "+" in models:
        models = models.split('+')
    else:
        models = [models]

    scores = []
    for model in models:
        for mode in ["test"]:  # 'test'
            if args.fold >= 0:
                lst = [args.fold]
            else:
                lst = list(range(8))

            for fold in lst:
                score = predict_fold(model,
                                     fold=fold,
                                     mode=mode,
                                     out_folder=dumps_dir,
                                     weights_dir=weights_dir)
                scores.append(score)
        print(scores[:10])
예제 #4
0
def get_loaders(batch_size=8):
    paths = get_paths()['dataset']
    data_params = get_params()['data_params']

    df = pd.read_csv('tables/folds_n01.csv')

    train_dataset = CifarDataset('train', data_params, paths, df,
                                 train_transform())
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=32,
                                   drop_last=False)

    valid_dataset = CifarDataset('valid', data_params, paths, df,
                                 valid_transform())
    valid_loader = data.DataLoader(valid_dataset,
                                   batch_size=batch_size,
                                   shuffle=False,
                                   num_workers=32,
                                   drop_last=False)

    test_df = pd.read_csv(os.path.join(paths['path'], paths['sample']))
    test_dataset = CifarDataset('test', data_params, paths, test_df,
                                valid_transform())
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=32,
                                  drop_last=False)

    return train_loader, valid_loader, test_loader
예제 #5
0
def main():
    global model_name, n_fold, mask_thresh, min_size_thresh, dilation, dumps_dir
    model_name = "se154+sx50+sx101"
    n_fold = 8
    mask_thresh = 0.5
    min_size_thresh = 1000
    dilation = 0

    paths = get_paths()
    dumps_dir = "/mnt/ssd2/dataset/pneumo/predictions/old/sota_predictions"
    # dst = osp.join(dumps_dir, f"{fold}_{model_name}_valid")

    df = pd.read_csv("tables/sample_submission.csv")

    with Pool() as p:
        rles = p.map(convert_one, df["ImageId"])

    df["EncodedPixels"] = rles
    os.makedirs("subm", exist_ok=True)
    df.to_csv(
        f"subm/{model_name}_nf{n_fold}_{mask_thresh}_{min_size_thresh}_{dilation}.csv",
        index=False)

    empty = df[df["EncodedPixels"] == " -1"]
    print(f"empty {empty.shape[0] / df.shape[0]}")
예제 #6
0
def main_bad():
    global model_name, n_fold, mask_thresh, min_size_thresh, dilation, dumps_dir
    model_name = "se154"
    n_fold = 2
    mask_thresh = 0.4892385788890003
    min_size_thresh = 1598
    dilation = 1

    paths = get_paths()
    dumps_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["predictions"])
    # dst = osp.join(dumps_dir, f"{fold}_{model_name}_valid")

    y_preds, ids = get_all_data(model_name, n_fold)
    print(ids[:10])
    print(y_preds.shape)

    #
    df = pd.DataFrame()
    df["ImageId"] = ids

    y_preds_lst = [y_preds[n] for n, sample_id in enumerate(ids)]

    for n, sample_id in enumerate(ids):
        cv2.imwrite(
            f"/mnt/ssd2/dataset/pneumo/predictions/uint8/se154/debug{sample_id}.png",
            np.uint8(255 * y_preds_lst[n]))

    with Pool() as p:
        rles = list(
            tqdm(p.imap_unordered(convert_mask, y_preds_lst),
                 total=len(y_preds_lst),
                 desc="converting"))

    df["EncodedPixels"] = rles
    df.drop_duplicates("ImageId", inplace=True)
    os.makedirs("subm", exist_ok=True)

    sample = pd.read_csv("tables/sample_submission.csv")
    df = df[df["ImageId"].isin(sample["ImageId"])]

    df.to_csv(
        f"subm/{model_name}_nf{n_fold}_{mask_thresh}_{min_size_thresh}_{dilation}.csv",
        index=False)

    empty = df[df["EncodedPixels"] == " -1"]
    print(f"empty {empty.shape[0] / df.shape[0]}")
예제 #7
0
def get_data(model_name="sx101", fold=0):
    paths = get_paths()
    dumps_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["predictions"])
    dst = osp.join(dumps_dir, model_name, f"{fold}_{model_name}_valid")

    dataset_valid = SIIMDataset_Unet(mode="valid", fold=fold)
    vloader = torch.utils.data.DataLoader(dataset_valid,
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=NCORE)

    progress_bar = tqdm(enumerate(vloader),
                        total=len(vloader),
                        desc=f"generating masks f{fold}")

    y_trues, ids = [], []
    for i, batch in progress_bar:
        images, targets, batch_ids = batch
        y_trues.append(np.array(targets[0, 0] > 0.5))
        ids.append(batch_ids[0])

    filenames = [osp.join(dst, f"{sample_id}.png") for sample_id in ids]
    with Pool(NCORE) as p:
        y_preds = list(
            tqdm(p.imap_unordered(read_prediction, filenames),
                 total=len(filenames),
                 desc="reading predictions"))

    scores = []
    for yp, yt in zip(y_preds, y_trues):
        # print(np.amax(yp), np.amin(yp))
        scores.append(dice_coef_metric(yp > 0.5, yt))

    # print(scores)
    print(np.mean(scores))
    return y_preds, y_trues, scores, ids
예제 #8
0
import os

import cv2
import matplotlib.pyplot as plt
import pandas as pd

from n01_config import get_paths

DATA = get_paths()['dataset']
LABELS = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


def main():
    df = pd.read_csv(os.path.join(DATA['path'], DATA['lables_csv']))
    print(df.head())

    labels = sorted(set(df['label'].tolist()))
    print(labels)

    for label in LABELS:
        tdf = df[df['label'] == label]
        print(f'{label}: {tdf.shape[0]}')

    label = LABELS[0]
    tdf = df[df['label'] == label]
    tdf.reset_index(inplace=True, drop=True)

    plt.figure()
    for i in range(9):
        plt.subplot(3,3,1+i)
        filename = f'{tdf.loc[i, "id"]}.png'
예제 #9
0
import numpy as np
from scipy import ndimage
from skimage import measure, morphology
from tqdm import tqdm

from n01_config import get_paths
from n03_loss_metric import dice_coef_metric, dice_coef_metric_batch

# DEVICE = torch.device("cuda:0")

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"

PATHS = get_paths()
PREDICTS = PATHS["dumps"]["predictions"]


def get_data_npz(model_name="sx101", fold=0, mode="valid"):
    if "+" not in model_name:
        models = [model_name]
    else:
        models = model_name.split("+")

    name_pattern = f"{fold}_{models[0]}_{mode}"
    filename = osp.join(PREDICTS, models[0], name_pattern,
                        f"{name_pattern}_fp32_d.npz")
    tfz = np.load(filename)
    y_preds, ids, gts, disagreements = tfz["outputs"], tfz["ids"], tfz[
        "gts"], tfz['disagreements']
                    rle.append(str(runStart))
                    rle.append(str(runLength))
                    runStart = -1
                    runLength = 0
                    currentPixel = 0
            elif runStart > -1:
                runLength += 1
            lastColor = currentColor
            currentPixel += 1
    return " " + " ".join(rle)


if __name__ == "__main__":
    fold = 5
    model_name = 'UnetSEResNext101'
    paths = get_paths()

    test_dir = os.path.join(paths["dataset"]["path"],
                            paths["dataset"]["test_dir"])

    IMG_SIZE = 1024  # 448
    SMALL_OBJ_THRESHOLD = 2000
    device = torch.device("cuda:0")

    model_ft = torch.load(f"outs/{model_name}_fold{fold}_best.pth")

    dst = 'outs/tmp'
    dst_dir = osp.join(dst, f'{model_name}_fold{fold}')
    os.makedirs(dst_dir, exist_ok=True)

    model_ft.to(device)