def main():
    with timer('load data'):
        df = pd.read_csv(TEST_PATH)
        df[['ID', 'Image', 'Diagnosis']] = df['ID'].str.split('_', expand=True)
        df = df[['Image', 'Diagnosis', 'Label']]
        df.drop_duplicates(inplace=True)
        df = df.pivot(index='Image', columns='Diagnosis',
                      values='Label').reset_index()
        df['Image'] = 'ID_' + df['Image']
        df = df[["Image"]]
        ids = df["Image"].values
        gc.collect()

    with timer('preprocessing'):
        test_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            Resize(img_size, img_size, p=1)
        ])

        test_dataset = RSNADatasetTest(df,
                                       img_size,
                                       IMAGE_PATH,
                                       id_colname=ID_COLUMNS,
                                       transforms=test_augmentation,
                                       black_crop=False,
                                       subdural_window=True,
                                       conc_type="concat_all",
                                       conc_type2="concat_prepost",
                                       n_tta=N_TTA)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=16,
                                 pin_memory=True)
        del df, test_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model = torch.nn.DataParallel(model)

    with timer('predict'):
        pred = predict(model, test_loader, device, n_tta=N_TTA)
        pred = np.clip(pred, 1e-6, 1 - 1e-6)

    with timer('sub'):
        sub = pd.DataFrame(pred, columns=TARGET_COLUMNS)
        sub["ID"] = ids
        sub = sub.set_index("ID")
        sub = sub.unstack().reset_index()
        sub["ID"] = sub["ID"] + "_" + sub["level_0"]
        sub = sub.rename(columns={0: "Label"})
        sub = sub.drop("level_0", axis=1)
        LOGGER.info(sub.head())
        sub.to_csv("../output/{}_sub_st2.csv".format(EXP_ID), index=False)
Ejemplo n.º 2
0
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)[:10]
        df = df[df.Image != "ID_6431af929"].reset_index(drop=True)
        df.loc[df.pre_SOPInstanceUID == "ID_6431af929",
               "pre1_SOPInstanceUID"] = df.loc[df.pre_SOPInstanceUID ==
                                               "ID_6431af929", "Image"]
        df.loc[df.post_SOPInstanceUID == "ID_6431af929",
               "post1_SOPInstanceUID"] = df.loc[df.post_SOPInstanceUID ==
                                                "ID_6431af929", "Image"]
        df = df[["Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID"]]
        ids = df["Image"].values
        gc.collect()

    with timer('preprocessing'):
        test_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            Resize(img_size, img_size, p=1)
        ])

        test_dataset = RSNADatasetTest(df,
                                       img_size,
                                       IMAGE_PATH,
                                       id_colname=ID_COLUMNS,
                                       transforms=test_augmentation,
                                       black_crop=False,
                                       three_window=True,
                                       rescaling=False,
                                       n_tta=N_TTA)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=16,
                                 pin_memory=True)
        del df, test_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model = torch.nn.DataParallel(model)

    with timer('predict'):
        pred = predict(model, test_loader, device, n_tta=N_TTA)
        pred = np.clip(pred, 1e-6, 1 - 1e-6)

    with timer('sub'):
        sub = pd.DataFrame(pred, columns=TARGET_COLUMNS)
        sub["ID"] = ids
        sub = sub.set_index("ID")
        sub = sub.unstack().reset_index()
        sub["ID"] = sub["ID"] + "_" + sub["level_0"]
        sub = sub.rename(columns={0: "Label"})
        sub = sub.drop("level_0", axis=1)
        LOGGER.info(sub.head())
        sub.to_csv("../output/{}_train.csv".format(EXP_ID), index=False)
Ejemplo n.º 3
0
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)
        df = df[df.Image != "ID_6431af929"].reset_index(drop=True)
        df.loc[df.pre_SOPInstanceUID=="ID_6431af929", "pre1_SOPInstanceUID"] = df.loc[
            df.pre_SOPInstanceUID=="ID_6431af929", "Image"]
        df.loc[df.post_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] = df.loc[
            df.post_SOPInstanceUID == "ID_6431af929", "Image"]
        df.loc[df.prepre_SOPInstanceUID == "ID_6431af929", "pre2_SOPInstanceUID"] = df.loc[
            df.prepre_SOPInstanceUID == "ID_6431af929", "pre1_SOPInstanceUID"]
        df.loc[df.postpost_SOPInstanceUID == "ID_6431af929", "post2_SOPInstanceUID"] = df.loc[
            df.postpost_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"]
        y = df[TARGET_COLUMNS].values
        df = df[["Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID", "pre2_SOPInstanceUID", "post2_SOPInstanceUID"]]
        gc.collect()

    with timer('preprocessing'):
        train_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            HorizontalFlip(p=0.5),
            OneOf([
                ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                GridDistortion(p=0.5),
                OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)
            ], p=0.5),
            Rotate(limit=30, border_mode=0, p=0.7),
            Resize(img_size, img_size, p=1)
        ])

        train_dataset = RSNADataset(df, y, img_size, IMAGE_PATH, id_colname=ID_COLUMNS,
                                    transforms=train_augmentation, black_crop=False, subdural_window=True, user_window=2)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
        del df, train_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg")
        if model_path is not None:
            model.load_state_dict(torch.load(model_path))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss(weight=torch.FloatTensor([2, 1, 1, 1, 1, 1]).cuda())
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4)
        model = torch.nn.DataParallel(model)

    with timer('train'):
        for epoch in range(1, epochs + 1):
            if epoch == 5:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.1
            seed_torch(SEED + epoch)

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            torch.save(model.module.state_dict(), 'models/{}_ep{}.pth'.format(EXP_ID, epoch))
Ejemplo n.º 4
0
def main():
    with timer('load data'):
        df = pd.read_csv(TEST_PATH)
        df["post_SOPInstanceUID"] = df["post_SOPInstanceUID"].fillna(
            df["SOPInstanceUID"])
        df["pre_SOPInstanceUID"] = df["pre_SOPInstanceUID"].fillna(
            df["SOPInstanceUID"])
        df = df[["Image", "pre_SOPInstanceUID", "post_SOPInstanceUID"]]
        ids = df["Image"].values
        pre_ids = df["pre_SOPInstanceUID"].values
        pos_ids = df["post_SOPInstanceUID"].values
        gc.collect()

    with timer('preprocessing'):
        test_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            Resize(img_size, img_size, p=1)
        ])

        test_dataset = RSNADatasetTest(df,
                                       img_size,
                                       IMAGE_PATH,
                                       id_colname=ID_COLUMNS,
                                       transforms=test_augmentation,
                                       black_crop=False,
                                       subdural_window=True,
                                       n_tta=N_TTA)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=16,
                                 pin_memory=True)
        del df, test_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model = torch.nn.DataParallel(model)

    with timer('predict'):
        pred = predict(model, test_loader, device, n_tta=N_TTA)
        pred = np.clip(pred, 1e-6, 1 - 1e-6)

    with timer('sub'):
        sub = pd.DataFrame(pred, columns=TARGET_COLUMNS)
        sub["ID"] = ids
        sub["PRE_ID"] = pre_ids
        sub["POST_ID"] = pos_ids
        sub = postprocess_multitarget(sub)
        LOGGER.info(sub.head())
        sub.to_csv("../output/{}_sub_st2.csv".format(EXP_ID), index=False)
def main():
    with timer('load data'):
        path = glob.glob("../input_ext/*/*/*/*.dcm")
        df = pd.DataFrame({"Image": path})
        df = df[["Image"]]
        ids = df["Image"].values
        gc.collect()

    with timer('preprocessing'):
        test_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            Resize(img_size, img_size, p=1)
        ])

        test_dataset = RSNADatasetTest(df,
                                       img_size,
                                       IMAGE_PATH,
                                       id_colname=ID_COLUMNS,
                                       transforms=test_augmentation,
                                       black_crop=False,
                                       subdural_window=True,
                                       n_tta=N_TTA,
                                       img_type="",
                                       external=True)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=0,
                                 pin_memory=True)
        del df, test_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model = torch.nn.DataParallel(model)

    with timer('predict'):
        pred, is_dicoms = predict_external(model,
                                           test_loader,
                                           device,
                                           n_tta=N_TTA)
        pred = np.clip(pred, 1e-6, 1 - 1e-6)

    with timer('sub'):
        sub = pd.DataFrame(pred, columns=TARGET_COLUMNS)
        sub["is_dicom"] = is_dicoms.reshape(-1)
        sub["Image"] = ids.reshape(-1)
        LOGGER.info(sub.head())
        sub.to_csv("../input_ext/{}_externalv2.csv".format(EXP_ID),
                   index=False)
Ejemplo n.º 6
0
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)
        df["loc_x"] = df["loc_x"] / 100
        df["loc_y"] = df["loc_y"] / 100
        y = df[TARGET_COLUMNS].values
        df = df[[ID_COLUMNS]]
        gc.collect()

    with timer("split data"):
        folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0).split(df, y)
        for n_fold, (train_index, val_index) in enumerate(folds):
            train_df = df.loc[train_index]
            val_df = df.loc[val_index]
            y_train = y[train_index]
            y_val = y[val_index]
            if n_fold == fold_id:
                break

    with timer('preprocessing'):
        train_augmentation = Compose([
            HorizontalFlip(p=0.5),
            OneOf([
                ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                GridDistortion(p=0.5),
                OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)
            ], p=0.5),
            RandomBrightnessContrast(p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
            Resize(img_size, img_size, p=1)
        ])
        val_augmentation = Compose([
            Resize(img_size, img_size, p=1)
        ])

        train_dataset = KDDataset(train_df, y_train, img_size, IMAGE_PATH, id_colname=ID_COLUMNS,
                                  transforms=train_augmentation)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)

        val_dataset = KDDataset(val_df, y_val, img_size, IMAGE_PATH, id_colname=ID_COLUMNS,
                                transforms=val_augmentation)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
        del df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d",
                         pretrained="../input/pytorch-pretrained-models/se_resnext50_32x4d-a260b3a4.pth",
                         pool_type="avg")
        if model_path is not None:
            model.load_state_dict(torch.load(model_path))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4)

        # model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)

    with timer('train'):
        best_score = 0
        for epoch in range(1, epochs + 1):
            seed_torch(SEED + epoch)

            if epoch == epochs - 3:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.1

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, N_CLASSES)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            y_pred, target, val_loss = validate(model, val_loader, criterion, device, N_CLASSES)
            score = roc_auc_score(target, y_pred)
            LOGGER.info('Mean val loss: {}'.format(round(val_loss, 5)))
            LOGGER.info('val score: {}'.format(round(score, 5)))

            if score > best_score:
                best_score = score
                np.save("y_pred.npy", y_pred)
                torch.save(model.state_dict(), save_path)

        np.save("target.npy", target)

    with timer('predict'):
        test_df = pd.read_csv(TEST_PATH)
        test_ids = test_df["id"].values

        test_augmentation = Compose([
            Resize(img_size, img_size, p=1)
        ])
        test_dataset = KDDatasetTest(test_df, img_size, TEST_IMAGE_PATH, id_colname=ID_COLUMNS,
                                     transforms=test_augmentation, n_tta=2)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

        model.load_state_dict(torch.load(save_path))

        pred = predict(model, test_loader, device, N_CLASSES, n_tta=2)
        print(pred.shape)
        results = pd.DataFrame({"id": test_ids,
                                "is_star": pred.reshape(-1)})

        results.to_csv("results.csv", index=False)