def main():
    with timer('load data'):
        df = pd.read_csv(TEST_PATH)
        df[['ID', 'Image', 'Diagnosis']] = df['ID'].str.split('_', expand=True)
        df = df[['Image', 'Diagnosis', 'Label']]
        df.drop_duplicates(inplace=True)
        df = df.pivot(index='Image', columns='Diagnosis',
                      values='Label').reset_index()
        df['Image'] = 'ID_' + df['Image']
        df = df[["Image"]]
        ids = df["Image"].values
        gc.collect()

    with timer('preprocessing'):
        test_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            Resize(img_size, img_size, p=1)
        ])

        test_dataset = RSNADatasetTest(df,
                                       img_size,
                                       IMAGE_PATH,
                                       id_colname=ID_COLUMNS,
                                       transforms=test_augmentation,
                                       black_crop=False,
                                       subdural_window=True,
                                       conc_type="concat_all",
                                       conc_type2="concat_prepost",
                                       n_tta=N_TTA)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=16,
                                 pin_memory=True)
        del df, test_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model = torch.nn.DataParallel(model)

    with timer('predict'):
        pred = predict(model, test_loader, device, n_tta=N_TTA)
        pred = np.clip(pred, 1e-6, 1 - 1e-6)

    with timer('sub'):
        sub = pd.DataFrame(pred, columns=TARGET_COLUMNS)
        sub["ID"] = ids
        sub = sub.set_index("ID")
        sub = sub.unstack().reset_index()
        sub["ID"] = sub["ID"] + "_" + sub["level_0"]
        sub = sub.rename(columns={0: "Label"})
        sub = sub.drop("level_0", axis=1)
        LOGGER.info(sub.head())
        sub.to_csv("../output/{}_sub_st2.csv".format(EXP_ID), index=False)
예제 #2
0
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)[:10]
        df = df[df.Image != "ID_6431af929"].reset_index(drop=True)
        df.loc[df.pre_SOPInstanceUID == "ID_6431af929",
               "pre1_SOPInstanceUID"] = df.loc[df.pre_SOPInstanceUID ==
                                               "ID_6431af929", "Image"]
        df.loc[df.post_SOPInstanceUID == "ID_6431af929",
               "post1_SOPInstanceUID"] = df.loc[df.post_SOPInstanceUID ==
                                                "ID_6431af929", "Image"]
        df = df[["Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID"]]
        ids = df["Image"].values
        gc.collect()

    with timer('preprocessing'):
        test_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            Resize(img_size, img_size, p=1)
        ])

        test_dataset = RSNADatasetTest(df,
                                       img_size,
                                       IMAGE_PATH,
                                       id_colname=ID_COLUMNS,
                                       transforms=test_augmentation,
                                       black_crop=False,
                                       three_window=True,
                                       rescaling=False,
                                       n_tta=N_TTA)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=16,
                                 pin_memory=True)
        del df, test_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model = torch.nn.DataParallel(model)

    with timer('predict'):
        pred = predict(model, test_loader, device, n_tta=N_TTA)
        pred = np.clip(pred, 1e-6, 1 - 1e-6)

    with timer('sub'):
        sub = pd.DataFrame(pred, columns=TARGET_COLUMNS)
        sub["ID"] = ids
        sub = sub.set_index("ID")
        sub = sub.unstack().reset_index()
        sub["ID"] = sub["ID"] + "_" + sub["level_0"]
        sub = sub.rename(columns={0: "Label"})
        sub = sub.drop("level_0", axis=1)
        LOGGER.info(sub.head())
        sub.to_csv("../output/{}_train.csv".format(EXP_ID), index=False)
def predict_loss(prev, paths, split_df):
    # prev: true value
    # paths: paths to the model weights
    t_preds = []
    for tm in range(3):
        tf.reset_default_graph()
        t_preds.append(
            predict(paths[-1:],
                    build_hparams(hparams.params_s32),
                    back_offset=0,
                    predict_window=288,
                    n_models=3,
                    target_model=tm,
                    seed=2,
                    batch_size=50,
                    asgd=True,
                    split_df=split_df))
    preds = sum(t_preds) / 3
    preds.index = [idx.decode('ascii') for idx in preds.index]
    # mean mae
    res = 0
    for idx in preds.index:
        res += np.abs(preds.loc[idx, :] - prev.loc[idx, -288:]).sum()
    res /= len(preds.index) * 288
    return preds, res
예제 #4
0
def predict_loss(paths):
    # paths: paths to the model weights
    t_preds = []
    for tm in range(3):
        tf.reset_default_graph()
        t_preds.append(predict(paths[-1:], build_hparams(hparams.params_s32),
                        n_models=3, target_model=tm, seed=5, batch_size=50, asgd=True))
    preds=sum(t_preds) /3
    return preds
예제 #5
0
def main():
    with timer('load data'):
        df = pd.read_csv(TEST_PATH)
        df["post_SOPInstanceUID"] = df["post_SOPInstanceUID"].fillna(
            df["SOPInstanceUID"])
        df["pre_SOPInstanceUID"] = df["pre_SOPInstanceUID"].fillna(
            df["SOPInstanceUID"])
        df = df[["Image", "pre_SOPInstanceUID", "post_SOPInstanceUID"]]
        ids = df["Image"].values
        pre_ids = df["pre_SOPInstanceUID"].values
        pos_ids = df["post_SOPInstanceUID"].values
        gc.collect()

    with timer('preprocessing'):
        test_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            Resize(img_size, img_size, p=1)
        ])

        test_dataset = RSNADatasetTest(df,
                                       img_size,
                                       IMAGE_PATH,
                                       id_colname=ID_COLUMNS,
                                       transforms=test_augmentation,
                                       black_crop=False,
                                       subdural_window=True,
                                       n_tta=N_TTA)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=16,
                                 pin_memory=True)
        del df, test_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model = torch.nn.DataParallel(model)

    with timer('predict'):
        pred = predict(model, test_loader, device, n_tta=N_TTA)
        pred = np.clip(pred, 1e-6, 1 - 1e-6)

    with timer('sub'):
        sub = pd.DataFrame(pred, columns=TARGET_COLUMNS)
        sub["ID"] = ids
        sub["PRE_ID"] = pre_ids
        sub["POST_ID"] = pos_ids
        sub = postprocess_multitarget(sub)
        LOGGER.info(sub.head())
        sub.to_csv("../output/{}_sub_st2.csv".format(EXP_ID), index=False)
 def predict(self, text_x):
     if not hasattr(self, 'dataset'):
         raise ValueError(
             'Cannnot call predict or evaluate in untrained model. Train First!'
         )
     self.dataset.clear_test_set()
     for _x in text_x:
         self.dataset.add_data_entry(_x.tolist(), 0, part='test')
     return predict(
         model=self.model,
         iterator_function=self.dataset.get_next_test_batch,
         _batch_count=self.dataset.initialize_test_batches(),
         cuda_device=0 if self.cuda else -1,
     )
예제 #7
0
def index():
    learning_gif_url = get_gif_url("learning")

    # Get search query
    if request.method == "POST":
        user_input = request.form.get("user_input")
    # If no query, user_input is 'You're awesome!'
    else:
        user_input = "You're awesome!"
    print("user_input = ", user_input)

    # Predict and get sentiment text
    sentiment = predict(user_input, cv, log_model)
    return render_template("index.html",
                           user_input=user_input,
                           sentiment=sentiment,
                           learning_gif_url=learning_gif_url)
예제 #8
0
def predict_loss(prev, paths):
    # prev: true value
    # paths: paths to the model weights
    t_preds = []
    for tm in range(3):
        tf.reset_default_graph()
        t_preds.append(
            predict(paths[-1:],
                    build_hparams(hparams.params_s32),
                    n_models=3,
                    target_model=tm,
                    seed=5,
                    batch_size=50,
                    asgd=True))
    preds = sum(t_preds) / 3
    # mean mae
    res = 0
    for idx in preds.index:
        res += np.abs(preds[idx] - prev[idx]) / prev[idx]
    res /= 72
    return preds, res
예제 #9
0
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)
        df["loc_x"] = df["loc_x"] / 100
        df["loc_y"] = df["loc_y"] / 100
        y = df[TARGET_COLUMNS].values
        df = df[[ID_COLUMNS]]
        gc.collect()

    with timer("split data"):
        folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0).split(df, y)
        for n_fold, (train_index, val_index) in enumerate(folds):
            train_df = df.loc[train_index]
            val_df = df.loc[val_index]
            y_train = y[train_index]
            y_val = y[val_index]
            if n_fold == fold_id:
                break

    with timer('preprocessing'):
        train_augmentation = Compose([
            HorizontalFlip(p=0.5),
            OneOf([
                ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                GridDistortion(p=0.5),
                OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)
            ], p=0.5),
            RandomBrightnessContrast(p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
            Resize(img_size, img_size, p=1)
        ])
        val_augmentation = Compose([
            Resize(img_size, img_size, p=1)
        ])

        train_dataset = KDDataset(train_df, y_train, img_size, IMAGE_PATH, id_colname=ID_COLUMNS,
                                  transforms=train_augmentation)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)

        val_dataset = KDDataset(val_df, y_val, img_size, IMAGE_PATH, id_colname=ID_COLUMNS,
                                transforms=val_augmentation)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
        del df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d",
                         pretrained="../input/pytorch-pretrained-models/se_resnext50_32x4d-a260b3a4.pth",
                         pool_type="avg")
        if model_path is not None:
            model.load_state_dict(torch.load(model_path))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4)

        # model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)

    with timer('train'):
        best_score = 0
        for epoch in range(1, epochs + 1):
            seed_torch(SEED + epoch)

            if epoch == epochs - 3:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.1

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, N_CLASSES)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            y_pred, target, val_loss = validate(model, val_loader, criterion, device, N_CLASSES)
            score = roc_auc_score(target, y_pred)
            LOGGER.info('Mean val loss: {}'.format(round(val_loss, 5)))
            LOGGER.info('val score: {}'.format(round(score, 5)))

            if score > best_score:
                best_score = score
                np.save("y_pred.npy", y_pred)
                torch.save(model.state_dict(), save_path)

        np.save("target.npy", target)

    with timer('predict'):
        test_df = pd.read_csv(TEST_PATH)
        test_ids = test_df["id"].values

        test_augmentation = Compose([
            Resize(img_size, img_size, p=1)
        ])
        test_dataset = KDDatasetTest(test_df, img_size, TEST_IMAGE_PATH, id_colname=ID_COLUMNS,
                                     transforms=test_augmentation, n_tta=2)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

        model.load_state_dict(torch.load(save_path))

        pred = predict(model, test_loader, device, N_CLASSES, n_tta=2)
        print(pred.shape)
        results = pd.DataFrame({"id": test_ids,
                                "is_star": pred.reshape(-1)})

        results.to_csv("results.csv", index=False)
예제 #10
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]

        val_augmentation = None
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('resnet34',
                         encoder_weights="imagenet",
                         classes=N_CLASSES,
                         encoder_se_module=True,
                         decoder_semodule=True,
                         h_columns=False,
                         skip=True,
                         act="swish",
                         freeze_bn=True,
                         classification=CLASSIFICATION,
                         attention_type="cbam")
        model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()

    with timer('predict'):
        valid_loss, y_pred, y_true, cls = predict(
            model,
            val_loader,
            criterion,
            device,
            classification=CLASSIFICATION)
        LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

        scores = []
        for i, (th, remove_mask_pixel) in enumerate(zip(ths, remove_pixels)):
            sum_val_preds = np.sum(
                y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1)
            cls_ = cls[:, i]

            best = 0
            for th_cls in np.linspace(0, 1, 101):
                val_preds_ = copy.deepcopy(y_pred[:, i, :, :])
                val_preds_[sum_val_preds < remove_mask_pixel] = 0
                val_preds_[cls_ <= th_cls] = 0
                scores = []
                for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_):
                    score = dice(y_val_, y_pred_ > 0.5)
                    if np.isnan(score):
                        scores.append(1)
                    else:
                        scores.append(score)
                if np.mean(scores) >= best:
                    best = np.mean(scores)
                    best_th = th_cls
                else:
                    break
            LOGGER.info('dice={} on {}'.format(best, best_th))
            scores.append(best)

        LOGGER.info('holdout dice={}'.format(np.mean(scores)))
예제 #11
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        if N_CLASSES == 3:
            df.drop("EncodedPixels_2", axis=1, inplace=True)
            df = df.rename(columns={"EncodedPixels_3": "EncodedPixels_2"})
            df = df.rename(columns={"EncodedPixels_4": "EncodedPixels_3"})

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]

        val_augmentation = None
        val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS,
                                  transforms=val_augmentation)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True,
                         decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True,
                         classification=CLASSIFICATION)
        model.load_state_dict(torch.load(base_model))
        model.to(device)
        model.eval()

        criterion = torch.nn.BCEWithLogitsLoss()

    with timer('predict'):
        valid_loss, y_pred, y_true, cls = predict(model, val_loader, criterion, device, classification=CLASSIFICATION)
        LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

        scores = []
        all_scores = []
        min_sizes = [300, 0, 600, 1600]
        for i in range(N_CLASSES):
            if i == 1:
                continue
            best = 0
            count = 0
            min_size = min_sizes[i]
            for th in [0.7+i*0.01 for i in range(30)]:
                val_preds_ = copy.deepcopy(y_pred[:, i, :, :])
                scores_ = []
                all_scores_ = []
                for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_):
                    y_pred_ = post_process(y_pred_ > 0.5, y_pred_, min_size, th)
                    score = dice(y_val_, y_pred_)
                    if np.isnan(score):
                        scores_.append(1)
                    else:
                        scores_.append(score)
                LOGGER.info('dice={} on {}'.format(np.mean(scores_), th))
                if np.mean(scores_) >= best:
                    best = np.mean(scores_)
                    count = 0
                else:
                    count += 1
                if count == 3:
                    break
            scores.append(best)
            all_scores.append(all_scores_)

        LOGGER.info('holdout dice={}'.format(np.mean(scores)))
예제 #12
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape(-1, 1)
        y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(-1, 1)
        y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape(-1, 1)
        y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape(-1, 1)
        y = np.concatenate([y1, y2, y3, y4], axis=1)

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]
        y_val = y[df.fold_id == FOLD_ID]

        val_dataset = SeverCLSDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, y_val, id_colname=ID_COLUMNS,
                                      transforms=None)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8, pin_memory=True)

        models = []

        model = cls_models.SEResNext(num_classes=N_CLASSES)
        model.load_state_dict(torch.load(base_model_cls))
        model.to(device)
        model.eval()
        models.append(model)

        """model = cls_models.ResNet(num_classes=N_CLASSES, pretrained=None)
        model.load_state_dict(torch.load(base_model_res))
        model.to(device)
        model.eval()
        models.append(model)"""

    with timer('predict cls'):
        criterion = torch.nn.BCEWithLogitsLoss()

        valid_loss, y_val, y_true = predict_cls(models, val_loader, criterion, device)
        #y_val = np.load("../exp_cls/y_pred_ema_ckpt8.npy")
        LOGGER.info("val loss={}".format(valid_loss))

    with timer('preprocessing seg'):
        val_augmentation = None
        val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS,
                                  transforms=val_augmentation, class_y=y_val)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True,
                         decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True,
                         classification=CLASSIFICATION, attention_type="cbam", center=True)
        model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()

    with timer('predict'):
        valid_loss, y_pred, y_true, cls = predict(model, val_loader, criterion, device, classification=CLASSIFICATION)
        LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

        scores = []
        for i, (th, remove_mask_pixel) in enumerate(zip(ths, remove_pixels)):
            if i <= 1:
                continue
            sum_val_preds = np.sum(y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1)
            cls_ = cls[:, i]

            best = 0
            for th_cls in np.linspace(0, 1, 101):
                val_preds_ = copy.deepcopy(y_pred[:, i, :, :])
                val_preds_[sum_val_preds < remove_mask_pixel] = 0
                val_preds_[cls_ <= th_cls] = 0
                scores = []
                for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_):
                    score = dice(y_val_, y_pred_ > 0.5)
                    if np.isnan(score):
                        scores.append(1)
                    else:
                        scores.append(score)
                if np.mean(scores) >= best:
                    best = np.mean(scores)
                    best_th = th_cls
                #else:
                #    break
            LOGGER.info('dice={} on {}'.format(best, best_th))
            scores.append(best)

        LOGGER.info('holdout dice={}'.format(np.mean(scores)))
예제 #13
0
    #                  embed_size=len(dct),
    #                  device=device)

    # model = Bi_RNN_ATTN(class_num=len(CATEGIRY_LIST),
    #                     embed_size=len(dct),
    #                     embed_dim=64,
    #                     device=device)

    lr = 0.001
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # train
    logger.info('training...')
    history = trainer.train(model,
                            optimizer,
                            train_dl,
                            valid_dl,
                            device=device,
                            epochs=5)

    # evaluate
    loss, acc = trainer.evaluate(model, valid_dl, device=device)

    # predict
    logger.info('predicting...')
    y_pred = trainer.predict(model, test_dl, device=device)

    y_true = test_ds.labels
    test_acc = (y_true == y_pred).sum() / y_pred.shape[0]
    logger.info('test - acc: {}'.format(test_acc))
예제 #14
0
    raw_smape = smape(true, pred)
    masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape))
    return masked_smape.mean()

from make_features import read_all
df_all = read_all()
df_all.columns

prev = df_all

paths = [p for p in tf.train.get_checkpoint_state('data/cpt/s32').all_model_checkpoint_paths]

t_preds = []
for tm in range(3):
    tf.reset_default_graph()
    t_preds.append(predict(paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63,
                    n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True))

preds=sum(t_preds) /3

missing_pages = prev.index.difference(preds.index)
# Use zeros for missing pages
rmdf = pd.DataFrame(index=missing_pages,
                    data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns)
f_preds = preds.append(rmdf).sort_index()

# Use zero for negative predictions
f_preds[f_preds < 0.5] = 0
# Rouns predictions to nearest int
f_preds = np.round(f_preds).astype(np.int64)

예제 #15
0
import argparse
from trainer import train, evaluate, predict

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('mode', help='train, eval or predict')
    args = parser.parse_args()

    if args.mode == "train":
        train()
    if args.mode == "eval":
        evaluate()
    if args.mode == "predict":
        predict()
예제 #16
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]

        val_augmentation = None
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        models = []
        for p in base_model_res:
            model = smp.Unet('resnet34',
                             encoder_weights="imagenet",
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION,
                             attention_type="cbam",
                             center=True)
            model.load_state_dict(torch.load(p))
            model.to(device)
            model.eval()
            models.append(model)

        model = smp_old.Unet('resnet34',
                             encoder_weights="imagenet",
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION)
        model.load_state_dict(torch.load(base_model_res_old))
        model.to(device)
        model.eval()
        models.append(model)

        criterion = torch.nn.BCEWithLogitsLoss()

    with timer('predict'):
        valid_loss, y_pred, y_true, cls = predict(
            models,
            val_loader,
            criterion,
            device,
            classification=CLASSIFICATION)
        LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

        scores = []
        all_scores = []
        for i, th in enumerate(ths):
            sum_val_preds = np.sum(
                y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1)

            best = 0
            for n_th, remove_mask_pixel in enumerate(
                [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800]):
                val_preds_ = copy.deepcopy(y_pred[:, i, :, :])
                val_preds_[sum_val_preds < remove_mask_pixel] = 0
                scores_ = []
                all_scores_ = []
                for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_):
                    score = dice(y_val_, y_pred_ > 0.5)
                    if np.isnan(score):
                        scores_.append(1)
                    else:
                        scores_.append(score)
                LOGGER.info('dice={} on {}'.format(np.mean(scores_),
                                                   remove_mask_pixel))
                if np.mean(scores_) >= best:
                    best = np.mean(scores_)
                all_scores_.append(np.mean(scores_))
            scores.append(np.mean(scores_))
            all_scores.append(all_scores_)

        LOGGER.info('holdout dice={}'.format(np.mean(scores)))
        np.save("all_scores_fold{}.npy".format(FOLD_ID), np.array(all_scores))