Example #1
0
def run(config_file):
    config = load_config(config_file)

    os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            data_folder=config.data.train_dir,
            df_path=config.data.train_df_path,
            phase=phase,
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            num_classes=config.data.num_classes,
            pseudo_label_path=config.train.pseudo_label_path,
            task='cls'
        )
        for phase in ['train', 'valid']
    }

    # create model
    model = CustomNet(config.model.encoder, config.data.num_classes)

    # train setting
    criterion = get_loss(config)
    params = [
        {'params': model.base_params(), 'lr': config.optimizer.params.encoder_lr},
        {'params': model.fresh_params(), 'lr': config.optimizer.params.decoder_lr}
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model)

    callbacks = [MultiClassAccuracyCallback(threshold=0.5), F1ScoreCallback()]
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(CheckpointCallback(resume=config.work_dir + '/checkpoints/best_full.pth'))

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )
Example #2
0
    def predict_loader(
        self,
        loader: DataLoader,
        resume: str = None,
        verbose: bool = False,
        state_kwargs: Dict = None,
        fp16: Union[Dict, bool] = None,
        check: bool = False,
    ):
        loaders = OrderedDict([("infer", loader)])

        callbacks = OrderedDict([("inference", InferCallback())])
        if resume is not None:
            callbacks["loader"] = CheckpointCallback(resume=resume)

        self.infer(model=self.model,
                   loaders=loaders,
                   callbacks=callbacks,
                   verbose=verbose,
                   state_kwargs=state_kwargs,
                   fp16=fp16,
                   check=check)

        output = callbacks["inference"].predictions
        if isinstance(self.output_key, str):
            output = output[self.output_key]

        return output
def valid_model(runner, model, valid_loader, valid_dataset, log_dir):
    encoded_pixels = []
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[CheckpointCallback(resume=log_dir+'/checkpoints/best.pth'),
                   InferCallback()
                   ],
    )
    valid_masks = []
    probabilities = np.zeros((2220, HEIGHT_TRAIN, WIDTH_TRAIN))
    for i, (batch, output) in enumerate(tqdm(zip(valid_dataset, runner.callbacks[0].predictions["logits"]))):
        image, mask = batch
        for m in mask:
            if m.shape != (HEIGHT_TRAIN, WIDTH_TRAIN):
                m = cv2.resize(m, dsize=(WIDTH_TRAIN, HEIGHT_TRAIN),
                               interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(output):
            if probability.shape != (HEIGHT_TRAIN, WIDTH_TRAIN):
                probability = cv2.resize(probability, dsize=(
                    WIDTH_TRAIN, HEIGHT_TRAIN), interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability
    return probabilities, valid_masks
Example #4
0
def get_callbacks(config: Dict):
    return [
        CriterionCallback(**config["criterion_callback_params"]),
        OptimizerCallback(**config["optimizer_callback_params"]),
        CheckpointCallback(save_n_best=3),
        EarlyStoppingCallback(**config["early_stopping"]),
    ]
Example #5
0
 def set_callbacks(self):
     callbacks = [
         LossCallback(),
         CheckpointCallback(save_n_best=2),
         OptimizerCallback(),
         ConsoleLogger(),
         TensorboardLogger(),
     ]
     return callbacks
Example #6
0
def generate_test_preds(class_params):

    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
    dummy_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids[:1],  transforms=get_validation_augmentation(),
                                preprocessing=get_preprocessing(preprocessing_fn))
    dummy_loader = DataLoader(dummy_dataset, batch_size=1, shuffle=False, num_workers=0)

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )
    runner = SupervisedRunner(model)

    # HACK: We are loading a few examples from our dummy loader so catalyst will properly load the weights
    # from our checkpoint
    loaders = {"test": dummy_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(
                resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )

    # Now we do real inference on the full dataset
    test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids,  transforms=get_validation_augmentation(),
                                preprocessing=get_preprocessing(preprocessing_fn))
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

    encoded_pixels = []
    image_id = 0
    for i, test_batch in enumerate(tqdm.tqdm(test_loader)):
        runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits'].cpu().detach().numpy()
        for i, batch in enumerate(runner_out):
            for probability in batch:

                # probability = probability.cpu().detach().numpy()
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0],
                                                    class_params[image_id % 4][1])
                if num_predict == 0:
                    encoded_pixels.append('')
                else:
                    r = mask2rle(predict)
                    encoded_pixels.append(r)
                image_id += 1

    print("Saving submission...")
    sub['EncodedPixels'] = encoded_pixels
    sub.to_csv('submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
    print("Saved.")
Example #7
0
 def set_callbacks(self):
     callbacks = [
         # IouCallback(activation = "Softmax2d"),
         LossCallback(),
         CheckpointCallback(save_n_best=2),
         OptimizerCallback(),
         ConsoleLogger(),
         TensorboardLogger(),
     ]
     return callbacks
Example #8
0
    def prepare_callbacks(*, args, mode, stage=None, **kwargs):
        callbacks = collections.OrderedDict()

        if mode == "train":
            if stage == "debug":
                callbacks["stage"] = StageCallback()
                callbacks["loss"] = LossCallback(
                    emb_l2_reg=kwargs.get("emb_l2_reg", -1))
                callbacks["optimizer"] = OptimizerCallback(
                    grad_clip=kwargs.get("grad_clip", None))
                callbacks["metrics"] = BaseMetrics()
                callbacks["lr-finder"] = LRFinder(
                    final_lr=kwargs.get("final_lr", 0.1),
                    n_steps=kwargs.get("n_steps", None))
                callbacks["logger"] = Logger()
                callbacks["tflogger"] = TensorboardLogger()
            else:
                callbacks["stage"] = StageCallback()
                callbacks["loss"] = LossCallback(
                    emb_l2_reg=kwargs.get("emb_l2_reg", -1))
                callbacks["optimizer"] = OptimizerCallback(
                    grad_clip=kwargs.get("grad_clip", None))
                callbacks["metrics"] = BaseMetrics()
                callbacks["map"] = MapKCallback(
                    map_args=kwargs.get("map_args", [3]))
                callbacks["saver"] = CheckpointCallback(save_n_best=getattr(
                    args, "save_n_best", 7),
                                                        resume=args.resume)

                # Pytorch scheduler callback
                callbacks["scheduler"] = SchedulerCallback(
                    reduce_metric="map03")

                callbacks["logger"] = Logger()
                callbacks["tflogger"] = TensorboardLogger()
        elif mode == "infer":
            callbacks["saver"] = CheckpointCallback(resume=args.resume)
            callbacks["infer"] = InferCallback(out_prefix=args.out_prefix)
        else:
            raise NotImplementedError

        return callbacks
Example #9
0
def train(args):
    set_random_seed(42)
    model = get_model(args.network)
    print('Loading model')
    model.encoder.conv1 = nn.Conv2d(
        count_channels(args.channels), 64, kernel_size=(7, 7),
        stride=(2, 2), padding=(3, 3), bias=False)
    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')

    ds = Dataset(args.channels, args.dataset_path, args.image_size, args.batch_size, args.num_workers)
    loaders = ds.create_loaders(train_df, val_df)
    print(loaders['train'].dataset.data)

    criterion = BCE_Dice_Loss(bce_weight=0.2)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[10, 20, 40], gamma=0.3
    )

    save_path = os.path.join(
        args.logdir,
        '_'.join([args.network, *args.channels])
    )

    # model runner
    runner = SupervisedRunner()

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            DiceCallback()
        ],
        logdir=save_path,
        num_epochs=args.epochs,
        verbose=True
    )

    infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
    runner.infer(
        model=model,
        loaders=infer_loader,
        callbacks=[
            CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
            InferCallback()
        ],
    )
Example #10
0
def generate_valid_preds(args):

    train_ids, valid_ids, logdir = args
    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, 'imagenet')
    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=0)

    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )

    runner = SupervisedRunner()
    # Generate validation predictions
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )

    valid_preds = np.load('data/valid_preds.npy')

    for im_id, preds in zip(valid_ids,
                            runner.callbacks[0].predictions["logits"]):

        preds = preds.transpose((1, 2, 0))
        preds = cv2.resize(preds, (525, 350))
        preds = preds.transpose((2, 0, 1))

        indexes = train.index[train['im_id'] == im_id]
        valid_preds[indexes[0]] = preds[0]  # fish
        valid_preds[indexes[1]] = preds[1]  # flower
        valid_preds[indexes[2]] = preds[2]  # gravel
        valid_preds[indexes[3]] = preds[3]  # sugar

    np.save('data/valid_preds.npy', valid_preds)

    return True
Example #11
0
def train(args):
    set_random_seed(42)
    for fold in range(args.folds):
        model = get_model(args.network)

        print("Loading model")
        model, device = UtilsFactory.prepare_model(model)
        train_df = pd.read_csv(
            os.path.join(args.dataset_path,
                         f'train{fold}.csv')).to_dict('records')
        val_df = pd.read_csv(os.path.join(args.dataset_path,
                                          f'val{fold}.csv')).to_dict('records')

        ds = Dataset(args.channels, args.dataset_path, args.image_size,
                     args.batch_size, args.num_workers)
        loaders = ds.create_loaders(train_df, val_df)

        criterion = BCE_Dice_Loss(bce_weight=0.2)

        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[10, 20, 40], gamma=0.3)

        # model runner
        runner = SupervisedRunner()

        save_path = os.path.join(args.logdir, f'fold{fold}')

        # model training
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=[DiceCallback()],
                     logdir=save_path,
                     num_epochs=args.epochs,
                     verbose=True)

        infer_loader = collections.OrderedDict([("infer", loaders["valid"])])
        runner.infer(
            model=model,
            loaders=infer_loader,
            callbacks=[
                CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
                InferCallback()
            ],
        )

        print(f'Fold {fold} ended')
Example #12
0
    def predict_loader(
        self,
        model: Model,
        loader: DataLoader,
        resume: str = None,
        verbose: bool = False,
        state_kwargs: Dict = None,
        fp16: Union[Dict, bool] = None,
        check: bool = False,
    ) -> Any:
        """
        Makes a prediction on the whole loader with the specified model.

        Args:
            model (Model): model to infer
            loader (DataLoader): dictionary containing only one
                ``torch.utils.data.DataLoader`` for inference
            resume (str): path to checkpoint for model
            verbose (bool): ff true, it displays the status of the inference
                to the console.
            state_kwargs (dict): additional state params to ``RunnerState``
            fp16 (Union[Dict, bool]): If not None, then sets inference to FP16.
                See https://nvidia.github.io/apex/amp.html#properties
                if fp16=True, params by default will be ``{"opt_level": "O1"}``
            check (bool): if True, then only checks that pipeline is working
                (3 epochs only)
        """
        loaders = OrderedDict([("infer", loader)])

        callbacks = OrderedDict([("inference", InferCallback())])
        if resume is not None:
            callbacks["loader"] = CheckpointCallback(resume=resume)

        self.infer(
            model=model,
            loaders=loaders,
            callbacks=callbacks,
            verbose=verbose,
            state_kwargs=state_kwargs,
            fp16=fp16,
            check=check
        )

        output = callbacks["inference"].predictions
        if isinstance(self.output_key, str):
            output = output[self.output_key]

        return output
Example #13
0
def make_predictions(runner, model, loader, y_true):
    runner.infer(
        model=model,
        loaders=loader,
        callbacks=[
            CheckpointCallback(resume=f"{LOG_DIR}/checkpoints/best.pth"),
            InferCallback(),
        ],
        verbose=True)

    y_preds = runner.callbacks[0].predictions['logits'].argmax(1)

    acc = calc_accuracy(y_preds, y_true)
    acc1, acc2 = calc_accuracy_per_cls(y_preds, y_true)
    f1 = calc_f1_score(y_preds, y_true)

    return {'acc': acc, 'acc1': acc1, 'acc2': acc2, 'f1': f1}
Example #14
0
def train(args):
    model = Autoencoder_Unet(encoder_name='resnet50')

    print("Loading model")
    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')
    ds = AutoencoderDataset(args.channels, args.dataset_path, args.image_size,
                            args.batch_size, args.num_workers)
    loaders = ds.create_loaders(train_df, val_df)

    criterion = MSELoss()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-3,
                                 weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[10, 20, 40],
                                                     gamma=0.3)

    # model runner
    runner = SupervisedRunner()

    # model training
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 callbacks=[],
                 scheduler=scheduler,
                 loaders=loaders,
                 logdir=args.logdir,
                 num_epochs=args.epochs,
                 verbose=True)

    infer_loader = collections.OrderedDict([("infer", loaders["valid"])])
    runner.infer(
        model=model,
        loaders=infer_loader,
        callbacks=[
            CheckpointCallback(resume=f"{args.logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
Example #15
0
 def inference(self, model):
     if not os.path.exists((os.path.join(self.__rez_dir, "mask"))):
         os.mkdir(os.path.join(self.__rez_dir, "mask"))
     if not os.path.exists((os.path.join(self.__rez_dir, "overlay"))):
         os.mkdir(os.path.join(self.__rez_dir, "overlay"))
     model = model
     loaders = self.__get_data()
     runner = SupervisedRunner()
     runner.infer(
         model=model,
         loaders=loaders,
         verbose=True,
         callbacks=[
             CheckpointCallback(resume=os.path.join(
                 self.__logs_dir, "checkpoints/best.pth")),
             InferCallback(),
         ],
     )
     sigmoid = lambda x: 1 / (1 + np.exp(-x))
     for i, (input, output) in enumerate(
             zip(self.loader.image_list,
                 runner.callbacks[1].predictions["logits"])):
         threshold = self.threshold
         output = sigmoid(output)
         image_path = input
         file_name = image_path[0].split("/")[-1]
         image = cv2.imread(image_path[0])
         canvas = (output[0] > threshold).astype(np.uint8) * 255
         canvas = np.squeeze(canvas)
         original_height, original_width = image.shape[:2]
         canvas = CenterCrop(p=1,
                             height=original_height,
                             width=original_width)(image=canvas)["image"]
         canvas = np.reshape(canvas, list(canvas.shape) + [1])
         overlay = make_overlay(image, canvas)
         cv2.imwrite(os.path.join(self.__rez_dir, "mask", file_name),
                     canvas)
         cv2.imwrite(os.path.join(self.__rez_dir, "overlay", file_name),
                     overlay)
Example #16
0
 def inference(self, model):
     if not os.path.exists((os.path.join(self.__rez_dir, "mask"))):
         os.mkdir(os.path.join(self.__rez_dir, "mask"))
     if not os.path.exists((os.path.join(self.__rez_dir, "overlay"))):
         os.mkdir(os.path.join(self.__rez_dir, "overlay"))
     model = model
     loaders = self.__get_data()
     runner = SupervisedRunner()
     runner.infer(
         model=model,
         loaders=loaders,
         verbose=True,
         callbacks=[
             CheckpointCallback(resume=os.path.join(
                 self.__logs_dir, "checkpoints/best.pth")),
             InferCallback(),
         ],
     )
     sigmoid = lambda x: 1 / (1 + np.exp(-x))
     for i, (input, output) in enumerate(
             zip(self.loader.image_list,
                 runner.callbacks[1].predictions["logits"])):
         threshold = self.threshold
         classes = np.argmax(output, axis=0)
         image_path = input
         file_name = image_path[0].split("/")[-1]
         image = cv2.imread(image_path[0])
         original_height, original_width = image.shape[:2]
         classes_cropped = CenterCrop(
             p=1, height=original_height,
             width=original_width)(image=classes)["image"]
         overlay = output2final(classes_cropped)
         raise
         # cv2.imwrite(os.path.join(self.__rez_dir, "mask",file_name), canvas)
         # cv2.imwrite(os.path.join(self.__rez_dir, "overlay", file_name), overlay)
         plt.imsave(os.path.join(self.__rez_dir, "mask", file_name),
                    classes_cropped)
         plt.imsave(os.path.join(self.__rez_dir, "overlay", file_name),
                    overlay)
    def get_callbacks(self):
        from catalyst.dl.callbacks import CriterionAggregatorCallback, \
                                          CriterionCallback
        seg_loss_name = self.criterion_params["seg_loss"].lower()
        clf_loss_name = self.criterion_params["clf_loss"].lower()
        callbacks_list = [
                          CriterionCallback(prefix="seg_loss",
                                            input_key="seg_targets",
                                            output_key="seg_logits",
                                            criterion_key=seg_loss_name),
                          CriterionCallback(prefix="clf_loss",
                                            input_key="clf_targets",
                                            output_key="clf_logits",
                                            criterion_key=clf_loss_name),
                          CriterionAggregatorCallback(prefix="loss",
                                                      loss_keys=\
                                                      ["seg_loss", "clf_loss"]),
                          EarlyStoppingCallback(**self.cb_params["earlystop"]),
                          ]

        ckpoint_params = self.cb_params["checkpoint_params"]
        if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck
            mode = ckpoint_params["mode"].lower()
            if mode == "full":
                print("Stateful loading...")
                ckpoint_p = Path(ckpoint_params["checkpoint_path"])
                fname = ckpoint_p.name
                # everything in the path besides the base file name
                resume_dir = str(ckpoint_p.parents[0])
                print(f"Loading {fname} from {resume_dir}. \
                      \nCheckpoints will also be saved in {resume_dir}.")
                # adding the checkpoint callback
                callbacks_list = callbacks_list + [CheckpointCallback(resume=fname,
                                                                      resume_dir=resume_dir),]
            elif mode == "model_only":
                print("Loading weights into model...")
                self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model)
        print(f"Callbacks: {callbacks_list}")
        return callbacks_list
 def get_callbacks(self):
     callbacks_list = [PrecisionRecallF1ScoreCallback(num_classes=4),#DiceCallback(),
                       EarlyStoppingCallback(**self.cb_params["earlystop"]),
                       AccuracyCallback(**self.cb_params["accuracy"]),
                       ]
     ckpoint_params = self.cb_params["checkpoint_params"]
     if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck
         mode = ckpoint_params["mode"].lower()
         if mode == "full":
             print("Stateful loading...")
             ckpoint_p = Path(ckpoint_params["checkpoint_path"])
             fname = ckpoint_p.name
             # everything in the path besides the base file name
             resume_dir = str(ckpoint_p.parents[0])
             print(f"Loading {fname} from {resume_dir}. \
                   \nCheckpoints will also be saved in {resume_dir}.")
             # adding the checkpoint callback
             callbacks_list = callbacks_list + [CheckpointCallback(resume=fname,
                                                                   resume_dir=resume_dir),]
         elif mode == "model_only":
             print("Loading weights into model...")
             self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model)
     return callbacks_list
        # We can specify the callbacks list for the experiment;
        # For this task, we will check accuracy, AUC and F1 metrics
        callbacks=[
            AccuracyCallback(num_classes=config.num_classes),
            AUCCallback(
                num_classes=config.num_classes,
                input_key="targets_one_hot",
                class_names=config.class_names
            ),
            F1ScoreCallback(
                input_key="targets_one_hot",
                activation="Softmax"
            ),
            CheckpointCallback(
                save_n_best=1,
                #             resume_dir="./models/classification",
                metrics_filename="metrics.json"
            ),
            EarlyStoppingCallback(
                patience=config.patience,
                metric="auc/_mean",
                minimize=False
            )
        ],
        # path to save logs
        logdir=config.logdir,

        num_epochs=config.num_epochs,

        # save our best checkpoint by AUC metric
        main_metric="auc/_mean",
Example #20
0
def main():
    args = get_args()

    config = py2cfg(args.config_path)

    train_batch_size = config.train_parameters.train_batch_size
    val_batch_size = config.train_parameters.val_batch_size
    model = config.model

    train_samples = get_samples("train", config)
    val_samples = get_samples("val", config)

    train_aug = config.train_augmentations

    val_aug = config.val_augmentations

    if config.train_parameters.tta == "lr":
        model = TTAWrapper(model, fliplr_image2mask)
    elif config.train_parameters.tta == "d4":
        model = TTAWrapper(model, d4_image2mask)

    if config.train_parameters.sync_bn:
        model = apex.parallel.convert_syncbn_model(model)

    train_loader = DataLoader(
        SegmentationDataset(
            train_samples,
            train_aug,
            num_samples=config.num_samples,
            downsample_mask_factor=config.train_parameters.
            downsample_mask_factor,
        ),
        batch_size=train_batch_size,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True,
    )

    valid_loader = DataLoader(
        SegmentationDataset(val_samples, val_aug),
        batch_size=val_batch_size,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=False,
    )

    data_loaders = OrderedDict()
    data_loaders["train"] = train_loader
    data_loaders["valid"] = valid_loader

    callbacks = config.callbacks
    if args.checkpoint_path is not None:
        callbacks += [CheckpointCallback(resume=args.checkpoint_path)]

    # model training
    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=config.loss,
        optimizer=config.optimizer,
        callbacks=callbacks,
        logdir=config.logdir,
        loaders=data_loaders,
        num_epochs=config.train_parameters.num_epochs,
        scheduler=config.scheduler,
        verbose=True,
        minimize_metric=True,
        fp16=config.train_parameters.fp16,
    )
Example #21
0
def find_class_params(args):
    runner = SupervisedRunner()
    model = create_model(args.encoder_type)
    valid_loader = get_train_val_loaders(args.encoder_type,
                                         batch_size=args.batch_size)['valid']

    encoded_pixels = []
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[CheckpointCallback(resume=args.ckp),
                   InferCallback()],
    )
    print(runner.callbacks)
    valid_masks = []
    probabilities = np.zeros((2220, 350, 525))
    for i, (batch, output) in enumerate(
            tqdm(
                zip(valid_loader.dataset,
                    runner.callbacks[0].predictions["logits"]))):
        image, mask = batch
        for m in mask:
            if m.shape != (350, 525):
                m = cv2.resize(m,
                               dsize=(525, 350),
                               interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(output):
            if probability.shape != (350, 525):
                probability = cv2.resize(probability,
                                         dsize=(525, 350),
                                         interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability

    class_params = {}
    for class_id in range(4):
        print(class_id)
        attempts = []
        for t in range(0, 100, 5):
            t /= 100
            #for ms in [0, 100, 1200, 5000, 10000]:
            for ms in [5000, 10000, 15000, 20000, 22500, 25000, 30000]:

                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(
                        sigmoid(probability), t, ms)
                    masks.append(predict)

                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))

                attempts.append((t, ms, np.mean(d)))

        attempts_df = pd.DataFrame(attempts,
                                   columns=['threshold', 'size', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]

        class_params[class_id] = (best_threshold, best_size)
    print(class_params)
    return class_params, runner
# arch = 'linknet'
model, preprocessing_fn = get_model(encoder, type=arch)

valid_dataset, loaders = get_loaders(bs, num_workers, preprocessing_fn)

train_loader = loaders['train']
valid_loader = loaders['valid']

print("Loading model")
runner = SupervisedRunner()
encoded_pixels = []
loaders = {"infer": valid_loader}
runner.infer(
    model=model,
    loaders=loaders,
    callbacks=[CheckpointCallback(resume=model_path),
               InferCallback()],
)
loaders['train'] = train_loader
loaders['valid'] = valid_loader

size = (320, 480)
if load_params:
    print(">>>> Loading params")
    with open(output_name + "_params.pkl", 'rb') as handle:
        class_params = pickle.load(handle)
else:
    print("Learning threshold and min area")
    valid_masks = []
    LIMIT = 800
    probabilities = np.zeros((int(LIMIT * 4), 320, 480))  #HARDCODED FOR NOW
Example #23
0
                                          'in_channels': 3
                                          })}
model = models[args.model.lower()][0](**models[args.model.lower()][1])
encoded_pixels = []
loaders = {"infer": valid_loader}
logdir = f'./logs/{args.model}/fold_{args.fold}'
gc.collect()
runner = SupervisedRunner(model=model,
                          device='cuda',
                          input_key='image',
                          input_target_key='mask')
runner.infer(
    model=model,
    loaders=loaders,
    callbacks=[
        CheckpointCallback(
            resume=f"{logdir}/checkpoints/best.pth"),
        InferCallback()
    ],
    # fp16={"opt_level": "O1"},
)
valid_masks = []
probabilities = np.zeros((2220, 350, 525))
for i, (batch, output) in enumerate(tqdm.tqdm(zip(
        valid_dataset, runner.callbacks[0].predictions["logits"]))):
    gc.collect()
    image, mask = batch
    for m in mask:
        if m.shape != (350, 525):
            m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        valid_masks.append(m)
Example #24
0
def generate_class_params(i_dont_know_how_to_return_values_without_map):

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)
    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=0)

    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )

    runner = SupervisedRunner()
    # Generate validation predictions
    encoded_pixels = []
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )

    valid_masks = []
    probabilities = np.zeros((2220, 350, 525))
    for i, (batch, output) in enumerate(
            tqdm.tqdm(
                zip(valid_dataset,
                    runner.callbacks[0].predictions["logits"]))):
        image, mask = batch
        for m in mask:
            if m.shape != (350, 525):
                m = cv2.resize(m,
                               dsize=(525, 350),
                               interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(output):
            if probability.shape != (350, 525):
                probability = cv2.resize(probability,
                                         dsize=(525, 350),
                                         interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability

    class_params = {}
    for class_id in range(4):
        print(class_id)
        attempts = []
        for t in range(30, 100, 5):
            t /= 100
            for ms in [1200, 5000, 10000]:
                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(
                        sigmoid(probability), t, ms)
                    masks.append(predict)

                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))

                attempts.append((t, ms, np.mean(d)))

        attempts_df = pd.DataFrame(attempts,
                                   columns=['threshold', 'size', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]

        class_params[class_id] = (best_threshold, best_size)

    return class_params
Example #25
0
                                                           patience=10,
                                                           verbose=True)

    # the only tricky part
    n_epochs = 120
    # logdir = "/tmp/runs/"
    logdir = "/tmp/runs_se_resnext50/"

    callbacks = collections.OrderedDict()

    callbacks['f1_score'] = F1ScoreCallback()
    callbacks["loss"] = ClassificationLossCallback()
    callbacks["optimizer"] = OptimizerCallback()

    callbacks["scheduler"] = SchedulerCallback(reduce_metric="f1_score")

    callbacks["saver"] = CheckpointCallback()
    callbacks["logger"] = Logger()
    callbacks["tflogger"] = TensorboardLogger()

    runner = ClassificationRunner(model=model,
                                  criterion=FocalLoss(),
                                  optimizer=optimizer,
                                  scheduler=scheduler)

    runner.train(loaders=loaders,
                 callbacks=callbacks,
                 logdir=logdir,
                 epochs=n_epochs,
                 verbose=True)
Example #26
0
def main(args):
    """
    Main code for training for training a U-Net with some user-defined encoder.
    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up model (U-Net with ImageNet Encoders)
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"

    attention_type = None if args.attention_type == "None" else args.attention_type
    model = smp.Unet(encoder_name=args.encoder,
                     encoder_weights=ENCODER_WEIGHTS,
                     classes=4,
                     activation=None,
                     attention_type=attention_type)
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        args.encoder, ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)
    valid_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {
            "params": model.decoder.parameters(),
            "lr": args.encoder_lr
        },
        {
            "params": model.encoder.parameters(),
            "lr": args.decoder_lr
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks_list = [
        DiceCallback(),
        EarlyStoppingCallback(patience=5, min_delta=0.001),
    ]
    if args.checkpoint_path != "None":  # hacky way to say no checkpoint callback but eh what the heck
        ckpoint_p = Path(args.checkpoint_path)
        fname = ckpoint_p.name
        resume_dir = str(ckpoint_p.parents[0]
                         )  # everything in the path besides the base file name
        print(
            f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}."
        )
        callbacks_list = callbacks_list + [
            CheckpointCallback(resume=fname, resume_dir=resume_dir),
        ]

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks_list,
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
Example #27
0
def training(train_ids, valid_ids, num_split, encoder, decoder):
    """
    模型训练
    """
    train = "./data/Clouds_Classify/train.csv"

    # Data overview
    train = pd.read_csv(open(train))
    train.head()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])

    ENCODER = encoder
    ENCODER_WEIGHTS = 'imagenet'

    if decoder == 'unet':
        model = smp.Unet(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            classes=4,
            activation=None,
        )
    else:
        model = smp.FPN(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            classes=4,
            activation=None,
        )
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)

    num_workers = 4
    bs = 12
    train_dataset = CloudDataset(
        df=train,
        transforms=get_training_augmentation(),
        datatype='train',
        img_ids=train_ids,
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(
        df=train,
        transforms=get_validation_augmentation(),
        datatype='valid',
        img_ids=valid_ids,
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=bs,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    num_epochs = 50
    logdir = "./logs/log_{}_{}/log_{}".format(encoder, decoder, num_split)

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {
            'params': model.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.encoder.parameters(),
            'lr': 1e-3
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.35, patience=4)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[DiceCallback()],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=True)

    # Exploring predictions
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
Example #28
0
def generate_test_preds(args):

    valid_dice, class_params, = args

    test_preds = np.zeros((len(sub), 350, 525), dtype=np.float32)

    for i in range(NFOLDS):
        logdir = LOG_DIR_BASE + str(i)

        preprocessing_fn = smp.encoders.get_preprocessing_fn(
            ENCODER, ENCODER_WEIGHTS)
        dummy_dataset = CloudDataset(
            df=sub,
            datatype='test',
            img_ids=test_ids[:1],
            transforms=get_validation_augmentation(),
            preprocessing=get_preprocessing(preprocessing_fn))
        dummy_loader = DataLoader(dummy_dataset,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=0)

        model = smp.Unet(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            classes=4,
            activation=ACTIVATION,
        )
        runner = SupervisedRunner(model)

        # HACK: We are loading a few examples from our dummy loader so catalyst will properly load the weights
        # from our checkpoint
        loaders = {"test": dummy_loader}
        runner.infer(
            model=model,
            loaders=loaders,
            callbacks=[
                CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
                InferCallback()
            ],
        )

        # Now we do real inference on the full dataset
        test_dataset = CloudDataset(
            df=sub,
            datatype='test',
            img_ids=test_ids,
            transforms=get_validation_augmentation(),
            preprocessing=get_preprocessing(preprocessing_fn))
        test_loader = DataLoader(test_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=0)

        image_id = 0
        for batch_index, test_batch in enumerate(tqdm.tqdm(test_loader)):
            runner_out = runner.predict_batch(
                {"features":
                 test_batch[0].cuda()})['logits'].cpu().detach().numpy()
            for preds in runner_out:

                preds = preds.transpose((1, 2, 0))
                preds = cv2.resize(
                    preds,
                    (525, 350))  # height and width are backward in cv2...
                preds = preds.transpose((2, 0, 1))

                idx = batch_index * 4
                test_preds[idx + 0] += sigmoid(preds[0]) / NFOLDS  # fish
                test_preds[idx + 1] += sigmoid(preds[1]) / NFOLDS  # flower
                test_preds[idx + 2] += sigmoid(preds[2]) / NFOLDS  # gravel
                test_preds[idx + 3] += sigmoid(preds[3]) / NFOLDS  # sugar

    # Convert ensembled predictions to RLE predictions
    encoded_pixels = []
    for image_id, preds in enumerate(test_preds):

        predict, num_predict = post_process(preds,
                                            class_params[image_id % 4][0],
                                            class_params[image_id % 4][1])
        if num_predict == 0:
            encoded_pixels.append('')
        else:
            r = mask2rle(predict)
            encoded_pixels.append(r)

    print("Saving submission...")
    sub['EncodedPixels'] = encoded_pixels
    sub.to_csv('unet_submission_{}.csv'.format(valid_dice),
               columns=['Image_Label', 'EncodedPixels'],
               index=False)
    print("Saved.")
def main(config):
    opts = config()
    path = opts.path
    train = pd.read_csv(f'{path}/train.csv')
    sub = pd.read_csv(f'{path}/sample_submission.csv')

    n_train = len(os.listdir(f'{path}/train_images'))
    n_test = len(os.listdir(f'{path}/test_images'))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
    sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])
    train.loc[train['EncodedPixels'].isnull() == False,
              'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()
    train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
        lambda x: x.split('_')[0]).value_counts().value_counts()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])

    valid_ids = pd.read_csv("csvs/valid_threshold.csv")["img_id"].values
    test_ids = sub['Image_Label'].apply(
        lambda x: x.split('_')[0]).drop_duplicates().values
    #     print(valid_ids)
    ENCODER = opts.backborn
    ENCODER_WEIGHTS = opts.encoder_weights
    DEVICE = 'cuda'

    ACTIVATION = None
    model = get_model(model_type=opts.model_type,
                      encoder=ENCODER,
                      encoder_weights=ENCODER_WEIGHTS,
                      activation=ACTIVATION,
                      n_classes=opts.class_num,
                      task=opts.task,
                      attention_type=opts.attention_type,
                      head='simple',
                      center=opts.center,
                      tta=opts.tta)
    if opts.refine:
        model = get_ref_model(infer_model=model,
                              encoder=opts.ref_backborn,
                              encoder_weights=ENCODER_WEIGHTS,
                              activation=ACTIVATION,
                              n_classes=opts.class_num,
                              preprocess=opts.preprocess,
                              tta=opts.tta)
    model = convert_model(model)
    preprocessing_fn = encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

    encoded_pixels = []
    runner = SupervisedRunner()
    probabilities = np.zeros((2220, 350, 525))

    for i in range(opts.fold_max):
        if opts.refine:
            logdir = f"{opts.logdir}_refine/fold{i}"
        else:
            logdir = f"{opts.logdir}/fold{i}"
        valid_dataset = CloudDataset(
            df=train,
            datatype='valid',
            img_ids=valid_ids,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=opts.batchsize,
                                  shuffle=False,
                                  num_workers=opts.num_workers)
        loaders = {"infer": valid_loader}
        runner.infer(
            model=model,
            loaders=loaders,
            callbacks=[
                CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
                InferCallback()
            ],
        )
        valid_masks = []
        for i, (batch, output) in enumerate(
                tqdm.tqdm(
                    zip(valid_dataset,
                        runner.callbacks[0].predictions["logits"]))):
            image, mask = batch
            for m in mask:
                if m.shape != (350, 525):
                    m = cv2.resize(m,
                                   dsize=(525, 350),
                                   interpolation=cv2.INTER_LINEAR)
                valid_masks.append(m)

            for j, probability in enumerate(output):
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability,
                                             dsize=(525, 350),
                                             interpolation=cv2.INTER_LINEAR)
                probabilities[i * 4 + j, :, :] += sigmoid(probability)

    probabilities /= opts.fold_max
    if opts.tta:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_valid.npy',
            probabilities)
    else:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_valid.npy',
            probabilities)

    torch.cuda.empty_cache()
    gc.collect()

    class_params = {}
    cv_d = []
    for class_id in tqdm.trange(opts.class_num, desc='class_id', leave=False):
        #         print(class_id)
        attempts = []
        for tt in tqdm.trange(0, 100, 10, desc='top_threshold', leave=False):
            tt /= 100
            for bt in tqdm.trange(0,
                                  100,
                                  10,
                                  desc='bot_threshold',
                                  leave=False):
                bt /= 100
                for ms in tqdm.tqdm([
                        0, 100, 1000, 5000, 10000, 11000, 14000, 15000, 16000,
                        18000, 19000, 20000, 21000, 23000, 25000, 27000, 30000,
                        50000
                ],
                                    desc='min_size',
                                    leave=False):
                    masks = []
                    for i in range(class_id, len(probabilities), 4):
                        probability = probabilities[i]
                        predict, num_predict = post_process(
                            probability, tt, ms, bt)

                        masks.append(predict)

                    d = []
                    for i, j in zip(masks, valid_masks[class_id::4]):
                        #                     print(i.shape, j.shape)
                        if (i.sum() == 0) & (j.sum() == 0):
                            d.append(1)
                        else:
                            d.append(dice(i, j))
                    attempts.append((tt, ms, bt, np.mean(d)))

        attempts_df = pd.DataFrame(
            attempts,
            columns=['top_threshold', 'size', 'bottom_threshold', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        cv_d.append(attempts_df['dice'].values[0])
        best_top_threshold = attempts_df['top_threshold'].values[0]
        best_size = attempts_df['size'].values[0]
        best_bottom_threshold = attempts_df['bottom_threshold'].values[0]

        class_params[class_id] = (best_top_threshold, best_size,
                                  best_bottom_threshold)
    cv_d = np.array(cv_d)
    print("CV Dice:", np.mean(cv_d))
    pathlist = [
        "../input/test_images/" + i.split("_")[0] for i in sub['Image_Label']
    ]

    del masks
    del valid_masks
    del probabilities
    gc.collect()

    ############# predict ###################
    probabilities = np.zeros((n_test, 4, 350, 525))
    for fold in tqdm.trange(opts.fold_max, desc='fold loop'):
        if opts.refine:
            logdir = f"{opts.logdir}_refine/fold{fold}"
        else:
            logdir = f"{opts.logdir}/fold{fold}"


#         loaders = {"test": test_loader}
        test_dataset = CloudDataset(
            df=sub,
            datatype='test',
            img_ids=test_ids,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        test_loader = DataLoader(test_dataset,
                                 batch_size=opts.batchsize,
                                 shuffle=False,
                                 num_workers=opts.num_workers)
        runner_out = runner.predict_loader(
            model,
            test_loader,
            resume=f"{logdir}/checkpoints/best.pth",
            verbose=True)
        for i, batch in enumerate(
                tqdm.tqdm(runner_out, desc='probability loop')):
            for j, probability in enumerate(batch):
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability,
                                             dsize=(525, 350),
                                             interpolation=cv2.INTER_LINEAR)
                probabilities[i, j, :, :] += sigmoid(probability)
        gc.collect()
    probabilities /= opts.fold_max
    if opts.tta:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_test.npy',
            probabilities)
    else:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_test.npy',
            probabilities)
    image_id = 0
    print("##################### start post_process #####################")
    for i in tqdm.trange(n_test, desc='post porocess loop'):
        for probability in probabilities[i]:
            predict, num_predict = post_process(probability,
                                                class_params[image_id % 4][0],
                                                class_params[image_id % 4][1],
                                                class_params[image_id % 4][2])
            if num_predict == 0:
                encoded_pixels.append('')
            else:
                black_mask = get_black_mask(pathlist[image_id])
                predict = np.multiply(predict, black_mask)
                r = mask2rle(predict)
                encoded_pixels.append(r)
            image_id += 1
        gc.collect()
    print("##################### Finish post_process #####################")
    #######################################
    sub['EncodedPixels'] = encoded_pixels
    sub.to_csv(
        f'submissions/submission_{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}.csv',
        columns=['Image_Label', 'EncodedPixels'],
        index=False)
Example #30
0
def get_optimal_postprocess(loaders=None, runner=None, logdir: str = ''):
    """
    Calculate optimal thresholds for validation data.

    Args:
        loaders: loaders with necessary datasets
        runner: runner
        logdir: directory with model checkpoints

    Returns:

    """
    loaders['infer'] = loaders['valid']

    runner.infer(
        model=runner.model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
    valid_masks = []
    probabilities = np.zeros((2220, 350, 525))
    for i, (batch, output) in enumerate(
            zip(loaders['infer'].dataset,
                runner.callbacks[0].predictions["logits"])):
        image, mask = batch
        for m in mask:
            if m.shape != (350, 525):
                m = cv2.resize(m,
                               dsize=(525, 350),
                               interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(output):
            if probability.shape != (350, 525):
                probability = cv2.resize(probability,
                                         dsize=(525, 350),
                                         interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability

    class_params = {}
    for class_id in range(4):
        print(class_id)
        attempts = []
        for t in range(0, 100, 10):
            t /= 100
            for ms in [
                    0, 100, 1000, 5000, 10000, 11000, 14000, 15000, 16000,
                    18000, 19000, 20000, 21000, 23000, 25000, 27000, 30000,
                    50000
            ]:
                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(
                        sigmoid(probability), t, ms)
                    masks.append(predict)

                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))

                attempts.append((t, ms, np.mean(d)))

        attempts_df = pd.DataFrame(attempts,
                                   columns=['threshold', 'size', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]

        class_params[class_id] = (best_threshold, best_size)

    print(class_params)
    return class_params