def train_model(epoch, train_loader, valid_loader, valid_dataset, log_dir):
    # create segmentation model with pretrained encoder

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=len(CLASSES),
        activation=ACTIVATION,
    )

    loss = smp.utils.losses.BCEDiceLoss()

    optimizer = Nadam(model.parameters(), lr=1e-5)
    model = nn.DataParallel(model)
    # optimizer = torch.optim.Adam([{'params': model.module.decoder.parameters(), 'lr': 1e-4},
    #                               # decrease lr for encoder in order not to permute
    #                               # pre-trained weights with large gradients on training start
    #                               {'params': model.module.encoder.parameters(), 'lr': 1e-6}, ])

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=(epoch // 9) + 1)

    runner = SupervisedRunner()

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    runner.train(
        model=model,
        criterion=loss,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[DiceCallback(), IouCallback(), EarlyStoppingCallback(
            patience=6, min_delta=0.001)],
        logdir=log_dir,
        num_epochs=epoch,
        verbose=True
    )

    probabilities, valid_masks = valid_model(
        runner, model, valid_loader, valid_dataset,  log_dir)

    get_optimal_thres(probabilities, valid_masks)
Example #2
0
    "valid": valid_dl
}
criterion = smp.utils.losses.BCEDiceLoss(eps=1e-7)
optimizer = torch.optim.SGD([
    {'params': model.encoder.parameters(), 'lr': LR},  
    {'params': model.decoder.parameters(), 'lr': LR},
], lr=LR)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 35])

callbacks = [
    DiceCallback(
        threshold=0.5,
        activation=ACTIVATION.capitalize(),
    ),
    IouCallback(
        threshold=0.5,
        activation=ACTIVATION.capitalize(),
    ),
]
callbacks[0].metric_fn = dice_wo_back
runner = SupervisedRunner()

## Step 1.

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    callbacks=callbacks,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
        # And only then we aggregate everything into one loss.
        CriterionAggregatorCallback(
            prefix="loss",
            loss_aggregate_fn=
            "weighted_sum",  # can be "sum", "weighted_sum" or "mean"
            # because we want weighted sum, we need to add scale for each loss
            loss_keys={
                "loss_dice": 0.5,
                "loss_iou": 0.5,
                "loss_bce": 1.0
            },
        ),

        # metrics
        DiceCallback(input_key="mask"),
        IouCallback(input_key="mask"),
    ],
    # path to save logs
    logdir=logdir,
    num_epochs=num_epochs,

    # save our best checkpoint by CE metric
    main_metric="ce",
    # CE needs to be minimised.
    minimize_metric=True,

    # for FP16. It uses the variable from the very first cell
    fp16=fp16_params,

    # prints train logs
    verbose=True,
Example #4
0
def run(config_file):
    config = load_config(config_file)

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(data_folder=config.data.train_dir,
                           df_path=config.data.train_df_path,
                           phase=phase,
                           batch_size=config.train.batch_size,
                           num_workers=config.num_workers,
                           idx_fold=config.data.params.idx_fold,
                           transforms=all_transforms[phase],
                           num_classes=config.data.num_classes,
                           pseudo_label_path=config.train.pseudo_label_path,
                           debug=config.debug)
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    # train setting
    criterion = get_loss(config)
    params = [
        {
            'params': model.decoder.parameters(),
            'lr': config.optimizer.params.decoder_lr
        },
        {
            'params': model.encoder.parameters(),
            'lr': config.optimizer.params.encoder_lr
        },
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model)

    callbacks = [DiceCallback(), IouCallback()]

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/best_full.pth'))

    if config.train.mixup:
        callbacks.append(MixupCallback())

    if config.train.cutmix:
        callbacks.append(CutMixCallback())

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )
def main():

    train_image_list = sorted(
        glob.glob(
            pathname=
            '../input/uavid-semantic-segmentation-dataset/train/train/*/Images/*.png',
            recursive=True))
    train_mask_list = sorted(
        glob.glob(pathname='./trainlabels/*/TrainId/*.png', recursive=True))
    valid_image_list = sorted(
        glob.glob(
            pathname=
            '../input/uavid-semantic-segmentation-dataset/valid/valid/*/Images/*.png',
            recursive=True))
    valid_mask_list = sorted(
        glob.glob(pathname='./validlabels/*/TrainId/*.png', recursive=True))

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        config.ENCODER, config.ENCODER_WEIGHTS)

    train_dataset = Dataset(
        train_image_list,
        train_mask_list,
        augmentation=augmentations.get_training_augmentation(),
        preprocessing=augmentations.get_preprocessing(preprocessing_fn),
        classes=config.CLASSES,
    )

    valid_dataset = Dataset(
        valid_image_list,
        valid_mask_list,
        augmentation=augmentations.get_validation_augmentation(),
        preprocessing=augmentations.get_preprocessing(preprocessing_fn),
        classes=config.CLASSES,
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=config.BATCH_SIZE,
                              shuffle=True,
                              num_workers=2,
                              pin_memory=True,
                              drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=config.BATCH_SIZE,
                              shuffle=False,
                              num_workers=2,
                              pin_memory=True,
                              drop_last=False)

    loaders = {"train": train_loader, "valid": valid_loader}

    base_optimizer = RAdam([
        {
            'params': model.MODEL.decoder.parameters(),
            'lr': config.LEARNING_RATE
        },
        {
            'params': model.MODEL.encoder.parameters(),
            'lr': 1e-4
        },
        {
            'params': model.MODEL.segmentation_head.parameters(),
            'lr': config.LEARNING_RATE
        },
    ])
    optimizer = Lookahead(base_optimizer)
    criterion = BCEDiceLoss(activation=None)
    runner = SupervisedRunner()
    scheduler = OneCycleLRWithWarmup(optimizer,
                                     num_steps=config.NUM_EPOCHS,
                                     lr_range=(0.0016, 0.0000001),
                                     init_lr=config.LEARNING_RATE,
                                     warmup_steps=2)

    callbacks = [
        IouCallback(activation='none'),
        ClasswiseIouCallback(classes=config.CLASSES, activation='none'),
        EarlyStoppingCallback(patience=config.ES_PATIENCE,
                              metric='iou',
                              minimize=False),
    ]
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=config.LOGDIR,
        num_epochs=config.NUM_EPOCHS,
        # save our best checkpoint by IoU metric
        main_metric="iou",
        # IoU needs to be maximized.
        minimize_metric=False,
        # for FP16. It uses the variable from the very first cell
        fp16=config.FP16_PARAMS,
        # prints train logs
        verbose=True,
    )
Example #6
0
def run(config_file):
    config = load_config(config_file)
    #set up the environment flags for working with the KAGGLE GPU OR COLAB_GPU
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir
    print('working directory:', config.work_dir)

    #save the configuration to the working dir
    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    #Enter the GPUS you have,
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    #our dataset has an explicit validation folder, use that later.
    all_transforms['valid'] = get_transforms(config.transforms.test)

    print("before rajat config", config.data.height, config.data.width)
    #fetch the dataloaders we need
    dataloaders = {
        phase: make_loader(data_folder=config.data.train_dir,
                           df_path=config.data.train_df_path,
                           phase=phase,
                           img_size=(config.data.height, config.data.width),
                           batch_size=config.train.batch_size,
                           num_workers=config.num_workers,
                           idx_fold=config.data.params.idx_fold,
                           transforms=all_transforms[phase],
                           num_classes=config.data.num_classes,
                           pseudo_label_path=config.train.pseudo_label_path,
                           debug=config.debug)
        for phase in ['train', 'valid']
    }

    #creating the segmentation model with pre-trained encoder
    '''
    dumping the parameters for smp library
    encoder_name: str = "resnet34",
    encoder_depth: int = 5,
    encoder_weights: str = "imagenet",
    decoder_use_batchnorm: bool = True,
    decoder_channels: List[int] = (256, 128, 64, 32, 16),
    decoder_attention_type: Optional[str] = None,
    in_channels: int = 3,
    classes: int = 1,
    activation: Optional[Union[str, callable]] = None,
    aux_params: Optional[dict] = None,
    '''
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    #fetch the loss
    criterion = get_loss(config)
    params = [
        {
            'params': model.decoder.parameters(),
            'lr': config.optimizer.params.decoder_lr
        },
        {
            'params': model.encoder.parameters(),
            'lr': config.optimizer.params.encoder_lr
        },
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)
    '''
    dumping the catalyst supervised runner
    https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py

    model (Model): Torch model object
    device (Device): Torch device
    input_key (str): Key in batch dict mapping for model input
    output_key (str): Key in output dict model output
        will be stored under
    input_target_key (str): Key in batch dict mapping for target
    '''

    runner = SupervisedRunner(model=model, device=get_device())

    #@pavel,srk,rajat,vladimir,pudae check the IOU and the Dice Callbacks

    callbacks = [DiceCallback(), IouCallback()]

    #adding patience
    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    #thanks for handling the distributed training
    '''
    we are gonna take zero_grad after accumulation accumulation_steps
    '''
    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend([
            CriterionCallback(),
            OptimizerCallback(accumulation_steps=accumulation_steps)
        ])

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))
    '''
    pudae добавь пожалуйста обратный вызов
    https://arxiv.org/pdf/1710.09412.pdf
    **srk adding the mixup callback
    '''
    if config.train.mixup:
        callbacks.append(MixupCallback())
    if config.train.cutmix:
        callbacks.append(CutMixCallback())
    '''@rajat implemented cutmix, a wieghed combination of cutout and mixup '''
    callbacks.append(MixupCallback())
    callbacks.append(CutMixCallback())
    '''
    rajat introducing training loop
    https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py
    take care of the nvidias fp16 precision
    '''
    print(config.work_dir)
    print(config.train.minimize_metric)
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=False,
    )
Example #7
0
def run(config_file):
    config = load_config(config_file)
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir
    print('working directory:', config.work_dir)

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            data_folder=config.data.train_dir,
            df_path=config.data.train_df_path,
            phase=phase,
            img_size=(config.data.height, config.data.width),
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            num_classes=config.data.num_classes,
            pseudo_label_path=config.train.pseudo_label_path,
            debug=config.debug
        )
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    # train setting
    criterion = get_loss(config)
    params = [
        {'params': model.decoder.parameters(), 'lr': config.optimizer.params.decoder_lr},
        {'params': model.encoder.parameters(), 'lr': config.optimizer.params.encoder_lr},
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model, device=get_device())

    callbacks = [DiceCallback(), IouCallback()]

    if config.train.early_stop_patience > 0:
        callbacks.append(EarlyStoppingCallback(
            patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [CriterionCallback(),
             OptimizerCallback(accumulation_steps=accumulation_steps)]
        )

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(CheckpointCallback(
            resume=config.work_dir + '/checkpoints/last_full.pth'))

    if config.train.mixup:
        callbacks.append(MixupCallback())

    if config.train.cutmix:
        callbacks.append(CutMixCallback())

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )