Ejemplo n.º 1
0
def get_callbacks(config: Dict):
    return [
        CriterionCallback(**config["criterion_callback_params"]),
        OptimizerCallback(**config["optimizer_callback_params"]),
        CheckpointCallback(save_n_best=3),
        EarlyStoppingCallback(**config["early_stopping"]),
    ]
Ejemplo n.º 2
0
 def set_callbacks(self):
     callbacks = [
         LossCallback(),
         CheckpointCallback(save_n_best=2),
         OptimizerCallback(),
         ConsoleLogger(),
         TensorboardLogger(),
     ]
     return callbacks
Ejemplo n.º 3
0
 def set_callbacks(self):
     callbacks = [
         # IouCallback(activation = "Softmax2d"),
         LossCallback(),
         CheckpointCallback(save_n_best=2),
         OptimizerCallback(),
         ConsoleLogger(),
         TensorboardLogger(),
     ]
     return callbacks
Ejemplo n.º 4
0
    def prepare_callbacks(*, args, mode, stage=None, **kwargs):
        callbacks = collections.OrderedDict()

        if mode == "train":
            if stage == "debug":
                callbacks["stage"] = StageCallback()
                callbacks["loss"] = LossCallback(
                    emb_l2_reg=kwargs.get("emb_l2_reg", -1))
                callbacks["optimizer"] = OptimizerCallback(
                    grad_clip=kwargs.get("grad_clip", None))
                callbacks["metrics"] = BaseMetrics()
                callbacks["lr-finder"] = LRFinder(
                    final_lr=kwargs.get("final_lr", 0.1),
                    n_steps=kwargs.get("n_steps", None))
                callbacks["logger"] = Logger()
                callbacks["tflogger"] = TensorboardLogger()
            else:
                callbacks["stage"] = StageCallback()
                callbacks["loss"] = LossCallback(
                    emb_l2_reg=kwargs.get("emb_l2_reg", -1))
                callbacks["optimizer"] = OptimizerCallback(
                    grad_clip=kwargs.get("grad_clip", None))
                callbacks["metrics"] = BaseMetrics()
                callbacks["map"] = MapKCallback(
                    map_args=kwargs.get("map_args", [3]))
                callbacks["saver"] = CheckpointCallback(save_n_best=getattr(
                    args, "save_n_best", 7),
                                                        resume=args.resume)

                # Pytorch scheduler callback
                callbacks["scheduler"] = SchedulerCallback(
                    reduce_metric="map03")

                callbacks["logger"] = Logger()
                callbacks["tflogger"] = TensorboardLogger()
        elif mode == "infer":
            callbacks["saver"] = CheckpointCallback(resume=args.resume)
            callbacks["infer"] = InferCallback(out_prefix=args.out_prefix)
        else:
            raise NotImplementedError

        return callbacks
Ejemplo n.º 5
0
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           factor=0.7,
                                                           patience=10,
                                                           verbose=True)

    # the only tricky part
    n_epochs = 120
    # logdir = "/tmp/runs/"
    logdir = "/tmp/runs_se_resnext50/"

    callbacks = collections.OrderedDict()

    callbacks['f1_score'] = F1ScoreCallback()
    callbacks["loss"] = ClassificationLossCallback()
    callbacks["optimizer"] = OptimizerCallback()

    callbacks["scheduler"] = SchedulerCallback(reduce_metric="f1_score")

    callbacks["saver"] = CheckpointCallback()
    callbacks["logger"] = Logger()
    callbacks["tflogger"] = TensorboardLogger()

    runner = ClassificationRunner(model=model,
                                  criterion=FocalLoss(),
                                  optimizer=optimizer,
                                  scheduler=scheduler)

    runner.train(loaders=loaders,
                 callbacks=callbacks,
                 logdir=logdir,
Ejemplo n.º 6
0
    loaders["valid"] = val_loader

    runner = dl.SupervisedRunner(device=tu.device,
                                 input_key="image",
                                 input_target_key="label",
                                 output_key="logits")

    callbacks = [
        CriterionCallback(input_key="label",
                          output_key="logits",
                          prefix="loss"),
        AccuracyCallback(input_key="label",
                         output_key="logits",
                         prefix="acc",
                         activation="Sigmoid"),
        OptimizerCallback(accumulation_steps=2),
        #MixupCallback(alpha=0.3, input_key="label", output_key="logits", fields=("image", ))
    ]
    if TRAINING:
        runner.train(model=model,
                     criterion=nn.CrossEntropyLoss(),
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     logdir=LOGDIR,
                     num_epochs=EPOCHS,
                     fp16=tu.fp16_params,
                     callbacks=callbacks,
                     verbose=True,
                     load_best_on_end=False,
                     resume=RESUME)
Ejemplo n.º 7
0
    elif args.loss == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == 'segmentation':
        callbacks = [DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]
    elif args.task == 'classification':
        callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4),
                     EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]

    if args.gradient_accumulation:
        callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth')
    model.cuda()
    utils.unpack_checkpoint(checkpoint, model=model)
    #
    #
    runner = SupervisedRunner()
    if args.train:
        print('Training')
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            main_metric='dice',
            minimize_metric=False,
def main():
    train_dataset = dataset.SentimentDataset(
        texts=df_train['sentences'].values.tolist(),
        labels=df_train['labels'].values,
        max_seq_length=config.MAX_SEQ_LENGTH,
        model_name=config.MODEL_NAME)

    valid_dataset = dataset.SentimentDataset(
        texts=df_valid['sentences'].values.tolist(),
        labels=df_valid['labels'].values,
        max_seq_length=config.MAX_SEQ_LENGTH,
        model_name=config.MODEL_NAME)

    train_val_loaders = {
        "train":
        DataLoader(dataset=train_dataset,
                   batch_size=config.BATCH_SIZE,
                   shuffle=True,
                   num_workers=2,
                   pin_memory=True),
        "valid":
        DataLoader(dataset=valid_dataset,
                   batch_size=config.BATCH_SIZE,
                   shuffle=False,
                   num_workers=2,
                   pin_memory=True)
    }

    dBert = model.DistilBert()

    param_optim = list(dBert.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

    criterion = nn.CrossEntropyLoss()

    base_optimizer = RAdam([{
        'params':
        [p for n, p in param_optim if not any(nd in n for nd in no_decay)],
        'weight_decay':
        config.WEIGHT_DECAY
    }, {
        'params':
        [p for n, p in param_optim if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }])
    optimizer = Lookahead(base_optimizer)
    scheduler = OneCycleLRWithWarmup(
        optimizer,
        num_steps=config.NUM_EPOCHS,
        lr_range=(config.LEARNING_RATE, 1e-8),
        init_lr=config.LEARNING_RATE,
        warmup_steps=0,
    )
    runner = SupervisedRunner(input_key=("input_ids", "attention_mask"))
    # model training
    runner.train(model=dBert,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=train_val_loaders,
                 callbacks=[
                     AccuracyCallback(num_classes=2),
                     OptimizerCallback(accumulation_steps=config.ACCUM_STEPS),
                 ],
                 fp16=config.FP_16,
                 logdir=config.LOG_DIR,
                 num_epochs=config.NUM_EPOCHS,
                 verbose=True)
Ejemplo n.º 9
0
def run(config_file):
    config = load_config(config_file)
    #set up the environment flags for working with the KAGGLE GPU OR COLAB_GPU
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir
    print('working directory:', config.work_dir)

    #save the configuration to the working dir
    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    #Enter the GPUS you have,
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    #our dataset has an explicit validation folder, use that later.
    all_transforms['valid'] = get_transforms(config.transforms.test)

    print("before rajat config", config.data.height, config.data.width)
    #fetch the dataloaders we need
    dataloaders = {
        phase: make_loader(data_folder=config.data.train_dir,
                           df_path=config.data.train_df_path,
                           phase=phase,
                           img_size=(config.data.height, config.data.width),
                           batch_size=config.train.batch_size,
                           num_workers=config.num_workers,
                           idx_fold=config.data.params.idx_fold,
                           transforms=all_transforms[phase],
                           num_classes=config.data.num_classes,
                           pseudo_label_path=config.train.pseudo_label_path,
                           debug=config.debug)
        for phase in ['train', 'valid']
    }

    #creating the segmentation model with pre-trained encoder
    '''
    dumping the parameters for smp library
    encoder_name: str = "resnet34",
    encoder_depth: int = 5,
    encoder_weights: str = "imagenet",
    decoder_use_batchnorm: bool = True,
    decoder_channels: List[int] = (256, 128, 64, 32, 16),
    decoder_attention_type: Optional[str] = None,
    in_channels: int = 3,
    classes: int = 1,
    activation: Optional[Union[str, callable]] = None,
    aux_params: Optional[dict] = None,
    '''
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    #fetch the loss
    criterion = get_loss(config)
    params = [
        {
            'params': model.decoder.parameters(),
            'lr': config.optimizer.params.decoder_lr
        },
        {
            'params': model.encoder.parameters(),
            'lr': config.optimizer.params.encoder_lr
        },
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)
    '''
    dumping the catalyst supervised runner
    https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py

    model (Model): Torch model object
    device (Device): Torch device
    input_key (str): Key in batch dict mapping for model input
    output_key (str): Key in output dict model output
        will be stored under
    input_target_key (str): Key in batch dict mapping for target
    '''

    runner = SupervisedRunner(model=model, device=get_device())

    #@pavel,srk,rajat,vladimir,pudae check the IOU and the Dice Callbacks

    callbacks = [DiceCallback(), IouCallback()]

    #adding patience
    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    #thanks for handling the distributed training
    '''
    we are gonna take zero_grad after accumulation accumulation_steps
    '''
    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend([
            CriterionCallback(),
            OptimizerCallback(accumulation_steps=accumulation_steps)
        ])

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))
    '''
    pudae добавь пожалуйста обратный вызов
    https://arxiv.org/pdf/1710.09412.pdf
    **srk adding the mixup callback
    '''
    if config.train.mixup:
        callbacks.append(MixupCallback())
    if config.train.cutmix:
        callbacks.append(CutMixCallback())
    '''@rajat implemented cutmix, a wieghed combination of cutout and mixup '''
    callbacks.append(MixupCallback())
    callbacks.append(CutMixCallback())
    '''
    rajat introducing training loop
    https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py
    take care of the nvidias fp16 precision
    '''
    print(config.work_dir)
    print(config.train.minimize_metric)
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=False,
    )
Ejemplo n.º 10
0
prepare_cudnn(deterministic=True)

# here we specify that we pass masks to the runner. So model's forward method will be called with
# these arguments passed to it.
runner = SupervisedRunner(input_key=("features", "attention_mask"))

# finally, training the model with Catalyst
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=train_val_loaders,
    callbacks=[
        AccuracyCallback(num_classes=int(params["model"]["num_classes"])),
        OptimizerCallback(
            accumulation_steps=int(params["training"]["accum_steps"])),
    ],
    logdir=params["training"]["log_dir"],
    num_epochs=int(params["training"]["num_epochs"]),
    verbose=True,
)

# and running inference
torch.cuda.empty_cache()
runner.infer(
    model=model,
    loaders=test_loaders,
    callbacks=[
        CheckpointCallback(
            resume=f"{params['training']['log_dir']}/checkpoints/best.pth"),
        InferCallback(),
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--encoder', type=str, default='efficientnet-b0')
    parser.add_argument('--model', type=str, default='unet')
    parser.add_argument('--pretrained', type=str, default='imagenet')
    parser.add_argument('--logdir', type=str, default='../logs/')
    parser.add_argument('--exp_name', type=str)
    parser.add_argument('--data_folder', type=str, default='../input/')
    parser.add_argument('--height', type=int, default=320)
    parser.add_argument('--width', type=int, default=640)
    parser.add_argument('--batch_size', type=int, default=2)
    parser.add_argument('--accumulate', type=int, default=8)
    parser.add_argument('--epochs', type=int, default=20)
    parser.add_argument('--enc_lr', type=float, default=1e-2)
    parser.add_argument('--dec_lr', type=float, default=1e-3)
    parser.add_argument('--optim', type=str, default="radam")
    parser.add_argument('--loss', type=str, default="bcedice")
    parser.add_argument('--schedule', type=str, default="rlop")
    parser.add_argument('--early_stopping', type=bool, default=True)

    args = parser.parse_args()

    encoder = args.encoder
    model = args.model
    pretrained = args.pretrained
    logdir = args.logdir
    name = args.exp_name
    data_folder = args.data_folder
    height = args.height
    width = args.width
    bs = args.batch_size
    accumulate = args.accumulate
    epochs = args.epochs
    enc_lr = args.enc_lr
    dec_lr = args.dec_lr
    optim = args.optim
    loss = args.loss
    schedule = args.schedule
    early_stopping = args.early_stopping

    if model == 'unet':
        model = smp.Unet(encoder_name=encoder,
                         encoder_weights=pretrained,
                         classes=4,
                         activation=None)
    if model == 'fpn':
        model = smp.FPN(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'pspnet':
        model = smp.PSPNet(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'linknet':
        model = smp.Linknet(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'aspp':
        print('aspp can only be used with resnet34')
        model = aspp(num_class=4)

    preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder, pretrained)
    log = os.path.join(logdir, name)

    ds = get_dataset(path=data_folder)
    prepared_ds = prepare_dataset(ds)

    train_set, valid_set = get_train_test(ds)

    train_ds = CloudDataset(df=prepared_ds,
                            datatype='train',
                            img_ids=train_set,
                            transforms=training1(h=height, w=width),
                            preprocessing=get_preprocessing(preprocessing_fn),
                            folder=data_folder)
    valid_ds = CloudDataset(df=prepared_ds,
                            datatype='train',
                            img_ids=valid_set,
                            transforms=valid1(h=height, w=width),
                            preprocessing=get_preprocessing(preprocessing_fn),
                            folder=data_folder)

    train_loader = DataLoader(train_ds,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=multiprocessing.cpu_count())
    valid_loader = DataLoader(valid_ds,
                              batch_size=bs,
                              shuffle=False,
                              num_workers=multiprocessing.cpu_count())

    loaders = {
        'train': train_loader,
        'valid': valid_loader,
    }

    num_epochs = epochs

    if args.model != "aspp":
        if optim == "radam":
            optimizer = RAdam([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "adam":
            optimizer = Adam([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "adamw":
            optimizer = AdamW([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "sgd":
            optimizer = SGD([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
    elif args.model == 'aspp':
        if optim == "radam":
            optimizer = RAdam([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "adam":
            optimizer = Adam([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "adamw":
            optimizer = AdamW([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "sgd":
            optimizer = SGD([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=5)
    if schedule == "rlop":
        scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=3)
    if schedule == "noam":
        scheduler = NoamLR(optimizer, 10)

    if loss == "bcedice":
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    if loss == "dice":
        criterion = smp.utils.losses.DiceLoss(eps=1.)
    if loss == "bcejaccard":
        criterion = smp.utils.losses.BCEJaccardLoss(eps=1.)
    if loss == "jaccard":
        criterion == smp.utils.losses.JaccardLoss(eps=1.)
    if loss == 'bce':
        criterion = NewBCELoss()

    callbacks = [NewDiceCallback(), CriterionCallback()]

    callbacks.append(OptimizerCallback(accumulation_steps=accumulate))
    if early_stopping:
        callbacks.append(EarlyStoppingCallback(patience=5, min_delta=0.001))

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=log,
        num_epochs=num_epochs,
        verbose=True,
    )
Ejemplo n.º 12
0
def run(config_file, device_id, idx_fold):
    os.environ['CUDA_VISIBLE_DEVICES'] = str(device_id)
    print('info: use gpu No.{}'.format(device_id))

    config = load_config(config_file)

    # for n-folds loop
    if config.data.params.idx_fold == -1:
        config.data.params.idx_fold = idx_fold
        config.work_dir = config.work_dir + '_fold{}'.format(idx_fold)
    elif config.data.params.idx_fold == 0:
        original_fold = int(config.work_dir.split('_fold')[1])
        if original_fold == idx_fold:
            raise Exception(
                'if you specify fold 0, you should use train.py or resume from fold 1.'
            )
        config.data.params.idx_fold = idx_fold
        config.work_dir = config.work_dir.split('_fold')[0] + '_fold{}'.format(
            idx_fold)
    else:
        raise Exception('you should use train.py if idx_fold is specified.')
    print('info: training for fold {}'.format(idx_fold))

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            df_path=config.data.train_df_path,
            data_dir=config.data.train_dir,
            features=config.data.features,
            phase=phase,
            img_size=(config.data.height, config.data.width),
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            horizontal_flip=config.train.horizontal_flip,
            model_scale=config.data.model_scale,
            debug=config.debug,
            pseudo_path=config.data.pseudo_path,
        )
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    num_features = len(config.data.features)
    print('info: num_features =', num_features)
    model = CenterNetFPN(
        slug=config.model.encoder,
        num_classes=num_features,
    )

    optimizer = get_optimizer(model, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model, device=get_device())

    # train setting
    criterion, callbacks = get_criterion_and_callback(config)

    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [OptimizerCallback(accumulation_steps=accumulation_steps)])

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/last_full.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=config.train.fp16,
    )
Ejemplo n.º 13
0
def main(config):
    opts = config()
    path = opts.path
    train = pd.read_csv(f'{path}/train.csv')
    pseudo_label = pd.read_csv(
        './submissions/submission_segmentation_and_classifier.csv')

    n_train = len(os.listdir(f'{path}/train_images'))
    n_test = len(os.listdir(f'{path}/test_images'))
    print(f'There are {n_train} images in train dataset')
    print(f'There are {n_test} images in test dataset')

    train.loc[train['EncodedPixels'].isnull() == False,
              'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()
    train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
        lambda x: x.split('_')[0]).value_counts().value_counts()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])
    id_mask_count = train.loc[train['EncodedPixels'].isnull() == False,
                              'Image_Label'].apply(lambda x: x.split('_')[
                                  0]).value_counts().reset_index().rename(
                                      columns={
                                          'index': 'img_id',
                                          'Image_Label': 'count'
                                      })
    print(id_mask_count.head())

    pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False,
                     'Image_Label'].apply(
                         lambda x: x.split('_')[1]).value_counts()
    pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False,
                     'Image_Label'].apply(lambda x: x.split('_')[0]
                                          ).value_counts().value_counts()

    pseudo_label['label'] = pseudo_label['Image_Label'].apply(
        lambda x: x.split('_')[1])
    pseudo_label['im_id'] = pseudo_label['Image_Label'].apply(
        lambda x: x.split('_')[0])
    pseudo_label_ids = pseudo_label.loc[
        pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
            lambda x: x.split('_')[0]).value_counts().reset_index().rename(
                columns={
                    'index': 'img_id',
                    'Image_Label': 'count'
                })
    print(pseudo_label_ids.head())

    if not os.path.exists("csvs/train_all.csv"):
        train_ids, valid_ids = train_test_split(
            id_mask_count,
            random_state=39,
            stratify=id_mask_count['count'],
            test_size=0.1)
        valid_ids.to_csv("csvs/valid_threshold.csv", index=False)
        train_ids.to_csv("csvs/train_all.csv", index=False)
    else:
        train_ids = pd.read_csv("csvs/train_all.csv")
        valid_ids = pd.read_csv("csvs/valid_threshold.csv")

    for fold, ((train_ids_new, valid_ids_new),
               (train_ids_pl, valid_ids_pl)) in enumerate(
                   zip(
                       stratified_groups_kfold(train_ids,
                                               target='count',
                                               n_splits=opts.fold_max,
                                               random_state=0),
                       stratified_groups_kfold(pseudo_label_ids,
                                               target='count',
                                               n_splits=opts.fold_max,
                                               random_state=0))):

        train_ids_new.to_csv(f'csvs/train_fold{fold}.csv')
        valid_ids_new.to_csv(f'csvs/valid_fold{fold}.csv')
        train_ids_new = train_ids_new['img_id'].values
        valid_ids_new = valid_ids_new['img_id'].values

        train_ids_pl = train_ids_pl['img_id'].values
        valid_ids_pl = valid_ids_pl['img_id'].values

        ENCODER = opts.backborn
        ENCODER_WEIGHTS = opts.encoder_weights
        DEVICE = 'cuda'

        ACTIVATION = None
        model = get_model(
            model_type=opts.model_type,
            encoder=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            activation=ACTIVATION,
            n_classes=opts.class_num,
            task=opts.task,
            center=opts.center,
            attention_type=opts.attention_type,
            head='simple',
            classification=opts.classification,
        )
        model = convert_model(model)
        preprocessing_fn = encoders.get_preprocessing_fn(
            ENCODER, ENCODER_WEIGHTS)

        num_workers = opts.num_workers
        bs = opts.batchsize

        train_dataset = CloudDataset(
            df=train,
            label_smoothing_eps=opts.label_smoothing_eps,
            datatype='train',
            img_ids=train_ids_new,
            transforms=get_training_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset = CloudDataset(
            df=train,
            datatype='valid',
            img_ids=valid_ids_new,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))

        ################# make pseudo label dataset #######################
        train_dataset_pl = CloudPseudoLabelDataset(
            df=pseudo_label,
            datatype='train',
            img_ids=train_ids_pl,
            transforms=get_training_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset_pl = CloudPseudoLabelDataset(
            df=pseudo_label,
            datatype='train',
            img_ids=valid_ids_pl,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))

        #         train_dataset = ConcatDataset([train_dataset, train_dataset_pl])
        #         valid_dataset = ConcatDataset([valid_dataset, valid_dataset_pl])
        train_dataset = ConcatDataset([train_dataset, valid_dataset_pl])
        ################# make pseudo label dataset #######################
        train_loader = DataLoader(train_dataset,
                                  batch_size=bs,
                                  shuffle=True,
                                  num_workers=num_workers,
                                  drop_last=True)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=bs,
                                  shuffle=False,
                                  num_workers=num_workers,
                                  drop_last=True)

        loaders = {"train": train_loader, "valid": valid_loader}
        num_epochs = opts.max_epoch
        logdir = f"{opts.logdir}/fold{fold}"
        optimizer = get_optimizer(optimizer=opts.optimizer,
                                  lookahead=opts.lookahead,
                                  model=model,
                                  separate_decoder=True,
                                  lr=opts.lr,
                                  lr_e=opts.lr_e)
        opt_level = 'O1'
        model.cuda()
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=opt_level)
        scheduler = opts.scheduler(optimizer)
        criterion = opts.criterion
        runner = SupervisedRunner()
        if opts.task == "segmentation":
            callbacks = [DiceCallback()]
        else:
            callbacks = []
        if opts.early_stop:
            callbacks.append(
                EarlyStoppingCallback(patience=10, min_delta=0.001))
        if opts.mixup:
            callbacks.append(MixupCallback(alpha=0.25))
        if opts.accumeration is not None:
            callbacks.append(CriterionCallback())
            callbacks.append(
                OptimizerCallback(accumulation_steps=opts.accumeration))
        print(
            f"############################## Start training of fold{fold}! ##############################"
        )
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=callbacks,
                     logdir=logdir,
                     num_epochs=num_epochs,
                     verbose=True)
        print(
            f"############################## Finish training of fold{fold}! ##############################"
        )
        del model
        del loaders
        del runner
        torch.cuda.empty_cache()
        gc.collect()
Ejemplo n.º 14
0
def train_model():

    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)

    num_workers = 0
    bs = 5
    train_dataset = CloudDataset(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    num_epochs = 40

    # model, criterion, optimizer
    optimizer = RAdam([
        {
            'params': model.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.encoder.parameters(),
            'lr': 1e-3
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.15,
                                  patience=2,
                                  threshold=0.001)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=0.001),
                     CriterionCallback(),
                     OptimizerCallback(accumulation_steps=2)
                 ],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=True)

    return True
Ejemplo n.º 15
0
def run(config_file):
    config = load_config(config_file)

    config.work_dir = 'result/' + config.work_dir
    print('working directory:', config.work_dir)

    all_transforms = {}
    all_transforms['train'] = Transform(size=config.data.image_size,
                                        threshold=20.,
                                        sigma=-1.,
                                        blur_ratio=0.2,
                                        noise_ratio=0.2,
                                        cutout_ratio=0.2,
                                        grid_distortion_ratio=0.2,
                                        random_brightness_ratio=0.2,
                                        piece_affine_ratio=0.2,
                                        ssr_ratio=0.2)
    all_transforms['valid'] = Transform(size=config.data.image_size)

    dataloaders = {
        phase: make_loader(
            phase=phase,
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx,
            fold_csv=config.data.params.fold_csv,
            transforms=all_transforms[phase],
            # debug=config.debug
        )
        for phase in ['train', 'valid']
    }
    model = get_model(config)
    model = model.to(device)
    # we have multiple criterions
    criterion = {
        "ce": nn.CrossEntropyLoss(),
        # Define your awesome losses in here. Ex: Focal, lovasz, etc
    }
    optimizer = RAdam(model.parameters(), lr=config.optimizer.params.lr)
    if config.optimizer.lookahead.apply:
        optimizer = Lookahead(optimizer)

    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(
        device=device,
        input_key="images",
        output_key=("logit_grapheme_root", "logit_vowel_diacritic",
                    "logit_consonant_diacritic"),
        input_target_key=("grapheme_roots", "vowel_diacritics",
                          "consonant_diacritics"),
    )

    callbacks = []

    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [OptimizerCallback(accumulation_steps=accumulation_steps)])

    # to resume from check points if exists
    if os.path.exists(config.work_dir +
                      '/checkpoints/best.pth') and config.train.resume:
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))
    if config.train.mixup:
        CC = MixupCallback
    else:
        CC = CriterionCallback

    callbacks.extend([
        CC(
            input_key="grapheme_roots",
            output_key="logit_grapheme_root",
            criterion_key='ce',
            prefix='loss_gr',
        ),
        CC(
            input_key="vowel_diacritics",
            output_key="logit_vowel_diacritic",
            criterion_key='ce',
            prefix='loss_wd',
        ),
        CC(
            input_key="consonant_diacritics",
            output_key="logit_consonant_diacritic",
            criterion_key='ce',
            prefix='loss_cd',
        ),
        CriterionAggregatorCallback(
            prefix="loss",
            loss_aggregate_fn="weighted_sum",
            loss_keys={
                "loss_gr": 2.0,
                "loss_wd": 1.0,
                "loss_cd": 1.0
            },
        ),

        # metrics
        HMacroAveragedRecall(),
    ])

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric="hmar",
        minimize_metric=False,
        monitoring_params=None,
        callbacks=callbacks,
        verbose=True,
        fp16=False,
    )
Ejemplo n.º 16
0
fp16_params = None  # dict(opt_level="O1")
runner = SupervisedRunner(device='cuda')


runner.train(
    model=model,
    criterion=criterion,
    scheduler=scheduler,
    optimizer=optimizer,
    loaders=loaders,
    callbacks=[
        # wAUC(),
        F1ScoreCallback(),
        AUCCallback(num_classes=4),
        AccuracyCallback(prefix='ACC'),
        OptimizerCallback(accumulation_steps=args.acc)],
    logdir=logdir,
    num_epochs=num_epochs,
    fp16=fp16_params,
    verbose=True
)
if args.test > 0:
    test_preds_proba: Union[List, Iterable, np.ndarray] = []
    model.eval()
    progress_bar_test = tqdm(test_dataset)
    with torch.no_grad():
        for i, im in enumerate(progress_bar_test):
            inputs = im.to('cuda')
            # flip horizontal
            im = kornia.augmentation.F.hflip(inputs)
            outputs = model(im)
Ejemplo n.º 17
0
                          prefix="loss_h2",
                          criterion_key="h2"),
        CriterionCallback(input_key="h3_targets",
                          output_key="h3_logits",
                          prefix="loss_h3",
                          criterion_key="h3"),
        crit_agg,
    ])

callbacks.extend([
    score_callback,
    EarlyStoppingCallback(metric='weight_recall',
                          patience=early_stop_epochs,
                          min_delta=0.001)
])

callbacks.append(OptimizerCallback(grad_clip_params={'params': 1.0}), )

runner.train(
    fp16=args.fp16,
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=True,
)
Ejemplo n.º 18
0
def run(config_file):
    config = load_config(config_file)
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir
    print('working directory:', config.work_dir)

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            data_folder=config.data.train_dir,
            df_path=config.data.train_df_path,
            phase=phase,
            img_size=(config.data.height, config.data.width),
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            num_classes=config.data.num_classes,
            pseudo_label_path=config.train.pseudo_label_path,
            debug=config.debug
        )
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    # train setting
    criterion = get_loss(config)
    params = [
        {'params': model.decoder.parameters(), 'lr': config.optimizer.params.decoder_lr},
        {'params': model.encoder.parameters(), 'lr': config.optimizer.params.encoder_lr},
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model, device=get_device())

    callbacks = [DiceCallback(), IouCallback()]

    if config.train.early_stop_patience > 0:
        callbacks.append(EarlyStoppingCallback(
            patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [CriterionCallback(),
             OptimizerCallback(accumulation_steps=accumulation_steps)]
        )

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(CheckpointCallback(
            resume=config.work_dir + '/checkpoints/last_full.pth'))

    if config.train.mixup:
        callbacks.append(MixupCallback())

    if config.train.cutmix:
        callbacks.append(CutMixCallback())

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )