def get_optimizer(optimizer_name: str, parameters, learning_rate: float, weight_decay=0.0, **kwargs):
    if optimizer_name.lower() == "sgd":
        return SGD(parameters, learning_rate, momentum=0.9, weight_decay=weight_decay, **kwargs)

    if optimizer_name.lower() == "adam":
        return Adam(parameters, learning_rate, weight_decay=weight_decay, eps=1e-5, **kwargs)  # As Jeremy suggests

    if optimizer_name.lower() == "rms":
        return RMSprop(parameters, learning_rate, weight_decay=weight_decay, **kwargs)

    if optimizer_name.lower() == "adamw":
        return AdamW(parameters, learning_rate, weight_decay=weight_decay, eps=1e-5, **kwargs)

    if optimizer_name.lower() == "radam":
        return RAdam(parameters, learning_rate, weight_decay=weight_decay, eps=1e-5, **kwargs)  # As Jeremy suggests

    if optimizer_name.lower() == "ranger":
        return Ranger(parameters, learning_rate, weight_decay=weight_decay, **kwargs)

    # if optimizer_name.lower() == "qhadamw":
    #     return QHAdamW(parameters, learning_rate, weight_decay=weight_decay,
    #                    **kwargs)
    #
    if optimizer_name.lower() == "lamb":
        return Lamb(parameters, learning_rate, weight_decay=weight_decay, **kwargs)

    if optimizer_name.lower() == "fused_lamb":
        from apex.optimizers import FusedLAMB

        return FusedLAMB(parameters, learning_rate, weight_decay=weight_decay, **kwargs)

    if optimizer_name.lower() == "fused_adam":
        from apex.optimizers import FusedAdam

        return FusedAdam(parameters, learning_rate, eps=1e-5, weight_decay=weight_decay, adam_w_mode=True, **kwargs)

    if optimizer_name.lower() == "fused_sgd":
        from apex.optimizers import FusedSGD

        return FusedSGD(parameters, learning_rate, weight_decay=weight_decay, momentum=0.9, **kwargs)

    if optimizer_name.lower() == "diffgrad":
        return DiffGrad(parameters, learning_rate, eps=1e-5, weight_decay=weight_decay, **kwargs)

    if optimizer_name.lower() == "novograd":
        return Novograd(parameters, learning_rate, eps=1e-5, weight_decay=weight_decay, **kwargs)

    raise ValueError("Unsupported optimizer name " + optimizer_name)
# In[24]:

learning_rate = 0.001
encoder_learning_rate = 0.0005

# Since we use a pre-trained encoder, we will reduce the learning rate on it.
layerwise_params = {
    "encoder*": dict(lr=encoder_learning_rate, weight_decay=0.00003)
}

# This function removes weight_decay for biases and applies our layerwise_params
model_params = utils.process_model_params(model,
                                          layerwise_params=layerwise_params)

# Catalyst has new SOTA optimizers out of box
base_optimizer = RAdam(model_params, lr=learning_rate, weight_decay=0.0003)
optimizer = Lookahead(base_optimizer)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                 factor=0.25,
                                                 patience=2)

# In[25]:

num_epochs = 3
logdir = "./logs/segmentation"

device = utils.get_device()
print(f"device: {device}")

if is_fp16_used:
    args.data_folder,
    args.meta_info_file,
    one_hot_encoding=args.one_hot_encoding,
    bs=args.batch_size,
    num_classes=args.num_classes,
    num_workers=args.num_workers,
    augmenters=augmenters,
)

if args.optimizer == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.max_lr,
                                 weight_decay=1e-4)
elif args.optimizer == 'RAdam':
    base_optimizer = RAdam(model.parameters(),
                           lr=args.max_lr,
                           weight_decay=1e-4)
    optimizer = Lookahead(base_optimizer)
elif args.optimizer == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(),
                                momentum=0.95,
                                lr=args.max_lr,
                                weight_decay=1e-4)
else:
    print('You have choose the default Optimizer - Adam')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.max_lr,
                                 weight_decay=1e-4)

criterion = OrderedDict({
    "ce": CustomCrossEntropyLoss(),
Exemple #4
0
    }),
    'attn_linknet': (LinkNetGated, {
        'num_classes': 4,
        'in_channels': 3
    })
}
preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder_name=args.encoder,
                                                     pretrained='imagenet')

model = models[args.model.lower()][0](**models[args.model.lower()][1]).cuda()

layerwise_params = {"enc*": dict(lr=args.lr_e, weight_decay=0.00001)}
model_params = utils.process_model_params(model,
                                          layerwise_params=layerwise_params)

base_optimizer = RAdam(model_params, lr=args.lr_d, weight_decay=1e-6)
optimizer = Lookahead(base_optimizer)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                 patience=5,
                                                 factor=0.15)
criteria = {'dice': DiceLoss(), 'bce': torch.nn.BCEWithLogitsLoss()}

train = pd.read_csv(f'train_preprocessed.csv')
train_ids = pd.read_csv(f'./folds/fold_{args.fold}_train.csv').values.ravel()
valid_ids = pd.read_csv(f'./folds/fold_{args.fold}_val.csv').values.ravel()
num_workers = 4
bs = args.bs
train_dataset = CloudDataset(df=train,
                             image_size=(args.size, args.size * 2),
                             path=path,
                             datatype='train',
Exemple #5
0
def run(config_file):
    config = load_config(config_file)

    config.work_dir = 'result/' + config.work_dir
    print('working directory:', config.work_dir)

    all_transforms = {}
    all_transforms['train'] = Transform(size=config.data.image_size,
                                        threshold=20.,
                                        sigma=-1.,
                                        blur_ratio=0.2,
                                        noise_ratio=0.2,
                                        cutout_ratio=0.2,
                                        grid_distortion_ratio=0.2,
                                        random_brightness_ratio=0.2,
                                        piece_affine_ratio=0.2,
                                        ssr_ratio=0.2)
    all_transforms['valid'] = Transform(size=config.data.image_size)

    dataloaders = {
        phase: make_loader(
            phase=phase,
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx,
            fold_csv=config.data.params.fold_csv,
            transforms=all_transforms[phase],
            # debug=config.debug
        )
        for phase in ['train', 'valid']
    }
    model = get_model(config)
    model = model.to(device)
    # we have multiple criterions
    criterion = {
        "ce": nn.CrossEntropyLoss(),
        # Define your awesome losses in here. Ex: Focal, lovasz, etc
    }
    optimizer = RAdam(model.parameters(), lr=config.optimizer.params.lr)
    if config.optimizer.lookahead.apply:
        optimizer = Lookahead(optimizer)

    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(
        device=device,
        input_key="images",
        output_key=("logit_grapheme_root", "logit_vowel_diacritic",
                    "logit_consonant_diacritic"),
        input_target_key=("grapheme_roots", "vowel_diacritics",
                          "consonant_diacritics"),
    )

    callbacks = []

    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [OptimizerCallback(accumulation_steps=accumulation_steps)])

    # to resume from check points if exists
    if os.path.exists(config.work_dir +
                      '/checkpoints/best.pth') and config.train.resume:
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))
    if config.train.mixup:
        CC = MixupCallback
    else:
        CC = CriterionCallback

    callbacks.extend([
        CC(
            input_key="grapheme_roots",
            output_key="logit_grapheme_root",
            criterion_key='ce',
            prefix='loss_gr',
        ),
        CC(
            input_key="vowel_diacritics",
            output_key="logit_vowel_diacritic",
            criterion_key='ce',
            prefix='loss_wd',
        ),
        CC(
            input_key="consonant_diacritics",
            output_key="logit_consonant_diacritic",
            criterion_key='ce',
            prefix='loss_cd',
        ),
        CriterionAggregatorCallback(
            prefix="loss",
            loss_aggregate_fn="weighted_sum",
            loss_keys={
                "loss_gr": 2.0,
                "loss_wd": 1.0,
                "loss_cd": 1.0
            },
        ),

        # metrics
        HMacroAveragedRecall(),
    ])

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric="hmar",
        minimize_metric=False,
        monitoring_params=None,
        callbacks=callbacks,
        verbose=True,
        fp16=False,
    )