コード例 #1
0
def Ranger(
    params: Iterable,
    betas: Tuple[float, float] = (0.95, 0.999),
    eps: float = 1e-5,
    k: int = 6,
    alpha: float = 0.5,
    **kwargs
):
    "Convenience method for `Lookahead` with `RAdam`"
    return Lookahead(RAdam(params, betas=betas, eps=eps, **kwargs), alpha=alpha, k=k)
コード例 #2
0
def create_optimizer(args,
                     model,
                     filter_bias_and_bn=True,
                     classification_layer_name=None):
    opt_lower = args.opt.lower()
    weight_decay = args.weight_decay
    if 'adamw' in opt_lower or 'radam' in opt_lower:
        # Compensate for the way current AdamW and RAdam optimizers apply LR to the weight-decay
        # I don't believe they follow the paper or original Torch7 impl which schedules weight
        # decay based on the ratio of current_lr/initial_lr
        weight_decay /= args.lr

    if weight_decay and filter_bias_and_bn:  # batch norm and bias params
        if classification_layer_name is not None:
            parameters = set_lr_per_params(args, model,
                                           classification_layer_name,
                                           weight_decay)
        else:
            parameters = add_weight_decay(model, weight_decay)
        weight_decay = 0.  # reset to 0
    else:
        if classification_layer_name is not None:
            parameters = set_lr_per_params(args,
                                           model,
                                           classification_layer_name,
                                           weight_decay=0)
        else:
            parameters = model.parameters()

    if 'fused' in opt_lower:
        assert has_apex and torch.cuda.is_available(
        ), 'APEX and CUDA required for fused optimizers'

    opt_split = opt_lower.split('_')
    opt_lower = opt_split[-1]
    if opt_lower == 'sgd' or opt_lower == 'nesterov':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=weight_decay,
                              nesterov=True)
    elif opt_lower == 'momentum':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=weight_decay,
                              nesterov=False)
    elif opt_lower == 'adam':
        optimizer = optim.Adam(parameters,
                               lr=args.lr,
                               weight_decay=weight_decay,
                               eps=args.opt_eps)
    elif opt_lower == 'adamw':
        optimizer = AdamW(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'nadam':
        optimizer = Nadam(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'radam':
        optimizer = RAdam(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'adamp':
        optimizer = AdamP(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps,
                          delta=0.1,
                          wd_ratio=0.01,
                          nesterov=True)
    elif opt_lower == 'sgdp':
        optimizer = SGDP(parameters,
                         lr=args.lr,
                         momentum=args.momentum,
                         weight_decay=weight_decay,
                         eps=args.opt_eps,
                         nesterov=True)
    elif opt_lower == 'adadelta':
        optimizer = optim.Adadelta(parameters,
                                   lr=args.lr,
                                   weight_decay=weight_decay,
                                   eps=args.opt_eps)
    elif opt_lower == 'rmsprop':
        optimizer = optim.RMSprop(parameters,
                                  lr=args.lr,
                                  alpha=0.9,
                                  eps=args.opt_eps,
                                  momentum=args.momentum,
                                  weight_decay=weight_decay)
    elif opt_lower == 'rmsproptf':
        optimizer = RMSpropTF(parameters,
                              lr=args.lr,
                              alpha=0.9,
                              eps=args.opt_eps,
                              momentum=args.momentum,
                              weight_decay=weight_decay)
    elif opt_lower == 'novograd':
        optimizer = NovoGrad(parameters,
                             lr=args.lr,
                             weight_decay=weight_decay,
                             eps=args.opt_eps)
    elif opt_lower == 'nvnovograd':
        optimizer = NvNovoGrad(parameters,
                               lr=args.lr,
                               weight_decay=weight_decay,
                               eps=args.opt_eps)
    elif opt_lower == 'fusedsgd':
        optimizer = FusedSGD(parameters,
                             lr=args.lr,
                             momentum=args.momentum,
                             weight_decay=weight_decay,
                             nesterov=True)
    elif opt_lower == 'fusedmomentum':
        optimizer = FusedSGD(parameters,
                             lr=args.lr,
                             momentum=args.momentum,
                             weight_decay=weight_decay,
                             nesterov=False)
    elif opt_lower == 'fusedadam':
        optimizer = FusedAdam(parameters,
                              lr=args.lr,
                              adam_w_mode=False,
                              weight_decay=weight_decay,
                              eps=args.opt_eps)
    elif opt_lower == 'fusedadamw':
        optimizer = FusedAdam(parameters,
                              lr=args.lr,
                              adam_w_mode=True,
                              weight_decay=weight_decay,
                              eps=args.opt_eps)
    elif opt_lower == 'fusedlamb':
        optimizer = FusedLAMB(parameters,
                              lr=args.lr,
                              weight_decay=weight_decay,
                              eps=args.opt_eps)
    elif opt_lower == 'fusednovograd':
        optimizer = FusedNovoGrad(parameters,
                                  lr=args.lr,
                                  betas=(0.95, 0.98),
                                  weight_decay=weight_decay,
                                  eps=args.opt_eps)
    else:
        assert False and "Invalid optimizer"
        raise ValueError

    if len(opt_split) > 1:
        if opt_split[0] == 'lookahead':
            optimizer = Lookahead(optimizer)

    return optimizer
コード例 #3
0
def create_optimizer(args, model, filter_bias_and_bn=True):
    opt_lower = args.opt.lower()
    weight_decay = args.weight_decay
    if opt_lower == 'adamw' or opt_lower == 'radam':
        # compensate for the way current AdamW and RAdam optimizers
        # apply the weight-decay
        weight_decay /= args.lr
    if weight_decay and filter_bias_and_bn:
        parameters = add_weight_decay(model, weight_decay)
        weight_decay = 0.
    else:
        parameters = model.parameters()

    opt_split = opt_lower.split('_')
    opt_lower = opt_split[-1]
    if opt_lower == 'sgd':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=weight_decay,
                              nesterov=True)
    elif opt_lower == 'adam':
        optimizer = optim.Adam(parameters,
                               lr=args.lr,
                               weight_decay=weight_decay,
                               eps=args.opt_eps)
    elif opt_lower == 'adamw':
        optimizer = AdamW(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'nadam':
        optimizer = Nadam(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'radam':
        optimizer = RAdam(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'adadelta':
        optimizer = optim.Adadelta(parameters,
                                   lr=args.lr,
                                   weight_decay=weight_decay,
                                   eps=args.opt_eps)
    elif opt_lower == 'rmsprop':
        optimizer = optim.RMSprop(parameters,
                                  lr=args.lr,
                                  alpha=0.9,
                                  eps=args.opt_eps,
                                  momentum=args.momentum,
                                  weight_decay=weight_decay)
    elif opt_lower == 'rmsproptf':
        optimizer = RMSpropTF(parameters,
                              lr=args.lr,
                              alpha=0.9,
                              eps=args.opt_eps,
                              momentum=args.momentum,
                              weight_decay=weight_decay)
    elif opt_lower == 'novograd':
        optimizer = NovoGrad(parameters,
                             lr=args.lr,
                             weight_decay=weight_decay,
                             eps=args.opt_eps)
    else:
        assert False and "Invalid optimizer"
        raise ValueError

    if len(opt_split) > 1:
        if opt_split[0] == 'lookahead':
            optimizer = Lookahead(optimizer)

    return optimizer
コード例 #4
0
def create_optimizer(args, model, filter_bias_and_bn=True):
    opt_lower = args.opt.lower()
    weight_decay = args.weight_decay
    if 'adamw' in opt_lower or 'radam' in opt_lower:
        # Compensate for the way current AdamW and RAdam optimizers apply LR to the weight-decay
        # I don't believe they follow the paper or original Torch7 impl which schedules weight
        # decay based on the ratio of current_lr/initial_lr
        weight_decay /= args.lr
    if weight_decay and filter_bias_and_bn:
        print("has weight decay and filter bias")
        parameters = add_weight_decay(model, weight_decay)
        weight_decay = 0.
    else:
        print("Comes here to unfrozen params inside optim")

        parameters = unfrozen_params(model)

    if 'fused' in opt_lower:
        assert has_apex and torch.cuda.is_available(
        ), 'APEX and CUDA required for fused optimizers'

    opt_split = opt_lower.split('_')
    opt_lower = opt_split[-1]
    if opt_lower == 'sgd' or opt_lower == 'nesterov':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=weight_decay,
                              nesterov=True)
    elif opt_lower == 'momentum':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=weight_decay,
                              nesterov=False)
    elif opt_lower == 'adam':
        optimizer = optim.Adam(parameters,
                               lr=args.lr,
                               weight_decay=weight_decay,
                               eps=args.opt_eps)
    elif opt_lower == 'adamw':
        optimizer = AdamW(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'nadam':
        optimizer = Nadam(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'radam':
        optimizer = RAdam(parameters,
                          lr=args.lr,
                          weight_decay=weight_decay,
                          eps=args.opt_eps)
    elif opt_lower == 'adadelta':
        optimizer = optim.Adadelta(parameters,
                                   lr=args.lr,
                                   weight_decay=weight_decay,
                                   eps=args.opt_eps)
    elif opt_lower == 'rmsprop':
        optimizer = optim.RMSprop(parameters,
                                  lr=args.lr,
                                  alpha=0.9,
                                  eps=args.opt_eps,
                                  momentum=args.momentum,
                                  weight_decay=weight_decay)
    elif opt_lower == 'rmsproptf':
        optimizer = RMSpropTF(parameters,
                              lr=args.lr,
                              alpha=0.9,
                              eps=args.opt_eps,
                              momentum=args.momentum,
                              weight_decay=weight_decay)
    elif opt_lower == 'novograd':
        optimizer = NovoGrad(parameters,
                             lr=args.lr,
                             weight_decay=weight_decay,
                             eps=args.opt_eps)
    elif opt_lower == 'nvnovograd':
        optimizer = NvNovoGrad(parameters,
                               lr=args.lr,
                               weight_decay=weight_decay,
                               eps=args.opt_eps)
    elif opt_lower == 'fusedsgd':
        optimizer = FusedSGD(parameters,
                             lr=args.lr,
                             momentum=args.momentum,
                             weight_decay=weight_decay,
                             nesterov=True)
    elif opt_lower == 'fusedmomentum':
        print("my optimizer")
        optimizer = FusedSGD(parameters,
                             lr=args.lr,
                             momentum=args.momentum,
                             weight_decay=weight_decay,
                             nesterov=False)
    elif opt_lower == 'fusedadam':
        optimizer = FusedAdam(parameters,
                              lr=args.lr,
                              adam_w_mode=False,
                              weight_decay=weight_decay,
                              eps=args.opt_eps)
    elif opt_lower == 'fusedadamw':
        optimizer = FusedAdam(parameters,
                              lr=args.lr,
                              adam_w_mode=True,
                              weight_decay=weight_decay,
                              eps=args.opt_eps)
    elif opt_lower == 'fusedlamb':
        optimizer = FusedLAMB(parameters,
                              lr=args.lr,
                              weight_decay=weight_decay,
                              eps=args.opt_eps)
    elif opt_lower == 'fusednovograd':
        optimizer = FusedNovoGrad(parameters,
                                  lr=args.lr,
                                  betas=(0.95, 0.98),
                                  weight_decay=weight_decay,
                                  eps=args.opt_eps)
    else:
        assert False and "Invalid optimizer"
        raise ValueError

    if len(opt_split) > 1:
        if opt_split[0] == 'lookahead':
            optimizer = Lookahead(optimizer)

    return optimizer
コード例 #5
0
def main(cfg: DictConfig):
    print('Cassava Leaf Disease Classification')
    cur_dir = hydra.utils.get_original_cwd()
    os.chdir(cur_dir)
    # Config  -------------------------------------------------------------------
    data_dir = './input'
    seed_everything(cfg.data.seed)

    # Comet_ml
    experiment = Experiment(api_key=cfg.comet_ml.api_key,
                            project_name=cfg.comet_ml.project_name,
                            auto_param_logging=False,
                            auto_metric_logging=False)

    # Log Parameters
    experiment.log_parameters(dict(cfg.data))
    experiment.log_parameters(dict(cfg.train))

    # Data Module  ---------------------------------------------------------------
    transform = get_transforms(transform_name=cfg.data.transform,
                               img_size=cfg.data.img_size)
    cv = StratifiedKFold(n_splits=cfg.data.n_splits,
                         shuffle=True,
                         random_state=cfg.data.seed)
    dm = CassavaDataModule(data_dir,
                           cfg,
                           transform,
                           cv,
                           use_merge=True,
                           sample=DEBUG)

    # Model  ----------------------------------------------------------------------
    net = Timm_model(cfg.train.model_type, pretrained=True)

    # Log Model Graph
    experiment.set_model_graph(str(net))

    # Loss fn  ---------------------------------------------------------------------
    df = pd.read_csv('./input/merged.csv')
    weight = df['label'].value_counts().sort_index().tolist()
    weight = [w / len(df) for w in weight]
    weight = torch.tensor(weight).cuda()
    del df

    criterion = get_loss_fn(cfg.train.loss_fn, weight=weight, smoothing=0.05)

    # Optimizer, Scheduler  --------------------------------------------------------
    if cfg.train.use_sam:
        base_optimizer = RAdam
        optimizer = SAM(net.parameters(),
                        base_optimizer,
                        lr=cfg.train.lr,
                        weight_decay=cfg.train.weight_decay)
    else:
        optimizer = RAdam(net.parameters(),
                          lr=cfg.train.lr,
                          weight_decay=cfg.train.weight_decay)

    scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                               T_max=cfg.train.epoch,
                                               eta_min=0)

    # Lightning Module  -------------------------------------------------------------
    model = CassavaLightningSystem(net,
                                   cfg,
                                   criterion=criterion,
                                   optimizer=optimizer,
                                   scheduler=scheduler,
                                   experiment=experiment)

    # Trainer  -------------------------------------------------------------------------
    trainer = Trainer(
        logger=False,
        max_epochs=cfg.train.epoch,
        gpus=-1,
        amp_backend='apex',
        amp_level='O2',
        num_sanity_val_steps=0,  # Skip Sanity Check
        automatic_optimization=False if cfg.train.use_sam else True,
        # resume_from_checkpoint='./checkpoints/epoch=3-step=14047.ckpt'
    )

    # Train
    trainer.fit(model, datamodule=dm)
コード例 #6
0
def get_optimizer(model, optimizer_name, optimizer_params, scheduler_name,
                  scheduler_params, n_epochs):
    opt_lower = optimizer_name.lower()

    opt_look_ahed = optimizer_params["lookahead"]
    if opt_lower == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=optimizer_params["lr"],
                              momentum=optimizer_params["momentum"],
                              weight_decay=optimizer_params["weight_decay"],
                              nesterov=True)
    elif opt_lower == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=optimizer_params["lr"],
                               betas=(0.9, 0.999),
                               eps=1e-08,
                               weight_decay=0)
    elif opt_lower == 'adamw':
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=optimizer_params["lr"],
            weight_decay=optimizer_params["weight_decay"],
            eps=optimizer_params["opt_eps"])
    elif opt_lower == 'nadam':
        optimizer = torch.optim.Nadam(
            model.parameters(),
            lr=optimizer_params["lr"],
            weight_decay=optimizer_params["weight_decay"],
            eps=optimizer_params["opt_eps"])
    elif opt_lower == 'radam':
        optimizer = RAdam(model.parameters(),
                          lr=optimizer_params["lr"],
                          weight_decay=optimizer_params["weight_decay"],
                          eps=optimizer_params["opt_eps"])
    elif opt_lower == "adabelief":
        optimizer = AdaBelief(model.parameters(),
                              lr=optimizer_params["lr"],
                              eps=1e-8,
                              weight_decay=optimizer_params["weight_decay"])

    elif opt_lower == "adamp":
        optimizer = AdamP(model.parameters(),
                          lr=optimizer_params["lr"],
                          weight_decay=optimizer_params["weight_decay"])
    else:
        assert False and "Invalid optimizer"
        raise ValueError

    if opt_look_ahed:
        optimizer = Lookahead(optimizer, alpha=0.5, k=5)

    if scheduler_name == "CosineAnnealingWarmRestarts":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer,
            eta_min=scheduler_params["eta_min"],
            T_0=scheduler_params["T_0"],
            T_mult=scheduler_params["T_multi"],
        )
    elif scheduler_name == "WarmRestart":
        scheduler = WarmRestart(optimizer,
                                T_max=scheduler_params["T_max"],
                                T_mult=scheduler_params["T_mul"],
                                eta_min=scheduler_params["eta_min"])
    elif scheduler_name == "MultiStepLR":
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            milestones=scheduler_params["schedule"],
            gamma=scheduler_params["gamma"])
    if scheduler_params["warmup_factor"] > 0:
        scheduler = GradualWarmupSchedulerV2(
            optimizer,
            multiplier=scheduler_params["warmup_factor"],
            total_epoch=1,
            after_scheduler=scheduler)

    return optimizer, scheduler