def configure_scheduler(self, optimizer): """ Initialize cyclic scheduler for lr modification """ return OneCycleLRWithWarmup(optimizer, num_steps=NUM_EPOCHS, lr_range=LR_RANGE, warmup_steps=WARMUP_STEPS, momentum_range=(0.9, 0.92))
def get_scheduler(scheduler_name: str, optimizer, lr, num_epochs, batches_in_epoch=None): if scheduler_name is None or scheduler_name.lower() == "none": return None if scheduler_name.lower() == "cos": return CosineAnnealingLR(optimizer, num_epochs, eta_min=1e-6) if scheduler_name.lower() == "cos2": return CosineAnnealingLR(optimizer, num_epochs, eta_min=float(lr * 0.1)) if scheduler_name.lower() == "cosr": return CosineAnnealingWarmRestarts(optimizer, T_0=max(2, num_epochs // 4), eta_min=1e-6) if scheduler_name.lower() == "cosrd": return CosineAnnealingWarmRestartsWithDecay(optimizer, T_0=max( 2, num_epochs // 6), gamma=0.96, eta_min=1e-6) if scheduler_name.lower() in {"1cycle", "one_cycle"}: return OneCycleLRWithWarmup( optimizer, lr_range=(lr, 1e-6), num_steps=batches_in_epoch * num_epochs, warmup_fraction=0.05, decay_fraction=0.1, ) if scheduler_name.lower() == "exp": return ExponentialLR(optimizer, gamma=0.95) if scheduler_name.lower() == "clr": return CyclicLR( optimizer, base_lr=1e-6, max_lr=lr, step_size_up=batches_in_epoch // 4, # mode='exp_range', gamma=0.99, ) if scheduler_name.lower() == "multistep": return MultiStepLR(optimizer, milestones=[ int(num_epochs * 0.5), int(num_epochs * 0.7), int(num_epochs * 0.9) ], gamma=0.3) if scheduler_name.lower() == "simple": return MultiStepLR( optimizer, milestones=[int(num_epochs * 0.4), int(num_epochs * 0.7)], gamma=0.1) raise KeyError(scheduler_name)
shuffle=False, num_workers=num_workers) loaders = {"train": train_loader, "valid": valid_loader} num_epochs = 50 logdir = "/var/data/bengali" + str(args.fold) + '_config_' + str( args.config) + '_comment_' + args.comment lr = 1e-2 optimizer = Over9000(params=model.parameters(), lr=lr) scheduler = OneCycleLRWithWarmup(optimizer, num_steps=num_epochs, lr_range=(0.2e-2, 1e-2), warmup_steps=2, momentum_range=(1e-3, 0.1e-1)) criterion = { "h1": torch.nn.CrossEntropyLoss(), "h2": torch.nn.CrossEntropyLoss(), "h3": torch.nn.CrossEntropyLoss(), } runner = SupervisedRunner(input_key='features', output_key=["h1_logits", "h2_logits", 'h3_logits']) early_stop_epochs = get_dict_value_or_default(dict_=config, key='early_stop_epochs', default_value=30)
batch_size=BATCH_SIZE, num_workers=1, shuffle=False) kf = StratifiedKFold(shuffle=True, random_state=42) labels_blend = None for i, (train_inds, test_inds) in enumerate(kf.split(image_files, image_labels)): torch.cuda.empty_cache() model = make_model() optimizer = Lookahead(RAdam(model.parameters(), lr=1e-3)) #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.33, patience=2, verbose=True) scheduler = OneCycleLRWithWarmup(optimizer, num_steps=EPOCHS, lr_range=(0.003, 0.0001), warmup_steps=1) LOGDIR = os.path.join(BASE_LOGDIR, f"fold{i}") train_files = image_files[train_inds] valid_files = image_files[test_inds] train_ds = tu.ITSDataset(train_files, train_transforms=[ albu.VerticalFlip(), albu.HorizontalFlip(), albu.ShiftScaleRotate(), albu.RandomGamma(), albu.RandomGridShuffle(), albu.HueSaturationValue() ]) val_ds = tu.ITSDataset(valid_files)
output_key="logits") mask_blend = None iou_values = [] for i, (train_inds, test_inds) in enumerate(kf.split(image_files, image_labels)): if i < RESUME_FOLD: continue LOGDIR = os.path.join(BASE_LOGDIR, f"fold{i}") gc.collect() torch.cuda.empty_cache() model = make_model() optimizer = Lookahead(RAdam(model.parameters(), lr=1e-3)) scheduler = OneCycleLRWithWarmup(optimizer, num_steps=EPOCHS, lr_range=(1e-2, 1e-5), warmup_steps=1) train_files = image_files[train_inds] valid_files = image_files[test_inds] train_labels = [getLabel(f) for f in train_files] #train_ds = tu.ITSDatasetWithPL(train_files, df_pl, train_transforms=[albu.HorizontalFlip(), albu.VerticalFlip(), albu.ShiftScaleRotate()], blur_mask=False) train_ds = tu.ITSDataset(train_files, train_transforms=[albu.HorizontalFlip(), albu.VerticalFlip(), albu.ShiftScaleRotate()], blur_mask=False) val_ds = tu.ITSDataset(valid_files) train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, num_workers=6, shuffle=True) val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, num_workers=6, shuffle=False) loaders = OrderedDict() loaders["train"] = train_loader loaders["valid"] = val_loader
def main(): train_dataset = dataset.SentimentDataset( texts=df_train['sentences'].values.tolist(), labels=df_train['labels'].values, max_seq_length=config.MAX_SEQ_LENGTH, model_name=config.MODEL_NAME) valid_dataset = dataset.SentimentDataset( texts=df_valid['sentences'].values.tolist(), labels=df_valid['labels'].values, max_seq_length=config.MAX_SEQ_LENGTH, model_name=config.MODEL_NAME) train_val_loaders = { "train": DataLoader(dataset=train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True), "valid": DataLoader(dataset=valid_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True) } dBert = model.DistilBert() param_optim = list(dBert.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] criterion = nn.CrossEntropyLoss() base_optimizer = RAdam([{ 'params': [p for n, p in param_optim if not any(nd in n for nd in no_decay)], 'weight_decay': config.WEIGHT_DECAY }, { 'params': [p for n, p in param_optim if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }]) optimizer = Lookahead(base_optimizer) scheduler = OneCycleLRWithWarmup( optimizer, num_steps=config.NUM_EPOCHS, lr_range=(config.LEARNING_RATE, 1e-8), init_lr=config.LEARNING_RATE, warmup_steps=0, ) runner = SupervisedRunner(input_key=("input_ids", "attention_mask")) # model training runner.train(model=dBert, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_val_loaders, callbacks=[ AccuracyCallback(num_classes=2), OptimizerCallback(accumulation_steps=config.ACCUM_STEPS), ], fp16=config.FP_16, logdir=config.LOG_DIR, num_epochs=config.NUM_EPOCHS, verbose=True)
def get_scheduler(scheduler_name: str, optimizer, learning_rate: float, num_epochs: int, batches_in_epoch=None): if scheduler_name is None: name = "" else: name = scheduler_name.lower() need_warmup = "warmup_" in name name = name.replace("warmup_", "") scheduler = None if name == "cos": scheduler = CosineAnnealingLR(optimizer, num_epochs, eta_min=1e-6) elif name == "cos2": scheduler = CosineAnnealingLR(optimizer, num_epochs, eta_min=float(learning_rate * 0.5)) elif name == "cos10": scheduler = CosineAnnealingLR(optimizer, num_epochs, eta_min=float(learning_rate * 0.1)) elif name == "cosr": scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=max(2, num_epochs // 4), eta_min=1e-6) elif name == "cosrd": scheduler = CosineAnnealingWarmRestartsWithDecay(optimizer, T_0=max( 2, num_epochs // 6), gamma=0.96, eta_min=1e-6) elif name in {"1cycle", "one_cycle"}: scheduler = OneCycleLRWithWarmup( optimizer, lr_range=(learning_rate, 1e-6), num_steps=batches_in_epoch * num_epochs, warmup_fraction=0.05, decay_fraction=0.1, ) elif name == "exp": scheduler = ExponentialLR(optimizer, gamma=0.95) elif name == "clr": scheduler = CyclicLR( optimizer, base_lr=1e-6, max_lr=learning_rate, step_size_up=batches_in_epoch // 4, # mode='exp_range', gamma=0.99, ) elif name == "multistep": scheduler = MultiStepLR(optimizer, milestones=[ int(num_epochs * 0.5), int(num_epochs * 0.7), int(num_epochs * 0.9) ], gamma=0.3) elif name == "simple": scheduler = MultiStepLR( optimizer, milestones=[int(num_epochs * 0.4), int(num_epochs * 0.7)], gamma=0.1) else: raise KeyError(f"Unsupported scheduler name {name}") if need_warmup: scheduler = GradualWarmupScheduler(optimizer, 1.0, 5, after_scheduler=scheduler) print("Adding warmup") return scheduler
def main(): train_image_list = sorted( glob.glob( pathname= '../input/uavid-semantic-segmentation-dataset/train/train/*/Images/*.png', recursive=True)) train_mask_list = sorted( glob.glob(pathname='./trainlabels/*/TrainId/*.png', recursive=True)) valid_image_list = sorted( glob.glob( pathname= '../input/uavid-semantic-segmentation-dataset/valid/valid/*/Images/*.png', recursive=True)) valid_mask_list = sorted( glob.glob(pathname='./validlabels/*/TrainId/*.png', recursive=True)) preprocessing_fn = smp.encoders.get_preprocessing_fn( config.ENCODER, config.ENCODER_WEIGHTS) train_dataset = Dataset( train_image_list, train_mask_list, augmentation=augmentations.get_training_augmentation(), preprocessing=augmentations.get_preprocessing(preprocessing_fn), classes=config.CLASSES, ) valid_dataset = Dataset( valid_image_list, valid_mask_list, augmentation=augmentations.get_validation_augmentation(), preprocessing=augmentations.get_preprocessing(preprocessing_fn), classes=config.CLASSES, ) train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True, drop_last=True) valid_loader = DataLoader(valid_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True, drop_last=False) loaders = {"train": train_loader, "valid": valid_loader} base_optimizer = RAdam([ { 'params': model.MODEL.decoder.parameters(), 'lr': config.LEARNING_RATE }, { 'params': model.MODEL.encoder.parameters(), 'lr': 1e-4 }, { 'params': model.MODEL.segmentation_head.parameters(), 'lr': config.LEARNING_RATE }, ]) optimizer = Lookahead(base_optimizer) criterion = BCEDiceLoss(activation=None) runner = SupervisedRunner() scheduler = OneCycleLRWithWarmup(optimizer, num_steps=config.NUM_EPOCHS, lr_range=(0.0016, 0.0000001), init_lr=config.LEARNING_RATE, warmup_steps=2) callbacks = [ IouCallback(activation='none'), ClasswiseIouCallback(classes=config.CLASSES, activation='none'), EarlyStoppingCallback(patience=config.ES_PATIENCE, metric='iou', minimize=False), ] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=config.LOGDIR, num_epochs=config.NUM_EPOCHS, # save our best checkpoint by IoU metric main_metric="iou", # IoU needs to be maximized. minimize_metric=False, # for FP16. It uses the variable from the very first cell fp16=config.FP16_PARAMS, # prints train logs verbose=True, )
def main(): # set your params DATA_PATH = '/content/drive/My Drive/kaggle/bengaliai-cv19/dataset' # MODEL_PATH = '/content/drive/My Drive/kaggle/bengaliai-cv19/model/se_resnext50_32x4d-a260b3a4.pth' # MODEL_PATH='/content/drive/My Drive/kaggle/bengaliai-cv19/model/efficientnet-b3-5fb5a3c3.pth' BASE_LOGDIR = '/content/drive/My Drive/kaggle/bengaliai-cv19/logs' NUM_FOLDS = 5 BATCH_SIZE = 64 EPOCHS = 20 SEED = 1234 SIZE = 224 LR = 0.003 HOLD_OUT = False # fix seed set_global_seed(SEED) # read dataset train, _, _ = read_data(DATA_PATH) train_all_images = prepare_image(DATA_PATH, data_type='train', submission=False) # init target_col = ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic'] device = get_device() train_data_transforms = albu.Compose([ albu.ShiftScaleRotate(rotate_limit=10, scale_limit=.1), albu.Cutout(p=0.5), ]) test_data_transforms = None # cross validation kf = MultilabelStratifiedKFold(n_splits=NUM_FOLDS, random_state=SEED) ids = kf.split(X=train_all_images, y=train[target_col].values) # fold_scores = [] for fold, (train_idx, valid_idx) in enumerate(ids): print("Current Fold: ", fold + 1) logdir = os.path.join(BASE_LOGDIR, 'fold_{}'.format(fold + 1)) os.makedirs(logdir, exist_ok=True) train_df, valid_df = train.iloc[train_idx], train.iloc[valid_idx] print("Train and Valid Shapes are", train_df.shape, valid_df.shape) print("Preparing train datasets....") train_dataset = BengaliAIDataset(images=train_all_images[train_idx], labels=train_df[target_col].values, size=SIZE, transforms=train_data_transforms) print("Preparing valid datasets....") valid_dataset = BengaliAIDataset(images=train_all_images[valid_idx], labels=valid_df[target_col].values, size=SIZE, transforms=test_data_transforms) print("Preparing dataloaders datasets....") train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False) loaders = {'train': train_loader, 'valid': valid_loader} # release memory del train_df, valid_df, train_dataset, valid_dataset gc.collect() torch.cuda.empty_cache() # init models resnet34 = pretrainedmodels.__dict__["resnet34"](pretrained="imagenet") model = BengaliBaselineClassifier(pretrainedmodels=resnet34, hdim=512) # model = BengaliBaselineClassifier(pretrainedmodels=se_resnext50_32x4d(model_path=MODEL_PATH)) # model = CustomEfficientNet.from_pretrained('efficientnet-b3', MODEL_PATH) model = model.to(device) criterions = {'train': BaselineLoss(), 'valid': BaselineLoss()} optimizer = AdamW(model.parameters(), lr=LR) scheduler = OneCycleLRWithWarmup(optimizer, num_steps=EPOCHS, lr_range=(0.001, 0.0001), warmup_steps=1) # catalyst trainer runner = BengaliRunner(device=device) # model training runner.train(model=model, criterions=criterions, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=EPOCHS, score_func=macro_recall) # release memory del model, runner, train_loader, valid_loader, loaders gc.collect() torch.cuda.empty_cache() if HOLD_OUT is True: break return True
'lr': learning_rate }, ], weight_decay=0.0003) optimizer = Lookahead(base_optimizer) criterion = { "dice": DiceLoss(), "iou": IoULoss(), "focal": FocalLossBinary() } runner = SupervisedRunner(device='cuda', input_key="image", input_target_key="mask") scheduler = OneCycleLRWithWarmup(optimizer, num_steps=num_epochs, lr_range=(0.0016, 0.0000001), init_lr=learning_rate, warmup_steps=15) loaders = get_loaders(preprocessing_fn, batch_size=12) from catalyst.dl import DiceCallback, IouCallback, CriterionCallback, MetricAggregationCallback, ClasswiseIouCallback callbacks = [ # Each criterion is calculated separately. CriterionCallback(input_key="mask", prefix="loss_dice", criterion_key="dice"), CriterionCallback(input_key="mask", prefix="loss_iou", criterion_key="iou"), CriterionCallback(input_key="mask", prefix="loss_focal",