# %% # %autoreload 2 # %% num_epochs = 1 logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam([ {'params': model.decoder.parameters(), 'lr': 1e-2}, {'params': model.encoder.parameters(), 'lr': 1e-3}, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() # %% [markdown] # ## Model training # %% {"_kg_hide-output": true} ###### Test Only ##### loaders = { "train": train_loader, "valid": valid_loader } ###################### runner.train( model=model, criterion=criterion,
def train_model(): model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) num_workers = 0 bs = 5 train_dataset = CloudDataset( df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=num_workers) loaders = {"train": train_loader, "valid": valid_loader} num_epochs = 40 # model, criterion, optimizer optimizer = RAdam([ { 'params': model.decoder.parameters(), 'lr': 1e-2 }, { 'params': model.encoder.parameters(), 'lr': 1e-3 }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2, threshold=0.001) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback(), OptimizerCallback(accumulation_steps=2) ], logdir=logdir, num_epochs=num_epochs, verbose=True) return True
def generate_test_preds(class_params): preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) dummy_dataset = CloudDataset( df=sub, datatype='test', img_ids=test_ids[:1], transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) dummy_loader = DataLoader(dummy_dataset, batch_size=1, shuffle=False, num_workers=0) model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) runner = SupervisedRunner(model) # HACK: We are loading a few examples from our dummy loader so catalyst will properly load the weights # from our checkpoint loaders = {"test": dummy_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) # Now we do real inference on the full dataset test_dataset = CloudDataset( df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0) encoded_pixels = [] image_id = 0 for i, test_batch in enumerate(tqdm.tqdm(test_loader)): runner_out = runner.predict_batch({"features": test_batch[0].cuda() })['logits'].cpu().detach().numpy() for i, batch in enumerate(runner_out): for probability in batch: # probability = probability.cpu().detach().numpy() if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) predict, num_predict = post_process( sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1]) if num_predict == 0: encoded_pixels.append('') else: r = mask2rle(predict) encoded_pixels.append(r) image_id += 1 print("Saving submission...") sub['EncodedPixels'] = encoded_pixels sub.to_csv('submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False) print("Saved.")
criterion = nn.BCEWithLogitsLoss() else: criterion = smp.utils.losses.BCEDiceLoss(eps=1.) if args.multigpu: model = nn.DataParallel(model) if args.task == 'segmentation': callbacks = [DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback()] elif args.task == 'classification': callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback()] if args.gradient_accumulation: callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation)) runner = SupervisedRunner() if args.train: runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=args.num_epochs, verbose=True ) with open(f'{logdir}/args.txt', 'w') as f: for k, v in args.__dict__.items():
AUCCallback(class_names=["Fish", "Flower", "Gravel", "Sugar"], num_classes=4), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback(), ] if args.gradient_accumulation: callbacks.append( OptimizerCallback(accumulation_steps=args.gradient_accumulation)) checkpoint = utils.load_checkpoint(f"{logdir}/checkpoints/best.pth") model.cuda() utils.unpack_checkpoint(checkpoint, model=model) # # runner = SupervisedRunner() if args.train: print("Training") runner.train( model=model, criterion=criterion, optimizer=optimizer, main_metric="dice", minimize_metric=False, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=args.num_epochs, verbose=True, )
def generate_test_preds(args): valid_dice, class_params, = args test_preds = np.zeros((len(sub), 350, 525), dtype=np.float32) for i in range(NFOLDS): logdir = LOG_DIR_BASE + str(i) preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) dummy_dataset = CloudDataset( df=sub, datatype='test', img_ids=test_ids[:1], transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) dummy_loader = DataLoader(dummy_dataset, batch_size=1, shuffle=False, num_workers=0) model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) runner = SupervisedRunner(model) # HACK: We are loading a few examples from our dummy loader so catalyst will properly load the weights # from our checkpoint loaders = {"test": dummy_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) # Now we do real inference on the full dataset test_dataset = CloudDataset( df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0) image_id = 0 for batch_index, test_batch in enumerate(tqdm.tqdm(test_loader)): runner_out = runner.predict_batch( {"features": test_batch[0].cuda()})['logits'].cpu().detach().numpy() for preds in runner_out: preds = preds.transpose((1, 2, 0)) preds = cv2.resize( preds, (525, 350)) # height and width are backward in cv2... preds = preds.transpose((2, 0, 1)) idx = batch_index * 4 test_preds[idx + 0] += sigmoid(preds[0]) / NFOLDS # fish test_preds[idx + 1] += sigmoid(preds[1]) / NFOLDS # flower test_preds[idx + 2] += sigmoid(preds[2]) / NFOLDS # gravel test_preds[idx + 3] += sigmoid(preds[3]) / NFOLDS # sugar # Convert ensembled predictions to RLE predictions encoded_pixels = [] for image_id, preds in enumerate(test_preds): predict, num_predict = post_process(preds, class_params[image_id % 4][0], class_params[image_id % 4][1]) if num_predict == 0: encoded_pixels.append('') else: r = mask2rle(predict) encoded_pixels.append(r) print("Saving submission...") sub['EncodedPixels'] = encoded_pixels sub.to_csv('fpn_submission_{}.csv'.format(valid_dice), columns=['Image_Label', 'EncodedPixels'], index=False) print("Saved.")
# In[ ]: from catalyst.dl.runner import SupervisedRunner # experiment setup num_epochs = NUM_EPOCHS logdir = "./logs/cifar_simple_notebook_1" # model, criterion, optimizer model = Net() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, check=True, ) # In[ ]: # you can use plotly and tensorboard to plot metrics inside jupyter
optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=config.patience, verbose=True, mode="min", factor=0.3) device = utils.get_device() if config.is_fp16_used: fp16_params = dict(opt_level="O1") # params for FP16 else: fp16_params = None runner = SupervisedRunner(device=device) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, # We can specify the callbacks list for the experiment; # For this task, we will check accuracy, AUC and F1 metrics callbacks=[ AccuracyCallback(num_classes=config.num_classes), AUCCallback(num_classes=config.num_classes, input_key="targets_one_hot", class_names=config.class_names), F1ScoreCallback(input_key="targets_one_hot", activation="Softmax"),
num_workers=num_workers, pin_memory=False, shuffle=False, ) model = ClassificationModel(backbone=args.backbone, n_output=[11, 168, 7, 1295], input_channels=1) model.cuda() loaders = collections.OrderedDict() loaders["train"] = train_loader loaders["valid"] = val_loader runner = SupervisedRunner(input_key="image", output_key=None, input_target_key=None) optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=0.001) scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.75, patience=3, mode="max") criterions_dict = { "vowel_diacritic_loss": torch.nn.CrossEntropyLoss(weight=get_w(ny1)), "grapheme_root_loss": torch.nn.CrossEntropyLoss(weight=get_w(ny2)), "consonant_diacritic_loss": torch.nn.CrossEntropyLoss(weight=get_w(ny3)), "grapheme_loss": torch.nn.CrossEntropyLoss(), }
checkpoint = torch.load( f'./logs/{args.model}/checkpoints/best.pth') model.load_state_dict(checkpoint['model_state_dict']) del checkpoint gc.collect() if args.tta: tta_model = tta.SegmentationTTAWrapper( model, tta.aliases.d4_transform(), merge_mode="sum") else: tta_model = model # tta.SegmentationTTAWrapper( # model, tta.aliases.flip_transform(), merge_mode="mean") runner = SupervisedRunner( model=tta_model, device=get_device()) for i, test_batch in enumerate(tqdm.tqdm(loaders['test'])): test_batch = test_batch[0].cuda() runner_out = runner.predict_batch( {"features": test_batch})['logits'] gc.collect() for i, batch in enumerate(runner_out): for probability in batch: probability = probability.cpu().detach().numpy() if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=( 525, 350), interpolation=cv2.INTER_LINEAR) predict, num_predict = post_process( sigmoid(probability), class_params[f"{image_id % 4}"][0],
def main(config_path, gpu='0'): os.environ["CUDA_VISIBLE_DEVICES"] = gpu config = get_config(config_path) model_name = config['model_name'] val_fold = config['val_fold'] folds_to_use = config['folds_to_use'] alias = config['alias'] log_path = osp.join(config['logs_path'], alias + str(val_fold) + '_' + model_name) device = torch.device(config['device']) weights = config['weights'] loss_name = config['loss'] optimizer_name = config['optimizer'] lr = config['lr'] decay = config['decay'] momentum = config['momentum'] epochs = config['epochs'] fp16 = config['fp16'] n_classes = config['n_classes'] input_channels = config['input_channels'] main_metric = config['main_metric'] best_models_count = config['best_models_count'] minimize_metric = config['minimize_metric'] folds_file = config['folds_file'] train_augs = config['train_augs'] preprocessing_fn = config['preprocessing_fn'] limit_files = config['limit_files'] batch_size = config['batch_size'] shuffle = config['shuffle'] num_workers = config['num_workers'] valid_augs = config['valid_augs'] val_batch_size = config['val_batch_size'] multiplier = config['multiplier'] train_dataset = SemSegDataset(mode='train', n_classes=n_classes, folds_file=folds_file, val_fold=val_fold, folds_to_use=folds_to_use, augmentation=train_augs, preprocessing=preprocessing_fn, limit_files=limit_files, multiplier=multiplier) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) valid_dataset = SemSegDataset(mode='valid', folds_file=folds_file, n_classes=n_classes, val_fold=val_fold, folds_to_use=folds_to_use, augmentation=valid_augs, preprocessing=preprocessing_fn, limit_files=limit_files) valid_loader = DataLoader(dataset=valid_dataset, batch_size=val_batch_size, shuffle=False, num_workers=num_workers) model = make_model(model_name=model_name).to(device) loss = get_loss(loss_name=loss_name) optimizer = get_optimizer(optimizer_name=optimizer_name, model=model, lr=lr, momentum=momentum, decay=decay) if config['scheduler'] == 'steps': print('steps lr') steps = config['steps'] step_gamma = config['step_gamma'] scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=steps, gamma=step_gamma) callbacks = [] dice_callback = DiceCallback() callbacks.append(dice_callback) callbacks.append(CheckpointCallback(save_n_best=best_models_count)) runner = SupervisedRunner(device=device) loaders = {'train': train_loader, 'valid': valid_loader} runner.train(model=model, criterion=loss, optimizer=optimizer, loaders=loaders, scheduler=scheduler, callbacks=callbacks, logdir=log_path, num_epochs=epochs, verbose=True, main_metric=main_metric, minimize_metric=minimize_metric, fp16=fp16)
elif config['opt'] == 'rmsprop': optimizer = torch.optim.RMSprop(params=model.parameters(), lr=lr) elif config['opt'] == 'radam': optimizer = RAdam(params=model.parameters(), lr=lr) else: raise Exception(config['opt'] + ' is not supported') scheduler = make_scheduler_from_config(optimizer=optimizer, config=config) criterion = { "h1": torch.nn.CrossEntropyLoss(), "h2": torch.nn.CrossEntropyLoss(), "h3": torch.nn.CrossEntropyLoss(), } runner = SupervisedRunner(input_key='features', output_key=["h1_logits", "h2_logits", 'h3_logits']) early_stop_epochs = get_dict_value_or_default(dict_=config, key='early_stop_epochs', default_value=30) loss_agg_fn = get_dict_value_or_default(config, 'loss_aggregate_fn', 'mean') if loss_agg_fn == 'mean' or loss_agg_fn == 'sum': crit_agg = CriterionAggregatorCallback( prefix="loss", loss_keys=["loss_h1", "loss_h2", 'loss_h3'], loss_aggregate_fn=config['loss_aggregate_fn']) elif loss_agg_fn == 'weighted_sum': weights = get_dict_value_or_default(config, 'weights', [0.3, 0.3, 0.3]) crit_agg = CriterionAggregatorCallback( prefix="loss",
from catalyst.dl.runner import SupervisedRunner # experiment setup num_epochs = 2 logdir = "./logs/segmentation_notebook" # model, criterion, optimizer model = Unet(num_classes=1, in_channels=1, num_channels=32, num_blocks=2) criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() # model training runner.train(model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, check=True) # # Inference # In[ ]: runner_out = runner.predict_loader(loaders["valid"],
def optimal_valid(k, net, config, loader_fold, ENCODER, ENCODER_WEIGHTS, ACTIVATION): runner = SupervisedRunner() model = load_model(net, ENCODER, ENCODER_WEIGHTS, ACTIVATION) logdir = "./logs/segmentation_{}_{}Fold".format(net, k) loaders = {"infer": loader_fold[k]['valid']} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/{config}.pth"), InferCallback() ], ) ###################### dummy test ###################### if 1: label_list = ["Fish", "Flower", "Gravel", "Sugar"] valid_masks = [] probabilities = np.zeros( (len(loader_fold[k]['valid'].dataset) * 4, 350, 525)) for i, (batch, output) in tqdm.tqdm_notebook( enumerate( zip(loaders['infer'].dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] = probability # Find optimal values # First of all, my thanks to @samusram for finding a mistake in my validation # https://www.kaggle.com/c/understanding_cloud_organization/discussion/107711#622412 # And now I find optimal values separately for each class. class_params = {} for class_id in range(4): print(label_list[class_id]) attempts = [] for t in range(0, 100, 5): t /= 100 for ms in [0, 100, 1200, 5000, 10000]: masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process( sigmoid(probability), t, ms) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((t, ms, np.mean(d))) attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) best_threshold = attempts_df['threshold'].values[0] best_size = attempts_df['size'].values[0] best_dice = attempts_df['dice'].values[0] class_params[class_id] = (best_threshold, best_size, best_dice) print("Best Threshold", class_params) print() print("Avg Valid Dice", (class_params[0][2] + class_params[1][2] + class_params[2][2] + class_params[3][2]) / 4) else: class_params = { 0: (0.6, 10000, 0.614792005689229), 1: (0.7, 10000, 0.7479094686835059), 2: (0.55, 10000, 0.6083618093569516), 3: (0.45, 10000, 0.5766765025111799) } ###################### dummy test ###################### # print("Classification Report") del loaders torch.cuda.empty_cache() gc.collect() return class_params, runner
def main(): args = get_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus SEED = 42 utils.set_global_seed(SEED) utils.prepare_cudnn(deterministic=True) num_classes = 14 #define datasets train_dataset = ChestXrayDataSet( data_dir=args.path_to_images, image_list_file=args.train_list, transform=transforms_train, ) val_dataset = ChestXrayDataSet( data_dir=args.path_to_images, image_list_file=args.val_list, transform=transforms_val, ) loaders = { 'train': DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers), 'valid': DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=args.num_workers) } logdir = args.log_dir #where model weights and logs are stored #define model model = DenseNet121(num_classes) if len(args.gpus) > 1: model = nn.DataParallel(model) device = utils.get_device() runner = SupervisedRunner(device=device) optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=0.0003) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=2) weights = torch.Tensor( [10, 100, 30, 8, 40, 40, 330, 140, 35, 155, 110, 250, 155, 200]).to(device) criterion = BCEWithLogitsLoss(pos_weight=weights) class_names = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia' ] runner.train( model=model, logdir=logdir, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=args.epochs, # We can specify the callbacks list for the experiment; # For this task, we will check AUC and accuracy callbacks=[ AUCCallback( input_key="targets", output_key='logits', prefix='auc', class_names=class_names, num_classes=num_classes, activation='Sigmoid', ), AccuracyCallback( input_key="targets", output_key="logits", prefix="accuracy", accuracy_args=[1], num_classes=14, threshold=0.5, activation='Sigmoid', ), ], main_metric='auc/_mean', minimize_metric=False, verbose=True, )
print(e) if use_gradient_accumulating: state = { "status": 'not used', "epoch": 10, "arch": 'Unet', "model_state_dict": torch.load(f"{output_logdir}/checkpoints/best.pth") } torch.save(state, 'best.pth') resume_path = 'best.pth' else: resume_path = f"{output_logdir}/checkpoints/best.pth" loaders = {"infer": valid_loader} runner = SupervisedRunner() runner.infer( model=model, loaders=loaders, callbacks=[CheckpointCallback(resume=resume_path), InferCallback()]) valid_masks = [] probabilities = np.zeros((2220, 350, 525)) for i, (batch, output) in enumerate( tqdm.tqdm(zip(valid_dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m)
'params': model.decoder.parameters(), 'lr': lrd }, { 'params': model.encoder.parameters(), 'lr': lre }, ]) model.to(device) scheduler = ReduceLROnPlateau(optimizer, factor=0.6, patience=s_patience) # criterion = smp.utils.losses.BCEDiceLoss(eps=1.) # scheduler = StepLR(optimizer, step_size=10, gamma=0.5) criterion = BCEDiceLoss(eps=1.) # criterion = DiceLoss(eps=1.) #Try this too runner = SupervisedRunner() # Train runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=train_patience, min_delta=0.001) ], logdir=logdir, num_epochs=epochs, verbose=True)
from catalyst.dl.runner import SupervisedRunner # experiment setup num_epochs = 2 logdir = "./logs/segmentation_notebook" # model, criterion, optimizer model = Unet(num_classes=1, in_channels=1, num_channels=32, num_blocks=2) criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() # model training runner.train(model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, check=True) # # Inference # In[ ]: from catalyst.dl.callbacks import InferCallback, CheckpointCallback
batch_size=batch_size, num_workers=num_workers) dataloader_val = provider(data_folder=data_folder, df_path=train_df_path, phase='val', transforms=aug_val, batch_size=batch_size, num_workers=num_workers) model = models.resnet34(pretrained=True) model.fc = nn.Linear(512, 1) loaders = collections.OrderedDict() loaders["train"] = dataloader_train loaders["valid"] = dataloader_val runner = SupervisedRunner() optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.BCEWithLogitsLoss() scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5, min_lr=1e-7) runner.train(model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, scheduler=scheduler, callbacks=[F1ScoreCallback()], num_epochs=num_epochs,
# In[ ]: from catalyst.dl.runner import SupervisedRunner # experiment setup num_epochs = 2 logdir = "./logs/cifar_simple_notebook_1" # model, criterion, optimizer model = Net() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, check=True ) # In[ ]: # you can use plotly and tensorboard to plot metrics inside jupyter
def main(config): opts = config() path = opts.path train = pd.read_csv(f'{path}/train.csv') n_train = len(os.listdir(f'{path}/train_images')) n_test = len(os.listdir(f'{path}/test_images')) print(f'There are {n_train} images in train dataset') print(f'There are {n_test} images in test dataset') train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts() train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[0]).value_counts().value_counts() train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0]) id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[ 0]).value_counts().reset_index().rename( columns={ 'index': 'img_id', 'Image_Label': 'count' }) print(id_mask_count.head()) if not os.path.exists("csvs/train_all.csv"): train_ids, valid_ids = train_test_split( id_mask_count, random_state=39, stratify=id_mask_count['count'], test_size=0.1) valid_ids.to_csv("csvs/valid_threshold.csv", index=False) train_ids.to_csv("csvs/train_all.csv", index=False) else: train_ids = pd.read_csv("csvs/train_all.csv") valid_ids = pd.read_csv("csvs/valid_threshold.csv") for fold, (train_ids_new, valid_ids_new) in enumerate( stratified_groups_kfold(train_ids, target='count', n_splits=opts.fold_max, random_state=0)): train_ids_new.to_csv(f'csvs/train_fold{fold}.csv') valid_ids_new.to_csv(f'csvs/valid_fold{fold}.csv') train_ids_new = train_ids_new['img_id'].values valid_ids_new = valid_ids_new['img_id'].values ENCODER = opts.backborn ENCODER_WEIGHTS = opts.encoder_weights DEVICE = 'cuda' ACTIVATION = None model = get_model( model_type=opts.model_type, encoder=ENCODER, encoder_weights=ENCODER_WEIGHTS, activation=ACTIVATION, n_classes=opts.class_num, task=opts.task, center=opts.center, attention_type=opts.attention_type, head='simple', classification=opts.classification, ) model = convert_model(model) preprocessing_fn = encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) num_workers = opts.num_workers bs = opts.batchsize train_dataset = CloudDataset( df=train, label_smoothing_eps=opts.label_smoothing_eps, datatype='train', img_ids=train_ids_new, transforms=get_training_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids_new, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers, drop_last=True) valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers, drop_last=True) loaders = {"train": train_loader, "valid": valid_loader} num_epochs = opts.max_epoch logdir = f"{opts.logdir}/fold{fold}" optimizer = get_optimizer(optimizer=opts.optimizer, lookahead=opts.lookahead, model=model, separate_decoder=True, lr=opts.lr, lr_e=opts.lr_e) opt_level = 'O1' model.cuda() model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level) scheduler = opts.scheduler(optimizer) criterion = opts.criterion runner = SupervisedRunner() if opts.task == "segmentation": callbacks = [DiceCallback()] else: callbacks = [] if opts.early_stop: callbacks.append( EarlyStoppingCallback(patience=10, min_delta=0.001)) if opts.mixup: callbacks.append(MixupCallback(alpha=0.25)) if opts.accumeration is not None: callbacks.append(CriterionCallback()) callbacks.append( OptimizerCallback(accumulation_steps=opts.accumeration)) print( f"############################## Start training of fold{fold}! ##############################" ) runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=num_epochs, verbose=True) print( f"############################## Finish training of fold{fold}! ##############################" ) del model del loaders del runner torch.cuda.empty_cache() gc.collect()
from catalyst.dl.runner import SupervisedRunner # experiment setup num_epochs = 2 logdir = "./logs/segmentation_notebook" # model, criterion, optimizer model = Unet(num_classes=1, in_channels=1, num_channels=32, num_blocks=2) criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, check=True, load_best_on_end=True, ) # # Inference
def main() -> None: config = load_config(CONFIG_FILE) train_config = config["train"] num_epochs = config.get("num epochs", 2) random_state = config.get("random state", 2019) num_workers = config.get("num workers", 6) batch_size = config["batch size"] train_dataset = get_dataset(**config["train"]) valid_dataset = get_dataset(**config["validation"]) data_loaders = OrderedDict() data_loaders["train"] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) data_loaders["valid"] = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) set_global_seed(random_state) model = get_model(**config["model"]) if CHECKPOINT != "" and os.path.exists(CHECKPOINT): checkpoint_state = torch.load(CHECKPOINT)["model_state_dict"] model.load_state_dict(checkpoint_state) print(f"Using {CHECKPOINT} checkpoint", flush=True) model = model.to(DEVICE) model_optimizer = get_optimizer(model.parameters(), **config["optimizer"]) loss_function = get_loss(**config["loss"]) metric = config.get("metric", "loss") is_metric_minimization = config.get("minimize metric", True) scheduler = optim.lr_scheduler.ReduceLROnPlateau( model_optimizer, mode="min" if is_metric_minimization else "max", patience=3, factor=0.2, verbose=True, ) runner = SupervisedRunner(device=DEVICE) runner.train( model=model, criterion=loss_function, optimizer=model_optimizer, loaders=data_loaders, logdir=LOGDIR, callbacks=[ cbks.AUCCallback(), cbks.F1ScoreCallback(), AccuracyCallback(), cbks.CriterionCallback(), CheckpointCallback(save_n_best=3), ], scheduler=scheduler, verbose=True, minimize_metric=is_metric_minimization, num_epochs=num_epochs, main_metric=metric, )
from catalyst.dl.core.state import RunnerState from catalyst.dl.core import MetricCallback from catalyst.dl.callbacks import CriterionCallback from efficientnet_pytorch import EfficientNet from utils import * from metrics import * if __name__ == '__main__': splits = pickle.load(open('cv_split.pickle', 'rb')) data = pd.read_csv('./data/splited_train.csv') labels = ['N','D','G','C','A','H','M','O'] n_classes = len(labels) fold_idx, batch_size, model_name, image_size, head_n_epochs, head_lr, full_n_epochs, full_lr, exp_name = fire.Fire(arguments) os.environ["CUDA_VISIBLE_DEVICES"] = "0" num_classes = len(labels) seed_everything(1234) runner = SupervisedRunner() model = prepare_model(model_name, n_classes) train_path = './ODIR-5K_Training_Dataset/' valid_path = './ODIR-5K_Training_Dataset/' #print('X size {}, y size {}'.format(data.loc[:,'id'].shape[1], data.loc[:,labels].shape[1]) train_dataset = EyeDataset(dataset_path = train_path, labels=data.loc[splits['train_idx'][fold_idx],labels].values, ids=data.loc[splits['train_idx'][fold_idx],'id'].values, albumentations_tr=aug_train_heavy(image_size)) val_dataset = EyeDataset(dataset_path=valid_path, labels=data.loc[splits['test_idx'][fold_idx],labels].values, ids=data.loc[splits['test_idx'][fold_idx],'id'].values, albumentations_tr=aug_val(image_size)) train_loader = DataLoader(train_dataset, num_workers=8, pin_memory=False,
logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 7e-5 }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner(model=model) loaders = {"train": train_loader, "valid": valid_loader} runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001) ], logdir=logdir, num_epochs=num_epochs, verbose=True) encoded_pixels = [] loaders = {"infer": valid_loader}
logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 1e-2 }, { 'params': model.encoder.parameters(), 'lr': 1e-3 }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() # In[66]: runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001) ], logdir=logdir, num_epochs=num_epochs,
def generate_class_params(i_dont_know_how_to_return_values_without_map): preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0) model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) runner = SupervisedRunner() # Generate validation predictions encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) valid_masks = [] probabilities = np.zeros((2220, 350, 525)) for i, (batch, output) in enumerate( tqdm.tqdm( zip(valid_dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] = probability class_params = {} for class_id in range(4): print(class_id) attempts = [] for t in range(30, 100, 5): t /= 100 for ms in [1200, 5000, 10000]: masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process( sigmoid(probability), t, ms) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((t, ms, np.mean(d))) attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) best_threshold = attempts_df['threshold'].values[0] best_size = attempts_df['size'].values[0] class_params[class_id] = (best_threshold, best_size) return class_params
def main_kaggle_smp(path_dataset='/dataset/kaggle/understanding_cloud_organization', ENCODER='resnet50', ENCODER_WEIGHTS='imagenet', num_workers=0, batch_size=8, epochs=19, debug=False, exec_catalyst=True, logdir="/src/logs/segmentation", pretrained=True ): # below line is potential input args # (name_dataset='eurosat', lr=0.0001, wd=0, ratio=0.9, batch_size=32, workers=4, epochs=15, num_gpus=1, # resume=None, dir_weights='./weights'): torch.backends.cudnn.benchmark = True # Dataset train, sub = get_meta_info_table(path_dataset) train_ids, valid_ids, test_ids = prepare_dataset(train, sub) preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS) train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), path=path_dataset) valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), path=path_dataset) # DataLoader train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) loaders = { "train": train_loader, "valid": valid_loader } # todo: check how to used device in this case DEVICE = 'cuda' if debug: device = 'cpu' else: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ACTIVATION = None model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) images, labels = next(iter(train_loader)) model.to(device) print(model) print(summary(model, input_size=tuple(images.shape[1:]))) # use smp epoch # num_epochs = 19 # model, criterion, optimizer optimizer = torch.optim.Adam([ {'params': model.decoder.parameters(), 'lr': 1e-2}, {'params': model.encoder.parameters(), 'lr': 1e-3}, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.DiceLoss(eps=1.) # smp.utils.losses.BCEDiceLoss(eps=1.) if not pretrained: # catalyst if exec_catalyst: device = utils.get_device() runner = SupervisedRunner(device=device) # train model runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, num_epochs=epochs, verbose=True ) # # prediction # encoded_pixels = [] # loaders = {"infer": valid_loader} # runner.infer( # model=model, # loaders=loaders, # callbacks=[ # CheckpointCallback( # resume=f"{logdir}/checkpoints/best.pth"), # InferCallback() # ], # ) # valid_masks = [] # # # todo: where .pth? # # todo: from here # valid_num = valid_dataset.__len__() # probabilities = np.zeros((valid_num * 4, 350, 525)) # for i, (batch, output) in enumerate(tqdm(zip( # valid_dataset, runner.callbacks[0].predictions["logits"]))): # image, mask = batch # for m in mask: # if m.shape != (350, 525): # m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) # valid_masks.append(m) # # for j, probability in enumerate(output): # if probability.shape != (350, 525): # probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) # probabilities[valid_num * 4 + j, :, :] = probability # # # todo: from here # class_params = {} # for class_id in range(4): # print(class_id) # attempts = [] # for t in range(0, 100, 5): # t /= 100 # for ms in [0, 100, 1200, 5000, 10000]: # masks = [] # for i in range(class_id, len(probabilities), 4): # probability = probabilities[i] # predict, num_predict = post_process(sigmoid(probability), t, ms) # masks.append(predict) # # d = [] # for i, j in zip(masks, valid_masks[class_id::4]): # if (i.sum() == 0) & (j.sum() == 0): # d.append(1) # else: # d.append(dice(i, j)) # # attempts.append((t, ms, np.mean(d))) # # attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) # # attempts_df = attempts_df.sort_values('dice', ascending=False) # print(attempts_df.head()) # best_threshold = attempts_df['threshold'].values[0] # best_size = attempts_df['size'].values[0] # # class_params[class_id] = (best_threshold, best_size) else: for epoch in trange(epochs, desc="Epochs"): metrics_train = train_epoch(model, train_loader, criterion, optimizer, device) metrics_eval = eval_epoch(model, valid_loader, criterion, device) scheduler.step(metrics_eval['valid_loss']) print(f'epoch: {epoch} ', metrics_train, metrics_eval) else: if exec_catalyst: device = utils.get_device() checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best_full.pth') utils.unpack_checkpoint(checkpoint, model=model) runner = SupervisedRunner(model=model) # prediction with infer encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback( resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) # todo: jupyterで確認中 valid_masks = [] valid_num = valid_dataset.__len__() probabilities = np.zeros((valid_num * 4, 350, 525)) for i, (batch, output) in enumerate(tqdm(zip( valid_dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] = probability class_params = {} for class_id in range(4): print(class_id) attempts = [] for t in range(0, 100, 5): t /= 100 for ms in [0, 100, 1200, 5000, 10000]: masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process(sigmoid(probability), t, ms) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((t, ms, np.mean(d))) attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) best_threshold = attempts_df['threshold'].values[0] best_size = attempts_df['size'].values[0] class_params[class_id] = (best_threshold, best_size) # predictions torch.cuda.empty_cache() gc.collect() test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0) loaders = {"test": test_loader} encoded_pixels = [] image_id = 0 for i, test_batch in enumerate(tqdm(loaders['test'])): runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits'] for i, batch in enumerate(runner_out): for probability in batch: probability = probability.cpu().detach().numpy() if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1]) if num_predict == 0: encoded_pixels.append('') else: r = mask2rle(predict) encoded_pixels.append(r) image_id += 1 sub['EncodedPixels'] = encoded_pixels sub.to_csv('data/kaggle_cloud_org/submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
loaders = collections.OrderedDict() loaders["valid"] = test_loader probabilities_list = [] ttatype='d4' for fold_idx in range(len(splits['test_idx'])): print('Getting predictions from fold {}'.format(fold_idx)) logdir = 'logs/{}_fold{}/'.format(exp_name, fold_idx) model = prepare_model(model_name, n_classes) model.cuda() model.load_state_dict(torch.load(os.path.join(logdir,'checkpoints/best.pth'))['model_state_dict']) model.eval() if ttatype=='d4': model = tta.TTAWrapper(model, tta.d4_image2label) elif ttatype=='fliplr_image2label': model = tta.TTAWrapper(model, tta.d4_image2label) runner = SupervisedRunner(model=model) #predictions = runner.predict_loader(loaders["valid"], resume=f"{logdir}/checkpoints/best.pth") runner.infer(model=model,loaders=loaders,callbacks=[InferCallback()]) predictions = runner.callbacks[0].predictions['logits'] probabilities = softmax(torch.from_numpy(predictions),dim=1).numpy() for idx in range(probabilities.shape[0]): if all(probabilities[idx,:]<0.5): probabilities[idx,0] = 1.0 probabilities_list.append(probabilities) probabilities_combined = np.stack(probabilities_list,axis=0).mean(axis=0) predicted_labels = pd.DataFrame(probabilities_combined, columns=labels) predicted_labels['id'] = test_data.loc[:,'id'].values predicted_labels.loc[:,'ID'] = predicted_labels.id.apply(lambda x: x.split('_')[0]) predicted_labels_groupped = predicted_labels.groupby(['ID']).aggregate(dict(zip(labels,['max']*(len(labels))))) predicted_labels_groupped['ID'] = predicted_labels_groupped.index.values.astype(int) predicted_labels_groupped.reset_index(drop=True, inplace=True)
def train( model: torch.nn.Module, dataset: torch.utils.data.Dataset, optimizer: torch.optim.Optimizer, criterion: torch.nn.Module, config: ParamConfig, val_dataset: torch.utils.data.Dataset = None, logdir: str = "./logdir", resume: Union[str, None] = "logdir/checkpoints/best_full.pth", ) -> None: """ train the model with specified paremeters Args: model: neural network model dataset: training dataset optimizer: optimizer criterion: loss function val_dataset: validation dataset logdir: logdir location to save checkpoints resume: path where the partially trained model is stored """ loaders = collections.OrderedDict() train_loader = utils.get_loader( dataset, open_fn=lambda x: { "input_audio": x[-1], "input_video": x[1], "targets": x[0] }, batch_size=config.batch_size, num_workers=config.workers, shuffle=True, ) val_loader = utils.get_loader( val_dataset, open_fn=lambda x: { "input_audio": x[-1], "input_video": x[1], "targets": x[0] }, batch_size=config.batch_size, num_workers=config.workers, shuffle=True, ) loaders = {"train": train_loader, "valid": val_loader} scheduler = torch.optim.lr_scheduler.CyclicLR( optimizer, base_lr=config.learning_rate, max_lr=config.learning_rate * 10, step_size_up=4 * len(train_loader), mode="triangular", cycle_momentum=False, ) runner = SupervisedRunner(input_key=["input_audio", "input_video"]) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, verbose=True, num_epochs=config.epochs, resume=resume, callbacks=collections.OrderedDict({ "iteration_checkpoint": IterationCheckpointCallback(save_n_last=1, num_iters=10_000), "snr_callback": SNRCallback(), "sched_callback": SchedulerCallback(mode="batch"), }), )