def main(): train_image_list = sorted( glob.glob( pathname= '../input/uavid-semantic-segmentation-dataset/train/train/*/Images/*.png', recursive=True)) train_mask_list = sorted( glob.glob(pathname='./trainlabels/*/TrainId/*.png', recursive=True)) valid_image_list = sorted( glob.glob( pathname= '../input/uavid-semantic-segmentation-dataset/valid/valid/*/Images/*.png', recursive=True)) valid_mask_list = sorted( glob.glob(pathname='./validlabels/*/TrainId/*.png', recursive=True)) preprocessing_fn = smp.encoders.get_preprocessing_fn( config.ENCODER, config.ENCODER_WEIGHTS) train_dataset = Dataset( train_image_list, train_mask_list, augmentation=augmentations.get_training_augmentation(), preprocessing=augmentations.get_preprocessing(preprocessing_fn), classes=config.CLASSES, ) valid_dataset = Dataset( valid_image_list, valid_mask_list, augmentation=augmentations.get_validation_augmentation(), preprocessing=augmentations.get_preprocessing(preprocessing_fn), classes=config.CLASSES, ) train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True, drop_last=True) valid_loader = DataLoader(valid_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True, drop_last=False) loaders = {"train": train_loader, "valid": valid_loader} base_optimizer = RAdam([ { 'params': model.MODEL.decoder.parameters(), 'lr': config.LEARNING_RATE }, { 'params': model.MODEL.encoder.parameters(), 'lr': 1e-4 }, { 'params': model.MODEL.segmentation_head.parameters(), 'lr': config.LEARNING_RATE }, ]) optimizer = Lookahead(base_optimizer) criterion = BCEDiceLoss(activation=None) runner = SupervisedRunner() scheduler = OneCycleLRWithWarmup(optimizer, num_steps=config.NUM_EPOCHS, lr_range=(0.0016, 0.0000001), init_lr=config.LEARNING_RATE, warmup_steps=2) callbacks = [ IouCallback(activation='none'), ClasswiseIouCallback(classes=config.CLASSES, activation='none'), EarlyStoppingCallback(patience=config.ES_PATIENCE, metric='iou', minimize=False), ] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=config.LOGDIR, num_epochs=config.NUM_EPOCHS, # save our best checkpoint by IoU metric main_metric="iou", # IoU needs to be maximized. minimize_metric=False, # for FP16. It uses the variable from the very first cell fp16=config.FP16_PARAMS, # prints train logs verbose=True, )
def train_model(): model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) num_workers = 0 bs = 16 train_dataset = CloudDataset( df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=num_workers) loaders = {"train": train_loader, "valid": valid_loader} num_epochs = 40 # model, criterion, optimizer optimizer = RAdam([ { 'params': model.decoder.parameters(), 'lr': 1e-2 }, { 'params': model.encoder.parameters(), 'lr': 1e-3 }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2, threshold=0.001) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001) ], logdir=logdir, num_epochs=num_epochs, verbose=True) return True
num_classes=config.num_classes, input_key="targets_one_hot", class_names=config.class_names ), F1ScoreCallback( input_key="targets_one_hot", activation="Softmax" ), CheckpointCallback( save_n_best=1, # resume_dir="./models/classification", metrics_filename="metrics.json" ), EarlyStoppingCallback( patience=config.patience, metric="auc/_mean", minimize=False ) ], # path to save logs logdir=config.logdir, num_epochs=config.num_epochs, # save our best checkpoint by AUC metric main_metric="auc/_mean", # AUC needs to be maximized. minimize_metric=False, # for FP16. It uses the variable from the very first cell fp16=fp16_params,
# elif args.loss == 'lovasz_softmax': # criterion = lovasz_softmax() elif args.loss == 'BCEMulticlassDiceLoss': criterion = BCEMulticlassDiceLoss() elif args.loss == 'MulticlassDiceMetricCallback': criterion = MulticlassDiceMetricCallback() elif args.loss == 'BCE': criterion = nn.BCEWithLogitsLoss() else: criterion = smp.utils.losses.BCEDiceLoss(eps=1.) if args.multigpu: model = nn.DataParallel(model) if args.task == 'segmentation': callbacks = [DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()] elif args.task == 'classification': callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()] if args.gradient_accumulation: callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation)) checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth') model.cuda() utils.unpack_checkpoint(checkpoint, model=model) # # runner = SupervisedRunner() if args.train: print('Training')
def main(args): """ Main code for training a classification model. Args: args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args Returns: None """ # Reading the in the .csvs train = pd.read_csv(os.path.join(args.dset_path, "train.csv")) sub = pd.read_csv(os.path.join(args.dset_path, "sample_submission.csv")) # setting up the train/val split with filenames train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path) # setting up the train/val split with filenames seed_everything(args.split_seed) train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values, random_state=args.split_seed, stratify=id_mask_count["count"], test_size=args.test_size) # setting up the classification model ENCODER_WEIGHTS = "imagenet" DEVICE = "cuda" model = ResNet34(pre=ENCODER_WEIGHTS, num_classes=4, use_simple_head=True) preprocessing_fn = smp.encoders.get_preprocessing_fn( "resnet34", ENCODER_WEIGHTS) # Setting up the I/O train_dataset = ClassificationSteelDataset( args.dset_path, df=train, datatype="train", im_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), ) valid_dataset = ClassificationSteelDataset( args.dset_path, df=train, datatype="valid", im_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), ) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) loaders = {"train": train_loader, "valid": valid_loader} # everything is saved here (i.e. weights + stats) logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001) ], logdir=logdir, num_epochs=args.num_epochs, verbose=True) utils.plot_metrics( logdir=logdir, # specify which metrics we want to plot metrics=["loss", "dice", "lr", "_base/lr"])
valid_loader = DataLoader(valid_dataset, batch_size=hyper_params['batch_size'], shuffle=False) loaders = {"train": train_loader, "valid": valid_loader} optimizer = torch.optim.Adam(model.parameters(), hyper_params['learning_rate']) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = WeightedBCEDiceLoss( lambda_dice=hyper_params['lambda_dice'], lambda_bce=hyper_params['lambda_bceWithLogits'] ) runner = SupervisedRunner(device=device) logdir = hyper_params['logdir'] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), CometCallback(experiment), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, #resume=f"{logdir}/checkpoints/last_full.pth", num_epochs=hyper_params['num_epochs'], verbose=True )
import numpy as np from sklearn.metrics import roc_auc_score def calc_roc_auc(pred, gt, *args, **kwargs): pred = torch.sigmoid(pred).detach().cpu().numpy() gt = gt.detach().cpu().numpy().astype(np.uint8) pred = np.concatenate([pred.reshape(-1), np.array([0, 0])]) gt = np.concatenate([gt.reshape(-1), np.array([1, 0])]) return [roc_auc_score(gt.reshape(-1), pred.reshape(-1))] runner.train(model=model, scheduler=scheduler, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, callbacks=[ MultiMetricCallback(metric_fn=calc_roc_auc, prefix='rocauc', input_key="targets", output_key="logits", list_args=['_']), EarlyStoppingCallback(patience=10, min_delta=0.01) ], verbose=True)
def run(config_file, device_id, idx_fold): os.environ['CUDA_VISIBLE_DEVICES'] = str(device_id) print('info: use gpu No.{}'.format(device_id)) config = load_config(config_file) # for n-folds loop if config.data.params.idx_fold == -1: config.data.params.idx_fold = idx_fold config.work_dir = config.work_dir + '_fold{}'.format(idx_fold) elif config.data.params.idx_fold == 0: original_fold = int(config.work_dir.split('_fold')[1]) if original_fold == idx_fold: raise Exception( 'if you specify fold 0, you should use train.py or resume from fold 1.' ) config.data.params.idx_fold = idx_fold config.work_dir = config.work_dir.split('_fold')[0] + '_fold{}'.format( idx_fold) else: raise Exception('you should use train.py if idx_fold is specified.') print('info: training for fold {}'.format(idx_fold)) if not os.path.exists(config.work_dir): os.makedirs(config.work_dir, exist_ok=True) all_transforms = {} all_transforms['train'] = get_transforms(config.transforms.train) all_transforms['valid'] = get_transforms(config.transforms.test) dataloaders = { phase: make_loader( df_path=config.data.train_df_path, data_dir=config.data.train_dir, features=config.data.features, phase=phase, img_size=(config.data.height, config.data.width), batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=all_transforms[phase], horizontal_flip=config.train.horizontal_flip, model_scale=config.data.model_scale, debug=config.debug, pseudo_path=config.data.pseudo_path, ) for phase in ['train', 'valid'] } # create segmentation model with pre trained encoder num_features = len(config.data.features) print('info: num_features =', num_features) model = CenterNetFPN( slug=config.model.encoder, num_classes=num_features, ) optimizer = get_optimizer(model, config) scheduler = get_scheduler(optimizer, config) # model runner runner = SupervisedRunner(model=model, device=get_device()) # train setting criterion, callbacks = get_criterion_and_callback(config) if config.train.early_stop_patience > 0: callbacks.append( EarlyStoppingCallback(patience=config.train.early_stop_patience)) if config.train.accumulation_size > 0: accumulation_steps = config.train.accumulation_size // config.train.batch_size callbacks.extend( [OptimizerCallback(accumulation_steps=accumulation_steps)]) # to resume from check points if exists if os.path.exists(config.work_dir + '/checkpoints/last_full.pth'): callbacks.append( CheckpointCallback(resume=config.work_dir + '/checkpoints/last_full.pth')) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=dataloaders, logdir=config.work_dir, num_epochs=config.train.num_epochs, main_metric=config.train.main_metric, minimize_metric=config.train.minimize_metric, callbacks=callbacks, verbose=True, fp16=config.train.fp16, )
def main(): parser = argparse.ArgumentParser() parser.add_argument('--encoder', type=str, default='efficientnet-b0') parser.add_argument('--model', type=str, default='unet') parser.add_argument('--pretrained', type=str, default='imagenet') parser.add_argument('--logdir', type=str, default='../logs/') parser.add_argument('--exp_name', type=str) parser.add_argument('--data_folder', type=str, default='../input/') parser.add_argument('--height', type=int, default=320) parser.add_argument('--width', type=int, default=640) parser.add_argument('--batch_size', type=int, default=2) parser.add_argument('--accumulate', type=int, default=8) parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--enc_lr', type=float, default=1e-2) parser.add_argument('--dec_lr', type=float, default=1e-3) parser.add_argument('--optim', type=str, default="radam") parser.add_argument('--loss', type=str, default="bcedice") parser.add_argument('--schedule', type=str, default="rlop") parser.add_argument('--early_stopping', type=bool, default=True) args = parser.parse_args() encoder = args.encoder model = args.model pretrained = args.pretrained logdir = args.logdir name = args.exp_name data_folder = args.data_folder height = args.height width = args.width bs = args.batch_size accumulate = args.accumulate epochs = args.epochs enc_lr = args.enc_lr dec_lr = args.dec_lr optim = args.optim loss = args.loss schedule = args.schedule early_stopping = args.early_stopping if model == 'unet': model = smp.Unet(encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None) if model == 'fpn': model = smp.FPN( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'pspnet': model = smp.PSPNet( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'linknet': model = smp.Linknet( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'aspp': print('aspp can only be used with resnet34') model = aspp(num_class=4) preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder, pretrained) log = os.path.join(logdir, name) ds = get_dataset(path=data_folder) prepared_ds = prepare_dataset(ds) train_set, valid_set = get_train_test(ds) train_ds = CloudDataset(df=prepared_ds, datatype='train', img_ids=train_set, transforms=training1(h=height, w=width), preprocessing=get_preprocessing(preprocessing_fn), folder=data_folder) valid_ds = CloudDataset(df=prepared_ds, datatype='train', img_ids=valid_set, transforms=valid1(h=height, w=width), preprocessing=get_preprocessing(preprocessing_fn), folder=data_folder) train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=multiprocessing.cpu_count()) valid_loader = DataLoader(valid_ds, batch_size=bs, shuffle=False, num_workers=multiprocessing.cpu_count()) loaders = { 'train': train_loader, 'valid': valid_loader, } num_epochs = epochs if args.model != "aspp": if optim == "radam": optimizer = RAdam([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "adam": optimizer = Adam([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "adamw": optimizer = AdamW([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "sgd": optimizer = SGD([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) elif args.model == 'aspp': if optim == "radam": optimizer = RAdam([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "adam": optimizer = Adam([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "adamw": optimizer = AdamW([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "sgd": optimizer = SGD([ { 'params': model.parameters(), 'lr': enc_lr }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=5) if schedule == "rlop": scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=3) if schedule == "noam": scheduler = NoamLR(optimizer, 10) if loss == "bcedice": criterion = smp.utils.losses.BCEDiceLoss(eps=1.) if loss == "dice": criterion = smp.utils.losses.DiceLoss(eps=1.) if loss == "bcejaccard": criterion = smp.utils.losses.BCEJaccardLoss(eps=1.) if loss == "jaccard": criterion == smp.utils.losses.JaccardLoss(eps=1.) if loss == 'bce': criterion = NewBCELoss() callbacks = [NewDiceCallback(), CriterionCallback()] callbacks.append(OptimizerCallback(accumulation_steps=accumulate)) if early_stopping: callbacks.append(EarlyStoppingCallback(patience=5, min_delta=0.001)) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=log, num_epochs=num_epochs, verbose=True, )
def main(data_path='/data/SN6_buildings/train/AOI_11_Rotterdam/', config_path='/project/configs/senet154_gcc_fold1.py', gpu='0'): os.environ["CUDA_VISIBLE_DEVICES"] = gpu config = get_config(config_path) model_name = config['model_name'] fold_number = config['fold_number'] alias = config['alias'] log_path = osp.join(config['logs_path'], alias + str(fold_number) + '_' + model_name) device = torch.device(config['device']) weights = config['weights'] loss_name = config['loss'] optimizer_name = config['optimizer'] lr = config['lr'] decay = config['decay'] momentum = config['momentum'] epochs = config['epochs'] fp16 = config['fp16'] n_classes = config['n_classes'] input_channels = config['input_channels'] main_metric = config['main_metric'] best_models_count = config['best_models_count'] minimize_metric = config['minimize_metric'] min_delta = config['min_delta'] train_images = data_path data_type = config['data_type'] masks_data_path = config['masks_data_path'] folds_file = config['folds_file'] train_augs = config['train_augs'] preprocessing_fn = config['preprocessing_fn'] limit_files = config['limit_files'] batch_size = config['batch_size'] shuffle = config['shuffle'] num_workers = config['num_workers'] valid_augs = config['valid_augs'] val_batch_size = config['val_batch_size'] multiplier = config['multiplier'] train_dataset = SemSegDataset(images_dir=train_images, data_type=data_type, masks_dir=masks_data_path, mode='train', n_classes=n_classes, folds_file=folds_file, fold_number=fold_number, augmentation=train_augs, preprocessing=preprocessing_fn, limit_files=limit_files, multiplier=multiplier) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) valid_dataset = SemSegDataset(images_dir=train_images, data_type=data_type, mode='valid', folds_file=folds_file, n_classes=n_classes, fold_number=fold_number, augmentation=valid_augs, preprocessing=preprocessing_fn, limit_files=limit_files) valid_loader = DataLoader(dataset=valid_dataset, batch_size=val_batch_size, shuffle=False, num_workers=num_workers) model = make_model(model_name=model_name, weights=weights, n_classes=n_classes, input_channels=input_channels).to(device) loss = get_loss(loss_name=loss_name) optimizer = get_optimizer(optimizer_name=optimizer_name, model=model, lr=lr, momentum=momentum, decay=decay) if config['scheduler'] == 'reduce_on_plateau': print('reduce lr') alpha = config['alpha'] patience = config['patience'] threshold = config['thershold'] min_lr = config['min_lr'] mode = config['scheduler_mode'] scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=optimizer, factor=alpha, verbose=True, patience=patience, mode=mode, threshold=threshold, min_lr=min_lr) elif config['scheduler'] == 'steps': print('steps lr') steps = config['steps'] step_gamma = config['step_gamma'] scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=steps, gamma=step_gamma) else: scheduler = None callbacks = [] dice_callback = DiceCallback() callbacks.append(dice_callback) callbacks.append(CheckpointCallback(save_n_best=best_models_count)) callbacks.append( EarlyStoppingCallback(patience=config['early_stopping'], metric=main_metric, minimize=minimize_metric, min_delta=min_delta)) runner = SupervisedRunner(device=device) loaders = {'train': train_loader, 'valid': valid_loader} runner.train(model=model, criterion=loss, optimizer=optimizer, loaders=loaders, scheduler=scheduler, callbacks=callbacks, logdir=log_path, num_epochs=epochs, verbose=True, main_metric=main_metric, minimize_metric=minimize_metric, fp16=fp16)
'valid': dataloader_val } #collections.OrderedDict({'train': dataloader_train, 'valid': dataloader_val}) model = ReverseModel() optimizer = Lookahead(RAdam(params=model.parameters(), lr=1e-3)) criterion = {"bce": nn.BCEWithLogitsLoss()} scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=2) callbacks = [ CriterionCallback(input_key='start', prefix="loss", criterion_key="bce"), EarlyStoppingCallback(patience=5), ] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir="./logs", num_epochs=5, #TODO main_metric="loss", minimize_metric=True, verbose=True, )
def main(config): opts = config() path = opts.path train = pd.read_csv(f'{path}/train.csv') pseudo_label = pd.read_csv( './submissions/submission_segmentation_and_classifier.csv') n_train = len(os.listdir(f'{path}/train_images')) n_test = len(os.listdir(f'{path}/test_images')) print(f'There are {n_train} images in train dataset') print(f'There are {n_test} images in test dataset') train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts() train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[0]).value_counts().value_counts() train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0]) id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[ 0]).value_counts().reset_index().rename( columns={ 'index': 'img_id', 'Image_Label': 'count' }) print(id_mask_count.head()) pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[1]).value_counts() pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0] ).value_counts().value_counts() pseudo_label['label'] = pseudo_label['Image_Label'].apply( lambda x: x.split('_')[1]) pseudo_label['im_id'] = pseudo_label['Image_Label'].apply( lambda x: x.split('_')[0]) pseudo_label_ids = pseudo_label.loc[ pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[0]).value_counts().reset_index().rename( columns={ 'index': 'img_id', 'Image_Label': 'count' }) print(pseudo_label_ids.head()) if not os.path.exists("csvs/train_all.csv"): train_ids, valid_ids = train_test_split( id_mask_count, random_state=39, stratify=id_mask_count['count'], test_size=0.1) valid_ids.to_csv("csvs/valid_threshold.csv", index=False) train_ids.to_csv("csvs/train_all.csv", index=False) else: train_ids = pd.read_csv("csvs/train_all.csv") valid_ids = pd.read_csv("csvs/valid_threshold.csv") for fold, ((train_ids_new, valid_ids_new), (train_ids_pl, valid_ids_pl)) in enumerate( zip( stratified_groups_kfold(train_ids, target='count', n_splits=opts.fold_max, random_state=0), stratified_groups_kfold(pseudo_label_ids, target='count', n_splits=opts.fold_max, random_state=0))): train_ids_new.to_csv(f'csvs/train_fold{fold}.csv') valid_ids_new.to_csv(f'csvs/valid_fold{fold}.csv') train_ids_new = train_ids_new['img_id'].values valid_ids_new = valid_ids_new['img_id'].values train_ids_pl = train_ids_pl['img_id'].values valid_ids_pl = valid_ids_pl['img_id'].values ENCODER = opts.backborn ENCODER_WEIGHTS = opts.encoder_weights DEVICE = 'cuda' ACTIVATION = None model = get_model( model_type=opts.model_type, encoder=ENCODER, encoder_weights=ENCODER_WEIGHTS, activation=ACTIVATION, n_classes=opts.class_num, task=opts.task, center=opts.center, attention_type=opts.attention_type, head='simple', classification=opts.classification, ) model = convert_model(model) preprocessing_fn = encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) num_workers = opts.num_workers bs = opts.batchsize train_dataset = CloudDataset( df=train, label_smoothing_eps=opts.label_smoothing_eps, datatype='train', img_ids=train_ids_new, transforms=get_training_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids_new, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) ################# make pseudo label dataset ####################### train_dataset_pl = CloudPseudoLabelDataset( df=pseudo_label, datatype='train', img_ids=train_ids_pl, transforms=get_training_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset_pl = CloudPseudoLabelDataset( df=pseudo_label, datatype='train', img_ids=valid_ids_pl, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) # train_dataset = ConcatDataset([train_dataset, train_dataset_pl]) # valid_dataset = ConcatDataset([valid_dataset, valid_dataset_pl]) train_dataset = ConcatDataset([train_dataset, valid_dataset_pl]) ################# make pseudo label dataset ####################### train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers, drop_last=True) valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers, drop_last=True) loaders = {"train": train_loader, "valid": valid_loader} num_epochs = opts.max_epoch logdir = f"{opts.logdir}/fold{fold}" optimizer = get_optimizer(optimizer=opts.optimizer, lookahead=opts.lookahead, model=model, separate_decoder=True, lr=opts.lr, lr_e=opts.lr_e) opt_level = 'O1' model.cuda() model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level) scheduler = opts.scheduler(optimizer) criterion = opts.criterion runner = SupervisedRunner() if opts.task == "segmentation": callbacks = [DiceCallback()] else: callbacks = [] if opts.early_stop: callbacks.append( EarlyStoppingCallback(patience=10, min_delta=0.001)) if opts.mixup: callbacks.append(MixupCallback(alpha=0.25)) if opts.accumeration is not None: callbacks.append(CriterionCallback()) callbacks.append( OptimizerCallback(accumulation_steps=opts.accumeration)) print( f"############################## Start training of fold{fold}! ##############################" ) runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=num_epochs, verbose=True) print( f"############################## Finish training of fold{fold}! ##############################" ) del model del loaders del runner torch.cuda.empty_cache() gc.collect()
# elif args.loss == 'lovasz_softmax': # criterion = lovasz_softmax() elif args.loss == 'BCEMulticlassDiceLoss': criterion = BCEMulticlassDiceLoss() elif args.loss == 'MulticlassDiceMetricCallback': criterion = MulticlassDiceMetricCallback() elif args.loss == 'BCE': criterion = nn.BCEWithLogitsLoss() else: criterion = smp.utils.losses.BCEDiceLoss(eps=1.) if args.multigpu: model = nn.DataParallel(model) if args.task == 'segmentation': callbacks = [DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback()] elif args.task == 'classification': callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback()] if args.gradient_accumulation: callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation)) runner = SupervisedRunner() if args.train: runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks,
def train_model( df_train, df_valid, model_class, model_params, vectorizer, general_params, ): vectorizer = copy.deepcopy(vectorizer) vectorizer.fit(df_train["text"]) df_train = make_df(df_train, vectorizer) train_ds = GeneralDataset( df_train["tokens"].values, labels=df_train["label"].values, max_sentence_len=general_params["max_sentence_len"], ) trainloader = DataLoader( dataset=train_ds, batch_size=general_params["batch_size"], shuffle=True, num_workers=general_params["num_workers"], ) df_valid = make_df(df_valid, vectorizer) valid_ds = GeneralDataset( df_valid["tokens"].values, labels=df_valid["label"].values, max_sentence_len=general_params["max_sentence_len"], ) validloader = DataLoader( dataset=valid_ds, batch_size=general_params["batch_size"], shuffle=False, num_workers=general_params["num_workers"], ) loaders = collections.OrderedDict() loaders["train"] = trainloader loaders["valid"] = validloader model_params = copy.deepcopy(model_params) model_params.update({"vocab_size": len(vectorizer.vocabulary_)}) model = model_class(**model_params).float() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), general_params["lr"]) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ AccuracyCallback(), EarlyStoppingCallback(patience=general_params["patience"], metric="accuracy01", minimize=False), ], logdir=general_params["logdir"], num_epochs=general_params["num_epochs"], main_metric="accuracy01", minimize_metric=False, load_best_on_end=True, verbose=False, ) with open(os.path.join(general_params["logdir"], "vectorizer.pickle"), "wb") as output_file: pickle.dump(vectorizer, output_file)
num_epochs = args.epochs callbacks = [ CriterionCallback(input_key='mask', multiplier=1., prefix='loss_dice', criterion_key='dice'), CriterionCallback(input_key='mask', prefix='loss_bce', multiplier=0.8, criterion_key='bce'), CriterionAggregatorCallback(prefix='loss', loss_keys=["loss_dice", "loss_bce"], loss_aggregate_fn="sum"), DiceCallback(input_key='mask'), OptimizerCallback(accumulation_steps=32), EarlyStoppingCallback(patience=8, min_delta=0.001), ] if args.checkpoint: callbacks.append( CheckpointCallback(resume=f'{logdir}/checkpoints/best_full.pth')) runner.train( model=model, criterion=criteria, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, main_metric='dice', minimize_metric=False, logdir=logdir, # fp16={"opt_level": "O1"},
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--seed', type=int, default=1234, help='Random seed') arg('--model-name', type=str, default=Path('seresnext101'), help='String model name used for saving') arg('--run-root', type=Path, default=Path('../results'), help='Directory for saving model') arg('--data-root', type=Path, default=Path('../data')) arg('--image-size', type=int, default=224, help='Image size for training') arg('--batch-size', type=int, default=16, help='Batch size during training') arg('--fold', type=int, default=0, help='Validation fold') arg('--n-epochs', type=int, default=10, help='Epoch to run') arg('--learning-rate', type=float, default=1e-3, help='Initial learning rate') arg('--step', type=int, default=1, help='Current training step') arg('--patience', type=int, default=4) arg('--criterion', type=str, default='bce', help='Criterion') arg('--optimizer', default='Adam', help='Name of the optimizer') arg('--continue_train', type=bool, default=False) arg('--checkpoint', type=str, default=Path('../results'), help='Checkpoint file path') arg('--workers', type=int, default=2) arg('--debug', type=bool, default=True) args = parser.parse_args() set_seed(args.seed) """ SET PARAMS """ args.debug = True ON_KAGGLE = configs.ON_KAGGLE N_CLASSES = configs.NUM_CLASSES args.image_size = configs.SIZE args.data_root = configs.DATA_ROOT use_cuda = cuda.is_available() fold = args.fold num_workers = args.workers num_epochs = args.n_epochs batch_size = args.batch_size learning_rate = args.learning_rate """ LOAD DATA """ print(os.listdir(args.data_root)) folds = pd.read_csv(args.data_root / 'folds.csv') train_root = args.data_root / 'train' if args.debug: folds = folds.head(50) train_fold = folds[folds['fold'] != fold] valid_fold = folds[folds['fold'] == fold] check_fold(train_fold, valid_fold) def get_dataloader(df: pd.DataFrame, image_transform) -> DataLoader: """ Calls dataloader to load Imet Dataset """ return DataLoader( ImetDataset(train_root, df, image_transform), shuffle=True, batch_size=batch_size, num_workers=num_workers, ) train_loader = get_dataloader(train_fold, image_transform=albu_transform) valid_loader = get_dataloader(valid_fold, image_transform=valid_transform) print('{} items in train, {} in valid'.format(len(train_loader.dataset), len(valid_loader.dataset))) loaders = OrderedDict() loaders["train"] = train_loader loaders["valid"] = valid_loader """ MODEL """ model = seresnext101(num_classes=N_CLASSES) if use_cuda: model = model.cuda() criterion = nn.BCEWithLogitsLoss() optimizer = Adam(model.parameters(), lr=learning_rate) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=args.patience) """ MODEL RUNNER """ # call an instance of the model runner runner = SupervisedRunner() # logs folder current_time = datetime.now().strftime('%b%d_%H_%M') prefix = f'{current_time}_{args.model_name}' logdir = os.path.join(args.run_root, prefix) os.makedirs(logdir, exist_ok=False) print('\tTrain session :', prefix) print('\tOn KAGGLE :', ON_KAGGLE) print('\tDebug :', args.debug) print('\tClasses number :', N_CLASSES) print('\tModel :', args.model_name) print('\tParameters :', model.parameters()) print('\tImage size :', args.image_size) print('\tEpochs :', num_epochs) print('\tWorkers :', num_workers) print('\tLog dir :', logdir) print('\tLearning rate :', learning_rate) print('\tBatch size :', batch_size) print('\tPatience :', args.patience) if args.continue_train: state = load_model(model, args.checkpoint) epoch = state['epoch'] step = state['step'] print('Loaded model weights from {}, epoch {}, step {}'.format( args.checkpoint, epoch, step)) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ F1ScoreCallback(threshold=0.5), #F2ScoreCallback(num_classes=N_CLASSES), EarlyStoppingCallback(patience=args.patience, min_delta=0.01) ], logdir=logdir, num_epochs=num_epochs, verbose=True) # by default it only plots loss, works in IPython Notebooks #utils.plot_metrics(logdir=logdir, metrics=["loss", "_base/lr"]) """ INFERENCE TEST """ loaders = OrderedDict([("infer", loaders["train"])]) runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) print(runner.callbacks[1].predictions["logits"])
def train(self): # TODO: Make it work for all modes, right now only it defaults to pcl. callbacks = [ EarlyStoppingCallback(patience=15, metric="loss", minimize=True, min_delta=0), ] scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode="min") train_dataset = TensorDataset(self.tr_eps, torch.arange(self.tr_eps.shape[0])) val_dataset = TensorDataset(self.val_eps, torch.arange(self.val_eps.shape[0])) runner = CustomRunner() v_bs = self.val_eps.shape[0] loaders = { "train": DataLoader( train_dataset, batch_size=self.batch_size, num_workers=1, shuffle=True, ), "valid": DataLoader( val_dataset, batch_size=self.batch_size, num_workers=1, shuffle=True, ), } model = self.model num_features = 2 # model training train_loader_param = { "batch_size": 64, "shuffle": True, } val_loader_param = { "batch_size": 32, "shuffle": True, } loaders_params = { "train": train_loader_param, "valid": val_loader_param, } # datasets = { # "batch_size": 64, # "num_workers": 1, # "loaders_params": loaders_params, # "get_datasets_fn": self.datasets_fn, # "num_features": num_features, # }, runner.train( model=model, optimizer=self.optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir="./logs", num_epochs=self.epochs, verbose=True, distributed=False, load_best_on_end=True, main_metric="loss", )
def main(): fold_path = args.fold_path fold_num = args.fold_num model_name = args.model_name train_csv = args.train_csv sub_csv = args.sub_csv encoder = args.encoder num_workers = args.num_workers batch_size = args.batch_size num_epochs = args.num_epochs learn_late = args.learn_late attention_type = args.attention_type train = pd.read_csv(train_csv) sub = pd.read_csv(sub_csv) train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1]) sub['im_id'] = sub['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv') val_fold = pd.read_csv(f'{fold_path}/val_file_fold_{fold_num}.csv') train_ids = np.array(train_fold.file_name) valid_ids = np.array(val_fold.file_name) encoder_weights = 'imagenet' if model_name == 'ORG_Link18': model = Linknet_resnet18_Classifer() preprocessing_fn = smp.encoders.get_preprocessing_fn( encoder, encoder_weights) train_dataset = CloudDataset_Multi( df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset_Multi( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) loaders = {"train": train_loader, "valid": valid_loader} logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation" print(logdir) if model_name == 'ORG_Link18': optimizer = Nadam([ { 'params': model.parameters(), 'lr': learn_late }, ]) else: optimizer = Nadam([ { 'params': model.decoder.parameters(), 'lr': learn_late }, { 'params': model.encoder.parameters(), 'lr': learn_late }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = Multi_Loss() runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)], logdir=logdir, num_epochs=num_epochs, verbose=1)
def train_model(train_parameters): k = train_parameters["k"] loaders = train_parameters["loaders"] num_epochs = train_parameters["num_epochs"] net = train_parameters["net"] ENCODER = train_parameters["ENCODER"] ENCODER_WEIGHTS = train_parameters["ENCODER_WEIGHTS"] ACTIVATION = train_parameters["ACTIVATION"] model = load_model(net, ENCODER, ENCODER_WEIGHTS, ACTIVATION) """ multi-gpu """ if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.to("cuda") # if k==0: # summary(model.module.encoder,(3,384,576)) logdir = "./logs/segmentation_{}_{}Fold".format(net, k) # model, criterion, optimizer optimizer = RAdam([ { 'params': model.module.decoder.parameters(), 'lr': 1e-2 }, { 'params': model.module.encoder.parameters(), 'lr': 1e-3 }, # {'params': model.decoder.parameters(), 'lr': 1e-2}, # {'params': model.encoder.parameters(), 'lr': 1e-3}, ]) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) # criterion = FocalLoss() # criterion = FocalDiceLoss() # criterion = smp.utils.losses.DiceLoss(eps=1.) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ EarlyStoppingCallback(patience=10, min_delta=0.001), DiceCallback() ], # AUCCallback(), # IouCallback()], logdir=logdir, num_epochs=num_epochs, verbose=True) del loaders, optimizer, scheduler, model, runner torch.cuda.empty_cache() gc.collect() print("Collect GPU cache")
# loss.backward() # optimizer.step() # model training runner = CustomRunner() logdir = "./logdir" runner.train( model=model, optimizer=optimizer, scheduler=scheduler, num_epochs=EPOCHS, loaders=loaders, logdir=logdir, verbose=True, timeit=True, callbacks=[EarlyStoppingCallback(patience=10)] ) # # model training # runner = SupervisedRunner() # logdir = "./logdir" # runner.train( # model=model, # criterion=criterion, # optimizer=optimizer, # scheduler=scheduler, # verbose=True, # timeit=True, # loaders=loaders, # logdir=logdir, # num_epochs=EPOCHS,
def main(): fold_path = args.fold_path fold_num = args.fold_num model_name = args.model_name train_csv = args.train_csv sub_csv = args.sub_csv encoder = args.encoder num_workers = args.num_workers batch_size = args.batch_size num_epochs = args.num_epochs learn_late = args.learn_late attention_type = args.attention_type train = pd.read_csv(train_csv) sub = pd.read_csv(sub_csv) train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1]) sub['im_id'] = sub['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv') val_fold = pd.read_csv(f'{fold_path}/valid_file_fold_{fold_num}.csv') train_ids = np.array(train_fold.file_name) valid_ids = np.array(val_fold.file_name) encoder_weights = 'imagenet' attention_type = None if attention_type == 'None' else attention_type if model_name == 'Unet': model = smp.Unet( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', attention_type=attention_type, ) if model_name == 'Linknet': model = smp.Linknet( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', ) if model_name == 'FPN': model = smp.FPN( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', ) if model_name == 'ORG': model = Linknet_resnet18_ASPP() preprocessing_fn = smp.encoders.get_preprocessing_fn( encoder, encoder_weights) train_dataset = CloudDataset( df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, pin_memory=True, ) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) loaders = {"train": train_loader, "valid": valid_loader} logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation" #for batch_idx, (data, target) in enumerate(loaders['train']): # print(batch_idx) print(logdir) if model_name == 'ORG': optimizer = NAdam([ { 'params': model.parameters(), 'lr': learn_late }, ]) else: optimizer = NAdam([ { 'params': model.decoder.parameters(), 'lr': learn_late }, { 'params': model.encoder.parameters(), 'lr': learn_late }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = smp.utils.losses.BCEDiceLoss() runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=1e-7) ], logdir=logdir, num_epochs=num_epochs, verbose=1)
]) model.to(device) scheduler = ReduceLROnPlateau(optimizer, factor=0.6, patience=s_patience) # criterion = smp.utils.losses.BCEDiceLoss(eps=1.) # scheduler = StepLR(optimizer, step_size=10, gamma=0.5) criterion = BCEDiceLoss(eps=1.) # criterion = DiceLoss(eps=1.) #Try this too runner = SupervisedRunner() # Train runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=train_patience, min_delta=0.001) ], logdir=logdir, num_epochs=epochs, verbose=True) secs = time.time() - start print(f"Done in {secs:.2f} seconds ({secs/3600:.2f} hours)") # git fetch --all && git reset --hard origin/master
def main(args): """ Main code for training for training a U-Net with some user-defined encoder. Args: args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args Returns: None """ # setting up the train/val split with filenames train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path) # setting up the train/val split with filenames seed_everything(args.split_seed) train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values, random_state=args.split_seed, stratify=id_mask_count["count"], test_size=args.test_size) # setting up model (U-Net with ImageNet Encoders) ENCODER_WEIGHTS = "imagenet" DEVICE = "cuda" attention_type = None if args.attention_type == "None" else args.attention_type model = smp.Unet(encoder_name=args.encoder, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=None, attention_type=attention_type) preprocessing_fn = smp.encoders.get_preprocessing_fn( args.encoder, ENCODER_WEIGHTS) # Setting up the I/O train_dataset = SteelDataset( args.dset_path, df=train, datatype="train", im_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), use_resized_dataset=args.use_resized_dataset) valid_dataset = SteelDataset( args.dset_path, df=train, datatype="valid", im_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), use_resized_dataset=args.use_resized_dataset) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) loaders = {"train": train_loader, "valid": valid_loader} # everything is saved here (i.e. weights + stats) logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam([ { "params": model.decoder.parameters(), "lr": args.encoder_lr }, { "params": model.encoder.parameters(), "lr": args.decoder_lr }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() callbacks_list = [ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), ] if args.checkpoint_path != "None": # hacky way to say no checkpoint callback but eh what the heck ckpoint_p = Path(args.checkpoint_path) fname = ckpoint_p.name resume_dir = str(ckpoint_p.parents[0] ) # everything in the path besides the base file name print( f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}." ) callbacks_list = callbacks_list + [ CheckpointCallback(resume=fname, resume_dir=resume_dir), ] runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks_list, logdir=logdir, num_epochs=args.num_epochs, verbose=True)
loaders = OrderedDict() loaders["train"] = train_dl loaders["valid"] = valid_dl # model model = AttentionModel(INPUT_DIM, HID_DIM, OUTPUT_DIM, RECURRENT_Layers, DROPOUT).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 60]) criterion = torch.nn.CrossEntropyLoss() # model training runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=EPOCHS, verbose=True, callbacks=[ AccuracyCallback(num_classes=5, topk_args=[1, 2]), EarlyStoppingCallback(metric='accuracy01', minimize=False, patience=10) ], )
def main(train, test, features, target): # get args args = parse_arguments() params = yaml_to_json(args.yaml_path) # hyper param num_folds = params.fold seed = params.seed base_path = params.base_path target_cols = params.target features_cols = params.features preprocessed_data_path = params.preprocessed_data batch_size = params.batch_size num_epochs = params.epochs # ex) '/hoge/logs' base_logdir = params.base_logdir # fix seed set_global_seed(seed) device = get_device() # set up logdir now = datetime.now() base_logdir = os.path.join(base_logdir + now.strftime("%Y%m%d%H%M%S")) os.makedirs(base_logdir, exist_ok=True) # dump yaml contents with open(os.path.join(base_logdir, 'params.json'), mode="w") as f: json.dump(params, f, indent=4) # dump this scripts my_file_path = os.path.abspath(__file__) shutil.copyfile(my_file_path, base_logdir) # load dataset if preprocessed_data_path == '': train, test, sample_submission = read_data(base_path) # noqa # TODO: You should implement these function!! train, test = preprocess(train, test) # noqa train, test = build_feature(train, test) # noqa else: train = pd.read_csv(preprocessed_data_path + 'train.csv') test = pd.read_csv(preprocessed_data_path + 'test.csv') sample_submission = pd.read_csv(preprocessed_data_path + 'sample_submission.csv') # execute CV # TODO: set your CV method kf = KFold(n_splits=num_folds, random_state=seed) ids = kf.split(train) fold_scores = [] test_preds = [] for fold, (train_idx, valid_idx) in enumerate(ids): print('Fold {}'.format(fold + 1)) logdir = os.path.join(base_logdir + 'fold_{}'.format(fold + 1)) os.makedirs(logdir, exist_ok=True) # data X_train = train[features_cols] # 目的変数の正規化は...? Y_train = train[target_cols] X_test = train[features_cols] # create dataloaders train_dls, test_dl = create_data_loader( X_train.iloc[train_idx].to_numpy(), Y_train.iloc[train_idx].to_numpy(), X_train.iloc[valid_idx].to_numpy(), Y_train.iloc[valid_idx].to_numpy(), X_test.to_numpy(), batch_size=batch_size) # init models # TODO: set your model and learning condition # ここは関数を用意して、キーワードで取り出すようにできると汎用性は上がる model = SampleNN(input_dim=1000, out_dim=1) criterion = nn.BCELoss() optimizer = torch.optim.AdamW(model.parameters()) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) # init catalyst runner runner = SupervisedRunner(device=device) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_dls, logdir=logdir, num_epochs=num_epochs, callbacks=[EarlyStoppingCallback(patience=15, min_delta=0)], verbose=False) # calculate valid score best_model_path = logdir + '/checkpoints/best.pth' val_preds = runner.predict_loader(model, train_dls['valid'], resume=best_model_path, verbose=False) val_truth = Y_train.iloc[valid_idx].values # TODO: set your score function cv_score = mean_spearmanr_correlation_score(val_truth, val_preds) print('Fold {} CV score : {}'.format(fold + 1, cv_score)) fold_scores.append(cv_score) # test prediction test_pred = runner.predict_loader( model, test_dl, resume=best_model_path, verbose=False) / num_folds test_preds.append(test_pred) # submit # TODO: set your submit process sample_submission[target_cols] = np.mean(test_preds, axis=0) sample_submission.to_csv('submission.csv') return True
def main(): train = pd.read_csv('./data_process/data/train_flip_aug_resize.csv') train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.replace('_' + x.split('_')[-1], '')) train['img_label'] = train.EncodedPixels.apply(lambda x: 0 if x is np.nan else 1) img_label = train.groupby('im_id')['img_label'].agg(list).reset_index() kf = KFold(n_splits=5, shuffle=True, random_state=777) fold = 0 for train, val in kf.split(img_label): train_df = img_label.iloc[train] image_train = np.array(train_df.im_id) label_train = np.array(train_df.img_label) val_df = img_label.iloc[val] image_val = np.array(val_df.im_id) label_val = np.array(val_df.img_label) train_dataset = CloudClassDataset( datatype='train', img_ids=image_train, img_labels=label_train, transforms=get_training_augmentation(), preprocessing=ort_get_preprocessing() ) valid_dataset = CloudClassDataset( datatype='train', img_ids=image_val, img_labels=label_val, transforms=get_validation_augmentation(), preprocessing=ort_get_preprocessing() ) train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=8) valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=8) resnet_model = ResNet() loaders = { "train": train_loader, "valid": valid_loader } logdir = f"./class/segmentation/fold_{fold}/" print(logdir) optimizer = Nadam([ {'params': resnet_model.parameters(), 'lr': 1e-3}, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = nn.BCEWithLogitsLoss() runner = SupervisedRunner() runner.train( model=resnet_model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)], logdir=logdir, num_epochs=15, verbose=1 ) fold +=1
criterion_key="h1"), CriterionCallback(input_key="h2_targets", output_key="h2_logits", prefix="loss_h2", criterion_key="h2"), CriterionCallback(input_key="h3_targets", output_key="h3_logits", prefix="loss_h3", criterion_key="h3"), crit_agg, ]) callbacks.extend([ score_callback, EarlyStoppingCallback(metric='weight_recall', patience=early_stop_epochs, min_delta=0.001) ]) callbacks.append(OptimizerCallback(grad_clip_params={'params': 1.0}), ) runner.train( fp16=args.fp16, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=num_epochs,
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) # 损失函数计算 criterion = smp.utils.losses.BCEDiceLoss(eps=1.) # from catalyst.dl.runner import SupervisedRunner runner = SupervisedRunner() ''' Training section ''' runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, num_epochs=num_epochs, verbose=True ) # 画loss_function的图 utils.plot_metrics( logdir=logdir, # specify which metrics we want to plot metrics=["loss", "dice", 'lr', '_base/lr'] ) # 导入validation encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer(
optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 8], gamma=0.3) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[EarlyStoppingCallback(patience=2, min_delta=0.01)], logdir=logdir, num_epochs=num_epochs, check=True, ) # In[ ]: # utils.plot_metrics(logdir=logdir, metrics=["loss", "_base/lr"]) # # Setup 4 - training with additional metrics # In[ ]: from catalyst.dl.runner import SupervisedRunner from catalyst.dl.callbacks import EarlyStoppingCallback, AccuracyCallback
def run(config_file): config = load_config(config_file) config.work_dir = 'result/' + config.work_dir print('working directory:', config.work_dir) all_transforms = {} all_transforms['train'] = Transform(size=config.data.image_size, threshold=20., sigma=-1., blur_ratio=0.2, noise_ratio=0.2, cutout_ratio=0.2, grid_distortion_ratio=0.2, random_brightness_ratio=0.2, piece_affine_ratio=0.2, ssr_ratio=0.2) all_transforms['valid'] = Transform(size=config.data.image_size) dataloaders = { phase: make_loader( phase=phase, batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx, fold_csv=config.data.params.fold_csv, transforms=all_transforms[phase], # debug=config.debug ) for phase in ['train', 'valid'] } model = get_model(config) model = model.to(device) # we have multiple criterions criterion = { "ce": nn.CrossEntropyLoss(), # Define your awesome losses in here. Ex: Focal, lovasz, etc } optimizer = RAdam(model.parameters(), lr=config.optimizer.params.lr) if config.optimizer.lookahead.apply: optimizer = Lookahead(optimizer) scheduler = get_scheduler(optimizer, config) # model runner runner = SupervisedRunner( device=device, input_key="images", output_key=("logit_grapheme_root", "logit_vowel_diacritic", "logit_consonant_diacritic"), input_target_key=("grapheme_roots", "vowel_diacritics", "consonant_diacritics"), ) callbacks = [] if config.train.early_stop_patience > 0: callbacks.append( EarlyStoppingCallback(patience=config.train.early_stop_patience)) if config.train.accumulation_size > 0: accumulation_steps = config.train.accumulation_size // config.train.batch_size callbacks.extend( [OptimizerCallback(accumulation_steps=accumulation_steps)]) # to resume from check points if exists if os.path.exists(config.work_dir + '/checkpoints/best.pth') and config.train.resume: callbacks.append( CheckpointCallback(resume=config.work_dir + '/checkpoints/last_full.pth')) if config.train.mixup: CC = MixupCallback else: CC = CriterionCallback callbacks.extend([ CC( input_key="grapheme_roots", output_key="logit_grapheme_root", criterion_key='ce', prefix='loss_gr', ), CC( input_key="vowel_diacritics", output_key="logit_vowel_diacritic", criterion_key='ce', prefix='loss_wd', ), CC( input_key="consonant_diacritics", output_key="logit_consonant_diacritic", criterion_key='ce', prefix='loss_cd', ), CriterionAggregatorCallback( prefix="loss", loss_aggregate_fn="weighted_sum", loss_keys={ "loss_gr": 2.0, "loss_wd": 1.0, "loss_cd": 1.0 }, ), # metrics HMacroAveragedRecall(), ]) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=dataloaders, logdir=config.work_dir, num_epochs=config.train.num_epochs, main_metric="hmar", minimize_metric=False, monitoring_params=None, callbacks=callbacks, verbose=True, fp16=False, )