def objective(trial): logdir = "./logdir" num_epochs = 10 model = Net(trial) optimizer = torch.optim.Adam(model.parameters(), lr=0.02) criterion = torch.nn.CrossEntropyLoss() # model training runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=True, callbacks=[ AccuracyCallback(), CatalystPruningCallback( trial, metric="accuracy01"), # top-1 accuracy as metric for pruning ], ) return runner.state.valid_metrics["accuracy01"]
def test_evaluation_loader_metrics() -> None: """ Test if metrics in evaluate loader works properly. """ dataset = DummyDataset() model = nn.Linear(in_features=dataset.features_dim, out_features=dataset.out_dim) loader = DataLoader(dataset=dataset, batch_size=1) callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, )) ] runner = SupervisedRunner() runner.train( loaders={ "train": loader, "valid": loader }, model=model, num_epochs=1, criterion=nn.BCEWithLogitsLoss(), callbacks=callbacks, ) runner_internal_metrics = runner.loader_metrics evaluate_loader_metrics = runner.evaluate_loader(loader=loader, callbacks=callbacks) assert runner_internal_metrics["accuracy01"] == evaluate_loader_metrics[ "accuracy01"]
def run(config_file): config = load_config(config_file) os.makedirs(config.work_dir, exist_ok=True) save_config(config, config.work_dir + '/config.yml') os.environ['CUDA_VISIBLE_DEVICES'] = '0' all_transforms = {} all_transforms['train'] = get_transforms(config.transforms.train) all_transforms['valid'] = get_transforms(config.transforms.test) dataloaders = { phase: make_loader( data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase=phase, batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=all_transforms[phase], num_classes=config.data.num_classes, pseudo_label_path=config.train.pseudo_label_path, task='cls' ) for phase in ['train', 'valid'] } # create model model = CustomNet(config.model.encoder, config.data.num_classes) # train setting criterion = get_loss(config) params = [ {'params': model.base_params(), 'lr': config.optimizer.params.encoder_lr}, {'params': model.fresh_params(), 'lr': config.optimizer.params.decoder_lr} ] optimizer = get_optimizer(params, config) scheduler = get_scheduler(optimizer, config) # model runner runner = SupervisedRunner(model=model) callbacks = [MultiClassAccuracyCallback(threshold=0.5), F1ScoreCallback()] if os.path.exists(config.work_dir + '/checkpoints/best.pth'): callbacks.append(CheckpointCallback(resume=config.work_dir + '/checkpoints/best_full.pth')) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=dataloaders, logdir=config.work_dir, num_epochs=config.train.num_epochs, callbacks=callbacks, verbose=True, fp16=True, )
def test_loading_best_state_at_end(): old_stdout = sys.stdout sys.stdout = str_stdout = StringIO() # experiment_setup logdir = "./logs/periodic_loader" checkpoint = logdir + "/checkpoints" logfile = checkpoint + "/_metrics.json" # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = { "train": loader, "valid": loader, } # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = SupervisedRunner() # first stage runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=5, verbose=False, callbacks=[ PeriodicLoaderCallback(valid=3), CheckRunCallback(num_epoch_steps=5), ], load_best_on_end=True, ) sys.stdout = old_stdout exp_output = str_stdout.getvalue() assert len(re.findall(r"\(train\)", exp_output)) == 5 assert len(re.findall(r"\(valid\)", exp_output)) == 1 assert (len( re.findall(r"\(global epoch 3, epoch 3, stage train\)", exp_output)) == 1) assert len(re.findall(r".*/train\.\d\.pth", exp_output)) == 1 assert os.path.isfile(logfile) assert os.path.isfile(checkpoint + "/train.3.pth") assert os.path.isfile(checkpoint + "/best.pth") assert os.path.isfile(checkpoint + "/best_full.pth") assert os.path.isfile(checkpoint + "/last.pth") assert os.path.isfile(checkpoint + "/last_full.pth") shutil.rmtree(logdir, ignore_errors=True)
def find_lr_range(final_lr: float = 1.0, num_steps: int = 1413): config = copy.deepcopy(experiment_config) del config["stages"]["scheduler_params"] config["stages"]["stage1"]["optimizer_params"]["lr"] = 1e-6 config["stages"]["callbacks_params"]['lr_finder'] = { "callback": "LRFinderLogger", "final_lr": final_lr, "num_steps": num_steps, "scale": "log", } experiment = Experiment(config) runner = SupervisedRunner( input_key="images", output_key=["logit_" + c for c in output_classes.keys()], input_target_key=list(output_classes.keys())) try: runner.run_experiment(experiment) except NotImplementedError: pass import matplotlib.pyplot as plt plt.plot(runner.callbacks['lr_finder'].lr_history, runner.callbacks['lr_finder'].loss_history) plt.xscale('log') plt.plot() return experiment, runner
def train(): """ Docs. """ num_features = int(1e1) # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 1) criterion = torch.nn.MSELoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) runner = SupervisedRunner() runner.train( model=model, datasets={ "batch_size": 32, "num_workers": 1, "get_datasets_fn": datasets_fn, "num_features": num_features, }, criterion=criterion, optimizer=optimizer, scheduler=scheduler, logdir="./logs/example_3", num_epochs=8, verbose=True, distributed=False, check=True, )
def main(): epochs = 5 num_class = 10 output_path = './output/catalyst' # Use if you want to fix seed # catalyst.utils.set_global_seed(42) # catalyst.utils.prepare_cudnn(deterministic=True) model = get_model() train_loader, val_loader = get_loaders() loaders = {"train": train_loader, "valid": val_loader} optimizer, lr_scheduler = get_optimizer(model=model) criterion = get_criterion() runner = SupervisedRunner(device=catalyst.utils.get_device()) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=lr_scheduler, loaders=loaders, logdir=output_path, callbacks=[AccuracyCallback(num_classes=num_class, accuracy_args=[1])], num_epochs=epochs, main_metric="accuracy01", minimize_metric=False, fp16=None, verbose=True )
def test_mnist(self): utils.set_global_seed(42) x_train = np.random.random((100, 1, 28, 28)).astype(np.float32) y_train = _to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10).astype(np.float32) x_valid = np.random.random((20, 1, 28, 28)).astype(np.float32) y_valid = _to_categorical(np.random.randint(10, size=(20, 1)), num_classes=10).astype(np.float32) x_train, y_train, x_valid, y_valid = \ list(map(torch.tensor, [x_train, y_train, x_valid, y_valid])) bs = 32 num_workers = 4 data_transform = transforms.ToTensor() loaders = collections.OrderedDict() trainset = torch.utils.data.TensorDataset(x_train, y_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True, num_workers=num_workers) validset = torch.utils.data.TensorDataset(x_valid, y_valid) validloader = torch.utils.data.DataLoader(validset, batch_size=bs, shuffle=False, num_workers=num_workers) loaders["train"] = trainloader loaders["valid"] = validloader # experiment setup num_epochs = 3 logdir = "./logs" # model, criterion, optimizer model = Net() criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters()) # model runner runner = SupervisedRunner() # model training runner.train(model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=False, callbacks=[CheckpointCallback(save_n_best=3)]) with open('./logs/checkpoints/_metrics.json') as f: metrics = json.load(f) self.assertTrue( metrics['train.3']['loss'] < metrics['train.1']['loss']) self.assertTrue(metrics['best']['loss'] < 0.35)
def main(config): """ Main code for training a classification/seg/classification+seg model. Args: config (dict): dictionary read from a yaml file i.e. script/configs/train.yml Returns: None """ # setting up the train/val split with filenames seed = config["io_params"]["split_seed"] seed_everything(seed) exp = TrainSegExperiment2D(config) output_key = "logits" print(f"Seed: {seed}") runner = SupervisedRunner(output_key=output_key) runner.train(model=exp.model, criterion=exp.criterion, optimizer=exp.opt, scheduler=exp.lr_scheduler, loaders=exp.loaders, callbacks=exp.cb_list, **config["runner_params"]) # Not saving plots if plot_params not specified in config if config.get("plot_params"): figs = plot_metrics(logdir=config["runner_params"]["logdir"], metrics=config["plot_params"]["metrics"]) save_figs(figs, save_dir=config["plot_params"]["save_dir"])
def test_passenger_example(interior_car_task): model, task_flow = interior_car_task dataset = task_flow.get_dataset() train_dataset, val_dataset = torch_split_dataset(dataset, random_state=42) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False) nested_loaders = OrderedDict({'train': train_loader, 'valid': val_loader}) print(model) runner = SupervisedRunner() criterion = task_flow.get_loss() callbacks = criterion.catalyst_callbacks() with tempfile.TemporaryDirectory() as tmp_dir: print(tmp_dir) runner.train( model=model, criterion=criterion, optimizer=optim.Adam(model.parameters(), lr=1e-3), loaders=nested_loaders, callbacks=callbacks, logdir=tmp_dir, num_epochs=20, ) print_any_prediction(criterion, model, nested_loaders, runner)
def main(): args = get_parse() catalyst.utils.set_global_seed(args.seed) catalyst.utils.prepare_cudnn(deterministic=True) print('Make Data set data frame') df, class_names = make_df(data_root=args.data_rootdir) num_class = len(class_names) print('Get data loaders') loaders = get_train_valid_loaders( df=df, test_size=0.2, random_state=args.seed, data_root=args.data_rootdir, num_class=num_class, batch_size=args.batch_size, num_workers=args.num_workers, img_size=args.img_size ) print('Make model') if args.frn: model = se_resnext50_32x4d_frn(pretrained=None) model.last_linear = nn.Linear(512 * 16, num_class) else: model = se_resnext50_32x4d() model.last_linear = nn.Linear(512 * 16, num_class) print('Get optimizer and scheduler') # learning rate for FRN is very very sensitive !!! criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=3e-5 if args.frn else 3e-4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer=optimizer, T_max=args.num_epochs, eta_min=1e-6 if args.frn else 1e-5, last_epoch=-1 ) log_base = './output/cls' dir_name = f'seresnext50{"_frn" if args.frn else ""}_bs_{args.batch_size}_fp16_{args.fp16}' print('Start training...') runner = SupervisedRunner(device=catalyst.utils.get_device()) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=os.path.join(log_base, dir_name), callbacks=get_callbacks(num_classes=num_class), num_epochs=args.num_epochs, main_metric="accuracy01", minimize_metric=False, fp16=dict(opt_level="O1") if args.fp16 else None, verbose=False )
def __init__(self, config: EstimatorConfig, model): super().__init__(config) self.runner = SupervisedRunner() self.model_metrics = dict() self.model = model self.ddp = False self.set_device()
def main(args=None): if args is None: args = argument_paser() # Set experiment id exp_id = str(uuid.uuid4())[:8] if args.exp_id is None else args.exp_id print(f'Experiment Id: {exp_id}', flush=True) # Fix seed torch.manual_seed(args.seed) # Config gpu use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Prepare data dataset = MovingMnistDataset() train_index, valid_index = train_test_split(range(len(dataset)), test_size=0.3) train_loader = DataLoader(Subset(dataset, train_index), batch_size=args.batch_size, shuffle=True) valid_loader = DataLoader(Subset(dataset, valid_index), batch_size=args.test_batch_size, shuffle=False) loaders = {"train": train_loader, "valid": valid_loader} model = ConvLSTMEncoderPredictor(image_size=(64, 64)).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) criterion = nn.MSELoss() runner = SupervisedRunner(device=catalyst.utils.get_device()) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=None, loaders=loaders, # model will be saved to {logdir}/checkpoints logdir=os.path.join(args.log_dir, exp_id), callbacks=[ CheckpointCallback(save_n_best=args.n_saved), EarlyStoppingCallback( patience=args.es_patience, metric="loss", minimize=True, ) ], num_epochs=args.epochs, main_metric="loss", minimize_metric=True, fp16=None, verbose=True) return exp_id, model
def test_zero_period_validation_exception(): old_stdout = sys.stdout sys.stdout = str_stdout = StringIO() # experiment_setup logdir = "./logs/periodic_loader" checkpoint = logdir + "/checkpoints" logfile = checkpoint + "/_metrics.json" # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = { "train": loader, "train_additional": loader, "valid": loader, "valid_additional": loader, } # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = SupervisedRunner() with pytest.raises(ValueError): runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=10, verbose=False, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, callbacks=[ PeriodicLoaderCallback( valid_loader_key="valid", valid_metric_key="loss", minimize=True, train_additional=1, train_not_exists=3, valid=0, valid_additional=2, valid_not_exist=1, ) ], ) sys.stdout = old_stdout exp_output = str_stdout.getvalue() shutil.rmtree(logdir, ignore_errors=True)
def main(): args = get_parse() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu catalyst.utils.set_global_seed(args.seed) catalyst.utils.prepare_cudnn(deterministic=True) print('Make Data set data frame') df, class_names = make_df(data_root=args.data_rootdir) num_class = len(class_names) print('Get data loaders') loaders = get_train_valid_loaders(df=df, test_size=0.2, random_state=args.seed, data_root=args.data_rootdir, num_class=num_class, batch_size=args.batch_size, num_workers=args.num_workers, img_size=args.img_size) print('Make model') model = make_model(model_name=args.model, num_classes=num_class, pretrained=args.use_pretrain, input_size=(args.img_size, args.img_size), dropout_p=0.2) if args.frn: print(f'Use FRN + TLU instead of BN2d + ReLU') model = bnrelu_to_frn(model) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4 if args.frn else 3e-4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer=optimizer, T_max=args.num_epochs, eta_min=3e-6 if args.frn else 1e-5, last_epoch=-1) log_base = './output/cls' dir_name = f'{args.model}_frn_{args.frn}_bs_{args.batch_size}_fp16_{args.fp16}_pretrain_{args.use_pretrain}' print('Start training...') runner = SupervisedRunner(device=catalyst.utils.get_device()) runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=os.path.join(log_base, dir_name), callbacks=get_callbacks(num_classes=num_class), num_epochs=args.num_epochs, main_metric="accuracy01", minimize_metric=False, fp16=dict(opt_level="O1") if args.fp16 else None, verbose=False)
def test_epoch_increasing(): class IncreaseCheckerCallback(Callback): def __init__(self, attribute: str, start_value: int = None): super().__init__(CallbackOrder.Internal) self.attr = attribute self.prev = start_value def on_epoch_start(self, runner): if not hasattr(runner, self.attr): raise ValueError(f"There is no {self.attr} in runner!") value = getattr(runner, self.attr) if self.prev is not None: # print( # f">>> '{self.attr}': " # f"previous - {self.prev}, " # f"current - {value}" # ) assert self.prev < value self.prev = value # experiment_setup logdir = "./logs/core_runner" # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = SupervisedRunner() callbacks = [ IncreaseCheckerCallback("epoch_step"), IncreaseCheckerCallback("batch_step"), IncreaseCheckerCallback("sample_step"), ] runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=2, verbose=False, callbacks=callbacks, ) shutil.rmtree(logdir, ignore_errors=True)
def run_train_with_empty_loader() -> None: """ In this function we push loader to be empty because we use batch_size > len(dataset) and drop_last=True. """ dataset = DummyDataset() model = nn.Linear(in_features=dataset.features_dim, out_features=dataset.out_dim) loader = DataLoader(dataset=dataset, batch_size=len(dataset) + 1, drop_last=True) runner = SupervisedRunner() runner.train( loaders={"train": loader}, model=model, num_epochs=1, criterion=nn.BCEWithLogitsLoss(), )
def train( in_csv: str, in_dir: str, model: str = 'resnet18', fold: int = None, n_epochs: int = 30, image_size: int = 224, augmentation: str = 'medium', learning_rate: float = 3e-3, n_milestones: int = 5, batch_size: int = 256, n_workers: int = 4, fast: bool = False, logdir: str = '.', verbose: bool = False ): model = get_model(model=model) loss = criterion.FocalLossMultiClass() # CrossEntropyLoss lr_scaled = learning_rate * (batch_size / 256) # lr linear scaling optimizer = torch.optim.Adam(model.parameters(), lr=lr_scaled) scheduler = schedulers.MultiStepLR( optimizer, milestones=[5, 10, 20, 30, 40], gamma=0.3 ) runner = SupervisedRunner() runner.train( model=model, criterion=loss, optimizer=optimizer, scheduler=scheduler, loaders=get_dataloaders( in_csv=in_csv, in_dir=in_dir, stages=['train', 'valid'], fold=fold, batch_size=batch_size, n_workers=n_workers, image_size=(image_size, image_size), augmentation=augmentation, fast=fast ), callbacks=[ AccuracyCallback(accuracy_args=[1]), BinaryAUCCallback() ], logdir=logdir, num_epochs=n_epochs, verbose=verbose )
def test_onecyle(): # experiment_setup logdir = "./logs/core_runner" # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = { "train": loader, "valid": loader, } # number of steps, epochs, LR range, initial LR and warmup_fraction num_steps = 6 epochs = 8 min_lr = 1e-4 max_lr = 2e-3 init_lr = 1e-3 warmup_fraction = 0.5 # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = OneCycleLRWithWarmup( optimizer, num_steps=num_steps, lr_range=(max_lr, min_lr), init_lr=init_lr, warmup_fraction=warmup_fraction, ) runner = SupervisedRunner() callbacks = [LRCheckerCallback(init_lr, min_lr)] # Single stage runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=epochs, verbose=False, callbacks=callbacks, )
def infer( config_path, log_dir ): """ Inference: 1. loaders 2. model """ # quering params from experiment config batch_size = 116 test_dataset = LipreadingDataset( "test") loaders = { "infer": DataLoader( test_dataset, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=False,) } model = LipNext() device = "cuda" if torch.cuda.is_available() else "cpu" runner = SupervisedRunner(device=device) runner.infer( model=model, loaders=loaders, callbacks=[ AccuracyCallback(accuracy_args=[1, 3]), InferenceCallback(), CheckpointCallbackV2( config_path=config_path, resume=("/home/dmitry.klimenkov/Documents/projects/visper_pytorch/logdir" "/Mobi-VSR-5W-mixed_aligned_patience5_sometests/checkpoints/train.0.35.8553.pth")) # NegativeMiningCallback() ], state_kwargs={ "log_dir": log_dir }, check=True )
def test_evaluation_loader_custom_model() -> None: """ Test if evaluate loader works with custom model. """ dataset = DummyDataset() model = nn.Linear(in_features=dataset.features_dim, out_features=dataset.out_dim) loader = DataLoader(dataset=dataset, batch_size=1) callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, )) ] runner = SupervisedRunner() runner.evaluate_loader(loader=loader, callbacks=callbacks, model=model)
def run(name: str = None, config: dict = None, device: str = None, check: bool = False) -> dict: config = config or experiment_config device = device or utils.get_device() print(f"device: {device}") utils.set_global_seed(SEED) # inititalize weigths & biases name = name or '_'.join( filter(None, [experiment_name, f"{datetime.datetime.now():%Y-%m-%d-%S}"])) # convert parquet ot zip parquet_to_images(TRAIN, ZIP_TRAIN_FILE, SIZE) parquet_to_images(TEST, ZIP_TEST_FILE, SIZE) # run experiment runner = SupervisedRunner( device=device, input_key="images", output_key=["logit_" + c for c in output_classes.keys()], input_target_key=list(output_classes.keys()), ) experiment = Experiment(config) runner.run_experiment(experiment, check=check) return { 'runner': runner, 'experiment': experiment, 'config': config, }
def test_evaluation_loader_empty_model() -> None: """ Test if there is no model was given, assertion raises. """ with pytest.raises(AssertionError) as record: dataset = DummyDataset() loader = DataLoader(dataset=dataset, batch_size=1) callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, )) ] runner = SupervisedRunner() runner.evaluate_loader(loader=loader, callbacks=callbacks, model=None) if not record: pytest.fail("Expected assertion bacuase model is empty!")
def run(name: str = None, config: dict = None, device: str = None, check: bool = False) -> dict: config = config or experiment_config device = device or utils.get_device() print(f"device: {device}") utils.set_global_seed(SEED) config['monitoring_params']['name'] = EXPERIMENT_NAME # convert parquet ot zip parquet_to_images(TRAIN, ZIP_TRAIN_FILE, SIZE) parquet_to_images(TEST, ZIP_TEST_FILE, SIZE) # run experiment runner = SupervisedRunner( device=device, input_key="images", output_key=["logit_" + c for c in output_classes.keys()], input_target_key=list(output_classes.keys()), ) experiment = Experiment(config) runner.run_experiment(experiment, check=check) return { 'runner': runner, 'experiment': experiment, 'config': config, }
def do_train(data, log, log_dir): model = Net(num_features=2) runner = SupervisedRunner() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) criterion = MyLoss() log_batch(model, data, log, "init") log.debug("Starting training") runner.train(model=model, criterion=criterion, optimizer=optimizer, loaders=data, logdir=f"{log_dir}/run", load_best_on_end=True, num_epochs=1) log_batch(model, data, log, "exit")
def make_runner(): runner = SupervisedRunner( input_key=( 'input_ids', 'attention_mask', # 'token_type_ids', ), device=device, ) return runner
def test_save_model_grads(): """ Tests a feature of `OptimizerCallback` for saving model gradients """ logdir = "./logs" dataset_root = "./dataset" loaders = _get_loaders(root=dataset_root, batch_size=4, num_workers=1) images, _ = next(iter(loaders["train"])) _, c, h, w = images.shape input_shape = (c, h, w) model = _SimpleNet(input_shape) criterion = nn.CrossEntropyLoss() optimizer = Adam(model.parameters()) criterion_callback = CriterionCallback() optimizer_callback = OptimizerCallback() save_model_grads_callback = SaveModelGradsCallback() prefix = save_model_grads_callback.grad_norm_prefix test_callback = _OnBatchEndCheckGradsCallback(prefix) callbacks = collections.OrderedDict( loss=criterion_callback, optimizer=optimizer_callback, grad_norm=save_model_grads_callback, test_callback=test_callback, ) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, callbacks=callbacks, check=True, verbose=True, ) shutil.rmtree(logdir) shutil.rmtree(dataset_root)
def main(config): """ Main code for training a classification model. Args: config (dict): dictionary read from a yaml file i.e. experiments/finetune_classification.yml Returns: None """ # setting up the train/val split with filenames seed = config["io_params"]["split_seed"] seed_everything(seed) mode = config["mode"].lower() assert mode in ["classification", "segmentation", "both"], \ "The `mode` must be one of ['classification', 'segmentation', 'both']." if mode == "classification": raise NotImplementedError elif mode == "segmentation": if config["dim"] == 2: exp = TrainSegExperiment2D(config) elif config["dim"] == 3: exp = TrainSegExperiment(config) output_key = "logits" elif mode == "both": if config["dim"] == 2: exp = TrainClfSegExperiment2D(config) elif config["dim"] == 3: exp = TrainClfSegExperiment3D(config) output_key = ["seg_logits", "clf_logits"] print(f"Seed: {seed}\nMode: {mode}") runner = SupervisedRunner(output_key=output_key) runner.train(model=exp.model, criterion=exp.criterion, optimizer=exp.opt, scheduler=exp.lr_scheduler, loaders=exp.loaders, callbacks=exp.cb_list, **config["runner_params"])
def get_runner(config: dict, device: torch.device): if config.get("runner") is not None: if config["runner"] == "SAMRunner": return SAMRunner(device=device) else: raise NotImplementedError else: return SupervisedRunner( device=device, input_key=config["globals"]["input_key"], input_target_key=config["globals"]["input_target_key"])
def train(num_epochs, model, loaders, logdir): criterion = torch.nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2) callbacks = [F1ScoreCallback()] # model runner runner = SupervisedRunner() # model training runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=num_epochs, callbacks=callbacks, verbose=True)