def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) model = {"teacher": teacher, "student": student} criterion = {"cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean")} optimizer = optim.Adam(student.parameters(), lr=0.02) loaders = { "train": DataLoader( MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32 ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), } runner = DistilRunner() # model training runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, logdir=logdir, verbose=False, callbacks=[ dl.AccuracyCallback( input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_" ), dl.AccuracyCallback( input_key="s_logits", target_key="targets", num_classes=2, prefix="student_" ), dl.CriterionCallback( input_key="s_logits", target_key="targets", metric_key="cls_loss", criterion_key="cls", ), dl.CriterionCallback( input_key="s_logprobs", target_key="t_probs", metric_key="kl_div_loss", criterion_key="kl", ), dl.MetricAggregationCallback( metric_key="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean" ), dl.OptimizerCallback(metric_key="loss", model_key="student"), dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3, ), ], )
def get_callbacks(self, stage: str): return { "criterion": dl.CriterionCallback(input_key="logits", target_key="labels", metric_key="loss"), "optimizer": dl.OptimizerCallback(metric_key="loss"), "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss", mode="batch"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="labels", topk_args=(1, )), "checkpoint": dl.CheckpointCallback( self._logdir, loader_key="valid", metric_key="accuracy", minimize=False, save_n_best=1, ), # "tqdm": dl.TqdmCallback(), }
def get_callbacks(self, stage: str): return { "criterion": dl.CriterionCallback(metric_key="loss", input_key="logits", target_key="targets"), "optimizer": dl.OptimizerCallback(metric_key="loss"), # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), "classification": dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), "confusion_matrix": dl.ConfusionMatrixCallback(input_key="logits", target_key="targets", num_classes=10), "checkpoint": dl.CheckpointCallback(self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3), }
def get_callbacks(self): return { "criterion": dl.CriterionCallback(metric_key="loss", input_key="logits", target_key="targets"), "backward": dl.BackwardCallback(metric_key="loss"), "optimizer": dl.OptimizerCallback(metric_key="loss"), "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, 3, 5)), "checkpoint": dl.CheckpointCallback( self._logdir, loader_key="valid", metric_key="accuracy01", minimize=False, topk=1, ), "tqdm": dl.TqdmCallback(), }
def get_callbacks(self, stage: str): callbacks = { "criterion": dl.CriterionCallback(metric_key="loss", input_key="logits", target_key="targets"), "optimizer": dl.OptimizerCallback( metric_key="loss", grad_clip_fn=nn.utils.clip_grad_norm_, grad_clip_params={"max_norm": 1.0}, ), # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), "classification": dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), "checkpoint": dl.CheckpointCallback(self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3), } if SETTINGS.ml_required: callbacks["confusion_matrix"] = dl.ConfusionMatrixCallback( input_key="logits", target_key="targets", num_classes=10) return callbacks
def test_disabling_loss_for_train(): old_stdout = sys.stdout sys.stdout = str_stdout = StringIO() # experiment_setup logdir = "./logs/control_flow" checkpoint = logdir + "/checkpoints" logfile = checkpoint + "/_metrics.json" # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = dl.SupervisedRunner() n_epochs = 5 # first stage runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=n_epochs, verbose=False, main_metric="accuracy01", callbacks=[ dl.ControlFlowCallback(dl.CriterionCallback(), ignore_loaders=["train"]), dl.AccuracyCallback(accuracy_args=[1, 3, 5]), dl.CheckRunCallback(num_epoch_steps=n_epochs), ], ) sys.stdout = old_stdout exp_output = str_stdout.getvalue() assert len(re.findall(r"\(train\): loss", exp_output)) == 5 assert len(re.findall(r"\(valid\): loss", exp_output)) == 0 assert len(re.findall(r".*/train\.\d\.pth", exp_output)) == 1 assert os.path.isfile(logfile) assert os.path.isfile(checkpoint + "/best.pth") assert os.path.isfile(checkpoint + "/best_full.pth") assert os.path.isfile(checkpoint + "/last.pth") assert os.path.isfile(checkpoint + "/last_full.pth") pth_files = [ file for file in os.listdir(checkpoint) if file.endswith(".pth") ] assert len(pth_files) == 6 shutil.rmtree(logdir, ignore_errors=True)
def get_callbacks(self, stage: str): callbacks = { "scores": dl.BatchTransformCallback( input_key="logits", output_key="scores", transform=partial(torch.softmax, dim=1), scope="on_batch_end", ), "labels": dl.BatchTransformCallback( input_key="scores", output_key="labels", transform=partial(torch.argmax, dim=1), scope="on_batch_end", ), "criterion": dl.CriterionCallback(metric_key="loss", input_key="logits", target_key="targets"), "optimizer": dl.OptimizerCallback( metric_key="loss", grad_clip_fn=nn.utils.clip_grad_norm_, grad_clip_params={"max_norm": 1.0}, ), # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), "classification": dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), "checkpoint": dl.CheckpointCallback(self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3), } if SETTINGS.ml_required: callbacks["confusion_matrix"] = dl.ConfusionMatrixCallback( input_key="logits", target_key="targets", num_classes=10) callbacks["f1_score"] = dl.SklearnBatchCallback( keys={ "y_pred": "labels", "y_true": "targets" }, metric_fn="f1_score", metric_key="sk_f1", average="macro", zero_division=1, ) return callbacks
def test_aggregation_2(): """ Aggregation with custom function """ loaders, model, criterion, optimizer = prepare_experiment() runner = dl.SupervisedRunner() def aggregation_function(metrics, runner): epoch = runner.stage_epoch_step loss = (3 / 2 - epoch / 2) * metrics["loss_focal"] + (1 / 2 * epoch - 1 / 2) * metrics[ "loss_bce" ] return loss runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir="./logs/aggregation_2/", num_epochs=3, callbacks=[ dl.CriterionCallback( input_key="logits", target_key="targets", metric_key="loss_bce", criterion_key="bce", ), dl.CriterionCallback( input_key="logits", target_key="targets", metric_key="loss_focal", criterion_key="focal", ), # loss aggregation dl.MetricAggregationCallback(metric_key="loss", mode=aggregation_function), ], ) for loader in ["train", "valid"]: metrics = runner.epoch_metrics[loader] loss_1 = metrics["loss_bce"] loss_2 = metrics["loss"] assert np.abs(loss_1 - loss_2) < 1e-5
def test_aggregation_1(): """ Aggregation as weighted_sum """ loaders, model, criterion, optimizer = prepare_experiment() runner = dl.SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir="./logs/aggregation_1/", num_epochs=3, callbacks=[ dl.CriterionCallback( input_key="logits", target_key="targets", metric_key="loss_bce", criterion_key="bce", ), dl.CriterionCallback( input_key="logits", target_key="targets", metric_key="loss_focal", criterion_key="focal", ), # loss aggregation dl.MetricAggregationCallback( metric_key="loss", metrics={ "loss_focal": 0.6, "loss_bce": 0.4 }, mode="weighted_sum", ), ], ) for loader in ["train", "valid"]: metrics = runner.epoch_metrics[loader] loss_1 = metrics["loss_bce"] * 0.4 + metrics["loss_focal"] * 0.6 loss_2 = metrics["loss"] assert np.abs(loss_1 - loss_2) < 1e-5
def train_experiment(device): with TemporaryDirectory() as logdir: # sample data num_users, num_features, num_items = int(1e4), int(1e1), 10 X = torch.rand(num_users, num_features) y = (torch.rand(num_users, num_items) > 0.5).to(torch.float32) # pytorch loaders dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, num_items) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) class CustomRunner(dl.Runner): def handle_batch(self, batch): x, y = batch logits = self.model(x) self.batch = { "features": x, "logits": logits, "scores": torch.sigmoid(logits), "targets": y, } # model training runner = CustomRunner() runner.train( engine=dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=1, verbose=False, callbacks=[ dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"), dl.AUCCallback(input_key="scores", target_key="targets"), dl.HitrateCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.MRRCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.MAPCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.NDCGCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.OptimizerCallback(metric_key="loss"), dl.SchedulerCallback(), dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="map01", minimize=False ), ], )
def get_callbacks(self, stage: str) -> Dict[str, dl.Callback]: return { "criterion": dl.CriterionCallback( metric_key="loss", input_key="logits", target_key="targets" ), "optimizer": dl.OptimizerCallback(metric_key="loss"), # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "checkpoint": dl.CheckpointCallback( self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3 ), "check_freezed": CheckRequiresGrad("layer1", "train_freezed", False), "check_unfreezed": CheckRequiresGrad("layer1", "train_unfreezed", True), }
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # sample data num_users, num_features, num_items = int(1e4), int(1e1), 10 X = torch.rand(num_users, num_features) y = (torch.rand(num_users, num_items) > 0.5).to(torch.float32) # pytorch loaders dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, num_items) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) callbacks = [ dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"), dl.AUCCallback(input_key="scores", target_key="targets"), dl.HitrateCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.MRRCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.MAPCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.NDCGCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.OptimizerCallback(metric_key="loss"), dl.SchedulerCallback(), dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="map01", minimize=False ), ] if engine is None or not isinstance( engine, (dl.AMPEngine, dl.DataParallelAMPEngine, dl.DistributedDataParallelAMPEngine) ): callbacks.append(dl.AUCCallback(input_key="logits", target_key="targets")) # model training runner = CustomRunner() runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=1, verbose=False, callbacks=callbacks, )
def get_callbacks(self, stage: str): return { "criterion": dl.CriterionCallback( metric_key="loss", input_key="logits", target_key="targets" ), "optimizer": dl.OptimizerCallback(metric_key="loss"), "checkpoint": dl.CheckpointCallback( self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3, load_on_stage_start="best", ), "test_model_load": CheckModelStateLoadAfterStages("second", self._logdir, "best.pth"), }
def get_callbacks(self, stage: str): return { "criterion": dl.CriterionCallback( metric_key="loss", input_key="logits", target_key="targets" ), "optimizer": dl.OptimizerCallback(metric_key="loss"), "profiler": ProfilerCallback( loader_key="train", epoch=1, profiler_kwargs=dict( activities=[ torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA, ], with_stack=True, with_flops=True, ), tensorboard_path=self.profiler_tb_logs, export_chrome_trace_path=self.chrome_trace_logs, export_stacks_kwargs=self._export_stacks_kwargs, ), }
pair_transform=False), download=True, ) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True) valid_loader = DataLoader(test_data, batch_size=batch_size, pin_memory=True) callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="out_1", target_key="out_2", metric_key="loss"), loaders="train", ), dl.SklearnModelCallback( feature_key="embeddings", target_key="targets", train_loader="train", valid_loaders="valid", model_fn=LogisticRegression, predict_key="sklearn_predict", predict_method="predict_proba", ), dl.OptimizerCallback(metric_key="loss"), dl.ControlFlowCallback( dl.AccuracyCallback(target_key="targets",
def main(args): wandb.init(project="teacher-pruning", config=vars(args)) set_global_seed(42) # dataloader initialization transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = Wrp( datasets.CIFAR10(root=os.getcwd(), train=True, transform=transform_train, download=True)) valid_dataset = Wrp( datasets.CIFAR10(root=os.getcwd(), train=False, transform=transform_test)) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=128, num_workers=2) loaders = { "train": train_dataloader, "valid": valid_dataloader, } # model initialization model = PreActResNet18() model.fc = nn.Linear(512, 10) if args.teacher_model is not None: is_kd = True teacher_model = NAME2MODEL[args.teacher_model]() load_model_from_path(model=teacher_model, path=args.teacher_path) model = { "student": model, "teacher": teacher_model, } output_hiddens = args.beta is None is_kd_on_hiddens = output_hiddens runner = KDRunner(device=args.device, output_hiddens=output_hiddens) parameters = model["student"].parameters() else: is_kd = False runner = dl.SupervisedRunner(device=args.device) parameters = model.parameters() # optimizer optimizer_cls = NAME2OPTIM[args.optimizer] optimizer_kwargs = {"params": parameters, "lr": args.lr} if args.optimizer == "sgd": optimizer_kwargs["momentum"] = args.momentum else: optimizer_kwargs["betas"] = (args.beta1, args.beta2) optimizer = optimizer_cls(**optimizer_kwargs) scheduler = MultiStepLR(optimizer, milestones=[80, 120], gamma=args.gamma) logdir = f"logs/{wandb.run.name}" # callbacks callbacks = [dl.AccuracyCallback(num_classes=10), WandbCallback()] if is_kd: metrics = {} callbacks.append(dl.CriterionCallback(output_key="cls_loss")) callbacks.append(DiffOutputCallback()) coefs = get_loss_coefs(args.alpha, args.beta) metrics["cls_loss"] = coefs[0] metrics["diff_output_loss"] = coefs[1] if is_kd_on_hiddens: callbacks.append(DiffHiddenCallback()) metrics["diff_hidden_loss"] = coefs[2] aggregator_callback = dl.MetricAggregationCallback(prefix="loss", metrics=metrics, mode="weighted_sum") wrapped_agg_callback = dl.ControlFlowCallback(aggregator_callback, loaders=["train"]) callbacks.append(wrapped_agg_callback) runner.train( model=model, optimizer=optimizer, scheduler=scheduler, criterion=nn.CrossEntropyLoss(), loaders=loaders, callbacks=callbacks, num_epochs=args.epoch, logdir=logdir, verbose=True, )
def run_ml_pipeline(sampler_inbatch: data.IInbatchTripletSampler) -> float: """ Full metric learning pipeline, including train and val. This function is also used as minimal example in README.md, section name: 'CV - MNIST with Metric Learning'. Args: sampler_inbatch: sampler to forming triplets Returns: best metric value """ # 1. train and valid datasets dataset_root = "./data" transforms = t.Compose([t.ToTensor(), t.Normalize((0.1307, ), (0.3081, ))]) dataset_train = datasets.MnistMLDataset( root=dataset_root, train=True, download=True, transform=transforms, ) sampler = data.BalanceBatchSampler(labels=dataset_train.get_labels(), p=5, k=10) train_loader = DataLoader(dataset=dataset_train, sampler=sampler, batch_size=sampler.batch_size) dataset_val = datasets.MnistQGDataset(root=dataset_root, transform=transforms, gallery_fraq=0.2) val_loader = DataLoader(dataset=dataset_val, batch_size=1024) # 2. model and optimizer model = models.SimpleConv(features_dim=16) optimizer = Adam(model.parameters(), lr=0.0005) # 3. criterion with triplets sampling criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner callbacks = [ dl.ControlFlowCallback(dl.CriterionCallback(), loaders="train"), dl.ControlFlowCallback(dl.CMCScoreCallback(topk_args=[1]), loaders="valid"), dl.PeriodicLoaderCallback(valid=100), ] runner = dl.SupervisedRunner(device=utils.get_device()) runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders={ "train": train_loader, "valid": val_loader }, minimize_metric=False, verbose=True, valid_loader="valid", num_epochs=100, main_metric="cmc01", ) return runner.best_valid_metrics["cmc01"]
# create model and optimizer model = get_contrastive_model( in_size=DATASETS[args.dataset]["in_size"], in_channels=DATASETS[args.dataset]["in_channels"], feature_dim=args.feature_dim, ) optimizer = optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-6) # define criterion criterion = BarlowTwinsLoss(offdiag_lambda=args.offdig_lambda) # and callbacks callbacks = [ dl.CriterionCallback( input_key="projection_left", target_key="projection_right", metric_key="loss" ), dl.BackwardCallback(metric_key="loss"), dl.OptimizerCallback(metric_key="loss"), dl.SklearnModelCallback( feature_key="embedding_origin", target_key="target", train_loader="train", valid_loaders="valid", model_fn=LogisticRegression, predict_key="sklearn_predict", predict_method="predict_proba", C=0.1, solver="saga", max_iter=200, ),
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # latent_dim = 128 # generator = nn.Sequential( # # We want to generate 128 coefficients to reshape into a 7x7x128 map # nn.Linear(128, 128 * 7 * 7), # nn.LeakyReLU(0.2, inplace=True), # Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), # nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.Conv2d(128, 1, (7, 7), padding=3), # nn.Sigmoid(), # ) # discriminator = nn.Sequential( # nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # GlobalMaxPool2d(), # Flatten(), # nn.Linear(128, 1), # ) latent_dim = 32 generator = nn.Sequential( nn.Linear(latent_dim, 28 * 28), Lambda(_ddp_hack), nn.Sigmoid(), ) discriminator = nn.Sequential(Flatten(), nn.Linear(28 * 28, 1)) model = {"generator": generator, "discriminator": discriminator} criterion = { "generator": nn.BCEWithLogitsLoss(), "discriminator": nn.BCEWithLogitsLoss() } optimizer = { "generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)), "discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)), } loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } runner = CustomRunner(latent_dim) runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ dl.CriterionCallback( input_key="combined_predictions", target_key="labels", metric_key="loss_discriminator", criterion_key="discriminator", ), dl.CriterionCallback( input_key="generated_predictions", target_key="misleading_labels", metric_key="loss_generator", criterion_key="generator", ), dl.OptimizerCallback( model_key="generator", optimizer_key="generator", metric_key="loss_generator", ), dl.OptimizerCallback( model_key="discriminator", optimizer_key="discriminator", metric_key="loss_discriminator", ), ], valid_loader="train", valid_metric="loss_generator", minimize_valid_metric=True, num_epochs=1, verbose=False, logdir=logdir, ) if not isinstance(engine, dl.DistributedDataParallelEngine): runner.predict_batch(None)[0, 0].cpu().numpy()
runner = MonaiSupervisedRunner( input_key="img", input_target_key="seg", output_key="logits") # you can also specify `device` here runner.train( loaders={ "train": train_loader, "valid": val_loader }, model=model, criterion=loss_function, optimizer=optimizer, num_epochs=6, logdir="./logs", main_metric="dice_metric", minimize_metric=False, verbose=False, timeit=True, # let's use minimal logs, but with time checkers callbacks={ "loss": dl.CriterionCallback(input_key="seg", output_key="logits"), "periodic_valid": dl.PeriodicLoaderCallback(valid=2), "dice_metric": dl.MetricCallback(prefix="dice_metric", metric_fn=dice_metric, input_key="seg", output_key="logits") }, load_best_on_end=True, # user-friendly API :) )
'x': sents, 'x_char': chars, 'y': tags, 'total_tags': total_tags } # 'mask': mask, self.output = {'preds': total_preds} callbacks = { "optimizer": dl.OptimizerCallback(metric_key="loss", accumulation_steps=1, grad_clip_params=None), "criterion": dl.CriterionCallback( input_key=['x', 'x_char', 'y'], #'mask': mask, output_key=[]), "metric": dl.MetricCallback(input_key='total_tags', output_key='preds', prefix='F1_token', metric_fn=ner_token_f1), "checkpoints": CheckpointCallback(save_n_best=3), } """ callbacks = [ dl.OptimizerCallback( metric_key="loss", accumulation_steps=1, grad_clip_params=None
def train_experiment(engine=None): with TemporaryDirectory() as logdir: # sample data num_samples, num_features, num_classes1, num_classes2 = int(1e4), int( 1e1), 4, 10 X = torch.rand(num_samples, num_features) y1 = (torch.rand(num_samples) * num_classes1).to(torch.int64) y2 = (torch.rand(num_samples) * num_classes2).to(torch.int64) # pytorch loaders dataset = TensorDataset(X, y1, y2) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = CustomModule(num_features, num_classes1, num_classes2) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2]) callbacks = [ dl.CriterionCallback(metric_key="loss1", input_key="logits1", target_key="targets1"), dl.CriterionCallback(metric_key="loss2", input_key="logits2", target_key="targets2"), dl.MetricAggregationCallback(metric_key="loss", metrics=["loss1", "loss2"], mode="mean"), dl.BackwardCallback(metric_key="loss"), dl.OptimizerCallback(metric_key="loss"), dl.SchedulerCallback(), dl.AccuracyCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_", ), dl.AccuracyCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_", ), dl.CheckpointCallback( "./logs/one", loader_key="valid", metric_key="one_accuracy01", minimize=False, topk=1, ), dl.CheckpointCallback( "./logs/two", loader_key="valid", metric_key="two_accuracy03", minimize=False, topk=3, ), ] if SETTINGS.ml_required: # catalyst[ml] required callbacks.append( dl.ConfusionMatrixCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_cm", )) # catalyst[ml] required callbacks.append( dl.ConfusionMatrixCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_cm", )) # model training runner = CustomRunner() runner.train( engine=engine, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=1, verbose=False, callbacks=callbacks, loggers={ "console": dl.ConsoleLogger(), "tb": dl.TensorboardLogger("./logs/tb"), }, )
def test_reid_pipeline(): """This test checks that reid pipeline runs and compute metrics with ReidCMCScoreCallback""" with TemporaryDirectory() as logdir: # 1. train and valid loaders train_dataset = MnistMLDataset(root=DATA_ROOT) sampler = BatchBalanceClassSampler( labels=train_dataset.get_labels(), num_classes=3, num_samples=10, num_batches=20, ) train_loader = DataLoader( dataset=train_dataset, batch_sampler=sampler, num_workers=0 ) valid_dataset = MnistReIDQGDataset(root=DATA_ROOT, gallery_fraq=0.2) valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024) # 2. model and optimizer model = MnistSimpleNet(out_features=16) optimizer = Adam(model.parameters(), lr=0.001) # 3. criterion with triplets sampling sampler_inbatch = AllTripletsSampler(max_output_triplets=1000) criterion = TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch ) # 4. training with catalyst Runner callbacks = [ dl.ControlFlowCallbackWrapper( dl.CriterionCallback( input_key="embeddings", target_key="targets", metric_key="loss" ), loaders="train", ), dl.ControlFlowCallbackWrapper( dl.ReidCMCScoreCallback( embeddings_key="embeddings", pids_key="targets", cids_key="cids", is_query_key="is_query", topk=[1], ), loaders="valid", ), dl.PeriodicLoaderCallback( valid_loader_key="valid", valid_metric_key="cmc01", minimize=False, valid=2, ), ] runner = ReIDCustomRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=OrderedDict({"train": train_loader, "valid": valid_loader}), verbose=False, logdir=logdir, valid_loader="valid", valid_metric="cmc01", minimize_valid_metric=False, num_epochs=10, ) assert "cmc01" in runner.loader_metrics assert runner.loader_metrics["cmc01"] > 0.65
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: from catalyst import utils utils.set_global_seed(RANDOM_STATE) # 1. train, valid and test loaders transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))]) train_data = MNIST(os.getcwd(), train=True, download=True, transform=transforms) train_labels = train_data.targets.cpu().numpy().tolist() train_sampler = data.BatchBalanceClassSampler(train_labels, num_classes=10, num_samples=4) train_loader = DataLoader(train_data, batch_sampler=train_sampler) valid_dataset = MNIST(root=os.getcwd(), transform=transforms, train=False, download=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=32) test_dataset = MNIST(root=os.getcwd(), transform=transforms, train=False, download=True) test_loader = DataLoader(dataset=test_dataset, batch_size=32) # 2. model and optimizer model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16), nn.LeakyReLU(inplace=True)) optimizer = Adam(model.parameters(), lr=LR) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # 3. criterion with triplets sampling sampler_inbatch = data.HardTripletsSampler(norm_required=False) criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner class CustomRunner(dl.SupervisedRunner): def handle_batch(self, batch) -> None: images, targets = batch["features"].float( ), batch["targets"].long() features = self.model(images) self.batch = { "embeddings": features, "targets": targets, } callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="embeddings", target_key="targets", metric_key="loss"), loaders="train", ), dl.SklearnModelCallback( feature_key="embeddings", target_key="targets", train_loader="train", valid_loaders=["valid", "infer"], model_fn=RandomForestClassifier, predict_method="predict_proba", predict_key="sklearn_predict", random_state=RANDOM_STATE, n_estimators=50, ), dl.ControlFlowCallback( dl.AccuracyCallback(target_key="targets", input_key="sklearn_predict", topk_args=(1, 3)), loaders=["valid", "infer"], ), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, callbacks=callbacks, loaders={ "train": train_loader, "valid": valid_loader, "infer": test_loader }, verbose=False, valid_loader="valid", valid_metric="accuracy", minimize_valid_metric=False, num_epochs=TRAIN_EPOCH, logdir=logdir, ) valid_path = Path(logdir) / "logs/infer.csv" best_accuracy = max( float(row["accuracy"]) for row in read_csv(valid_path)) assert best_accuracy > 0.8
def train_experiment(device): with TemporaryDirectory() as logdir: latent_dim = 128 generator = nn.Sequential( # We want to generate 128 coefficients to reshape into a 7x7x128 map nn.Linear(128, 128 * 7 * 7), nn.LeakyReLU(0.2, inplace=True), Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(128, 1, (7, 7), padding=3), nn.Sigmoid(), ) discriminator = nn.Sequential( nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), GlobalMaxPool2d(), Flatten(), nn.Linear(128, 1), ) model = {"generator": generator, "discriminator": discriminator} criterion = { "generator": nn.BCEWithLogitsLoss(), "discriminator": nn.BCEWithLogitsLoss() } optimizer = { "generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)), "discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)), } loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } class CustomRunner(dl.Runner): def predict_batch(self, batch): batch_size = 1 # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Decode them to fake images generated_images = self.model["generator"]( random_latent_vectors).detach() return generated_images def handle_batch(self, batch): real_images, _ = batch batch_size = real_images.shape[0] # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Decode them to fake images generated_images = self.model["generator"]( random_latent_vectors).detach() # Combine them with real images combined_images = torch.cat([generated_images, real_images]) # Assemble labels discriminating real from fake images labels = torch.cat([ torch.ones((batch_size, 1)), torch.zeros((batch_size, 1)) ]).to(self.device) # Add random noise to the labels - important trick! labels += 0.05 * torch.rand(labels.shape).to(self.device) # Discriminator forward combined_predictions = self.model["discriminator"]( combined_images) # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Assemble labels that say "all real images" misleading_labels = torch.zeros( (batch_size, 1)).to(self.device) # Generator forward generated_images = self.model["generator"]( random_latent_vectors) generated_predictions = self.model["discriminator"]( generated_images) self.batch = { "combined_predictions": combined_predictions, "labels": labels, "generated_predictions": generated_predictions, "misleading_labels": misleading_labels, } runner = CustomRunner() runner.train( engine=dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ dl.CriterionCallback( input_key="combined_predictions", target_key="labels", metric_key="loss_discriminator", criterion_key="discriminator", ), dl.CriterionCallback( input_key="generated_predictions", target_key="misleading_labels", metric_key="loss_generator", criterion_key="generator", ), dl.OptimizerCallback( model_key="generator", optimizer_key="generator", metric_key="loss_generator", ), dl.OptimizerCallback( model_key="discriminator", optimizer_key="discriminator", metric_key="loss_discriminator", ), ], valid_loader="train", valid_metric="loss_generator", minimize_valid_metric=True, num_epochs=1, verbose=False, logdir=logdir, ) runner.predict_batch(None)[0, 0].cpu().numpy()
in_channels=DATASETS[args.dataset]["in_channels"], feature_dim=args.feature_dim, ), } ) utils.set_requires_grad(model["target"], False) optimizer = optim.Adam(model["online"].parameters(), lr=args.learning_rate) # define criterion criterion = NTXentLoss(tau=args.temperature) # and callbacks callbacks = [ dl.CriterionCallback( input_key="online_projection_left", target_key="target_projection_right", metric_key="loss", ), dl.BackwardCallback(metric_key="loss"), dl.OptimizerCallback(metric_key="loss"), dl.ControlFlowCallbackWrapper( dl.SoftUpdateCallaback( target_model="target", source_model="online", tau=0.1, scope="on_batch_end", ), loaders="train", ), dl.SklearnModelCallback( feature_key="online_embedding_origin",
def train_experiment(engine=None): with TemporaryDirectory() as logdir: # 1. train and valid loaders train_dataset = MnistMLDataset(root=DATA_ROOT) sampler = BatchBalanceClassSampler( labels=train_dataset.get_labels(), num_classes=5, num_samples=10, num_batches=10, ) train_loader = DataLoader(dataset=train_dataset, batch_sampler=sampler) valid_dataset = MnistQGDataset(root=DATA_ROOT, gallery_fraq=0.2) valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024) # 2. model and optimizer model = MnistSimpleNet(out_features=16) optimizer = Adam(model.parameters(), lr=0.001) # 3. criterion with triplets sampling sampler_inbatch = HardTripletsSampler(norm_required=False) criterion = TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner callbacks = [ dl.ControlFlowCallbackWrapper( dl.CriterionCallback(input_key="embeddings", target_key="targets", metric_key="loss"), loaders="train", ), dl.ControlFlowCallbackWrapper( dl.CMCScoreCallback( embeddings_key="embeddings", labels_key="targets", is_query_key="is_query", topk=[1], ), loaders="valid", ), dl.PeriodicLoaderCallback( valid_loader_key="valid", valid_metric_key="cmc01", minimize=False, valid=2, ), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine, model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders={ "train": train_loader, "valid": valid_loader }, verbose=False, logdir=logdir, valid_loader="valid", valid_metric="cmc01", minimize_valid_metric=False, num_epochs=2, )
def train_experiment(device): with TemporaryDirectory() as logdir: teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) criterion = { "cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean") } optimizer = optim.Adam(student.parameters(), lr=0.02) loaders = { "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } class DistilRunner(dl.Runner): def handle_batch(self, batch): x, y = batch teacher.eval() # let's manually set teacher model to eval mode with torch.no_grad(): t_logits = self.model["teacher"](x) s_logits = self.model["student"](x) self.batch = { "t_logits": t_logits, "s_logits": s_logits, "targets": y, "s_logprobs": F.log_softmax(s_logits, dim=-1), "t_probs": F.softmax(t_logits, dim=-1), } runner = DistilRunner() # model training runner.train( engine=dl.DeviceEngine(device), model={ "teacher": teacher, "student": student }, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, logdir=logdir, verbose=True, callbacks=[ dl.AccuracyCallback(input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_"), dl.AccuracyCallback(input_key="s_logits", target_key="targets", num_classes=2, prefix="student_"), dl.CriterionCallback( input_key="s_logits", target_key="targets", metric_key="cls_loss", criterion_key="cls", ), dl.CriterionCallback( input_key="s_logprobs", target_key="t_probs", metric_key="kl_div_loss", criterion_key="kl", ), dl.MetricAggregationCallback( prefix="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean"), dl.OptimizerCallback(metric_key="loss", model_key="student"), dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3, ), ], )
callbacks = [ dl.BatchTransformCallback( input_key=["projection_left", "projection_right"], output_key="full_projection", scope="on_batch_end", transform=concat, ), dl.BatchTransformCallback( input_key=["target", "target"], output_key="full_targets", scope="on_batch_end", transform=concat, ), dl.CriterionCallback( input_key="full_projection", target_key="full_targets", metric_key="loss" ), ] runner = dl.SelfSupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=get_loaders(args.dataset, args.batch_size, args.num_workers), verbose=True, logdir=args.logdir, valid_loader="train", valid_metric="loss",
def train_experiment(device): with TemporaryDirectory() as logdir: # sample data num_samples, num_features, num_classes1, num_classes2 = int(1e4), int( 1e1), 4, 10 X = torch.rand(num_samples, num_features) y1 = (torch.rand(num_samples, ) * num_classes1).to(torch.int64) y2 = (torch.rand(num_samples, ) * num_classes2).to(torch.int64) # pytorch loaders dataset = TensorDataset(X, y1, y2) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} class CustomModule(nn.Module): def __init__(self, in_features: int, out_features1: int, out_features2: int): super().__init__() self.shared = nn.Linear(in_features, 128) self.head1 = nn.Linear(128, out_features1) self.head2 = nn.Linear(128, out_features2) def forward(self, x): x = self.shared(x) y1 = self.head1(x) y2 = self.head2(x) return y1, y2 # model, criterion, optimizer, scheduler model = CustomModule(num_features, num_classes1, num_classes2) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2]) class CustomRunner(dl.Runner): def handle_batch(self, batch): x, y1, y2 = batch y1_hat, y2_hat = self.model(x) self.batch = { "features": x, "logits1": y1_hat, "logits2": y2_hat, "targets1": y1, "targets2": y2, } # model training runner = CustomRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=1, verbose=False, callbacks=[ dl.CriterionCallback(metric_key="loss1", input_key="logits1", target_key="targets1"), dl.CriterionCallback(metric_key="loss2", input_key="logits2", target_key="targets2"), dl.MetricAggregationCallback(prefix="loss", metrics=["loss1", "loss2"], mode="mean"), dl.OptimizerCallback(metric_key="loss"), dl.SchedulerCallback(), dl.AccuracyCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_", ), dl.AccuracyCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_", ), dl.ConfusionMatrixCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_cm", ), # catalyst[ml] required dl.ConfusionMatrixCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_cm", ), # catalyst[ml] required dl.CheckpointCallback( "./logs/one", loader_key="valid", metric_key="one_accuracy", minimize=False, save_n_best=1, ), dl.CheckpointCallback( "./logs/two", loader_key="valid", metric_key="two_accuracy03", minimize=False, save_n_best=3, ), ], loggers={ "console": dl.ConsoleLogger(), "tb": dl.TensorboardLogger("./logs/tb") }, )