def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) optimizer = optim.Adam(model.parameters(), lr=0.02) loaders = { "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } runner = CustomRunner() # model training runner.train( engine=engine or dl.DeviceEngine(device), model=model, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=1, verbose=False, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, )
def __init__(self): super().__init__() self.trainset = MNIST('./data', train=True, download=True, transform=ToTensor()) self.testset = MNIST('./data', train=False, download=True, transform=ToTensor())
def get_loaders(self, stage: str): loaders = { "train": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32, ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32, ), } return loaders
def test_transform_kornia(): """Run few epochs to check ``BatchTransformCallback`` callback.""" model = torch.nn.Linear(28 * 28, 10) optimizer = torch.optim.Adam(model.parameters(), lr=0.02) loaders = { "train": DataLoader( MnistDataset( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())), batch_size=32, ), "valid": DataLoader( MnistDataset( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor())), batch_size=32, ), } transrorms = [ augmentation.RandomAffine(degrees=(-15, 20), scale=(0.75, 1.25)), ] runner = CustomRunner() # model training runner.train( model=model, optimizer=optimizer, loaders=loaders, logdir="./logs", num_epochs=5, verbose=False, load_best_on_end=True, check=True, callbacks=[ BatchTransformCallback(transform=transrorms, scope="on_batch_start", input_key="features") ], ) # model inference for prediction in runner.predict_loader(loader=loaders["train"]): assert prediction.detach().cpu().numpy().shape[-1] == 10
def objective(trial): lr = trial.suggest_loguniform("lr", 1e-3, 1e-1) num_hidden = int(trial.suggest_loguniform("num_hidden", 32, 128)) loaders = { "train": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32, ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32, ), } model = nn.Sequential(nn.Flatten(), nn.Linear(784, num_hidden), nn.ReLU(), nn.Linear(num_hidden, 10)) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() runner = dl.SupervisedRunner(input_key="features", output_key="logits", target_key="targets") runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks={ "optuna": dl.OptunaPruningCallback(loader_key="valid", metric_key="accuracy01", minimize=False, trial=trial), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10), }, num_epochs=2, ) score = runner.callbacks["optuna"].best_score return score
def test_optuna(): trainset = MNIST("./data", train=False, download=True, transform=ToTensor()) testset = MNIST("./data", train=False, download=True, transform=ToTensor()) loaders = { "train": DataLoader(trainset, batch_size=32), "valid": DataLoader(testset, batch_size=64), } model = nn.Sequential(Flatten(), nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10)) def objective(trial): lr = trial.suggest_loguniform("lr", 1e-3, 1e-1) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() runner = dl.SupervisedRunner() runner.train( model=model, loaders=loaders, criterion=criterion, optimizer=optimizer, callbacks={ "optuna": OptunaPruningCallback(loader_key="valid", metric_key="loss", minimize=True, trial=trial), "accuracy": AccuracyCallback(num_classes=10, input_key="logits", target_key="targets"), }, num_epochs=2, valid_metric="accuracy01", minimize_valid_metric=False, ) return runner.callbacks["optuna"].best_score study = optuna.create_study( direction="maximize", pruner=optuna.pruners.MedianPruner(n_startup_trials=1, n_warmup_steps=0, interval_steps=1), ) study.optimize(objective, n_trials=2, timeout=300) assert True
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # <--- multi-model/optimizer setup ---> encoder = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 128)) head = nn.Linear(128, 10) model = {"encoder": encoder, "head": head} optimizer = { "encoder": optim.Adam(encoder.parameters(), lr=0.02), "head": optim.Adam(head.parameters(), lr=0.001), } # <--- multi-model/optimizer setup ---> criterion = nn.CrossEntropyLoss() loaders = { "train": DataLoader( MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32 ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), } runner = CustomRunner() # model training runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=1, verbose=False, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, )
def test_pruning_callback() -> None: """Quantize model""" loaders = { "train": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32, ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32, ), } model = nn.Sequential(Flatten(), nn.Linear(784, 512), nn.ReLU(), nn.Linear(512, 10)) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) runner = dl.SupervisedRunner() runner.train( model=model, callbacks=[dl.QuantizationCallback(logdir="./logs")], loaders=loaders, criterion=criterion, optimizer=optimizer, num_epochs=1, logdir="./logs", check=True, ) assert os.path.isfile("./logs/quantized.pth")
def test_accuracy(): """Test if accuracy drops too low. """ model = torch.nn.Sequential( Flatten(), torch.nn.Linear(28 * 28, 128), torch.nn.ReLU(), torch.nn.Linear(128, 64), torch.nn.Linear(64, 10), ) datasets = { "train": MNIST("./data", transform=ToTensor(), download=True), "valid": MNIST("./data", transform=ToTensor(), train=False), } dataloaders = { k: torch.utils.data.DataLoader(d, batch_size=32) for k, d in datasets.items() } optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) runner = SupervisedRunner() runner.train( model=model, optimizer=optimizer, loaders=dataloaders, callbacks=[AccuracyCallback(target_key="targets", input_key="logits")], num_epochs=1, criterion=torch.nn.CrossEntropyLoss(), valid_loader="valid", valid_metric="accuracy01", minimize_valid_metric=False, ) accuracy_before = _evaluate_loader_accuracy(runner, dataloaders["valid"]) q_model = quantize_model(model) runner.model = q_model accuracy_after = _evaluate_loader_accuracy(runner, dataloaders["valid"]) assert abs(accuracy_before - accuracy_after) < 0.01
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.02) loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } runner = dl.SupervisedRunner(input_key="features", output_key="logits", target_key="targets", loss_key="loss") callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), ] if SETTINGS.ml_required: callbacks.append( dl.ConfusionMatrixCallback(input_key="logits", target_key="targets", num_classes=10)) if SETTINGS.amp_required and (engine is None or not isinstance( engine, (dl.AMPEngine, dl.DataParallelAMPEngine, dl.DistributedDataParallelAMPEngine), )): callbacks.append( dl.AUCCallback(input_key="logits", target_key="targets")) if SETTINGS.onnx_required: callbacks.append( dl.OnnxCallback(logdir=logdir, input_key="features")) if SETTINGS.pruning_required: callbacks.append( dl.PruningCallback(pruning_fn="l1_unstructured", amount=0.5)) if SETTINGS.quantization_required: callbacks.append(dl.QuantizationCallback(logdir=logdir)) if engine is None or not isinstance(engine, dl.DistributedDataParallelEngine): callbacks.append( dl.TracingCallback(logdir=logdir, input_key="features")) # model training runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, callbacks=callbacks, logdir=logdir, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, verbose=False, load_best_on_end=True, timeit=False, check=False, overfit=False, fp16=False, ddp=False, ) # model inference for prediction in runner.predict_loader(loader=loaders["valid"]): assert prediction["logits"].detach().cpu().numpy().shape[-1] == 10 # model post-processing features_batch = next(iter(loaders["valid"]))[0] # model stochastic weight averaging model.load_state_dict( utils.get_averaged_weights_by_path_mask(logdir=logdir, path_mask="*.pth")) # model onnx export if SETTINGS.onnx_required: utils.onnx_export( model=runner.model, batch=runner.engine.sync_device(features_batch), file="./mnist.onnx", verbose=False, ) # model quantization if SETTINGS.quantization_required: utils.quantize_model(model=runner.model) # model pruning if SETTINGS.pruning_required: utils.prune_model(model=runner.model, pruning_fn="l1_unstructured", amount=0.8) # model tracing utils.trace_model(model=runner.model, batch=features_batch)
def train_experiment(device): with TemporaryDirectory() as logdir: latent_dim = 128 generator = nn.Sequential( # We want to generate 128 coefficients to reshape into a 7x7x128 map nn.Linear(128, 128 * 7 * 7), nn.LeakyReLU(0.2, inplace=True), Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(128, 1, (7, 7), padding=3), nn.Sigmoid(), ) discriminator = nn.Sequential( nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), nn.LeakyReLU(0.2, inplace=True), GlobalMaxPool2d(), Flatten(), nn.Linear(128, 1), ) model = {"generator": generator, "discriminator": discriminator} criterion = { "generator": nn.BCEWithLogitsLoss(), "discriminator": nn.BCEWithLogitsLoss() } optimizer = { "generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)), "discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)), } loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } class CustomRunner(dl.Runner): def predict_batch(self, batch): batch_size = 1 # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Decode them to fake images generated_images = self.model["generator"]( random_latent_vectors).detach() return generated_images def handle_batch(self, batch): real_images, _ = batch batch_size = real_images.shape[0] # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Decode them to fake images generated_images = self.model["generator"]( random_latent_vectors).detach() # Combine them with real images combined_images = torch.cat([generated_images, real_images]) # Assemble labels discriminating real from fake images labels = torch.cat([ torch.ones((batch_size, 1)), torch.zeros((batch_size, 1)) ]).to(self.device) # Add random noise to the labels - important trick! labels += 0.05 * torch.rand(labels.shape).to(self.device) # Discriminator forward combined_predictions = self.model["discriminator"]( combined_images) # Sample random points in the latent space random_latent_vectors = torch.randn(batch_size, latent_dim).to(self.device) # Assemble labels that say "all real images" misleading_labels = torch.zeros( (batch_size, 1)).to(self.device) # Generator forward generated_images = self.model["generator"]( random_latent_vectors) generated_predictions = self.model["discriminator"]( generated_images) self.batch = { "combined_predictions": combined_predictions, "labels": labels, "generated_predictions": generated_predictions, "misleading_labels": misleading_labels, } runner = CustomRunner() runner.train( engine=dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ dl.CriterionCallback( input_key="combined_predictions", target_key="labels", metric_key="loss_discriminator", criterion_key="discriminator", ), dl.CriterionCallback( input_key="generated_predictions", target_key="misleading_labels", metric_key="loss_generator", criterion_key="generator", ), dl.OptimizerCallback( model_key="generator", optimizer_key="generator", metric_key="loss_generator", ), dl.OptimizerCallback( model_key="discriminator", optimizer_key="discriminator", metric_key="loss_discriminator", ), ], valid_loader="train", valid_metric="loss_generator", minimize_valid_metric=True, num_epochs=1, verbose=False, logdir=logdir, ) runner.predict_batch(None)[0, 0].cpu().numpy()
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: from catalyst import utils utils.set_global_seed(RANDOM_STATE) # 1. train, valid and test loaders transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))]) train_data = MNIST(os.getcwd(), train=True, download=True, transform=transforms) train_labels = train_data.targets.cpu().numpy().tolist() train_sampler = data.BatchBalanceClassSampler(train_labels, num_classes=10, num_samples=4) train_loader = DataLoader(train_data, batch_sampler=train_sampler) valid_dataset = MNIST(root=os.getcwd(), transform=transforms, train=False, download=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=32) test_dataset = MNIST(root=os.getcwd(), transform=transforms, train=False, download=True) test_loader = DataLoader(dataset=test_dataset, batch_size=32) # 2. model and optimizer model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16), nn.LeakyReLU(inplace=True)) optimizer = Adam(model.parameters(), lr=LR) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # 3. criterion with triplets sampling sampler_inbatch = data.HardTripletsSampler(norm_required=False) criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner class CustomRunner(dl.SupervisedRunner): def handle_batch(self, batch) -> None: images, targets = batch["features"].float( ), batch["targets"].long() features = self.model(images) self.batch = { "embeddings": features, "targets": targets, } callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="embeddings", target_key="targets", metric_key="loss"), loaders="train", ), dl.SklearnModelCallback( feature_key="embeddings", target_key="targets", train_loader="train", valid_loaders=["valid", "infer"], model_fn=RandomForestClassifier, predict_method="predict_proba", predict_key="sklearn_predict", random_state=RANDOM_STATE, n_estimators=50, ), dl.ControlFlowCallback( dl.AccuracyCallback(target_key="targets", input_key="sklearn_predict", topk_args=(1, 3)), loaders=["valid", "infer"], ), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, callbacks=callbacks, loaders={ "train": train_loader, "valid": valid_loader, "infer": test_loader }, verbose=False, valid_loader="valid", valid_metric="accuracy", minimize_valid_metric=False, num_epochs=TRAIN_EPOCH, logdir=logdir, ) valid_path = Path(logdir) / "logs/infer.csv" best_accuracy = max( float(row["accuracy"]) for row in read_csv(valid_path)) assert best_accuracy > 0.8
def train_experiment(device): with TemporaryDirectory() as logdir: # <--- multi-model setup ---> encoder = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 128)) head = nn.Linear(128, 10) model = {"encoder": encoder, "head": head} optimizer = optim.Adam( [{"params": encoder.parameters()}, {"params": head.parameters()},], lr=0.02 ) # <--- multi-model setup ---> criterion = nn.CrossEntropyLoss() loaders = { "train": DataLoader( MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32 ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), } class CustomRunner(dl.Runner): def predict_batch(self, batch): # model inference step return self.model(batch[0].to(self.device)) def on_loader_start(self, runner): super().on_loader_start(runner) self.meters = { key: metrics.AdditiveValueMetric(compute_on_call=False) for key in ["loss", "accuracy01", "accuracy03"] } def handle_batch(self, batch): # model train/valid step # unpack the batch x, y = batch # <--- multi-model usage ---> # run model forward pass x_ = self.model["encoder"](x) logits = self.model["head"](x_) # <--- multi-model usage ---> # compute the loss loss = self.criterion(logits, y) # compute other metrics of interest accuracy01, accuracy03 = metrics.accuracy(logits, y, topk=(1, 3)) # log metrics self.batch_metrics.update( {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} ) for key in ["loss", "accuracy01", "accuracy03"]: self.meters[key].update(self.batch_metrics[key].item(), self.batch_size) # run model backward pass if self.is_train_loader: loss.backward() self.optimizer.step() self.optimizer.zero_grad() def on_loader_end(self, runner): for key in ["loss", "accuracy01", "accuracy03"]: self.loader_metrics[key] = self.meters[key].compute()[0] super().on_loader_end(runner) runner = CustomRunner() # model training runner.train( engine=dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=1, verbose=True, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, )
def get_transform(self, stage: str = None, mode: str = None): return ToTensor()
def train_experiment(device): with TemporaryDirectory() as logdir: teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) criterion = { "cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean") } optimizer = optim.Adam(student.parameters(), lr=0.02) loaders = { "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } class DistilRunner(dl.Runner): def handle_batch(self, batch): x, y = batch teacher.eval() # let's manually set teacher model to eval mode with torch.no_grad(): t_logits = self.model["teacher"](x) s_logits = self.model["student"](x) self.batch = { "t_logits": t_logits, "s_logits": s_logits, "targets": y, "s_logprobs": F.log_softmax(s_logits, dim=-1), "t_probs": F.softmax(t_logits, dim=-1), } runner = DistilRunner() # model training runner.train( engine=dl.DeviceEngine(device), model={ "teacher": teacher, "student": student }, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, logdir=logdir, verbose=True, callbacks=[ dl.AccuracyCallback(input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_"), dl.AccuracyCallback(input_key="s_logits", target_key="targets", num_classes=2, prefix="student_"), dl.CriterionCallback( input_key="s_logits", target_key="targets", metric_key="cls_loss", criterion_key="cls", ), dl.CriterionCallback( input_key="s_logprobs", target_key="t_probs", metric_key="kl_div_loss", criterion_key="kl", ), dl.MetricAggregationCallback( prefix="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean"), dl.OptimizerCallback(metric_key="loss", model_key="student"), dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3, ), ], )
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # latent_dim = 128 # generator = nn.Sequential( # # We want to generate 128 coefficients to reshape into a 7x7x128 map # nn.Linear(128, 128 * 7 * 7), # nn.LeakyReLU(0.2, inplace=True), # Lambda(lambda x: x.view(x.size(0), 128, 7, 7)), # nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.ConvTranspose2d(128, 128, (4, 4), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.Conv2d(128, 1, (7, 7), padding=3), # nn.Sigmoid(), # ) # discriminator = nn.Sequential( # nn.Conv2d(1, 64, (3, 3), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # nn.Conv2d(64, 128, (3, 3), stride=(2, 2), padding=1), # nn.LeakyReLU(0.2, inplace=True), # GlobalMaxPool2d(), # Flatten(), # nn.Linear(128, 1), # ) latent_dim = 32 generator = nn.Sequential( nn.Linear(latent_dim, 28 * 28), Lambda(_ddp_hack), nn.Sigmoid(), ) discriminator = nn.Sequential(Flatten(), nn.Linear(28 * 28, 1)) model = {"generator": generator, "discriminator": discriminator} criterion = { "generator": nn.BCEWithLogitsLoss(), "discriminator": nn.BCEWithLogitsLoss() } optimizer = { "generator": torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.5, 0.999)), "discriminator": torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.5, 0.999)), } loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } runner = CustomRunner(latent_dim) runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ dl.CriterionCallback( input_key="combined_predictions", target_key="labels", metric_key="loss_discriminator", criterion_key="discriminator", ), dl.CriterionCallback( input_key="generated_predictions", target_key="misleading_labels", metric_key="loss_generator", criterion_key="generator", ), dl.OptimizerCallback( model_key="generator", optimizer_key="generator", metric_key="loss_generator", ), dl.OptimizerCallback( model_key="discriminator", optimizer_key="discriminator", metric_key="loss_discriminator", ), ], valid_loader="train", valid_metric="loss_generator", minimize_valid_metric=True, num_epochs=1, verbose=False, logdir=logdir, ) if not isinstance(engine, dl.DistributedDataParallelEngine): runner.predict_batch(None)[0, 0].cpu().numpy()
def test_reid_pipeline(): """This test checks that reid pipeline runs and compute metrics with ReidCMCScoreCallback""" with TemporaryDirectory() as logdir: # 1. train and valid loaders transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))]) train_dataset = MnistMLDataset(root=os.getcwd(), download=True, transform=transforms) sampler = data.BatchBalanceClassSampler( labels=train_dataset.get_labels(), num_classes=3, num_samples=10, num_batches=20) train_loader = DataLoader(dataset=train_dataset, batch_sampler=sampler, num_workers=0) valid_dataset = MnistReIDQGDataset(root=os.getcwd(), transform=transforms, gallery_fraq=0.2) valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024) # 2. model and optimizer model = models.MnistSimpleNet(out_features=16) optimizer = Adam(model.parameters(), lr=0.001) # 3. criterion with triplets sampling sampler_inbatch = data.AllTripletsSampler(max_output_triplets=1000) criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="embeddings", target_key="targets", metric_key="loss"), loaders="train", ), dl.ControlFlowCallback( dl.ReidCMCScoreCallback( embeddings_key="embeddings", pids_key="targets", cids_key="cids", is_query_key="is_query", topk_args=[1], ), loaders="valid", ), dl.PeriodicLoaderCallback(valid_loader_key="valid", valid_metric_key="cmc01", minimize=False, valid=2), ] runner = ReIDCustomRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=OrderedDict({ "train": train_loader, "valid": valid_loader }), verbose=False, logdir=logdir, valid_loader="valid", valid_metric="cmc01", minimize_valid_metric=False, num_epochs=10, ) assert "cmc01" in runner.loader_metrics assert runner.loader_metrics["cmc01"] > 0.7
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) model = {"teacher": teacher, "student": student} criterion = { "cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean") } optimizer = optim.Adam(student.parameters(), lr=0.02) loaders = { "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } runner = DistilRunner() # model training runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, logdir=logdir, verbose=False, callbacks=[ dl.AccuracyCallback(input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_"), dl.AccuracyCallback(input_key="s_logits", target_key="targets", num_classes=2, prefix="student_"), dl.CriterionCallback( input_key="s_logits", target_key="targets", metric_key="cls_loss", criterion_key="cls", ), dl.CriterionCallback( input_key="s_logprobs", target_key="t_probs", metric_key="kl_div_loss", criterion_key="kl", ), dl.MetricAggregationCallback( prefix="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean"), dl.OptimizerCallback(metric_key="loss", model_key="student"), dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3, ), ], )
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # 1. train and valid loaders transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))]) train_dataset = datasets.MnistMLDataset(root=os.getcwd(), download=True, transform=transforms) sampler = data.BatchBalanceClassSampler( labels=train_dataset.get_labels(), num_classes=5, num_samples=10, num_batches=10) train_loader = DataLoader(dataset=train_dataset, batch_sampler=sampler) valid_dataset = datasets.MnistQGDataset(root=os.getcwd(), transform=transforms, gallery_fraq=0.2) valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024) # 2. model and optimizer model = models.MnistSimpleNet(out_features=16) optimizer = Adam(model.parameters(), lr=0.001) # 3. criterion with triplets sampling sampler_inbatch = data.HardTripletsSampler(norm_required=False) criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="embeddings", target_key="targets", metric_key="loss"), loaders="train", ), dl.ControlFlowCallback( dl.CMCScoreCallback( embeddings_key="embeddings", labels_key="targets", is_query_key="is_query", topk_args=[1], ), loaders="valid", ), dl.PeriodicLoaderCallback(valid_loader_key="valid", valid_metric_key="cmc01", minimize=False, valid=2), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders={ "train": train_loader, "valid": valid_loader }, verbose=False, logdir=logdir, valid_loader="valid", valid_metric="cmc01", minimize_valid_metric=False, num_epochs=2, )
def main(args): train_dataset = TorchvisionDatasetWrapper( MNIST(root="./", download=True, train=True, transform=ToTensor()) ) val_dataset = TorchvisionDatasetWrapper( MNIST(root="./", download=True, train=False, transform=ToTensor()) ) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=64) loaders = {"train": train_dataloader, "valid": val_dataloader} utils.set_global_seed(args.seed) net = nn.Sequential( Flatten(), nn.Linear(28 * 28, 300), nn.ReLU(), nn.Linear(300, 100), nn.ReLU(), nn.Linear(100, 10), ) initial_state_dict = net.state_dict() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters()) if args.device is not None: engine = dl.DeviceEngine(args.device) else: engine = None if args.vanilla_pruning: runner = dl.SupervisedRunner(engine=engine) runner.train( model=net, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10), ], logdir="./logdir", num_epochs=args.num_epochs, load_best_on_end=True, valid_metric="accuracy01", minimize_valid_metric=False, valid_loader="valid", ) pruning_fn = partial( utils.pruning.prune_model, pruning_fn=args.pruning_method, amount=args.amount, keys_to_prune=["weights"], dim=args.dim, l_norm=args.n, ) acc, amount = validate_model( runner, pruning_fn=pruning_fn, loader=loaders["valid"], num_sessions=args.num_sessions ) torch.save(acc, "accuracy.pth") torch.save(amount, "amount.pth") else: runner = PruneRunner(num_sessions=args.num_sessions, engine=engine) callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10), dl.PruningCallback( args.pruning_method, keys_to_prune=["weight"], amount=args.amount, remove_reparametrization_on_stage_end=False, ), dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"), dl.OptimizerCallback(metric_key="loss"), ] if args.lottery_ticket: callbacks.append(LotteryTicketCallback(initial_state_dict=initial_state_dict)) if args.kd: net.load_state_dict(torch.load(args.state_dict)) callbacks.append( PrepareForFinePruningCallback(probability_shift=args.probability_shift) ) callbacks.append(KLDivCallback(temperature=4, student_logits_key="logits")) callbacks.append( MetricAggregationCallback( prefix="loss", metrics={"loss": 0.1, "kl_div_loss": 0.9}, mode="weighted_sum" ) ) runner.train( model=net, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=callbacks, logdir=args.logdir, num_epochs=args.num_epochs, load_best_on_end=True, valid_metric="accuracy01", minimize_valid_metric=False, valid_loader="valid", )