def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) model = {"teacher": teacher, "student": student} criterion = {"cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean")} optimizer = optim.Adam(student.parameters(), lr=0.02) loaders = { "train": DataLoader( MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32 ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), } runner = DistilRunner() # model training runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, logdir=logdir, verbose=False, callbacks=[ dl.AccuracyCallback( input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_" ), dl.AccuracyCallback( input_key="s_logits", target_key="targets", num_classes=2, prefix="student_" ), dl.CriterionCallback( input_key="s_logits", target_key="targets", metric_key="cls_loss", criterion_key="cls", ), dl.CriterionCallback( input_key="s_logprobs", target_key="t_probs", metric_key="kl_div_loss", criterion_key="kl", ), dl.MetricAggregationCallback( metric_key="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean" ), dl.OptimizerCallback(metric_key="loss", model_key="student"), dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3, ), ], )
def run_catalyst(irunner: dl.IRunner, idx: int, device: str = "cuda", num_epochs: int = 10): utils.set_global_seed(idx) loader = irunner.get_loaders()["train"] model = irunner.get_model().to(device) criterion = irunner.get_criterion() optimizer = irunner.get_optimizer(model) runner = dl.SupervisedRunner() runner.train( engine=dl.GPUEngine() if device == "cuda" else dl.CPUEngine(), model=model, criterion=criterion, optimizer=optimizer, loaders={"train": loader}, num_epochs=num_epochs, verbose=False, callbacks=[ dl.AccuracyCallback( input_key=runner._output_key, target_key=runner._target_key, topk=(1, ), ) ], ) return ( runner.epoch_metrics["train"]["accuracy01"], runner.epoch_metrics["train"]["loss"], _get_used_memory(), )
def get_callbacks(self): return { "criterion": dl.CriterionCallback(metric_key="loss", input_key="logits", target_key="targets"), "backward": dl.BackwardCallback(metric_key="loss"), "optimizer": dl.OptimizerCallback(metric_key="loss"), "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, 3, 5)), "checkpoint": dl.CheckpointCallback( self._logdir, loader_key="valid", metric_key="accuracy01", minimize=False, topk=1, ), "tqdm": dl.TqdmCallback(), }
def test_evaluation_loader_metrics() -> None: """ Test if metrics in evaluate loader works properly. """ dataset = DummyDataset() model = nn.Linear(in_features=dataset.features_dim, out_features=dataset.out_dim) loader = DataLoader(dataset=dataset, batch_size=1) callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, )) ] runner = SupervisedRunner() runner.train( loaders={ "train": loader, "valid": loader }, model=model, num_epochs=1, criterion=nn.BCEWithLogitsLoss(), callbacks=callbacks, ) runner_internal_metrics = runner.loader_metrics evaluate_loader_metrics = runner.evaluate_loader(loader=loader, callbacks=callbacks) assert runner_internal_metrics["accuracy01"] == evaluate_loader_metrics[ "accuracy01"]
def get_callbacks(self, stage: str): callbacks = { "criterion": dl.CriterionCallback(metric_key="loss", input_key="logits", target_key="targets"), "optimizer": dl.OptimizerCallback( metric_key="loss", grad_clip_fn=nn.utils.clip_grad_norm_, grad_clip_params={"max_norm": 1.0}, ), # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), "classification": dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), "checkpoint": dl.CheckpointCallback(self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3), } if SETTINGS.ml_required: callbacks["confusion_matrix"] = dl.ConfusionMatrixCallback( input_key="logits", target_key="targets", num_classes=10) return callbacks
def test_catalyst_callback(tmp_dir, runner, loaders): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.02) runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=2, callbacks=[ dl.AccuracyCallback(input_key="logits", target_key="targets"), DvcLiveCallback(), ], logdir="./logs", valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, verbose=True, load_best_on_end=True, ) assert os.path.exists("dvclive") train_path = tmp_dir / "dvclive/train" valid_path = tmp_dir / "dvclive/valid" assert train_path.is_dir() assert valid_path.is_dir() assert any("accuracy" in x.name for x in train_path.iterdir()) assert any("accuracy" in x.name for x in valid_path.iterdir())
def get_callbacks(self, stage: str): return { "criterion": dl.CriterionCallback(metric_key="loss", input_key="logits", target_key="targets"), "optimizer": dl.OptimizerCallback(metric_key="loss"), # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), "classification": dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), "confusion_matrix": dl.ConfusionMatrixCallback(input_key="logits", target_key="targets", num_classes=10), "checkpoint": dl.CheckpointCallback(self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3), }
def get_callbacks(self, stage: str): return { "criterion": dl.CriterionCallback(input_key="logits", target_key="labels", metric_key="loss"), "optimizer": dl.OptimizerCallback(metric_key="loss"), "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss", mode="batch"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="labels", topk_args=(1, )), "checkpoint": dl.CheckpointCallback( self._logdir, loader_key="valid", metric_key="accuracy", minimize=False, save_n_best=1, ), # "tqdm": dl.TqdmCallback(), }
def objective(trial): lr = trial.suggest_loguniform("lr", 1e-3, 1e-1) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() runner = dl.SupervisedRunner() runner.train( model=model, loaders=loaders, criterion=criterion, optimizer=optimizer, callbacks={ "optuna": dl.OptunaPruningCallback(loader_key="valid", metric_key="loss", minimize=True, trial=trial), "accuracy": dl.AccuracyCallback(num_classes=10, input_key="logits", target_key="targets"), }, num_epochs=2, valid_metric="accuracy01", minimize_valid_metric=False, ) return trial.best_score
def test_disabling_loss_for_train(): old_stdout = sys.stdout sys.stdout = str_stdout = StringIO() # experiment_setup logdir = "./logs/control_flow" checkpoint = logdir + "/checkpoints" logfile = checkpoint + "/_metrics.json" # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = dl.SupervisedRunner() n_epochs = 5 # first stage runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=n_epochs, verbose=False, main_metric="accuracy01", callbacks=[ dl.ControlFlowCallback(dl.CriterionCallback(), ignore_loaders=["train"]), dl.AccuracyCallback(accuracy_args=[1, 3, 5]), dl.CheckRunCallback(num_epoch_steps=n_epochs), ], ) sys.stdout = old_stdout exp_output = str_stdout.getvalue() assert len(re.findall(r"\(train\): loss", exp_output)) == 5 assert len(re.findall(r"\(valid\): loss", exp_output)) == 0 assert len(re.findall(r".*/train\.\d\.pth", exp_output)) == 1 assert os.path.isfile(logfile) assert os.path.isfile(checkpoint + "/best.pth") assert os.path.isfile(checkpoint + "/best_full.pth") assert os.path.isfile(checkpoint + "/last.pth") assert os.path.isfile(checkpoint + "/last_full.pth") pth_files = [ file for file in os.listdir(checkpoint) if file.endswith(".pth") ] assert len(pth_files) == 6 shutil.rmtree(logdir, ignore_errors=True)
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # sample data num_samples, num_features, num_classes = int(1e4), int(1e1), 4 X = torch.rand(num_samples, num_features) y = (torch.rand(num_samples, ) * num_classes).to(torch.int64) # pytorch loaders dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, num_classes) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # model training runner = dl.SupervisedRunner(input_key="features", output_key="logits", target_key="targets", loss_key="loss") callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=num_classes), dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=4), ] if SETTINGS.ml_required: callbacks.append( dl.ConfusionMatrixCallback(input_key="logits", target_key="targets", num_classes=4)) if SETTINGS.amp_required and (engine is None or not isinstance( engine, (dl.AMPEngine, dl.DataParallelAMPEngine, dl.DistributedDataParallelAMPEngine), )): callbacks.append( dl.AUCCallback(input_key="logits", target_key="targets")) runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=1, valid_loader="valid", valid_metric="accuracy03", minimize_valid_metric=False, verbose=False, callbacks=callbacks, )
def get_callbacks(self, stage: str): callbacks = { "scores": dl.BatchTransformCallback( input_key="logits", output_key="scores", transform=partial(torch.softmax, dim=1), scope="on_batch_end", ), "labels": dl.BatchTransformCallback( input_key="scores", output_key="labels", transform=partial(torch.argmax, dim=1), scope="on_batch_end", ), "criterion": dl.CriterionCallback(metric_key="loss", input_key="logits", target_key="targets"), "optimizer": dl.OptimizerCallback( metric_key="loss", grad_clip_fn=nn.utils.clip_grad_norm_, grad_clip_params={"max_norm": 1.0}, ), # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), "classification": dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), "checkpoint": dl.CheckpointCallback(self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3), } if SETTINGS.ml_required: callbacks["confusion_matrix"] = dl.ConfusionMatrixCallback( input_key="logits", target_key="targets", num_classes=10) callbacks["f1_score"] = dl.SklearnBatchCallback( keys={ "y_pred": "labels", "y_true": "targets" }, metric_fn="f1_score", metric_key="sk_f1", average="macro", zero_division=1, ) return callbacks
def objective(trial): lr = trial.suggest_loguniform("lr", 1e-3, 1e-1) num_hidden = int(trial.suggest_loguniform("num_hidden", 32, 128)) loaders = { "train": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32, ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32, ), } model = nn.Sequential(nn.Flatten(), nn.Linear(784, num_hidden), nn.ReLU(), nn.Linear(num_hidden, 10)) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() runner = dl.SupervisedRunner(input_key="features", output_key="logits", target_key="targets") runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks={ "optuna": dl.OptunaPruningCallback(loader_key="valid", metric_key="accuracy01", minimize=False, trial=trial), "accuracy": dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10), }, num_epochs=2, ) score = runner.callbacks["optuna"].best_score return score
def test_evaluation_loader_empty_model() -> None: """ Test if there is no model was given, assertion raises. """ with pytest.raises(AssertionError) as record: dataset = DummyDataset() loader = DataLoader(dataset=dataset, batch_size=1) callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, )) ] runner = SupervisedRunner() runner.evaluate_loader(loader=loader, callbacks=callbacks, model=None) if not record: pytest.fail("Expected assertion bacuase model is empty!")
def test_evaluation_loader_custom_model() -> None: """ Test if evaluate loader works with custom model. """ dataset = DummyDataset() model = nn.Linear(in_features=dataset.features_dim, out_features=dataset.out_dim) loader = DataLoader(dataset=dataset, batch_size=1) callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, )) ] runner = SupervisedRunner() runner.evaluate_loader(loader=loader, callbacks=callbacks, model=model)
def test_model(self, config, testset): # pylint: disable=unused-argument """A custom testing loop. """ test_loader = torch.utils.data.DataLoader( testset, batch_size=config['batch_size'], shuffle=False) # Using Catalyst's SupervisedRunner and AccuracyCallback to compute accuracies runner = dl.SupervisedRunner() runner.train(model=self.model, num_epochs=1, loaders={"valid": test_loader}, logdir="./logs", verbose=True, callbacks=[ dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10) ]) # Retrieving the top-1 accuracy from SupervisedRunner accuracy = runner.epoch_metrics["valid"]["accuracy"] return accuracy
def test_catalyst_model_file(tmp_dir, runner, loaders): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.02) runner.train( model=model, engine=runner.engine, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=2, callbacks=[ dl.AccuracyCallback(input_key="logits", target_key="targets"), DvcLiveCallback("model.pth"), ], logdir="./logs", valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, verbose=True, load_best_on_end=True, ) assert (tmp_dir / "model.pth").is_file()
def train_experiment(device): with TemporaryDirectory() as logdir: # sample data num_samples, num_features, num_classes1, num_classes2 = int(1e4), int( 1e1), 4, 10 X = torch.rand(num_samples, num_features) y1 = (torch.rand(num_samples, ) * num_classes1).to(torch.int64) y2 = (torch.rand(num_samples, ) * num_classes2).to(torch.int64) # pytorch loaders dataset = TensorDataset(X, y1, y2) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} class CustomModule(nn.Module): def __init__(self, in_features: int, out_features1: int, out_features2: int): super().__init__() self.shared = nn.Linear(in_features, 128) self.head1 = nn.Linear(128, out_features1) self.head2 = nn.Linear(128, out_features2) def forward(self, x): x = self.shared(x) y1 = self.head1(x) y2 = self.head2(x) return y1, y2 # model, criterion, optimizer, scheduler model = CustomModule(num_features, num_classes1, num_classes2) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2]) class CustomRunner(dl.Runner): def handle_batch(self, batch): x, y1, y2 = batch y1_hat, y2_hat = self.model(x) self.batch = { "features": x, "logits1": y1_hat, "logits2": y2_hat, "targets1": y1, "targets2": y2, } # model training runner = CustomRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=1, verbose=False, callbacks=[ dl.CriterionCallback(metric_key="loss1", input_key="logits1", target_key="targets1"), dl.CriterionCallback(metric_key="loss2", input_key="logits2", target_key="targets2"), dl.MetricAggregationCallback(prefix="loss", metrics=["loss1", "loss2"], mode="mean"), dl.OptimizerCallback(metric_key="loss"), dl.SchedulerCallback(), dl.AccuracyCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_", ), dl.AccuracyCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_", ), dl.ConfusionMatrixCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_cm", ), # catalyst[ml] required dl.ConfusionMatrixCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_cm", ), # catalyst[ml] required dl.CheckpointCallback( "./logs/one", loader_key="valid", metric_key="one_accuracy", minimize=False, save_n_best=1, ), dl.CheckpointCallback( "./logs/two", loader_key="valid", metric_key="two_accuracy03", minimize=False, save_n_best=3, ), ], loggers={ "console": dl.ConsoleLogger(), "tb": dl.TensorboardLogger("./logs/tb") }, )
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # 1. data and transforms transforms = Compose([ torchvision.transforms.ToPILImage(), torchvision.transforms.RandomCrop((28, 28)), torchvision.transforms.RandomVerticalFlip(), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), Normalize((0.1307, ), (0.3081, )), ]) transform_original = Compose([ ToTensor(), Normalize((0.1307, ), (0.3081, )), ]) mnist = MNIST("./logdir", train=True, download=True, transform=None) contrastive_mnist = SelfSupervisedDatasetWrapper( mnist, transforms=transforms, transform_original=transform_original) train_loader = torch.utils.data.DataLoader(contrastive_mnist, batch_size=BATCH_SIZE) mnist_valid = MNIST("./logdir", train=False, download=True, transform=None) contrastive_valid = SelfSupervisedDatasetWrapper( mnist_valid, transforms=transforms, transform_original=transform_original) valid_loader = torch.utils.data.DataLoader(contrastive_valid, batch_size=BATCH_SIZE) # 2. model and optimizer encoder = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16), nn.LeakyReLU(inplace=True)) projection_head = nn.Sequential( nn.Linear(16, 16, bias=False), nn.ReLU(inplace=True), nn.Linear(16, 16, bias=True), ) class ContrastiveModel(torch.nn.Module): def __init__(self, model, encoder): super(ContrastiveModel, self).__init__() self.model = model self.encoder = encoder def forward(self, x): emb = self.encoder(x) projection = self.model(emb) return emb, projection model = ContrastiveModel(model=projection_head, encoder=encoder) optimizer = Adam(model.parameters(), lr=LR) # 3. criterion with triplets sampling criterion = NTXentLoss(tau=0.1) callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="projection_left", target_key="projection_right", metric_key="loss"), loaders="train", ), dl.SklearnModelCallback( feature_key="embedding_left", target_key="target", train_loader="train", valid_loaders="valid", model_fn=RandomForestClassifier, predict_method="predict_proba", predict_key="sklearn_predict", random_state=RANDOM_STATE, n_estimators=50, ), dl.ControlFlowCallback( dl.AccuracyCallback(target_key="target", input_key="sklearn_predict", topk_args=(1, 3)), loaders="valid", ), ] runner = dl.SelfSupervisedRunner() logdir = "./logdir" runner.train( model=model, engine=engine or dl.DeviceEngine(device), criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders={ "train": train_loader, "valid": valid_loader }, verbose=False, logdir=logdir, valid_loader="train", valid_metric="loss", minimize_valid_metric=True, num_epochs=TRAIN_EPOCH, ) valid_path = Path(logdir) / "logs/valid.csv" best_accuracy = max( float(row["accuracy"]) for row in read_csv(valid_path) if row["accuracy"] != "accuracy") assert best_accuracy > 0.6
def main(args): train_dataset = TorchvisionDatasetWrapper( MNIST(root="./", download=True, train=True, transform=ToTensor()) ) val_dataset = TorchvisionDatasetWrapper( MNIST(root="./", download=True, train=False, transform=ToTensor()) ) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=64) loaders = {"train": train_dataloader, "valid": val_dataloader} utils.set_global_seed(args.seed) net = nn.Sequential( Flatten(), nn.Linear(28 * 28, 300), nn.ReLU(), nn.Linear(300, 100), nn.ReLU(), nn.Linear(100, 10), ) initial_state_dict = net.state_dict() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters()) if args.device is not None: engine = dl.DeviceEngine(args.device) else: engine = None if args.vanilla_pruning: runner = dl.SupervisedRunner(engine=engine) runner.train( model=net, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10), ], logdir="./logdir", num_epochs=args.num_epochs, load_best_on_end=True, valid_metric="accuracy01", minimize_valid_metric=False, valid_loader="valid", ) pruning_fn = partial( utils.pruning.prune_model, pruning_fn=args.pruning_method, amount=args.amount, keys_to_prune=["weights"], dim=args.dim, l_norm=args.n, ) acc, amount = validate_model( runner, pruning_fn=pruning_fn, loader=loaders["valid"], num_sessions=args.num_sessions ) torch.save(acc, "accuracy.pth") torch.save(amount, "amount.pth") else: runner = PruneRunner(num_sessions=args.num_sessions, engine=engine) callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10), dl.PruningCallback( args.pruning_method, keys_to_prune=["weight"], amount=args.amount, remove_reparametrization_on_stage_end=False, ), dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"), dl.OptimizerCallback(metric_key="loss"), ] if args.lottery_ticket: callbacks.append(LotteryTicketCallback(initial_state_dict=initial_state_dict)) if args.kd: net.load_state_dict(torch.load(args.state_dict)) callbacks.append( PrepareForFinePruningCallback(probability_shift=args.probability_shift) ) callbacks.append(KLDivCallback(temperature=4, student_logits_key="logits")) callbacks.append( MetricAggregationCallback( prefix="loss", metrics={"loss": 0.1, "kl_div_loss": 0.9}, mode="weighted_sum" ) ) runner.train( model=net, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=callbacks, logdir=args.logdir, num_epochs=args.num_epochs, load_best_on_end=True, valid_metric="accuracy01", minimize_valid_metric=False, valid_loader="valid", )
# sample data num_samples, num_features, num_classes = int(1e4), int(1e1), 4 X = torch.rand(num_samples, num_features) y = (torch.rand(num_samples, ) * num_classes).to(torch.int64) # pytorch loaders dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, num_classes) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # model training runner = dl.SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir="./logdir", num_epochs=3, check=True, callbacks=[dl.AccuracyCallback(num_classes=num_classes)], )
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.02) loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } runner = dl.SupervisedRunner(input_key="features", output_key="logits", target_key="targets", loss_key="loss") callbacks = [ dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), ] if SETTINGS.ml_required: callbacks.append( dl.ConfusionMatrixCallback(input_key="logits", target_key="targets", num_classes=10)) if SETTINGS.amp_required and (engine is None or not isinstance( engine, (dl.AMPEngine, dl.DataParallelAMPEngine, dl.DistributedDataParallelAMPEngine), )): callbacks.append( dl.AUCCallback(input_key="logits", target_key="targets")) if SETTINGS.onnx_required: callbacks.append( dl.OnnxCallback(logdir=logdir, input_key="features")) if SETTINGS.pruning_required: callbacks.append( dl.PruningCallback(pruning_fn="l1_unstructured", amount=0.5)) if SETTINGS.quantization_required: callbacks.append(dl.QuantizationCallback(logdir=logdir)) if engine is None or not isinstance(engine, dl.DistributedDataParallelEngine): callbacks.append( dl.TracingCallback(logdir=logdir, input_key="features")) # model training runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, callbacks=callbacks, logdir=logdir, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, verbose=False, load_best_on_end=True, timeit=False, check=False, overfit=False, fp16=False, ddp=False, ) # model inference for prediction in runner.predict_loader(loader=loaders["valid"]): assert prediction["logits"].detach().cpu().numpy().shape[-1] == 10 # model post-processing features_batch = next(iter(loaders["valid"]))[0] # model stochastic weight averaging model.load_state_dict( utils.get_averaged_weights_by_path_mask(logdir=logdir, path_mask="*.pth")) # model onnx export if SETTINGS.onnx_required: utils.onnx_export( model=runner.model, batch=runner.engine.sync_device(features_batch), file="./mnist.onnx", verbose=False, ) # model quantization if SETTINGS.quantization_required: utils.quantize_model(model=runner.model) # model pruning if SETTINGS.pruning_required: utils.prune_model(model=runner.model, pruning_fn="l1_unstructured", amount=0.8) # model tracing utils.trace_model(model=runner.model, batch=features_batch)
def train_experiment(engine=None): with TemporaryDirectory() as logdir: utils.set_global_seed(RANDOM_STATE) # 1. generate data num_samples, num_features, num_classes = int(1e4), int(30), 3 X, y = make_classification( n_samples=num_samples, n_features=num_features, n_informative=num_features, n_repeated=0, n_redundant=0, n_classes=num_classes, n_clusters_per_class=1, ) X, y = torch.tensor(X), torch.tensor(y) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=64, num_workers=1, shuffle=True) # 2. model, optimizer and scheduler hidden_size, out_features = 20, 16 model = nn.Sequential( nn.Linear(num_features, hidden_size), nn.ReLU(), nn.Linear(hidden_size, out_features), ) optimizer = Adam(model.parameters(), lr=LR) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # 3. criterion with triplets sampling sampler_inbatch = HardTripletsSampler(norm_required=False) criterion = TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner class CustomRunner(dl.SupervisedRunner): def handle_batch(self, batch) -> None: features, targets = batch["features"].float( ), batch["targets"].long() embeddings = self.model(features) self.batch = { "embeddings": embeddings, "targets": targets, } callbacks = [ dl.SklearnModelCallback( feature_key="embeddings", target_key="targets", train_loader="train", valid_loaders="valid", model_fn=RandomForestClassifier, predict_method="predict_proba", predict_key="sklearn_predict", random_state=RANDOM_STATE, n_estimators=100, ), dl.ControlFlowCallbackWrapper( dl.AccuracyCallback(target_key="targets", input_key="sklearn_predict", topk=(1, 3)), loaders="valid", ), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine, model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, scheduler=scheduler, loaders={ "train": loader, "valid": loader }, verbose=False, valid_loader="valid", valid_metric="accuracy01", minimize_valid_metric=False, num_epochs=TRAIN_EPOCH, logdir=logdir, ) best_accuracy = max( epoch_metrics["valid"]["accuracy01"] for epoch_metrics in runner.experiment_metrics.values()) assert best_accuracy > 0.9
runner.train( loaders=dataloaders, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, logdir="./catalyst_logs", num_epochs=EPOCHS, # valid_loader="valid", # valid_metric="accuracy03", # minimize_valid_metric=False, verbose=True, # uncomment for extra metrics: callbacks=[ dl.AccuracyCallback(input_key="logits", target_key="mask_class", num_classes=NUM_CLASES), # dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="mask_class", num_classes=NUM_CLASES), # dl.AUCCallback(input_key="logits", target_key="mask_class"), dl.ConfusionMatrixCallback(input_key="logits", target_key="mask_class", num_classes=NUM_CLASES), ], ) """ torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1, verbose=False) torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda, last_epoch=-1, verbose=False) torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma=0.1, last_epoch=-1, verbose=False) torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1, last_epoch=-1, verbose=False) torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma, last_epoch=-1, verbose=False) torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max, eta_min=0, last_epoch=-1, verbose=False)
dl.OptimizerCallback(metric_key="loss"), dl.SklearnModelCallback( feature_key="embedding_origin", target_key="target", train_loader="train", valid_loaders="valid", model_fn=LogisticRegression, predict_key="sklearn_predict", predict_method="predict_proba", C=0.1, solver="saga", max_iter=200, ), dl.ControlFlowCallbackWrapper( dl.AccuracyCallback( target_key="target", input_key="sklearn_predict", topk=(1, 3) ), loaders="valid", ), ] # train model runner = SelfSupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=get_loaders(args.dataset, args.batch_size, args.num_workers), num_epochs=args.epochs, logdir=args.logdir,
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: from catalyst import utils utils.set_global_seed(RANDOM_STATE) # 1. train, valid and test loaders transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))]) train_data = MNIST(os.getcwd(), train=True, download=True, transform=transforms) train_labels = train_data.targets.cpu().numpy().tolist() train_sampler = data.BatchBalanceClassSampler(train_labels, num_classes=10, num_samples=4) train_loader = DataLoader(train_data, batch_sampler=train_sampler) valid_dataset = MNIST(root=os.getcwd(), transform=transforms, train=False, download=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=32) test_dataset = MNIST(root=os.getcwd(), transform=transforms, train=False, download=True) test_loader = DataLoader(dataset=test_dataset, batch_size=32) # 2. model and optimizer model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16), nn.LeakyReLU(inplace=True)) optimizer = Adam(model.parameters(), lr=LR) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # 3. criterion with triplets sampling sampler_inbatch = data.HardTripletsSampler(norm_required=False) criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner class CustomRunner(dl.SupervisedRunner): def handle_batch(self, batch) -> None: images, targets = batch["features"].float( ), batch["targets"].long() features = self.model(images) self.batch = { "embeddings": features, "targets": targets, } callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="embeddings", target_key="targets", metric_key="loss"), loaders="train", ), dl.SklearnModelCallback( feature_key="embeddings", target_key="targets", train_loader="train", valid_loaders=["valid", "infer"], model_fn=RandomForestClassifier, predict_method="predict_proba", predict_key="sklearn_predict", random_state=RANDOM_STATE, n_estimators=50, ), dl.ControlFlowCallback( dl.AccuracyCallback(target_key="targets", input_key="sklearn_predict", topk_args=(1, 3)), loaders=["valid", "infer"], ), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, callbacks=callbacks, loaders={ "train": train_loader, "valid": valid_loader, "infer": test_loader }, verbose=False, valid_loader="valid", valid_metric="accuracy", minimize_valid_metric=False, num_epochs=TRAIN_EPOCH, logdir=logdir, ) valid_path = Path(logdir) / "logs/infer.csv" best_accuracy = max( float(row["accuracy"]) for row in read_csv(valid_path)) assert best_accuracy > 0.8
def main(args): wandb.init(project="teacher-pruning", config=vars(args)) set_global_seed(42) # dataloader initialization transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = Wrp( datasets.CIFAR10(root=os.getcwd(), train=True, transform=transform_train, download=True)) valid_dataset = Wrp( datasets.CIFAR10(root=os.getcwd(), train=False, transform=transform_test)) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=128, num_workers=2) loaders = { "train": train_dataloader, "valid": valid_dataloader, } # model initialization model = PreActResNet18() model.fc = nn.Linear(512, 10) if args.teacher_model is not None: is_kd = True teacher_model = NAME2MODEL[args.teacher_model]() load_model_from_path(model=teacher_model, path=args.teacher_path) model = { "student": model, "teacher": teacher_model, } output_hiddens = args.beta is None is_kd_on_hiddens = output_hiddens runner = KDRunner(device=args.device, output_hiddens=output_hiddens) parameters = model["student"].parameters() else: is_kd = False runner = dl.SupervisedRunner(device=args.device) parameters = model.parameters() # optimizer optimizer_cls = NAME2OPTIM[args.optimizer] optimizer_kwargs = {"params": parameters, "lr": args.lr} if args.optimizer == "sgd": optimizer_kwargs["momentum"] = args.momentum else: optimizer_kwargs["betas"] = (args.beta1, args.beta2) optimizer = optimizer_cls(**optimizer_kwargs) scheduler = MultiStepLR(optimizer, milestones=[80, 120], gamma=args.gamma) logdir = f"logs/{wandb.run.name}" # callbacks callbacks = [dl.AccuracyCallback(num_classes=10), WandbCallback()] if is_kd: metrics = {} callbacks.append(dl.CriterionCallback(output_key="cls_loss")) callbacks.append(DiffOutputCallback()) coefs = get_loss_coefs(args.alpha, args.beta) metrics["cls_loss"] = coefs[0] metrics["diff_output_loss"] = coefs[1] if is_kd_on_hiddens: callbacks.append(DiffHiddenCallback()) metrics["diff_hidden_loss"] = coefs[2] aggregator_callback = dl.MetricAggregationCallback(prefix="loss", metrics=metrics, mode="weighted_sum") wrapped_agg_callback = dl.ControlFlowCallback(aggregator_callback, loaders=["train"]) callbacks.append(wrapped_agg_callback) runner.train( model=model, optimizer=optimizer, scheduler=scheduler, criterion=nn.CrossEntropyLoss(), loaders=loaders, callbacks=callbacks, num_epochs=args.epoch, logdir=logdir, verbose=True, )
def train_experiment(device): with TemporaryDirectory() as logdir: teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) criterion = { "cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean") } optimizer = optim.Adam(student.parameters(), lr=0.02) loaders = { "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } class DistilRunner(dl.Runner): def handle_batch(self, batch): x, y = batch teacher.eval() # let's manually set teacher model to eval mode with torch.no_grad(): t_logits = self.model["teacher"](x) s_logits = self.model["student"](x) self.batch = { "t_logits": t_logits, "s_logits": s_logits, "targets": y, "s_logprobs": F.log_softmax(s_logits, dim=-1), "t_probs": F.softmax(t_logits, dim=-1), } runner = DistilRunner() # model training runner.train( engine=dl.DeviceEngine(device), model={ "teacher": teacher, "student": student }, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, logdir=logdir, verbose=True, callbacks=[ dl.AccuracyCallback(input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_"), dl.AccuracyCallback(input_key="s_logits", target_key="targets", num_classes=2, prefix="student_"), dl.CriterionCallback( input_key="s_logits", target_key="targets", metric_key="cls_loss", criterion_key="cls", ), dl.CriterionCallback( input_key="s_logprobs", target_key="t_probs", metric_key="kl_div_loss", criterion_key="kl", ), dl.MetricAggregationCallback( prefix="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean"), dl.OptimizerCallback(metric_key="loss", model_key="student"), dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3, ), ], )
def train(dev_dir, logdir, device): train = pd.read_csv(f'{dev_dir}/train.csv', index_col=0) train['all_utils'] = train['cmd_cleaned'].apply(select_utils) train = train.loc[train.all_utils.apply(str.strip).apply(len) > 0] train['util'] = train['all_utils'].apply(lambda x: x.split()[0]) train = train.dropna().reset_index(drop=True) spm.SentencePieceTrainer.train(input=f'{dev_dir}/text', model_prefix=f'{dev_dir}/txt_bpe_clf', model_type='bpe', vocab_size=config.src_vocab_size) text_tokenizer = spm.SentencePieceProcessor(f'{dev_dir}/txt_bpe_clf.model') cmd_le = LabelEncoder() train['text_enc'] = train.text_cleaned.progress_apply( text_tokenizer.encode) train['y'] = cmd_le.fit_transform(train['util'].values) tdf = train[train.origin == 'original'] tdf2 = train[train.origin != 'original'] train, valid = train_test_split(tdf, test_size=500, random_state=SEED) train = pd.concat([train, tdf2]).reset_index(drop=True) train_ds = UtilDataset(train.text_enc, train.y, config, bos_id, eos_id, pad_id) valid_ds = UtilDataset(valid.text_enc, valid.y, config, bos_id, eos_id, pad_id) model = BertClassifier(config, pad_id, len(cmd_le.classes_)) print('# params', sum(p.numel() for p in model.parameters() if p.requires_grad)) loaders = { 'train': data.DataLoader(train_ds, batch_size=config.batch_size, shuffle=True), 'valid': data.DataLoader(valid_ds, batch_size=config.batch_size), } criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config.optimizer_lr, weight_decay=config.weight_decay, amsgrad=True) callbacks = [ dl.CheckpointCallback(config.num_epochs), dl.AccuracyCallback(num_classes=len(cmd_le.classes_), topk_args=[1, 5]) ] if config.schedule == 'OneCycleLR': scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=config.optimizer_lr, epochs=config.num_epochs, steps_per_epoch=len(loaders['train'])) callbacks.append(dl.SchedulerCallback(mode="batch")) elif config.schedule == 'ReduceLROnPlateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=config.plateau_factor, patience=5, cooldown=3, threshold=1e-3, min_lr=1e-6) callbacks.append(dl.SchedulerCallback(mode="epoch")) shutil.rmtree(logdir, ignore_errors=True) os.makedirs(logdir, exist_ok=True) runner = dl.SupervisedRunner(device=device) runner.train( model=model, loaders=loaders, criterion=criterion, optimizer=optimizer, scheduler=scheduler if config.schedule else None, num_epochs=config.num_epochs, verbose=True, logdir=logdir, callbacks=callbacks, ) joblib.dump(cmd_le, f'{dev_dir}/cmd_le')
def train_experiment(engine=None): with TemporaryDirectory() as logdir: # sample data num_samples, num_features, num_classes1, num_classes2 = int(1e4), int( 1e1), 4, 10 X = torch.rand(num_samples, num_features) y1 = (torch.rand(num_samples) * num_classes1).to(torch.int64) y2 = (torch.rand(num_samples) * num_classes2).to(torch.int64) # pytorch loaders dataset = TensorDataset(X, y1, y2) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = CustomModule(num_features, num_classes1, num_classes2) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2]) callbacks = [ dl.CriterionCallback(metric_key="loss1", input_key="logits1", target_key="targets1"), dl.CriterionCallback(metric_key="loss2", input_key="logits2", target_key="targets2"), dl.MetricAggregationCallback(metric_key="loss", metrics=["loss1", "loss2"], mode="mean"), dl.BackwardCallback(metric_key="loss"), dl.OptimizerCallback(metric_key="loss"), dl.SchedulerCallback(), dl.AccuracyCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_", ), dl.AccuracyCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_", ), dl.CheckpointCallback( "./logs/one", loader_key="valid", metric_key="one_accuracy01", minimize=False, topk=1, ), dl.CheckpointCallback( "./logs/two", loader_key="valid", metric_key="two_accuracy03", minimize=False, topk=3, ), ] if SETTINGS.ml_required: # catalyst[ml] required callbacks.append( dl.ConfusionMatrixCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_cm", )) # catalyst[ml] required callbacks.append( dl.ConfusionMatrixCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_cm", )) # model training runner = CustomRunner() runner.train( engine=engine, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=1, verbose=False, callbacks=callbacks, loggers={ "console": dl.ConsoleLogger(), "tb": dl.TensorboardLogger("./logs/tb"), }, )