Beispiel #1
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        model = {"teacher": teacher, "student": student}
        criterion = {"cls": nn.CrossEntropyLoss(), "kl": nn.KLDivLoss(reduction="batchmean")}
        optimizer = optim.Adam(student.parameters(), lr=0.02)

        loaders = {
            "train": DataLoader(
                MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32
            ),
            "valid": DataLoader(
                MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32
            ),
        }

        runner = DistilRunner()
        # model training
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            num_epochs=1,
            logdir=logdir,
            verbose=False,
            callbacks=[
                dl.AccuracyCallback(
                    input_key="t_logits", target_key="targets", num_classes=2, prefix="teacher_"
                ),
                dl.AccuracyCallback(
                    input_key="s_logits", target_key="targets", num_classes=2, prefix="student_"
                ),
                dl.CriterionCallback(
                    input_key="s_logits",
                    target_key="targets",
                    metric_key="cls_loss",
                    criterion_key="cls",
                ),
                dl.CriterionCallback(
                    input_key="s_logprobs",
                    target_key="t_probs",
                    metric_key="kl_div_loss",
                    criterion_key="kl",
                ),
                dl.MetricAggregationCallback(
                    metric_key="loss", metrics=["kl_div_loss", "cls_loss"], mode="mean"
                ),
                dl.OptimizerCallback(metric_key="loss", model_key="student"),
                dl.CheckpointCallback(
                    logdir=logdir,
                    loader_key="valid",
                    metric_key="loss",
                    minimize=True,
                    save_n_best=3,
                ),
            ],
        )
Beispiel #2
0
def run_catalyst(irunner: dl.IRunner,
                 idx: int,
                 device: str = "cuda",
                 num_epochs: int = 10):
    utils.set_global_seed(idx)
    loader = irunner.get_loaders()["train"]
    model = irunner.get_model().to(device)
    criterion = irunner.get_criterion()
    optimizer = irunner.get_optimizer(model)

    runner = dl.SupervisedRunner()
    runner.train(
        engine=dl.GPUEngine() if device == "cuda" else dl.CPUEngine(),
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders={"train": loader},
        num_epochs=num_epochs,
        verbose=False,
        callbacks=[
            dl.AccuracyCallback(
                input_key=runner._output_key,
                target_key=runner._target_key,
                topk=(1, ),
            )
        ],
    )

    return (
        runner.epoch_metrics["train"]["accuracy01"],
        runner.epoch_metrics["train"]["loss"],
        _get_used_memory(),
    )
Beispiel #3
0
 def get_callbacks(self):
     return {
         "criterion":
         dl.CriterionCallback(metric_key="loss",
                              input_key="logits",
                              target_key="targets"),
         "backward":
         dl.BackwardCallback(metric_key="loss"),
         "optimizer":
         dl.OptimizerCallback(metric_key="loss"),
         "scheduler":
         dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="targets",
                             topk=(1, 3, 5)),
         "checkpoint":
         dl.CheckpointCallback(
             self._logdir,
             loader_key="valid",
             metric_key="accuracy01",
             minimize=False,
             topk=1,
         ),
         "tqdm":
         dl.TqdmCallback(),
     }
Beispiel #4
0
def test_evaluation_loader_metrics() -> None:
    """
    Test if metrics in evaluate loader works properly.
    """
    dataset = DummyDataset()
    model = nn.Linear(in_features=dataset.features_dim,
                      out_features=dataset.out_dim)
    loader = DataLoader(dataset=dataset, batch_size=1)
    callbacks = [
        dl.AccuracyCallback(input_key="logits",
                            target_key="targets",
                            topk=(1, ))
    ]
    runner = SupervisedRunner()
    runner.train(
        loaders={
            "train": loader,
            "valid": loader
        },
        model=model,
        num_epochs=1,
        criterion=nn.BCEWithLogitsLoss(),
        callbacks=callbacks,
    )
    runner_internal_metrics = runner.loader_metrics
    evaluate_loader_metrics = runner.evaluate_loader(loader=loader,
                                                     callbacks=callbacks)
    assert runner_internal_metrics["accuracy01"] == evaluate_loader_metrics[
        "accuracy01"]
Beispiel #5
0
 def get_callbacks(self, stage: str):
     callbacks = {
         "criterion":
         dl.CriterionCallback(metric_key="loss",
                              input_key="logits",
                              target_key="targets"),
         "optimizer":
         dl.OptimizerCallback(
             metric_key="loss",
             grad_clip_fn=nn.utils.clip_grad_norm_,
             grad_clip_params={"max_norm": 1.0},
         ),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="targets",
                             topk_args=(1, 3, 5)),
         "classification":
         dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                             target_key="targets",
                                             num_classes=10),
         "checkpoint":
         dl.CheckpointCallback(self._logdir,
                               loader_key="valid",
                               metric_key="loss",
                               minimize=True,
                               save_n_best=3),
     }
     if SETTINGS.ml_required:
         callbacks["confusion_matrix"] = dl.ConfusionMatrixCallback(
             input_key="logits", target_key="targets", num_classes=10)
     return callbacks
Beispiel #6
0
def test_catalyst_callback(tmp_dir, runner, loaders):
    model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.02)

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=2,
        callbacks=[
            dl.AccuracyCallback(input_key="logits", target_key="targets"),
            DvcLiveCallback(),
        ],
        logdir="./logs",
        valid_loader="valid",
        valid_metric="loss",
        minimize_valid_metric=True,
        verbose=True,
        load_best_on_end=True,
    )

    assert os.path.exists("dvclive")

    train_path = tmp_dir / "dvclive/train"
    valid_path = tmp_dir / "dvclive/valid"

    assert train_path.is_dir()
    assert valid_path.is_dir()
    assert any("accuracy" in x.name for x in train_path.iterdir())
    assert any("accuracy" in x.name for x in valid_path.iterdir())
Beispiel #7
0
 def get_callbacks(self, stage: str):
     return {
         "criterion":
         dl.CriterionCallback(metric_key="loss",
                              input_key="logits",
                              target_key="targets"),
         "optimizer":
         dl.OptimizerCallback(metric_key="loss"),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="targets",
                             topk_args=(1, 3, 5)),
         "classification":
         dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                             target_key="targets",
                                             num_classes=10),
         "confusion_matrix":
         dl.ConfusionMatrixCallback(input_key="logits",
                                    target_key="targets",
                                    num_classes=10),
         "checkpoint":
         dl.CheckpointCallback(self._logdir,
                               loader_key="valid",
                               metric_key="loss",
                               minimize=True,
                               save_n_best=3),
     }
Beispiel #8
0
 def get_callbacks(self, stage: str):
     return {
         "criterion":
         dl.CriterionCallback(input_key="logits",
                              target_key="labels",
                              metric_key="loss"),
         "optimizer":
         dl.OptimizerCallback(metric_key="loss"),
         "scheduler":
         dl.SchedulerCallback(loader_key="valid",
                              metric_key="loss",
                              mode="batch"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="labels",
                             topk_args=(1, )),
         "checkpoint":
         dl.CheckpointCallback(
             self._logdir,
             loader_key="valid",
             metric_key="accuracy",
             minimize=False,
             save_n_best=1,
         ),
         # "tqdm": dl.TqdmCallback(),
     }
Beispiel #9
0
 def objective(trial):
     lr = trial.suggest_loguniform("lr", 1e-3, 1e-1)
     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
     criterion = nn.CrossEntropyLoss()
     runner = dl.SupervisedRunner()
     runner.train(
         model=model,
         loaders=loaders,
         criterion=criterion,
         optimizer=optimizer,
         callbacks={
             "optuna":
             dl.OptunaPruningCallback(loader_key="valid",
                                      metric_key="loss",
                                      minimize=True,
                                      trial=trial),
             "accuracy":
             dl.AccuracyCallback(num_classes=10,
                                 input_key="logits",
                                 target_key="targets"),
         },
         num_epochs=2,
         valid_metric="accuracy01",
         minimize_valid_metric=False,
     )
     return trial.best_score
Beispiel #10
0
def test_disabling_loss_for_train():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/control_flow"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    n_epochs = 5
    # first stage
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=n_epochs,
        verbose=False,
        main_metric="accuracy01",
        callbacks=[
            dl.ControlFlowCallback(dl.CriterionCallback(),
                                   ignore_loaders=["train"]),
            dl.AccuracyCallback(accuracy_args=[1, 3, 5]),
            dl.CheckRunCallback(num_epoch_steps=n_epochs),
        ],
    )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    assert len(re.findall(r"\(train\): loss", exp_output)) == 5
    assert len(re.findall(r"\(valid\): loss", exp_output)) == 0
    assert len(re.findall(r".*/train\.\d\.pth", exp_output)) == 1

    assert os.path.isfile(logfile)
    assert os.path.isfile(checkpoint + "/best.pth")
    assert os.path.isfile(checkpoint + "/best_full.pth")
    assert os.path.isfile(checkpoint + "/last.pth")
    assert os.path.isfile(checkpoint + "/last_full.pth")
    pth_files = [
        file for file in os.listdir(checkpoint) if file.endswith(".pth")
    ]
    assert len(pth_files) == 6

    shutil.rmtree(logdir, ignore_errors=True)
Beispiel #11
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        # sample data
        num_samples, num_features, num_classes = int(1e4), int(1e1), 4
        X = torch.rand(num_samples, num_features)
        y = (torch.rand(num_samples, ) * num_classes).to(torch.int64)

        # pytorch loaders
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = torch.nn.Linear(num_features, num_classes)
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        # model training
        runner = dl.SupervisedRunner(input_key="features",
                                     output_key="logits",
                                     target_key="targets",
                                     loss_key="loss")
        callbacks = [
            dl.AccuracyCallback(input_key="logits",
                                target_key="targets",
                                num_classes=num_classes),
            dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                                target_key="targets",
                                                num_classes=4),
        ]
        if SETTINGS.ml_required:
            callbacks.append(
                dl.ConfusionMatrixCallback(input_key="logits",
                                           target_key="targets",
                                           num_classes=4))
        if SETTINGS.amp_required and (engine is None or not isinstance(
                engine,
            (dl.AMPEngine, dl.DataParallelAMPEngine,
             dl.DistributedDataParallelAMPEngine),
        )):
            callbacks.append(
                dl.AUCCallback(input_key="logits", target_key="targets"))

        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            logdir=logdir,
            num_epochs=1,
            valid_loader="valid",
            valid_metric="accuracy03",
            minimize_valid_metric=False,
            verbose=False,
            callbacks=callbacks,
        )
Beispiel #12
0
 def get_callbacks(self, stage: str):
     callbacks = {
         "scores":
         dl.BatchTransformCallback(
             input_key="logits",
             output_key="scores",
             transform=partial(torch.softmax, dim=1),
             scope="on_batch_end",
         ),
         "labels":
         dl.BatchTransformCallback(
             input_key="scores",
             output_key="labels",
             transform=partial(torch.argmax, dim=1),
             scope="on_batch_end",
         ),
         "criterion":
         dl.CriterionCallback(metric_key="loss",
                              input_key="logits",
                              target_key="targets"),
         "optimizer":
         dl.OptimizerCallback(
             metric_key="loss",
             grad_clip_fn=nn.utils.clip_grad_norm_,
             grad_clip_params={"max_norm": 1.0},
         ),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "accuracy":
         dl.AccuracyCallback(input_key="logits",
                             target_key="targets",
                             topk_args=(1, 3, 5)),
         "classification":
         dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                             target_key="targets",
                                             num_classes=10),
         "checkpoint":
         dl.CheckpointCallback(self._logdir,
                               loader_key="valid",
                               metric_key="loss",
                               minimize=True,
                               save_n_best=3),
     }
     if SETTINGS.ml_required:
         callbacks["confusion_matrix"] = dl.ConfusionMatrixCallback(
             input_key="logits", target_key="targets", num_classes=10)
         callbacks["f1_score"] = dl.SklearnBatchCallback(
             keys={
                 "y_pred": "labels",
                 "y_true": "targets"
             },
             metric_fn="f1_score",
             metric_key="sk_f1",
             average="macro",
             zero_division=1,
         )
     return callbacks
Beispiel #13
0
        def objective(trial):
            lr = trial.suggest_loguniform("lr", 1e-3, 1e-1)
            num_hidden = int(trial.suggest_loguniform("num_hidden", 32, 128))

            loaders = {
                "train":
                DataLoader(
                    MNIST(os.getcwd(),
                          train=False,
                          download=True,
                          transform=ToTensor()),
                    batch_size=32,
                ),
                "valid":
                DataLoader(
                    MNIST(os.getcwd(),
                          train=False,
                          download=True,
                          transform=ToTensor()),
                    batch_size=32,
                ),
            }
            model = nn.Sequential(nn.Flatten(), nn.Linear(784, num_hidden),
                                  nn.ReLU(), nn.Linear(num_hidden, 10))
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            criterion = nn.CrossEntropyLoss()

            runner = dl.SupervisedRunner(input_key="features",
                                         output_key="logits",
                                         target_key="targets")
            runner.train(
                engine=engine or dl.DeviceEngine(device),
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                loaders=loaders,
                callbacks={
                    "optuna":
                    dl.OptunaPruningCallback(loader_key="valid",
                                             metric_key="accuracy01",
                                             minimize=False,
                                             trial=trial),
                    "accuracy":
                    dl.AccuracyCallback(input_key="logits",
                                        target_key="targets",
                                        num_classes=10),
                },
                num_epochs=2,
            )
            score = runner.callbacks["optuna"].best_score
            return score
Beispiel #14
0
def test_evaluation_loader_empty_model() -> None:
    """
    Test if there is no model was given, assertion raises.
    """
    with pytest.raises(AssertionError) as record:
        dataset = DummyDataset()
        loader = DataLoader(dataset=dataset, batch_size=1)
        callbacks = [
            dl.AccuracyCallback(input_key="logits",
                                target_key="targets",
                                topk=(1, ))
        ]
        runner = SupervisedRunner()
        runner.evaluate_loader(loader=loader, callbacks=callbacks, model=None)
        if not record:
            pytest.fail("Expected assertion bacuase model is empty!")
Beispiel #15
0
def test_evaluation_loader_custom_model() -> None:
    """
    Test if evaluate loader works with custom model.
    """
    dataset = DummyDataset()
    model = nn.Linear(in_features=dataset.features_dim,
                      out_features=dataset.out_dim)
    loader = DataLoader(dataset=dataset, batch_size=1)
    callbacks = [
        dl.AccuracyCallback(input_key="logits",
                            target_key="targets",
                            topk=(1, ))
    ]
    runner = SupervisedRunner()

    runner.evaluate_loader(loader=loader, callbacks=callbacks, model=model)
Beispiel #16
0
    def test_model(self, config, testset):  # pylint: disable=unused-argument
        """A custom testing loop. """
        test_loader = torch.utils.data.DataLoader(
            testset, batch_size=config['batch_size'], shuffle=False)

        # Using Catalyst's SupervisedRunner and AccuracyCallback to compute accuracies
        runner = dl.SupervisedRunner()
        runner.train(model=self.model,
                     num_epochs=1,
                     loaders={"valid": test_loader},
                     logdir="./logs",
                     verbose=True,
                     callbacks=[
                         dl.AccuracyCallback(input_key="logits",
                                             target_key="targets",
                                             num_classes=10)
                     ])

        # Retrieving the top-1 accuracy from SupervisedRunner
        accuracy = runner.epoch_metrics["valid"]["accuracy"]
        return accuracy
Beispiel #17
0
def test_catalyst_model_file(tmp_dir, runner, loaders):
    model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.02)

    runner.train(
        model=model,
        engine=runner.engine,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=2,
        callbacks=[
            dl.AccuracyCallback(input_key="logits", target_key="targets"),
            DvcLiveCallback("model.pth"),
        ],
        logdir="./logs",
        valid_loader="valid",
        valid_metric="loss",
        minimize_valid_metric=True,
        verbose=True,
        load_best_on_end=True,
    )
    assert (tmp_dir / "model.pth").is_file()
Beispiel #18
0
def train_experiment(device):
    with TemporaryDirectory() as logdir:
        # sample data
        num_samples, num_features, num_classes1, num_classes2 = int(1e4), int(
            1e1), 4, 10
        X = torch.rand(num_samples, num_features)
        y1 = (torch.rand(num_samples, ) * num_classes1).to(torch.int64)
        y2 = (torch.rand(num_samples, ) * num_classes2).to(torch.int64)

        # pytorch loaders
        dataset = TensorDataset(X, y1, y2)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        class CustomModule(nn.Module):
            def __init__(self, in_features: int, out_features1: int,
                         out_features2: int):
                super().__init__()
                self.shared = nn.Linear(in_features, 128)
                self.head1 = nn.Linear(128, out_features1)
                self.head2 = nn.Linear(128, out_features2)

            def forward(self, x):
                x = self.shared(x)
                y1 = self.head1(x)
                y2 = self.head2(x)
                return y1, y2

        # model, criterion, optimizer, scheduler
        model = CustomModule(num_features, num_classes1, num_classes2)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters())
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2])

        class CustomRunner(dl.Runner):
            def handle_batch(self, batch):
                x, y1, y2 = batch
                y1_hat, y2_hat = self.model(x)
                self.batch = {
                    "features": x,
                    "logits1": y1_hat,
                    "logits2": y2_hat,
                    "targets1": y1,
                    "targets2": y2,
                }

        # model training
        runner = CustomRunner()
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            num_epochs=1,
            verbose=False,
            callbacks=[
                dl.CriterionCallback(metric_key="loss1",
                                     input_key="logits1",
                                     target_key="targets1"),
                dl.CriterionCallback(metric_key="loss2",
                                     input_key="logits2",
                                     target_key="targets2"),
                dl.MetricAggregationCallback(prefix="loss",
                                             metrics=["loss1", "loss2"],
                                             mode="mean"),
                dl.OptimizerCallback(metric_key="loss"),
                dl.SchedulerCallback(),
                dl.AccuracyCallback(
                    input_key="logits1",
                    target_key="targets1",
                    num_classes=num_classes1,
                    prefix="one_",
                ),
                dl.AccuracyCallback(
                    input_key="logits2",
                    target_key="targets2",
                    num_classes=num_classes2,
                    prefix="two_",
                ),
                dl.ConfusionMatrixCallback(
                    input_key="logits1",
                    target_key="targets1",
                    num_classes=num_classes1,
                    prefix="one_cm",
                ),
                # catalyst[ml] required
                dl.ConfusionMatrixCallback(
                    input_key="logits2",
                    target_key="targets2",
                    num_classes=num_classes2,
                    prefix="two_cm",
                ),
                # catalyst[ml] required
                dl.CheckpointCallback(
                    "./logs/one",
                    loader_key="valid",
                    metric_key="one_accuracy",
                    minimize=False,
                    save_n_best=1,
                ),
                dl.CheckpointCallback(
                    "./logs/two",
                    loader_key="valid",
                    metric_key="two_accuracy03",
                    minimize=False,
                    save_n_best=3,
                ),
            ],
            loggers={
                "console": dl.ConsoleLogger(),
                "tb": dl.TensorboardLogger("./logs/tb")
            },
        )
def train_experiment(device, engine=None):

    with TemporaryDirectory() as logdir:

        # 1. data and transforms

        transforms = Compose([
            torchvision.transforms.ToPILImage(),
            torchvision.transforms.RandomCrop((28, 28)),
            torchvision.transforms.RandomVerticalFlip(),
            torchvision.transforms.RandomHorizontalFlip(),
            torchvision.transforms.ToTensor(),
            Normalize((0.1307, ), (0.3081, )),
        ])

        transform_original = Compose([
            ToTensor(),
            Normalize((0.1307, ), (0.3081, )),
        ])

        mnist = MNIST("./logdir", train=True, download=True, transform=None)
        contrastive_mnist = SelfSupervisedDatasetWrapper(
            mnist,
            transforms=transforms,
            transform_original=transform_original)
        train_loader = torch.utils.data.DataLoader(contrastive_mnist,
                                                   batch_size=BATCH_SIZE)

        mnist_valid = MNIST("./logdir",
                            train=False,
                            download=True,
                            transform=None)
        contrastive_valid = SelfSupervisedDatasetWrapper(
            mnist_valid,
            transforms=transforms,
            transform_original=transform_original)
        valid_loader = torch.utils.data.DataLoader(contrastive_valid,
                                                   batch_size=BATCH_SIZE)

        # 2. model and optimizer
        encoder = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16),
                                nn.LeakyReLU(inplace=True))
        projection_head = nn.Sequential(
            nn.Linear(16, 16, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(16, 16, bias=True),
        )

        class ContrastiveModel(torch.nn.Module):
            def __init__(self, model, encoder):
                super(ContrastiveModel, self).__init__()
                self.model = model
                self.encoder = encoder

            def forward(self, x):
                emb = self.encoder(x)
                projection = self.model(emb)
                return emb, projection

        model = ContrastiveModel(model=projection_head, encoder=encoder)

        optimizer = Adam(model.parameters(), lr=LR)

        # 3. criterion with triplets sampling
        criterion = NTXentLoss(tau=0.1)

        callbacks = [
            dl.ControlFlowCallback(
                dl.CriterionCallback(input_key="projection_left",
                                     target_key="projection_right",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.SklearnModelCallback(
                feature_key="embedding_left",
                target_key="target",
                train_loader="train",
                valid_loaders="valid",
                model_fn=RandomForestClassifier,
                predict_method="predict_proba",
                predict_key="sklearn_predict",
                random_state=RANDOM_STATE,
                n_estimators=50,
            ),
            dl.ControlFlowCallback(
                dl.AccuracyCallback(target_key="target",
                                    input_key="sklearn_predict",
                                    topk_args=(1, 3)),
                loaders="valid",
            ),
        ]

        runner = dl.SelfSupervisedRunner()

        logdir = "./logdir"
        runner.train(
            model=model,
            engine=engine or dl.DeviceEngine(device),
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders={
                "train": train_loader,
                "valid": valid_loader
            },
            verbose=False,
            logdir=logdir,
            valid_loader="train",
            valid_metric="loss",
            minimize_valid_metric=True,
            num_epochs=TRAIN_EPOCH,
        )

        valid_path = Path(logdir) / "logs/valid.csv"
        best_accuracy = max(
            float(row["accuracy"]) for row in read_csv(valid_path)
            if row["accuracy"] != "accuracy")

        assert best_accuracy > 0.6
Beispiel #20
0
def main(args):
    train_dataset = TorchvisionDatasetWrapper(
        MNIST(root="./", download=True, train=True, transform=ToTensor())
    )
    val_dataset = TorchvisionDatasetWrapper(
        MNIST(root="./", download=True, train=False, transform=ToTensor())
    )

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=64)
    loaders = {"train": train_dataloader, "valid": val_dataloader}
    utils.set_global_seed(args.seed)
    net = nn.Sequential(
        Flatten(),
        nn.Linear(28 * 28, 300),
        nn.ReLU(),
        nn.Linear(300, 100),
        nn.ReLU(),
        nn.Linear(100, 10),
    )
    initial_state_dict = net.state_dict()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters())
    if args.device is not None:
        engine = dl.DeviceEngine(args.device)
    else:
        engine = None
    if args.vanilla_pruning:
        runner = dl.SupervisedRunner(engine=engine)

        runner.train(
            model=net,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            callbacks=[
                dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10),
            ],
            logdir="./logdir",
            num_epochs=args.num_epochs,
            load_best_on_end=True,
            valid_metric="accuracy01",
            minimize_valid_metric=False,
            valid_loader="valid",
        )
        pruning_fn = partial(
            utils.pruning.prune_model,
            pruning_fn=args.pruning_method,
            amount=args.amount,
            keys_to_prune=["weights"],
            dim=args.dim,
            l_norm=args.n,
        )
        acc, amount = validate_model(
            runner, pruning_fn=pruning_fn, loader=loaders["valid"], num_sessions=args.num_sessions
        )
        torch.save(acc, "accuracy.pth")
        torch.save(amount, "amount.pth")

    else:
        runner = PruneRunner(num_sessions=args.num_sessions, engine=engine)
        callbacks = [
            dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=10),
            dl.PruningCallback(
                args.pruning_method,
                keys_to_prune=["weight"],
                amount=args.amount,
                remove_reparametrization_on_stage_end=False,
            ),
            dl.CriterionCallback(input_key="logits", target_key="targets", metric_key="loss"),
            dl.OptimizerCallback(metric_key="loss"),
        ]
        if args.lottery_ticket:
            callbacks.append(LotteryTicketCallback(initial_state_dict=initial_state_dict))
        if args.kd:
            net.load_state_dict(torch.load(args.state_dict))
            callbacks.append(
                PrepareForFinePruningCallback(probability_shift=args.probability_shift)
            )
            callbacks.append(KLDivCallback(temperature=4, student_logits_key="logits"))
            callbacks.append(
                MetricAggregationCallback(
                    prefix="loss", metrics={"loss": 0.1, "kl_div_loss": 0.9}, mode="weighted_sum"
                )
            )

        runner.train(
            model=net,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            callbacks=callbacks,
            logdir=args.logdir,
            num_epochs=args.num_epochs,
            load_best_on_end=True,
            valid_metric="accuracy01",
            minimize_valid_metric=False,
            valid_loader="valid",
        )
# sample data
num_samples, num_features, num_classes = int(1e4), int(1e1), 4
X = torch.rand(num_samples, num_features)
y = (torch.rand(num_samples, ) * num_classes).to(torch.int64)

# pytorch loaders
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}

# model, criterion, optimizer, scheduler
model = torch.nn.Linear(num_features, num_classes)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

# model training
runner = dl.SupervisedRunner()
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    logdir="./logdir",
    num_epochs=3,
    check=True,
    callbacks=[dl.AccuracyCallback(num_classes=num_classes)],
)
Beispiel #22
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.02)

        loaders = {
            "train":
            DataLoader(MNIST(os.getcwd(),
                             train=False,
                             download=True,
                             transform=ToTensor()),
                       batch_size=32),
            "valid":
            DataLoader(MNIST(os.getcwd(),
                             train=False,
                             download=True,
                             transform=ToTensor()),
                       batch_size=32),
        }

        runner = dl.SupervisedRunner(input_key="features",
                                     output_key="logits",
                                     target_key="targets",
                                     loss_key="loss")
        callbacks = [
            dl.AccuracyCallback(input_key="logits",
                                target_key="targets",
                                topk_args=(1, 3, 5)),
            dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                                target_key="targets",
                                                num_classes=10),
        ]
        if SETTINGS.ml_required:
            callbacks.append(
                dl.ConfusionMatrixCallback(input_key="logits",
                                           target_key="targets",
                                           num_classes=10))
        if SETTINGS.amp_required and (engine is None or not isinstance(
                engine,
            (dl.AMPEngine, dl.DataParallelAMPEngine,
             dl.DistributedDataParallelAMPEngine),
        )):
            callbacks.append(
                dl.AUCCallback(input_key="logits", target_key="targets"))
        if SETTINGS.onnx_required:
            callbacks.append(
                dl.OnnxCallback(logdir=logdir, input_key="features"))
        if SETTINGS.pruning_required:
            callbacks.append(
                dl.PruningCallback(pruning_fn="l1_unstructured", amount=0.5))
        if SETTINGS.quantization_required:
            callbacks.append(dl.QuantizationCallback(logdir=logdir))
        if engine is None or not isinstance(engine,
                                            dl.DistributedDataParallelEngine):
            callbacks.append(
                dl.TracingCallback(logdir=logdir, input_key="features"))
        # model training
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            num_epochs=1,
            callbacks=callbacks,
            logdir=logdir,
            valid_loader="valid",
            valid_metric="loss",
            minimize_valid_metric=True,
            verbose=False,
            load_best_on_end=True,
            timeit=False,
            check=False,
            overfit=False,
            fp16=False,
            ddp=False,
        )
        # model inference
        for prediction in runner.predict_loader(loader=loaders["valid"]):
            assert prediction["logits"].detach().cpu().numpy().shape[-1] == 10
        # model post-processing
        features_batch = next(iter(loaders["valid"]))[0]
        # model stochastic weight averaging
        model.load_state_dict(
            utils.get_averaged_weights_by_path_mask(logdir=logdir,
                                                    path_mask="*.pth"))
        # model onnx export
        if SETTINGS.onnx_required:
            utils.onnx_export(
                model=runner.model,
                batch=runner.engine.sync_device(features_batch),
                file="./mnist.onnx",
                verbose=False,
            )
        # model quantization
        if SETTINGS.quantization_required:
            utils.quantize_model(model=runner.model)
        # model pruning
        if SETTINGS.pruning_required:
            utils.prune_model(model=runner.model,
                              pruning_fn="l1_unstructured",
                              amount=0.8)
        # model tracing
        utils.trace_model(model=runner.model, batch=features_batch)
Beispiel #23
0
def train_experiment(engine=None):
    with TemporaryDirectory() as logdir:
        utils.set_global_seed(RANDOM_STATE)
        # 1. generate data
        num_samples, num_features, num_classes = int(1e4), int(30), 3
        X, y = make_classification(
            n_samples=num_samples,
            n_features=num_features,
            n_informative=num_features,
            n_repeated=0,
            n_redundant=0,
            n_classes=num_classes,
            n_clusters_per_class=1,
        )
        X, y = torch.tensor(X), torch.tensor(y)
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset,
                            batch_size=64,
                            num_workers=1,
                            shuffle=True)

        # 2. model, optimizer and scheduler
        hidden_size, out_features = 20, 16
        model = nn.Sequential(
            nn.Linear(num_features, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, out_features),
        )
        optimizer = Adam(model.parameters(), lr=LR)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        # 3. criterion with triplets sampling
        sampler_inbatch = HardTripletsSampler(norm_required=False)
        criterion = TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        class CustomRunner(dl.SupervisedRunner):
            def handle_batch(self, batch) -> None:
                features, targets = batch["features"].float(
                ), batch["targets"].long()
                embeddings = self.model(features)
                self.batch = {
                    "embeddings": embeddings,
                    "targets": targets,
                }

        callbacks = [
            dl.SklearnModelCallback(
                feature_key="embeddings",
                target_key="targets",
                train_loader="train",
                valid_loaders="valid",
                model_fn=RandomForestClassifier,
                predict_method="predict_proba",
                predict_key="sklearn_predict",
                random_state=RANDOM_STATE,
                n_estimators=100,
            ),
            dl.ControlFlowCallbackWrapper(
                dl.AccuracyCallback(target_key="targets",
                                    input_key="sklearn_predict",
                                    topk=(1, 3)),
                loaders="valid",
            ),
        ]

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            engine=engine,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            scheduler=scheduler,
            loaders={
                "train": loader,
                "valid": loader
            },
            verbose=False,
            valid_loader="valid",
            valid_metric="accuracy01",
            minimize_valid_metric=False,
            num_epochs=TRAIN_EPOCH,
            logdir=logdir,
        )

        best_accuracy = max(
            epoch_metrics["valid"]["accuracy01"]
            for epoch_metrics in runner.experiment_metrics.values())

        assert best_accuracy > 0.9
runner.train(
    loaders=dataloaders,
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    logdir="./catalyst_logs",
    num_epochs=EPOCHS,
    #    valid_loader="valid",
    #    valid_metric="accuracy03",
    #    minimize_valid_metric=False,
    verbose=True,
    # uncomment for extra metrics:
    callbacks=[
        dl.AccuracyCallback(input_key="logits",
                            target_key="mask_class",
                            num_classes=NUM_CLASES),
        #         dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="mask_class", num_classes=NUM_CLASES),
        #         dl.AUCCallback(input_key="logits", target_key="mask_class"),
        dl.ConfusionMatrixCallback(input_key="logits",
                                   target_key="mask_class",
                                   num_classes=NUM_CLASES),
    ],
)
"""
torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1, verbose=False)
torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda, last_epoch=-1, verbose=False)
torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma=0.1, last_epoch=-1, verbose=False)
torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1, last_epoch=-1, verbose=False)
torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma, last_epoch=-1, verbose=False)
torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max, eta_min=0, last_epoch=-1, verbose=False)
Beispiel #25
0
        dl.OptimizerCallback(metric_key="loss"),
        dl.SklearnModelCallback(
            feature_key="embedding_origin",
            target_key="target",
            train_loader="train",
            valid_loaders="valid",
            model_fn=LogisticRegression,
            predict_key="sklearn_predict",
            predict_method="predict_proba",
            C=0.1,
            solver="saga",
            max_iter=200,
        ),
        dl.ControlFlowCallbackWrapper(
            dl.AccuracyCallback(
                target_key="target", input_key="sklearn_predict", topk=(1, 3)
            ),
            loaders="valid",
        ),
    ]

    # train model
    runner = SelfSupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        callbacks=callbacks,
        loaders=get_loaders(args.dataset, args.batch_size, args.num_workers),
        num_epochs=args.epochs,
        logdir=args.logdir,
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        from catalyst import utils

        utils.set_global_seed(RANDOM_STATE)
        # 1. train, valid and test loaders
        transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))])

        train_data = MNIST(os.getcwd(),
                           train=True,
                           download=True,
                           transform=transforms)
        train_labels = train_data.targets.cpu().numpy().tolist()
        train_sampler = data.BatchBalanceClassSampler(train_labels,
                                                      num_classes=10,
                                                      num_samples=4)
        train_loader = DataLoader(train_data, batch_sampler=train_sampler)

        valid_dataset = MNIST(root=os.getcwd(),
                              transform=transforms,
                              train=False,
                              download=True)
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=32)

        test_dataset = MNIST(root=os.getcwd(),
                             transform=transforms,
                             train=False,
                             download=True)
        test_loader = DataLoader(dataset=test_dataset, batch_size=32)

        # 2. model and optimizer
        model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16),
                              nn.LeakyReLU(inplace=True))
        optimizer = Adam(model.parameters(), lr=LR)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        # 3. criterion with triplets sampling
        sampler_inbatch = data.HardTripletsSampler(norm_required=False)
        criterion = nn.TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        class CustomRunner(dl.SupervisedRunner):
            def handle_batch(self, batch) -> None:
                images, targets = batch["features"].float(
                ), batch["targets"].long()
                features = self.model(images)
                self.batch = {
                    "embeddings": features,
                    "targets": targets,
                }

        callbacks = [
            dl.ControlFlowCallback(
                dl.CriterionCallback(input_key="embeddings",
                                     target_key="targets",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.SklearnModelCallback(
                feature_key="embeddings",
                target_key="targets",
                train_loader="train",
                valid_loaders=["valid", "infer"],
                model_fn=RandomForestClassifier,
                predict_method="predict_proba",
                predict_key="sklearn_predict",
                random_state=RANDOM_STATE,
                n_estimators=50,
            ),
            dl.ControlFlowCallback(
                dl.AccuracyCallback(target_key="targets",
                                    input_key="sklearn_predict",
                                    topk_args=(1, 3)),
                loaders=["valid", "infer"],
            ),
        ]

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            callbacks=callbacks,
            loaders={
                "train": train_loader,
                "valid": valid_loader,
                "infer": test_loader
            },
            verbose=False,
            valid_loader="valid",
            valid_metric="accuracy",
            minimize_valid_metric=False,
            num_epochs=TRAIN_EPOCH,
            logdir=logdir,
        )

        valid_path = Path(logdir) / "logs/infer.csv"
        best_accuracy = max(
            float(row["accuracy"]) for row in read_csv(valid_path))

        assert best_accuracy > 0.8
Beispiel #27
0
def main(args):
    wandb.init(project="teacher-pruning", config=vars(args))
    set_global_seed(42)
    # dataloader initialization
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    train_dataset = Wrp(
        datasets.CIFAR10(root=os.getcwd(),
                         train=True,
                         transform=transform_train,
                         download=True))
    valid_dataset = Wrp(
        datasets.CIFAR10(root=os.getcwd(),
                         train=False,
                         transform=transform_test))
    train_dataloader = DataLoader(dataset=train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=2)
    valid_dataloader = DataLoader(dataset=valid_dataset,
                                  batch_size=128,
                                  num_workers=2)
    loaders = {
        "train": train_dataloader,
        "valid": valid_dataloader,
    }
    # model initialization
    model = PreActResNet18()
    model.fc = nn.Linear(512, 10)
    if args.teacher_model is not None:
        is_kd = True
        teacher_model = NAME2MODEL[args.teacher_model]()
        load_model_from_path(model=teacher_model, path=args.teacher_path)
        model = {
            "student": model,
            "teacher": teacher_model,
        }
        output_hiddens = args.beta is None
        is_kd_on_hiddens = output_hiddens
        runner = KDRunner(device=args.device, output_hiddens=output_hiddens)
        parameters = model["student"].parameters()
    else:
        is_kd = False
        runner = dl.SupervisedRunner(device=args.device)
        parameters = model.parameters()
    # optimizer
    optimizer_cls = NAME2OPTIM[args.optimizer]
    optimizer_kwargs = {"params": parameters, "lr": args.lr}
    if args.optimizer == "sgd":
        optimizer_kwargs["momentum"] = args.momentum
    else:
        optimizer_kwargs["betas"] = (args.beta1, args.beta2)
    optimizer = optimizer_cls(**optimizer_kwargs)
    scheduler = MultiStepLR(optimizer, milestones=[80, 120], gamma=args.gamma)
    logdir = f"logs/{wandb.run.name}"
    # callbacks
    callbacks = [dl.AccuracyCallback(num_classes=10), WandbCallback()]
    if is_kd:
        metrics = {}
        callbacks.append(dl.CriterionCallback(output_key="cls_loss"))
        callbacks.append(DiffOutputCallback())
        coefs = get_loss_coefs(args.alpha, args.beta)
        metrics["cls_loss"] = coefs[0]
        metrics["diff_output_loss"] = coefs[1]
        if is_kd_on_hiddens:
            callbacks.append(DiffHiddenCallback())
            metrics["diff_hidden_loss"] = coefs[2]

        aggregator_callback = dl.MetricAggregationCallback(prefix="loss",
                                                           metrics=metrics,
                                                           mode="weighted_sum")
        wrapped_agg_callback = dl.ControlFlowCallback(aggregator_callback,
                                                      loaders=["train"])
        callbacks.append(wrapped_agg_callback)

    runner.train(
        model=model,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=nn.CrossEntropyLoss(),
        loaders=loaders,
        callbacks=callbacks,
        num_epochs=args.epoch,
        logdir=logdir,
        verbose=True,
    )
Beispiel #28
0
def train_experiment(device):
    with TemporaryDirectory() as logdir:
        teacher = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        student = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        criterion = {
            "cls": nn.CrossEntropyLoss(),
            "kl": nn.KLDivLoss(reduction="batchmean")
        }
        optimizer = optim.Adam(student.parameters(), lr=0.02)

        loaders = {
            "train":
            DataLoader(MNIST(os.getcwd(),
                             train=True,
                             download=True,
                             transform=ToTensor()),
                       batch_size=32),
            "valid":
            DataLoader(MNIST(os.getcwd(),
                             train=False,
                             download=True,
                             transform=ToTensor()),
                       batch_size=32),
        }

        class DistilRunner(dl.Runner):
            def handle_batch(self, batch):
                x, y = batch

                teacher.eval()  # let's manually set teacher model to eval mode
                with torch.no_grad():
                    t_logits = self.model["teacher"](x)

                s_logits = self.model["student"](x)
                self.batch = {
                    "t_logits": t_logits,
                    "s_logits": s_logits,
                    "targets": y,
                    "s_logprobs": F.log_softmax(s_logits, dim=-1),
                    "t_probs": F.softmax(t_logits, dim=-1),
                }

        runner = DistilRunner()
        # model training
        runner.train(
            engine=dl.DeviceEngine(device),
            model={
                "teacher": teacher,
                "student": student
            },
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            num_epochs=1,
            logdir=logdir,
            verbose=True,
            callbacks=[
                dl.AccuracyCallback(input_key="t_logits",
                                    target_key="targets",
                                    num_classes=2,
                                    prefix="teacher_"),
                dl.AccuracyCallback(input_key="s_logits",
                                    target_key="targets",
                                    num_classes=2,
                                    prefix="student_"),
                dl.CriterionCallback(
                    input_key="s_logits",
                    target_key="targets",
                    metric_key="cls_loss",
                    criterion_key="cls",
                ),
                dl.CriterionCallback(
                    input_key="s_logprobs",
                    target_key="t_probs",
                    metric_key="kl_div_loss",
                    criterion_key="kl",
                ),
                dl.MetricAggregationCallback(
                    prefix="loss",
                    metrics=["kl_div_loss", "cls_loss"],
                    mode="mean"),
                dl.OptimizerCallback(metric_key="loss", model_key="student"),
                dl.CheckpointCallback(
                    logdir=logdir,
                    loader_key="valid",
                    metric_key="loss",
                    minimize=True,
                    save_n_best=3,
                ),
            ],
        )
Beispiel #29
0
def train(dev_dir, logdir, device):
    train = pd.read_csv(f'{dev_dir}/train.csv', index_col=0)
    train['all_utils'] = train['cmd_cleaned'].apply(select_utils)
    train = train.loc[train.all_utils.apply(str.strip).apply(len) > 0]
    train['util'] = train['all_utils'].apply(lambda x: x.split()[0])
    train = train.dropna().reset_index(drop=True)

    spm.SentencePieceTrainer.train(input=f'{dev_dir}/text',
                                   model_prefix=f'{dev_dir}/txt_bpe_clf',
                                   model_type='bpe',
                                   vocab_size=config.src_vocab_size)
    text_tokenizer = spm.SentencePieceProcessor(f'{dev_dir}/txt_bpe_clf.model')

    cmd_le = LabelEncoder()

    train['text_enc'] = train.text_cleaned.progress_apply(
        text_tokenizer.encode)
    train['y'] = cmd_le.fit_transform(train['util'].values)

    tdf = train[train.origin == 'original']
    tdf2 = train[train.origin != 'original']
    train, valid = train_test_split(tdf, test_size=500, random_state=SEED)
    train = pd.concat([train, tdf2]).reset_index(drop=True)

    train_ds = UtilDataset(train.text_enc, train.y, config, bos_id, eos_id,
                           pad_id)
    valid_ds = UtilDataset(valid.text_enc, valid.y, config, bos_id, eos_id,
                           pad_id)

    model = BertClassifier(config, pad_id, len(cmd_le.classes_))
    print('# params',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    loaders = {
        'train':
        data.DataLoader(train_ds, batch_size=config.batch_size, shuffle=True),
        'valid':
        data.DataLoader(valid_ds, batch_size=config.batch_size),
    }

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.optimizer_lr,
                                 weight_decay=config.weight_decay,
                                 amsgrad=True)
    callbacks = [
        dl.CheckpointCallback(config.num_epochs),
        dl.AccuracyCallback(num_classes=len(cmd_le.classes_), topk_args=[1, 5])
    ]

    if config.schedule == 'OneCycleLR':
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=config.optimizer_lr,
            epochs=config.num_epochs,
            steps_per_epoch=len(loaders['train']))
        callbacks.append(dl.SchedulerCallback(mode="batch"))

    elif config.schedule == 'ReduceLROnPlateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            factor=config.plateau_factor,
            patience=5,
            cooldown=3,
            threshold=1e-3,
            min_lr=1e-6)
        callbacks.append(dl.SchedulerCallback(mode="epoch"))

    shutil.rmtree(logdir, ignore_errors=True)
    os.makedirs(logdir, exist_ok=True)

    runner = dl.SupervisedRunner(device=device)
    runner.train(
        model=model,
        loaders=loaders,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler if config.schedule else None,
        num_epochs=config.num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=callbacks,
    )
    joblib.dump(cmd_le, f'{dev_dir}/cmd_le')
def train_experiment(engine=None):
    with TemporaryDirectory() as logdir:
        # sample data
        num_samples, num_features, num_classes1, num_classes2 = int(1e4), int(
            1e1), 4, 10
        X = torch.rand(num_samples, num_features)
        y1 = (torch.rand(num_samples) * num_classes1).to(torch.int64)
        y2 = (torch.rand(num_samples) * num_classes2).to(torch.int64)

        # pytorch loaders
        dataset = TensorDataset(X, y1, y2)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = CustomModule(num_features, num_classes1, num_classes2)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters())
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2])

        callbacks = [
            dl.CriterionCallback(metric_key="loss1",
                                 input_key="logits1",
                                 target_key="targets1"),
            dl.CriterionCallback(metric_key="loss2",
                                 input_key="logits2",
                                 target_key="targets2"),
            dl.MetricAggregationCallback(metric_key="loss",
                                         metrics=["loss1", "loss2"],
                                         mode="mean"),
            dl.BackwardCallback(metric_key="loss"),
            dl.OptimizerCallback(metric_key="loss"),
            dl.SchedulerCallback(),
            dl.AccuracyCallback(
                input_key="logits1",
                target_key="targets1",
                num_classes=num_classes1,
                prefix="one_",
            ),
            dl.AccuracyCallback(
                input_key="logits2",
                target_key="targets2",
                num_classes=num_classes2,
                prefix="two_",
            ),
            dl.CheckpointCallback(
                "./logs/one",
                loader_key="valid",
                metric_key="one_accuracy01",
                minimize=False,
                topk=1,
            ),
            dl.CheckpointCallback(
                "./logs/two",
                loader_key="valid",
                metric_key="two_accuracy03",
                minimize=False,
                topk=3,
            ),
        ]
        if SETTINGS.ml_required:
            # catalyst[ml] required
            callbacks.append(
                dl.ConfusionMatrixCallback(
                    input_key="logits1",
                    target_key="targets1",
                    num_classes=num_classes1,
                    prefix="one_cm",
                ))
            # catalyst[ml] required
            callbacks.append(
                dl.ConfusionMatrixCallback(
                    input_key="logits2",
                    target_key="targets2",
                    num_classes=num_classes2,
                    prefix="two_cm",
                ))

        # model training
        runner = CustomRunner()
        runner.train(
            engine=engine,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            num_epochs=1,
            verbose=False,
            callbacks=callbacks,
            loggers={
                "console": dl.ConsoleLogger(),
                "tb": dl.TensorboardLogger("./logs/tb"),
            },
        )