def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        # sample data
        num_samples, num_features, num_classes = int(1e4), int(1e1), 4
        X = torch.rand(num_samples, num_features)
        y = (torch.rand(num_samples, ) * num_classes).to(torch.int64)

        # pytorch loaders
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = torch.nn.Linear(num_features, num_classes)
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        # model training
        runner = dl.SupervisedRunner(input_key="features",
                                     output_key="logits",
                                     target_key="targets",
                                     loss_key="loss")
        callbacks = [
            dl.AccuracyCallback(input_key="logits",
                                target_key="targets",
                                num_classes=num_classes),
            dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                                target_key="targets",
                                                num_classes=4),
            dl.ConfusionMatrixCallback(input_key="logits",
                                       target_key="targets",
                                       num_classes=4),
        ]
        if engine is None or not isinstance(
                engine, (dl.AMPEngine, dl.DataParallelAMPEngine,
                         dl.DistributedDataParallelAMPEngine)):
            callbacks.append(
                dl.AUCCallback(input_key="logits", target_key="targets"))

        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            logdir=logdir,
            num_epochs=1,
            valid_loader="valid",
            valid_metric="accuracy03",
            minimize_valid_metric=False,
            verbose=False,
            callbacks=callbacks,
        )
Beispiel #2
0
def test_resume_with_missing_file():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/checkpoint_callback"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"
    num_epochs = 5

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    with pytest.raises(FileNotFoundError):
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            logdir=logdir,
            num_epochs=num_epochs,
            verbose=False,
            valid_loader="valid",
            valid_metric="loss",
            minimize_valid_metric=True,
            callbacks=[
                dl.CheckpointCallback(
                    logdir=logdir,
                    loader_key="valid",
                    metric_key="loss",
                    minimize=True,
                    save_n_best=2,
                    load_on_stage_end={"model": "best", "criterion": "best", "optimizer": "last"},
                    resume="not_existing_file.pth",
                ),
                dl.CheckRunCallback(num_epoch_steps=num_epochs),
            ],
        )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    shutil.rmtree(logdir, ignore_errors=True)
Beispiel #3
0
        def objective(trial):
            lr = trial.suggest_loguniform("lr", 1e-3, 1e-1)
            num_hidden = int(trial.suggest_loguniform("num_hidden", 32, 128))

            loaders = {
                "train":
                DataLoader(
                    MNIST(os.getcwd(),
                          train=False,
                          download=True,
                          transform=ToTensor()),
                    batch_size=32,
                ),
                "valid":
                DataLoader(
                    MNIST(os.getcwd(),
                          train=False,
                          download=True,
                          transform=ToTensor()),
                    batch_size=32,
                ),
            }
            model = nn.Sequential(nn.Flatten(), nn.Linear(784, num_hidden),
                                  nn.ReLU(), nn.Linear(num_hidden, 10))
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            criterion = nn.CrossEntropyLoss()

            runner = dl.SupervisedRunner(input_key="features",
                                         output_key="logits",
                                         target_key="targets")
            runner.train(
                engine=engine or dl.DeviceEngine(device),
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                loaders=loaders,
                callbacks={
                    "optuna":
                    dl.OptunaPruningCallback(loader_key="valid",
                                             metric_key="accuracy01",
                                             minimize=False,
                                             trial=trial),
                    "accuracy":
                    dl.AccuracyCallback(input_key="logits",
                                        target_key="targets",
                                        num_classes=10),
                },
                num_epochs=2,
            )
            score = runner.callbacks["optuna"].best_score
            return score
Beispiel #4
0
def test_pruning_str_structured():
    dataloader = prepare_experiment()
    model = nn.Linear(100, 10, bias=False)
    runner = dl.SupervisedRunner()
    criterion = nn.CrossEntropyLoss()
    runner.train(
        model=model,
        optimizer=torch.optim.Adam(model.parameters()),
        criterion=criterion,
        loaders={"train": dataloader},
        callbacks=[PruningCallback("ln_structured", dim=1, l_norm=2)],
        num_epochs=1,
    )
    assert np.isclose(pruning_factor(model), 0.5)
Beispiel #5
0
def test_batch_overfit():
    loaders, model, criterion, optimizer, scheduler = _prepare_experiment()
    runner = dl.SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        logdir="./logs/batch_overfit",
        num_epochs=1,
        verbose=False,
        callbacks=[dl.BatchOverfitCallback(train=1, valid=0.1)],
    )
    assert runner.epoch_metrics["train"]["loss"] < 1.4
    assert runner.epoch_metrics["valid"]["loss"] < 1.3
Beispiel #6
0
    def train_model(self, config, trainset, sampler, cut_layer=None):  # pylint: disable=unused-argument
        """A custom training loop. """
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=0.02)
        train_loader = DataLoader(dataset=trainset,
                                  batch_size=config['batch_size'],
                                  sampler=sampler)

        # Training the model using Catalyst's SupervisedRunner
        runner = dl.SupervisedRunner()

        runner.train(model=self.model,
                     criterion=criterion,
                     optimizer=optimizer,
                     loaders={"train": train_loader},
                     num_epochs=1,
                     logdir="./logs",
                     verbose=True)
Beispiel #7
0
def test_classification_pipeline():
    """
    Test if classification pipeline can run and compute metrics.
    In this test we check that BatchMetricCallback works with
    AccuracyMetric (ICallbackBatchMetric).
    """
    x = torch.rand(NUM_SAMPLES, NUM_FEATURES)
    y = (torch.rand(NUM_SAMPLES) * NUM_CLASSES).long()
    dataset = TensorDataset(x, y)
    loader = DataLoader(dataset, batch_size=64, num_workers=1)

    model = DummyModel(num_features=NUM_FEATURES, num_classes=NUM_CLASSES)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())

    runner = dl.SupervisedRunner(input_key="features",
                                 output_key="logits",
                                 target_key="targets")
    with TemporaryDirectory() as logdir:
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=OrderedDict({
                "train": loader,
                "valid": loader
            }),
            logdir=logdir,
            num_epochs=3,
            verbose=False,
            valid_loader="valid",
            valid_metric="loss",
            minimize_valid_metric=True,
            callbacks=OrderedDict({
                "classification":
                dl.BatchMetricCallback(
                    metric=AccuracyMetric(num_classes=NUM_CLASSES),
                    input_key="logits",
                    target_key="targets",
                ),
            }),
        )
        assert "accuracy01" in runner.batch_metrics
        assert "accuracy01" in runner.loader_metrics
def train_experiment(device):
    with TemporaryDirectory() as logdir:
        # sample data
        num_samples, num_features, num_classes = int(1e4), int(1e1), 4
        X = torch.rand(num_samples, num_features)
        y = (torch.rand(num_samples, num_classes) > 0.5).to(torch.float32)

        # pytorch loaders
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = torch.nn.Linear(num_features, num_classes)
        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        # model training
        runner = dl.SupervisedRunner(input_key="features",
                                     output_key="logits",
                                     target_key="targets",
                                     loss_key="loss")
        runner.train(
            engine=dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            logdir=logdir,
            num_epochs=1,
            valid_loader="valid",
            valid_metric="accuracy",
            minimize_valid_metric=False,
            verbose=False,
            callbacks=[
                dl.AUCCallback(input_key="logits", target_key="targets"),
                dl.MultilabelAccuracyCallback(input_key="logits",
                                              target_key="targets",
                                              threshold=0.5),
            ],
        )
 def objective(trial):
     lr = trial.suggest_loguniform("lr", 1e-3, 1e-1)
     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
     criterion = nn.CrossEntropyLoss()
     runner = dl.SupervisedRunner()
     runner.train(
         model=model,
         loaders=loaders,
         criterion=criterion,
         optimizer=optimizer,
         callbacks=[
             OptunaCallback(trial),
             AccuracyCallback(num_classes=10),
         ],
         num_epochs=10,
         main_metric="accuracy01",
         minimize_metric=False,
     )
     return runner.best_valid_metrics[runner.main_metric]
Beispiel #10
0
def test_aggregation_2():
    """
    Aggregation with custom function
    """
    loaders, model, criterion, optimizer = prepare_experiment()
    runner = dl.SupervisedRunner()

    def aggregation_function(metrics, runner):
        epoch = runner.stage_epoch_step
        loss = (3 / 2 - epoch / 2) * metrics["loss_focal"] + (1 / 2 * epoch - 1 / 2) * metrics[
            "loss_bce"
        ]
        return loss

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir="./logs/aggregation_2/",
        num_epochs=3,
        callbacks=[
            dl.CriterionCallback(
                input_key="logits",
                target_key="targets",
                metric_key="loss_bce",
                criterion_key="bce",
            ),
            dl.CriterionCallback(
                input_key="logits",
                target_key="targets",
                metric_key="loss_focal",
                criterion_key="focal",
            ),
            # loss aggregation
            dl.MetricAggregationCallback(metric_key="loss", mode=aggregation_function),
        ],
    )
    for loader in ["train", "valid"]:
        metrics = runner.epoch_metrics[loader]
        loss_1 = metrics["loss_bce"]
        loss_2 = metrics["loss"]
        assert np.abs(loss_1 - loss_2) < 1e-5
Beispiel #11
0
def test_aggregation_1():
    """
    Aggregation as weighted_sum
    """
    loaders, model, criterion, optimizer = prepare_experiment()
    runner = dl.SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir="./logs/aggregation_1/",
        num_epochs=3,
        callbacks=[
            dl.CriterionCallback(
                input_key="logits",
                target_key="targets",
                metric_key="loss_bce",
                criterion_key="bce",
            ),
            dl.CriterionCallback(
                input_key="logits",
                target_key="targets",
                metric_key="loss_focal",
                criterion_key="focal",
            ),
            # loss aggregation
            dl.MetricAggregationCallback(
                metric_key="loss",
                metrics={
                    "loss_focal": 0.6,
                    "loss_bce": 0.4
                },
                mode="weighted_sum",
            ),
        ],
    )
    for loader in ["train", "valid"]:
        metrics = runner.epoch_metrics[loader]
        loss_1 = metrics["loss_bce"] * 0.4 + metrics["loss_focal"] * 0.6
        loss_2 = metrics["loss"]
        assert np.abs(loss_1 - loss_2) < 1e-5
Beispiel #12
0
    def test_model(self, config, testset):  # pylint: disable=unused-argument
        """A custom testing loop. """
        test_loader = torch.utils.data.DataLoader(
            testset, batch_size=config['batch_size'], shuffle=False)

        # Using Catalyst's SupervisedRunner and AccuracyCallback to compute accuracies
        runner = dl.SupervisedRunner()
        runner.train(model=self.model,
                     num_epochs=1,
                     loaders={"valid": test_loader},
                     logdir="./logs",
                     verbose=True,
                     callbacks=[
                         dl.AccuracyCallback(input_key="logits",
                                             target_key="targets",
                                             num_classes=10)
                     ])

        # Retrieving the top-1 accuracy from SupervisedRunner
        accuracy = runner.epoch_metrics["valid"]["accuracy"]
        return accuracy
Beispiel #13
0
def test_parametrization():
    dataloader = prepare_experiment()
    model = nn.Linear(100, 10, bias=False)
    runner = dl.SupervisedRunner()
    criterion = nn.CrossEntropyLoss()
    runner.train(
        model=model,
        optimizer=torch.optim.Adam(model.parameters()),
        criterion=criterion,
        loaders={"train": dataloader},
        callbacks=[
            PruningCallback(l1_unstructured,
                            remove_reparametrization_on_stage_end=False)
        ],
        num_epochs=1,
    )
    assert np.isclose(pruning_factor(model), 0.5)
    try:
        _mask = model.weight_mask
        mask_applied = True
    except AttributeError:
        mask_applied = False
    assert mask_applied
Beispiel #14
0
 def objective(trial):
     lr = trial.suggest_loguniform("lr", 1e-3, 1e-1)
     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
     criterion = nn.CrossEntropyLoss()
     runner = dl.SupervisedRunner()
     runner.train(
         model=model,
         loaders=loaders,
         criterion=criterion,
         optimizer=optimizer,
         callbacks={
             "optuna": OptunaPruningCallback(
                 loader_key="valid", metric_key="loss", minimize=True, trial=trial
             ),
             "accuracy": AccuracyCallback(
                 num_classes=10, input_key="logits", target_key="targets"
             ),
         },
         num_epochs=2,
         valid_metric="accuracy01",
         minimize_valid_metric=False,
     )
     return trial.best_score
def train():
    num_features = 10
    model = Projector(num_features)

    runner = dl.SupervisedRunner()
    runner.train(
        model=model,
        # loaders={"train": loader, "valid": loader},
        datasets={
            "batch_size": 32,
            "num_workers": 1,
            "get_datasets_fn": datasets_fn,
            "num_features": num_features,
        },
        criterion=nn.MSELoss(),
        optimizer=optim.Adam(model.parameters()),
        logdir="logs/log_example_14",
        num_epochs=10,
        verbose=True,
        check=True,
        fp16=False,
        distributed=False,
    )
Beispiel #16
0
def test_batch_balance_class_sampler_with_prefetch():
    train_data = MNIST(os.getcwd(),
                       train=True,
                       download=True,
                       transform=ToTensor())
    train_labels = train_data.targets.cpu().numpy().tolist()
    train_sampler = BatchBalanceClassSampler(train_labels,
                                             num_classes=10,
                                             num_samples=4)
    valid_data = MNIST(os.getcwd(),
                       train=False,
                       download=True,
                       transform=ToTensor())

    loaders = {
        "train": DataLoader(train_data, batch_sampler=train_sampler),
        "valid": DataLoader(valid_data, batch_size=32),
    }
    loaders = {k: BatchPrefetchLoaderWrapper(v) for k, v in loaders.items()}

    model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.02)

    runner = dl.SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=1,
        logdir="./logs",
        valid_loader="valid",
        valid_metric="loss",
        minimize_valid_metric=True,
        verbose=True,
    )
Beispiel #17
0
def test_pruning_callback() -> None:
    """Quantize model"""
    loaders = {
        "train":
        DataLoader(
            MNIST(os.getcwd(),
                  train=False,
                  download=True,
                  transform=ToTensor()),
            batch_size=32,
        ),
        "valid":
        DataLoader(
            MNIST(os.getcwd(),
                  train=False,
                  download=True,
                  transform=ToTensor()),
            batch_size=32,
        ),
    }
    model = nn.Sequential(Flatten(), nn.Linear(784, 512), nn.ReLU(),
                          nn.Linear(512, 10))
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    runner = dl.SupervisedRunner()
    runner.train(
        model=model,
        callbacks=[dl.QuantizationCallback(logdir="./logs")],
        loaders=loaders,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=1,
        logdir="./logs",
        check=True,
    )
    assert os.path.isfile("./logs/quantized.pth")
Beispiel #18
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.02)

        loaders = {
            "train":
            DataLoader(MNIST(os.getcwd(),
                             train=False,
                             download=True,
                             transform=ToTensor()),
                       batch_size=32),
            "valid":
            DataLoader(MNIST(os.getcwd(),
                             train=False,
                             download=True,
                             transform=ToTensor()),
                       batch_size=32),
        }

        runner = dl.SupervisedRunner(input_key="features",
                                     output_key="logits",
                                     target_key="targets",
                                     loss_key="loss")
        callbacks = [
            dl.AccuracyCallback(input_key="logits",
                                target_key="targets",
                                topk_args=(1, 3, 5)),
            dl.PrecisionRecallF1SupportCallback(input_key="logits",
                                                target_key="targets",
                                                num_classes=10),
        ]
        if SETTINGS.ml_required:
            callbacks.append(
                dl.ConfusionMatrixCallback(input_key="logits",
                                           target_key="targets",
                                           num_classes=10))
        if SETTINGS.amp_required and (engine is None or not isinstance(
                engine,
            (dl.AMPEngine, dl.DataParallelAMPEngine,
             dl.DistributedDataParallelAMPEngine),
        )):
            callbacks.append(
                dl.AUCCallback(input_key="logits", target_key="targets"))
        if SETTINGS.onnx_required:
            callbacks.append(
                dl.OnnxCallback(logdir=logdir, input_key="features"))
        if SETTINGS.pruning_required:
            callbacks.append(
                dl.PruningCallback(pruning_fn="l1_unstructured", amount=0.5))
        if SETTINGS.quantization_required:
            callbacks.append(dl.QuantizationCallback(logdir=logdir))
        if engine is None or not isinstance(engine,
                                            dl.DistributedDataParallelEngine):
            callbacks.append(
                dl.TracingCallback(logdir=logdir, input_key="features"))
        # model training
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            num_epochs=1,
            callbacks=callbacks,
            logdir=logdir,
            valid_loader="valid",
            valid_metric="loss",
            minimize_valid_metric=True,
            verbose=False,
            load_best_on_end=True,
            timeit=False,
            check=False,
            overfit=False,
            fp16=False,
            ddp=False,
        )
        # model inference
        for prediction in runner.predict_loader(loader=loaders["valid"]):
            assert prediction["logits"].detach().cpu().numpy().shape[-1] == 10
        # model post-processing
        features_batch = next(iter(loaders["valid"]))[0]
        # model stochastic weight averaging
        model.load_state_dict(
            utils.get_averaged_weights_by_path_mask(logdir=logdir,
                                                    path_mask="*.pth"))
        # model onnx export
        if SETTINGS.onnx_required:
            utils.onnx_export(
                model=runner.model,
                batch=runner.engine.sync_device(features_batch),
                file="./mnist.onnx",
                verbose=False,
            )
        # model quantization
        if SETTINGS.quantization_required:
            utils.quantize_model(model=runner.model)
        # model pruning
        if SETTINGS.pruning_required:
            utils.prune_model(model=runner.model,
                              pruning_fn="l1_unstructured",
                              amount=0.8)
        # model tracing
        utils.trace_model(model=runner.model, batch=features_batch)
Beispiel #19
0
    train_loader = DataLoader(train_ds,
                              batch_size=BATCH_SIZE,
                              num_workers=6,
                              shuffle=True)
    val_loader = DataLoader(val_ds,
                            batch_size=BATCH_SIZE,
                            num_workers=6,
                            shuffle=False)

    loaders = OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = val_loader

    runner = dl.SupervisedRunner(device=tu.device,
                                 input_key="image",
                                 input_target_key="label",
                                 output_key="logits")

    callbacks = [
        CriterionCallback(input_key="label",
                          output_key="logits",
                          prefix="loss"),
        AccuracyCallback(input_key="label",
                         output_key="logits",
                         prefix="acc",
                         activation="Sigmoid"),
        OptimizerCallback(accumulation_steps=2),
        #MixupCallback(alpha=0.3, input_key="label", output_key="logits", fields=("image", ))
    ]
    if TRAINING:
        runner.train(model=model,
Beispiel #20
0
def main(args):
    wandb.init(project="teacher-pruning", config=vars(args))
    set_global_seed(42)
    # dataloader initialization
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    train_dataset = Wrp(
        datasets.CIFAR10(root=os.getcwd(),
                         train=True,
                         transform=transform_train,
                         download=True))
    valid_dataset = Wrp(
        datasets.CIFAR10(root=os.getcwd(),
                         train=False,
                         transform=transform_test))
    train_dataloader = DataLoader(dataset=train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=2)
    valid_dataloader = DataLoader(dataset=valid_dataset,
                                  batch_size=128,
                                  num_workers=2)
    loaders = {
        "train": train_dataloader,
        "valid": valid_dataloader,
    }
    # model initialization
    model = PreActResNet18()
    model.fc = nn.Linear(512, 10)
    if args.teacher_model is not None:
        is_kd = True
        teacher_model = NAME2MODEL[args.teacher_model]()
        load_model_from_path(model=teacher_model, path=args.teacher_path)
        model = {
            "student": model,
            "teacher": teacher_model,
        }
        output_hiddens = args.beta is None
        is_kd_on_hiddens = output_hiddens
        runner = KDRunner(device=args.device, output_hiddens=output_hiddens)
        parameters = model["student"].parameters()
    else:
        is_kd = False
        runner = dl.SupervisedRunner(device=args.device)
        parameters = model.parameters()
    # optimizer
    optimizer_cls = NAME2OPTIM[args.optimizer]
    optimizer_kwargs = {"params": parameters, "lr": args.lr}
    if args.optimizer == "sgd":
        optimizer_kwargs["momentum"] = args.momentum
    else:
        optimizer_kwargs["betas"] = (args.beta1, args.beta2)
    optimizer = optimizer_cls(**optimizer_kwargs)
    scheduler = MultiStepLR(optimizer, milestones=[80, 120], gamma=args.gamma)
    logdir = f"logs/{wandb.run.name}"
    # callbacks
    callbacks = [dl.AccuracyCallback(num_classes=10), WandbCallback()]
    if is_kd:
        metrics = {}
        callbacks.append(dl.CriterionCallback(output_key="cls_loss"))
        callbacks.append(DiffOutputCallback())
        coefs = get_loss_coefs(args.alpha, args.beta)
        metrics["cls_loss"] = coefs[0]
        metrics["diff_output_loss"] = coefs[1]
        if is_kd_on_hiddens:
            callbacks.append(DiffHiddenCallback())
            metrics["diff_hidden_loss"] = coefs[2]

        aggregator_callback = dl.MetricAggregationCallback(prefix="loss",
                                                           metrics=metrics,
                                                           mode="weighted_sum")
        wrapped_agg_callback = dl.ControlFlowCallback(aggregator_callback,
                                                      loaders=["train"])
        callbacks.append(wrapped_agg_callback)

    runner.train(
        model=model,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=nn.CrossEntropyLoss(),
        loaders=loaders,
        callbacks=callbacks,
        num_epochs=args.epoch,
        logdir=logdir,
        verbose=True,
    )
def run_ml_pipeline(sampler_inbatch: data.IInbatchTripletSampler) -> float:
    """
    Full metric learning pipeline, including train and val.

    This function is also used as minimal example in README.md, section name:
    'CV - MNIST with Metric Learning'.

    Args:
        sampler_inbatch: sampler to forming triplets

    Returns:
        best metric value
    """
    # 1. train and valid datasets
    dataset_root = "./data"
    transforms = t.Compose([t.ToTensor(), t.Normalize((0.1307, ), (0.3081, ))])

    dataset_train = datasets.MnistMLDataset(
        root=dataset_root,
        train=True,
        download=True,
        transform=transforms,
    )
    sampler = data.BalanceBatchSampler(labels=dataset_train.get_labels(),
                                       p=5,
                                       k=10)
    train_loader = DataLoader(dataset=dataset_train,
                              sampler=sampler,
                              batch_size=sampler.batch_size)

    dataset_val = datasets.MnistQGDataset(root=dataset_root,
                                          transform=transforms,
                                          gallery_fraq=0.2)
    val_loader = DataLoader(dataset=dataset_val, batch_size=1024)

    # 2. model and optimizer
    model = models.SimpleConv(features_dim=16)
    optimizer = Adam(model.parameters(), lr=0.0005)

    # 3. criterion with triplets sampling
    criterion = nn.TripletMarginLossWithSampler(
        margin=0.5, sampler_inbatch=sampler_inbatch)

    # 4. training with catalyst Runner
    callbacks = [
        dl.ControlFlowCallback(dl.CriterionCallback(), loaders="train"),
        dl.ControlFlowCallback(dl.CMCScoreCallback(topk_args=[1]),
                               loaders="valid"),
        dl.PeriodicLoaderCallback(valid=100),
    ]

    runner = dl.SupervisedRunner(device=utils.get_device())
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        callbacks=callbacks,
        loaders={
            "train": train_loader,
            "valid": val_loader
        },
        minimize_metric=False,
        verbose=True,
        valid_loader="valid",
        num_epochs=100,
        main_metric="cmc01",
    )
    return runner.best_valid_metrics["cmc01"]
Beispiel #22
0
    item_num = len(train_dataset[0])
    model = MultiDAE([200, 600, item_num], dropout=0.5)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    engine = dl.Engine()
    callbacks = [
        dl.NDCGCallback("logits", "targets", [20, 50, 100]),
        dl.MAPCallback("logits", "targets", [20, 50, 100]),
        dl.MRRCallback("logits", "targets", [20, 50, 100]),
        dl.HitrateCallback("logits", "targets", [20, 50, 100]),
        dl.BackwardCallback("loss"),
        dl.OptimizerCallback("loss", accumulation_steps=1),
    ]

    runner = dl.SupervisedRunner(input_key="inputs",
                                 output_key="logits",
                                 target_key="targets",
                                 loss_key="loss")
    runner.train(
        model=model,
        optimizer=optimizer,
        criterion=criterion,
        engine=engine,
        loaders=loaders,
        num_epochs=100,
        verbose=True,
        timeit=False,
        callbacks=callbacks,
        logdir="./logs_multidae",
    )
Beispiel #23
0
def test_load_best_on_stage_end():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/checkpoint_callback"
    checkpoint = logdir  # + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    n_epochs = 5
    # first stage
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=n_epochs,
        verbose=False,
        valid_loader="valid",
        valid_metric="loss",
        minimize_valid_metric=True,
        callbacks=[
            dl.CheckpointCallback(
                logdir=logdir,
                loader_key="valid",
                metric_key="loss",
                minimize=True,
                save_n_best=2,
                load_on_stage_end="best",
            ),
            dl.CheckRunCallback(num_epoch_steps=n_epochs),
        ],
    )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    assert len(re.findall(r"=> Loading", exp_output)) == 1
    assert len(re.findall(r"=> Loading .*best\.pth", exp_output)) == 1

    assert os.path.isfile(logfile)
    assert os.path.isfile(checkpoint + "/train.4.pth")
    assert os.path.isfile(checkpoint + "/train.4_full.pth")
    assert os.path.isfile(checkpoint + "/train.5.pth")
    assert os.path.isfile(checkpoint + "/train.5_full.pth")
    assert os.path.isfile(checkpoint + "/best.pth")
    assert os.path.isfile(checkpoint + "/best_full.pth")
    assert os.path.isfile(checkpoint + "/last.pth")
    assert os.path.isfile(checkpoint + "/last_full.pth")

    shutil.rmtree(logdir, ignore_errors=True)
Beispiel #24
0
callbacks = {
    "optimizer":
    dl.OptimizerCallback(metric_key="loss",
                         accumulation_steps=1,
                         grad_clip_params=None),
    "metric":
    dl.MetricCallback(scope='batch',
                      input_key='y',
                      output_key='preds',
                      prefix='F1_token',
                      metric_fn=ner_token_f1)
}

runner = dl.SupervisedRunner(
    input_key='features',
    output_key='preds',
    input_target_key="y",
)

runner.train(model=model,
             criterion=loss,
             optimizer=optimizer,
             scheduler=scheduler,
             loaders=dataloaders,
             callbacks=callbacks,
             logdir='./checkpoints',
             num_epochs=100,
             main_metric='F1_token',
             minimize_metric=False,
             verbose=True)
Beispiel #25
0
def main():
    cifar_train = CIFAR10('.',
                          train=True,
                          transform=transforms.Compose([
                              transforms.Resize((224, 224)),
                              transforms.ToTensor()
                          ]),
                          download=True)
    cifar_test = CIFAR10('.',
                         train=False,
                         transform=transforms.Compose([
                             transforms.Resize((224, 224)),
                             transforms.ToTensor()
                         ]),
                         download=True)

    dl_train = DataLoader(cifar_train, batch_size=16)
    dl_test = DataLoader(cifar_test, batch_size=16)

    logdir = "./logdir/Adam"
    num_epochs = 10

    loaders = {'train': dl_train, 'valid': dl_test}

    model = resnet34()
    for name, param in model.named_parameters():
        param.requires_grad = True

    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=[
            logger.TensorboardLogger(),
            AccuracyCallback(num_classes=10)
        ],
    )

    logdir = "./logdir/AdamW"

    model.apply(init_weights)
    optimizer = AdamW()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=[
            logger.TensorboardLogger(),
            AccuracyCallback(num_classes=10)
        ],
    )

    logdir = "./logdir/RAdam"

    model.apply(init_weights)
    optimizer = RAdam()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=[
            logger.TensorboardLogger(),
            AccuracyCallback(num_classes=10)
        ],
    )
Beispiel #26
0
def train(dev_dir, logdir, device):
    if not config.joined_vocab:
        spm.SentencePieceTrainer.train(input=f'{dev_dir}/text',
                                       model_prefix=f'{dev_dir}/txt_bpe_ctx',
                                       model_type='bpe',
                                       vocab_size=config.src_vocab_size)
        spm.SentencePieceTrainer.train(
            input=f'{dev_dir}/cmd',
            model_prefix=f'{dev_dir}/cmd_bpe_ctx',
            model_type='bpe',
            vocab_size=config.tgt_vocab_size,
        )
        text_tokenizer = spm.SentencePieceProcessor(
            f'{dev_dir}/txt_bpe_ctx.model')
        cmd_tokenizer = spm.SentencePieceProcessor(
            f'{dev_dir}/cmd_bpe_ctx.model')

    else:
        spm.SentencePieceTrainer.train(
            input=f'{dev_dir}/all',
            model_prefix=f'{dev_dir}/all_bpe_ctx',
            model_type='bpe',
            vocab_size=config.src_vocab_size,
        )
        text_tokenizer = spm.SentencePieceProcessor(
            f'{dev_dir}/all_bpe_ctx.model')
        cmd_tokenizer = text_tokenizer

    train = pd.read_csv(f'{dev_dir}/train.csv', index_col=0)
    train = train.dropna()
    train['cmd_cleaned'] = train['cmd_cleaned'].apply(
        lambda cmd: cmd.replace('|', ' |'))
    train['util'] = train.cmd_cleaned.apply(
        lambda x: x.strip(' $()').split()[0])
    train = train[train.util != ']']
    train = train.reset_index(drop=True)

    mandf = pd.read_csv(f'{dev_dir}/man.csv', index_col=0)
    mandf['ctx'] = mandf.apply(make_ctx, axis=1)
    mandf = mandf.drop_duplicates(subset=('cmd'))
    mandf = mandf.set_index('cmd')

    train['ctx'] = train['util'].map(mandf.ctx)
    train.text_cleaned = train.text_cleaned + ' ' + train.ctx.fillna('')

    train['text_enc'] = train.text_cleaned.progress_apply(
        text_tokenizer.encode)
    train['cmd_enc'] = train.cmd_cleaned.progress_apply(cmd_tokenizer.encode)

    tdf = train[train.origin == 'original']
    tdf2 = train[train.origin != 'original']
    train, valid = train_test_split(tdf, test_size=500, random_state=SEED)
    train = pd.concat([train, tdf2]).reset_index(drop=True)

    train_ds = MtDataset(train.text_enc, train.cmd_enc, config, bos_id, eos_id,
                         pad_id)
    valid_ds = MtDataset(valid.text_enc, valid.cmd_enc, config, bos_id, eos_id,
                         pad_id)

    model = Transformer(config, pad_id)
    print('# params',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    loaders = {
        'train':
        data.DataLoader(train_ds, batch_size=config.batch_size, shuffle=True),
        'valid':
        data.DataLoader(valid_ds, batch_size=config.batch_size),
    }

    criterion = nn.CrossEntropyLoss(ignore_index=pad_id)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.optimizer_lr,
                                 weight_decay=config.weight_decay,
                                 amsgrad=True)
    callbacks = [
        dl.CheckpointCallback(config.num_epochs),
    ]

    callbacks.append(dl.SchedulerCallback(mode="epoch"))
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        factor=config.plateau_factor,
        patience=3,
        cooldown=2,
        threshold=1e-3,
        min_lr=1e-6)

    shutil.rmtree(logdir, ignore_errors=True)
    os.makedirs(logdir, exist_ok=True)

    runner = dl.SupervisedRunner(device=device)
    runner.train(
        model=model,
        loaders=loaders,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler if config.schedule else None,
        num_epochs=config.num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=callbacks,
        #     check=True
    )
Beispiel #27
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        # sample data
        num_users, num_features, num_items = int(1e4), int(1e1), 10
        X = torch.rand(num_users, num_features)
        y = (torch.rand(num_users, num_items) > 0.5).to(torch.float32)

        # pytorch loaders
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=32, num_workers=1)
        loaders = {"train": loader, "valid": loader}

        # model, criterion, optimizer, scheduler
        model = torch.nn.Linear(num_features, num_items)
        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        callbacks = [
            dl.BatchTransformCallback(
                input_key="logits",
                output_key="scores",
                transform=torch.sigmoid,
                scope="on_batch_end",
            ),
            dl.CriterionCallback(input_key="logits",
                                 target_key="targets",
                                 metric_key="loss"),
            dl.AUCCallback(input_key="scores", target_key="targets"),
            dl.HitrateCallback(input_key="scores",
                               target_key="targets",
                               topk_args=(1, 3, 5)),
            dl.MRRCallback(input_key="scores",
                           target_key="targets",
                           topk_args=(1, 3, 5)),
            dl.MAPCallback(input_key="scores",
                           target_key="targets",
                           topk_args=(1, 3, 5)),
            dl.NDCGCallback(input_key="scores",
                            target_key="targets",
                            topk_args=(1, 3, 5)),
            dl.OptimizerCallback(metric_key="loss"),
            dl.SchedulerCallback(),
            dl.CheckpointCallback(logdir=logdir,
                                  loader_key="valid",
                                  metric_key="map01",
                                  minimize=False),
        ]
        if SETTINGS.amp_required and (engine is None or not isinstance(
                engine,
            (dl.AMPEngine, dl.DataParallelAMPEngine,
             dl.DistributedDataParallelAMPEngine),
        )):
            callbacks.append(
                dl.AUCCallback(input_key="logits", target_key="targets"))

        # model training
        runner = dl.SupervisedRunner(input_key="features",
                                     output_key="logits",
                                     target_key="targets",
                                     loss_key="loss")
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            num_epochs=1,
            verbose=False,
            callbacks=callbacks,
        )
def train():
    load_to_mem_train = True

    features_stats = {"mean": 86, "std": 22}
    batch_size = 512
    train_batch_size = batch_size
    validation_batch_size = train_batch_size

    path_to_train_data = "./data/external/train/"
    path_to_targets_train = "./data/external/train.csv"
    path_to_val_data = "./data/external/val/"
    path_to_targets_val = "./data/external/val.csv"
    path_to_save_model = "./models/model.pt"

    n_epochs = 2
    es_rounds = 35
    lr = 0.001

    backbone_output_dim = 1024
    backbone = VGGNet()
    model = Supervised1dModel(backbone=backbone,
                              backbone_output_dim=backbone_output_dim,
                              num_classes=3)

    n_workers = os.cpu_count()

    # define train, val and test datasets
    train_dataset = CustomDataset(path_to_data=path_to_train_data,
                                  files_to_use=None,
                                  load_data_to_mem=load_to_mem_train,
                                  path_to_targets=path_to_targets_train,
                                  features_stats=features_stats,
                                  mode="train")

    val_dataset = CustomDataset(path_to_data=path_to_val_data,
                                files_to_use=None,
                                load_data_to_mem=load_to_mem_train,
                                path_to_targets=path_to_targets_val,
                                features_stats=features_stats,
                                mode="val")

    # define train, val and test loaders
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=train_batch_size,
                                               collate_fn=collate_fn,
                                               num_workers=n_workers,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=validation_batch_size,
                                             collate_fn=collate_fn,
                                             num_workers=n_workers,
                                             shuffle=False)

    # train
    runner = dl.SupervisedRunner()
    criterion = torch.nn.CrossEntropyLoss()
    callbacks = [
        dl.F1ScoreCallback(),
        dl.EarlyStoppingCallback(patience=es_rounds, minimize=True)
    ]

    print("\n\n")
    print("Main training")
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=25, verbose=True)
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders={
                     "train": train_loader,
                     "valid": val_loader
                 },
                 num_epochs=n_epochs,
                 callbacks=callbacks,
                 logdir="./logdir/",
                 load_best_on_end=True,
                 main_metric="f1_score",
                 minimize_metric=False,
                 fp16=True,
                 verbose=True)

    # save trained model
    torch.save(model, path_to_save_model)
# sample data
num_samples, num_features, num_classes = int(1e4), int(1e1), 4
X = torch.rand(num_samples, num_features)
y = (torch.rand(num_samples, ) * num_classes).to(torch.int64)

# pytorch loaders
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}

# model, criterion, optimizer, scheduler
model = torch.nn.Linear(num_features, num_classes)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

# model training
runner = dl.SupervisedRunner()
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    logdir="./logdir",
    num_epochs=3,
    check=True,
    callbacks=[dl.AccuracyCallback(num_classes=num_classes)],
)
Beispiel #30
0
def train(dev_dir, logdir, device):
    train = pd.read_csv(f'{dev_dir}/train.csv', index_col=0)
    train['all_utils'] = train['cmd_cleaned'].apply(select_utils)
    train = train.loc[train.all_utils.apply(str.strip).apply(len) > 0]
    train['util'] = train['all_utils'].apply(lambda x: x.split()[0])
    train = train.dropna().reset_index(drop=True)

    spm.SentencePieceTrainer.train(input=f'{dev_dir}/text',
                                   model_prefix=f'{dev_dir}/txt_bpe_clf',
                                   model_type='bpe',
                                   vocab_size=config.src_vocab_size)
    text_tokenizer = spm.SentencePieceProcessor(f'{dev_dir}/txt_bpe_clf.model')

    cmd_le = LabelEncoder()

    train['text_enc'] = train.text_cleaned.progress_apply(
        text_tokenizer.encode)
    train['y'] = cmd_le.fit_transform(train['util'].values)

    tdf = train[train.origin == 'original']
    tdf2 = train[train.origin != 'original']
    train, valid = train_test_split(tdf, test_size=500, random_state=SEED)
    train = pd.concat([train, tdf2]).reset_index(drop=True)

    train_ds = UtilDataset(train.text_enc, train.y, config, bos_id, eos_id,
                           pad_id)
    valid_ds = UtilDataset(valid.text_enc, valid.y, config, bos_id, eos_id,
                           pad_id)

    model = BertClassifier(config, pad_id, len(cmd_le.classes_))
    print('# params',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    loaders = {
        'train':
        data.DataLoader(train_ds, batch_size=config.batch_size, shuffle=True),
        'valid':
        data.DataLoader(valid_ds, batch_size=config.batch_size),
    }

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.optimizer_lr,
                                 weight_decay=config.weight_decay,
                                 amsgrad=True)
    callbacks = [
        dl.CheckpointCallback(config.num_epochs),
        dl.AccuracyCallback(num_classes=len(cmd_le.classes_), topk_args=[1, 5])
    ]

    if config.schedule == 'OneCycleLR':
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=config.optimizer_lr,
            epochs=config.num_epochs,
            steps_per_epoch=len(loaders['train']))
        callbacks.append(dl.SchedulerCallback(mode="batch"))

    elif config.schedule == 'ReduceLROnPlateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            factor=config.plateau_factor,
            patience=5,
            cooldown=3,
            threshold=1e-3,
            min_lr=1e-6)
        callbacks.append(dl.SchedulerCallback(mode="epoch"))

    shutil.rmtree(logdir, ignore_errors=True)
    os.makedirs(logdir, exist_ok=True)

    runner = dl.SupervisedRunner(device=device)
    runner.train(
        model=model,
        loaders=loaders,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler if config.schedule else None,
        num_epochs=config.num_epochs,
        verbose=True,
        logdir=logdir,
        callbacks=callbacks,
    )
    joblib.dump(cmd_le, f'{dev_dir}/cmd_le')