Exemple #1
0
def test_disabling_loss_for_train():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/control_flow"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    n_epochs = 5
    # first stage
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=n_epochs,
        verbose=False,
        main_metric="accuracy01",
        callbacks=[
            dl.ControlFlowCallback(dl.CriterionCallback(),
                                   ignore_loaders=["train"]),
            dl.AccuracyCallback(accuracy_args=[1, 3, 5]),
            dl.CheckRunCallback(num_epoch_steps=n_epochs),
        ],
    )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    assert len(re.findall(r"\(train\): loss", exp_output)) == 5
    assert len(re.findall(r"\(valid\): loss", exp_output)) == 0
    assert len(re.findall(r".*/train\.\d\.pth", exp_output)) == 1

    assert os.path.isfile(logfile)
    assert os.path.isfile(checkpoint + "/best.pth")
    assert os.path.isfile(checkpoint + "/best_full.pth")
    assert os.path.isfile(checkpoint + "/last.pth")
    assert os.path.isfile(checkpoint + "/last_full.pth")
    pth_files = [
        file for file in os.listdir(checkpoint) if file.endswith(".pth")
    ]
    assert len(pth_files) == 6

    shutil.rmtree(logdir, ignore_errors=True)
Exemple #2
0
def test_resume_with_missing_file():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/checkpoint_callback"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"
    num_epochs = 5

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    with pytest.raises(FileNotFoundError):
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            logdir=logdir,
            num_epochs=num_epochs,
            verbose=False,
            valid_loader="valid",
            valid_metric="loss",
            minimize_valid_metric=True,
            callbacks=[
                dl.CheckpointCallback(
                    logdir=logdir,
                    loader_key="valid",
                    metric_key="loss",
                    minimize=True,
                    save_n_best=2,
                    load_on_stage_end={
                        "model": "best",
                        "criterion": "best",
                        "optimizer": "last"
                    },
                    resume="not_existing_file.pth",
                ),
                dl.CheckRunCallback(num_epoch_steps=num_epochs),
            ],
        )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    shutil.rmtree(logdir, ignore_errors=True)
def test_load_best_on_stage_end():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/checkpoint_callback"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    n_epochs = 5
    # first stage
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=n_epochs,
        verbose=False,
        callbacks=[
            dl.CheckpointCallback(save_n_best=2, load_on_stage_end="best"),
            dl.CheckRunCallback(num_epoch_steps=n_epochs),
        ],
    )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    assert len(re.findall(r"=> Loading", exp_output)) == 1
    assert len(re.findall(r"=> Loading .*best\.pth", exp_output)) == 1

    assert os.path.isfile(logfile)
    assert os.path.isfile(checkpoint + "/train.4.pth")
    assert os.path.isfile(checkpoint + "/train.4_full.pth")
    assert os.path.isfile(checkpoint + "/train.5.pth")
    assert os.path.isfile(checkpoint + "/train.5_full.pth")
    assert os.path.isfile(checkpoint + "/best.pth")
    assert os.path.isfile(checkpoint + "/best_full.pth")
    assert os.path.isfile(checkpoint + "/last.pth")
    assert os.path.isfile(checkpoint + "/last_full.pth")

    shutil.rmtree(logdir, ignore_errors=True)
def test_files_existence(tmpdir):
    logfile = tmpdir + "/model.storage.json"
    n_epochs = 5
    callbacks = [
        dl.CheckpointCallback(
            logdir=tmpdir,
            loader_key="valid",
            metric_key="loss",
            minimize=True,
            topk=2,
        ),
        dl.CheckRunCallback(num_epoch_steps=n_epochs),
    ]
    train_runner(tmpdir, n_epochs, callbacks)

    assert os.path.isfile(logfile)
    assert os.path.isfile(tmpdir + "/model.0004.pth")
    # assert os.path.isfile(tmpdir + "/train.4_full.pth")
    assert os.path.isfile(tmpdir + "/model.0005.pth")
    # assert os.path.isfile(tmpdir + "/train.5_full.pth")
    assert os.path.isfile(tmpdir + "/model.best.pth")
    # assert os.path.isfile(tmpdir + "/best_full.pth")
    assert os.path.isfile(tmpdir + "/model.last.pth")