def test_disabling_loss_for_train(): old_stdout = sys.stdout sys.stdout = str_stdout = StringIO() # experiment_setup logdir = "./logs/control_flow" checkpoint = logdir + "/checkpoints" logfile = checkpoint + "/_metrics.json" # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = dl.SupervisedRunner() n_epochs = 5 # first stage runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=n_epochs, verbose=False, main_metric="accuracy01", callbacks=[ dl.ControlFlowCallback(dl.CriterionCallback(), ignore_loaders=["train"]), dl.AccuracyCallback(accuracy_args=[1, 3, 5]), dl.CheckRunCallback(num_epoch_steps=n_epochs), ], ) sys.stdout = old_stdout exp_output = str_stdout.getvalue() assert len(re.findall(r"\(train\): loss", exp_output)) == 5 assert len(re.findall(r"\(valid\): loss", exp_output)) == 0 assert len(re.findall(r".*/train\.\d\.pth", exp_output)) == 1 assert os.path.isfile(logfile) assert os.path.isfile(checkpoint + "/best.pth") assert os.path.isfile(checkpoint + "/best_full.pth") assert os.path.isfile(checkpoint + "/last.pth") assert os.path.isfile(checkpoint + "/last_full.pth") pth_files = [ file for file in os.listdir(checkpoint) if file.endswith(".pth") ] assert len(pth_files) == 6 shutil.rmtree(logdir, ignore_errors=True)
def test_resume_with_missing_file(): old_stdout = sys.stdout sys.stdout = str_stdout = StringIO() # experiment_setup logdir = "./logs/checkpoint_callback" checkpoint = logdir + "/checkpoints" logfile = checkpoint + "/_metrics.json" num_epochs = 5 # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = dl.SupervisedRunner() with pytest.raises(FileNotFoundError): runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=False, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, callbacks=[ dl.CheckpointCallback( logdir=logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=2, load_on_stage_end={ "model": "best", "criterion": "best", "optimizer": "last" }, resume="not_existing_file.pth", ), dl.CheckRunCallback(num_epoch_steps=num_epochs), ], ) sys.stdout = old_stdout exp_output = str_stdout.getvalue() shutil.rmtree(logdir, ignore_errors=True)
def test_load_best_on_stage_end(): old_stdout = sys.stdout sys.stdout = str_stdout = StringIO() # experiment_setup logdir = "./logs/checkpoint_callback" checkpoint = logdir + "/checkpoints" logfile = checkpoint + "/_metrics.json" # data num_samples, num_features = int(1e4), int(1e1) X = torch.rand(num_samples, num_features) y = torch.randint(0, 5, size=[num_samples]) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 5) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = dl.SupervisedRunner() n_epochs = 5 # first stage runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=n_epochs, verbose=False, callbacks=[ dl.CheckpointCallback(save_n_best=2, load_on_stage_end="best"), dl.CheckRunCallback(num_epoch_steps=n_epochs), ], ) sys.stdout = old_stdout exp_output = str_stdout.getvalue() assert len(re.findall(r"=> Loading", exp_output)) == 1 assert len(re.findall(r"=> Loading .*best\.pth", exp_output)) == 1 assert os.path.isfile(logfile) assert os.path.isfile(checkpoint + "/train.4.pth") assert os.path.isfile(checkpoint + "/train.4_full.pth") assert os.path.isfile(checkpoint + "/train.5.pth") assert os.path.isfile(checkpoint + "/train.5_full.pth") assert os.path.isfile(checkpoint + "/best.pth") assert os.path.isfile(checkpoint + "/best_full.pth") assert os.path.isfile(checkpoint + "/last.pth") assert os.path.isfile(checkpoint + "/last_full.pth") shutil.rmtree(logdir, ignore_errors=True)
def test_files_existence(tmpdir): logfile = tmpdir + "/model.storage.json" n_epochs = 5 callbacks = [ dl.CheckpointCallback( logdir=tmpdir, loader_key="valid", metric_key="loss", minimize=True, topk=2, ), dl.CheckRunCallback(num_epoch_steps=n_epochs), ] train_runner(tmpdir, n_epochs, callbacks) assert os.path.isfile(logfile) assert os.path.isfile(tmpdir + "/model.0004.pth") # assert os.path.isfile(tmpdir + "/train.4_full.pth") assert os.path.isfile(tmpdir + "/model.0005.pth") # assert os.path.isfile(tmpdir + "/train.5_full.pth") assert os.path.isfile(tmpdir + "/model.best.pth") # assert os.path.isfile(tmpdir + "/best_full.pth") assert os.path.isfile(tmpdir + "/model.last.pth")