def test_pytorch_with_early_stopping_autolog_log_models_configuration_with( log_models, patience): mlflow.pytorch.autolog(log_models=log_models) model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") early_stopping = EarlyStopping(monitor="val_loss", mode="min", patience=patience, verbose=True) with TempDir() as tmp: keyword = "dirpath" if Version( pl.__version__) >= Version("1.2.0") else "filepath" checkpoint_callback = ModelCheckpoint( **{keyword: tmp.path()}, save_top_k=1, verbose=True, monitor="val_loss", mode="min", ) trainer = pl.Trainer( max_epochs=NUM_EPOCHS * 2, callbacks=[early_stopping, checkpoint_callback], ) trainer.fit(model, dm) client = mlflow.tracking.MlflowClient() run = client.get_run( client.list_run_infos(experiment_id="0")[0].run_id) run_id = run.info.run_id client = mlflow.tracking.MlflowClient() artifacts = [f.path for f in client.list_artifacts(run_id)] assert ("restored_model_checkpoint" in artifacts) == log_models
def test_pytorch_autologging_supports_data_parallel_execution(): mlflow.pytorch.autolog() model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=NUM_EPOCHS, accelerator="ddp_cpu", num_processes=4) with mlflow.start_run() as run: trainer.fit(model, datamodule=dm) trainer.test(datamodule=dm) client = mlflow.tracking.MlflowClient() run = client.get_run(run.info.run_id) # Checking if metrics are logged client = mlflow.tracking.MlflowClient() for metric_key in ["loss", "train_acc", "val_loss", "val_acc"]: assert metric_key in run.data.metrics data = run.data assert "test_loss" in data.metrics assert "test_acc" in data.metrics # Testing optimizer parameters are logged assert "optimizer_name" in data.params assert data.params["optimizer_name"] == "Adam" # Testing model_summary.txt is saved client = mlflow.tracking.MlflowClient() artifacts = client.list_artifacts(run.info.run_id) artifacts = list(map(lambda x: x.path, artifacts)) assert "model" in artifacts assert "model_summary.txt" in artifacts
def pytorch_model_with_callback(patience): mlflow.pytorch.autolog() model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") early_stopping = EarlyStopping( monitor="val_loss", mode="min", min_delta=99999999, # forces early stopping patience=patience, verbose=True, ) with TempDir() as tmp: keyword = "dirpath" if Version( pl.__version__) >= Version("1.2.0") else "filepath" checkpoint_callback = ModelCheckpoint( **{keyword: tmp.path()}, save_top_k=1, verbose=True, monitor="val_loss", mode="min", ) trainer = pl.Trainer( max_epochs=NUM_EPOCHS * 2, callbacks=[early_stopping, checkpoint_callback], ) trainer.fit(model, dm) client = mlflow.tracking.MlflowClient() run = client.get_run( client.list_run_infos(experiment_id="0")[0].run_id) return trainer, run
def pytorch_model(): mlflow.pytorch.autolog() model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=NUM_EPOCHS) trainer.fit(model, dm) client = mlflow.tracking.MlflowClient() run = client.get_run(client.list_run_infos(experiment_id="0")[0].run_id) return trainer, run
def train_evaluate(params, max_epochs=100): model = IrisClassification(**params) dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=max_epochs) mlflow.pytorch.autolog() trainer.fit(model, dm) trainer.test(datamodule=dm) test_accuracy = trainer.callback_metrics.get("test_acc") return test_accuracy
def test_pytorch_autolog_persists_manually_created_run(): with mlflow.start_run() as manual_run: mlflow.pytorch.autolog() model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=NUM_EPOCHS) trainer.fit(model, dm) trainer.test(datamodule=dm) assert mlflow.active_run() is not None assert mlflow.active_run().info.run_id == manual_run.info.run_id
def pytorch_model_tests(): mlflow.pytorch.autolog() model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=NUM_EPOCHS) with mlflow.start_run() as run: trainer.fit(model, datamodule=dm) trainer.test(datamodule=dm) client = mlflow.tracking.MlflowClient() run = client.get_run(run.info.run_id) return trainer, run
def pytorch_model_with_steps_logged(request): log_every_n_epoch, log_every_n_step = request.param mlflow.pytorch.autolog(log_every_n_epoch=log_every_n_epoch, log_every_n_step=log_every_n_step) model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=NUM_EPOCHS) trainer.fit(model, dm) client = mlflow.tracking.MlflowClient() run = client.get_run(client.list_run_infos(experiment_id="0")[0].run_id) return trainer, run, log_every_n_epoch, log_every_n_step
def test_pytorch_autolog_log_models_configuration(log_models): mlflow.pytorch.autolog(log_models=log_models) model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=NUM_EPOCHS) trainer.fit(model, dm) client = mlflow.tracking.MlflowClient() run = client.get_run(client.list_run_infos(experiment_id="0")[0].run_id) run_id = run.info.run_id client = mlflow.tracking.MlflowClient() artifacts = [f.path for f in client.list_artifacts(run_id)] assert ("model" in artifacts) == log_models
def test_autolog_registering_model(): registered_model_name = "test_autolog_registered_model" mlflow.pytorch.autolog(registered_model_name=registered_model_name) model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=NUM_EPOCHS) with mlflow.start_run(): trainer.fit(model, dm) registered_model = MlflowClient().get_registered_model( registered_model_name) assert registered_model.name == registered_model_name
def test_pytorch_autolog_raises_error_when_step_logging_unsupported(): mlflow.pytorch.autolog(log_every_n_step=1) model = IrisClassification() dm = IrisDataModule() trainer = pl.Trainer(max_epochs=NUM_EPOCHS) with pytest.raises( MlflowException, match= "log_every_n_step is only supported for PyTorch-Lightning >= 1.1.0" ): trainer.fit(model, dm)
def pytorch_model_with_callback(patience): mlflow.pytorch.autolog() model = IrisClassification() dm = IrisDataModule() dm.prepare_data() dm.setup(stage="fit") early_stopping = EarlyStopping( monitor="val_loss", mode="min", min_delta=99999999, # forces early stopping patience=patience, verbose=True, ) with TempDir() as tmp: checkpoint_callback = ModelCheckpoint( filepath=tmp.path(), save_top_k=1, verbose=True, monitor="val_loss", mode="min", prefix="", ) trainer = pl.Trainer( max_epochs=NUM_EPOCHS * 2, callbacks=[early_stopping], checkpoint_callback=checkpoint_callback, ) trainer.fit(model, dm) client = mlflow.tracking.MlflowClient() run = client.get_run( client.list_run_infos(experiment_id="0")[0].run_id) return trainer, run
"--save-model", type=bool, default=True, help="For Saving the current Model", ) parser.add_argument( "--accelerator", type=lambda x: None if x == "None" else x, default=None, help="Accelerator - (default: None)", ) from iris_data_module import IrisDataModule parser = IrisClassification.add_model_specific_args(parent_parser=parser) parser = IrisDataModule.add_model_specific_args(parent_parser=parser) args = parser.parse_args() dict_args = vars(args) dm = IrisDataModule(**dict_args) dm.prepare_data() dm.setup(stage="fit") model = IrisClassification(**dict_args) trainer = pl.Trainer.from_argparse_args(args) trainer.fit(model, dm) trainer.test() torch.save(model.state_dict(), "iris.pt")