Beispiel #1
0
def pytorch_model_with_callback(patience):
    mlflow.pytorch.autolog()
    model = IrisClassification()
    dm = IrisDataModule()
    dm.setup(stage="fit")
    early_stopping = EarlyStopping(
        monitor="val_loss",
        mode="min",
        min_delta=99999999,  # forces early stopping
        patience=patience,
        verbose=True,
    )

    with TempDir() as tmp:
        keyword = "dirpath" if Version(
            pl.__version__) >= Version("1.2.0") else "filepath"
        checkpoint_callback = ModelCheckpoint(
            **{keyword: tmp.path()},
            save_top_k=1,
            verbose=True,
            monitor="val_loss",
            mode="min",
        )

        trainer = pl.Trainer(
            max_epochs=NUM_EPOCHS * 2,
            callbacks=[early_stopping, checkpoint_callback],
        )
        trainer.fit(model, dm)

        client = mlflow.tracking.MlflowClient()
        run = client.get_run(
            client.list_run_infos(experiment_id="0")[0].run_id)

    return trainer, run
Beispiel #2
0
def test_pytorch_with_early_stopping_autolog_log_models_configuration_with(
        log_models, patience):
    mlflow.pytorch.autolog(log_models=log_models)
    model = IrisClassification()
    dm = IrisDataModule()
    dm.setup(stage="fit")
    early_stopping = EarlyStopping(monitor="val_loss",
                                   mode="min",
                                   patience=patience,
                                   verbose=True)

    with TempDir() as tmp:
        keyword = "dirpath" if Version(
            pl.__version__) >= Version("1.2.0") else "filepath"
        checkpoint_callback = ModelCheckpoint(
            **{keyword: tmp.path()},
            save_top_k=1,
            verbose=True,
            monitor="val_loss",
            mode="min",
        )

        trainer = pl.Trainer(
            max_epochs=NUM_EPOCHS * 2,
            callbacks=[early_stopping, checkpoint_callback],
        )
        trainer.fit(model, dm)

        client = mlflow.tracking.MlflowClient()
        run = client.get_run(
            client.list_run_infos(experiment_id="0")[0].run_id)
    run_id = run.info.run_id
    client = mlflow.tracking.MlflowClient()
    artifacts = [f.path for f in client.list_artifacts(run_id)]
    assert ("restored_model_checkpoint" in artifacts) == log_models
Beispiel #3
0
def test_pytorch_autologging_supports_data_parallel_execution():
    mlflow.pytorch.autolog()
    model = IrisClassification()
    dm = IrisDataModule()
    dm.setup(stage="fit")

    trainer = pl.Trainer(max_epochs=NUM_EPOCHS, accelerator="ddp_cpu", num_processes=4)

    with mlflow.start_run() as run:
        trainer.fit(model, datamodule=dm)
        trainer.test(datamodule=dm)

    client = mlflow.tracking.MlflowClient()
    run = client.get_run(run.info.run_id)

    # Checking if metrics are logged
    client = mlflow.tracking.MlflowClient()
    for metric_key in ["loss", "train_acc", "val_loss", "val_acc"]:
        assert metric_key in run.data.metrics

    data = run.data
    assert "test_loss" in data.metrics
    assert "test_acc" in data.metrics

    # Testing optimizer parameters are logged
    assert "optimizer_name" in data.params
    assert data.params["optimizer_name"] == "Adam"

    # Testing model_summary.txt is saved
    client = mlflow.tracking.MlflowClient()
    artifacts = client.list_artifacts(run.info.run_id)
    artifacts = list(map(lambda x: x.path, artifacts))
    assert "model" in artifacts
    assert "model_summary.txt" in artifacts
Beispiel #4
0
def pytorch_model():
    mlflow.pytorch.autolog()
    model = IrisClassification()
    dm = IrisDataModule()
    dm.setup(stage="fit")
    trainer = pl.Trainer(max_epochs=NUM_EPOCHS)
    trainer.fit(model, dm)
    client = mlflow.tracking.MlflowClient()
    run = client.get_run(client.list_run_infos(experiment_id="0")[0].run_id)
    return trainer, run
Beispiel #5
0
def train_evaluate(params, max_epochs=100):
    model = IrisClassification(**params)
    dm = IrisDataModule()
    dm.setup(stage="fit")
    trainer = pl.Trainer(max_epochs=max_epochs)
    mlflow.pytorch.autolog()
    trainer.fit(model, dm)
    trainer.test(datamodule=dm)
    test_accuracy = trainer.callback_metrics.get("test_acc")
    return test_accuracy
Beispiel #6
0
def test_pytorch_autolog_persists_manually_created_run():
    with mlflow.start_run() as manual_run:
        mlflow.pytorch.autolog()
        model = IrisClassification()
        dm = IrisDataModule()
        dm.setup(stage="fit")
        trainer = pl.Trainer(max_epochs=NUM_EPOCHS)
        trainer.fit(model, dm)
        trainer.test(datamodule=dm)
        assert mlflow.active_run() is not None
        assert mlflow.active_run().info.run_id == manual_run.info.run_id
Beispiel #7
0
def pytorch_model_tests():
    mlflow.pytorch.autolog()
    model = IrisClassification()
    dm = IrisDataModule()
    dm.setup(stage="fit")
    trainer = pl.Trainer(max_epochs=NUM_EPOCHS)
    with mlflow.start_run() as run:
        trainer.fit(model, datamodule=dm)
        trainer.test(datamodule=dm)
    client = mlflow.tracking.MlflowClient()
    run = client.get_run(run.info.run_id)
    return trainer, run
def pytorch_model_with_steps_logged(request):
    log_every_n_epoch, log_every_n_step = request.param
    mlflow.pytorch.autolog(log_every_n_epoch=log_every_n_epoch,
                           log_every_n_step=log_every_n_step)
    model = IrisClassification()
    dm = IrisDataModule()
    dm.setup(stage="fit")
    trainer = pl.Trainer(max_epochs=NUM_EPOCHS)
    trainer.fit(model, dm)
    client = mlflow.tracking.MlflowClient()
    run = client.get_run(client.list_run_infos(experiment_id="0")[0].run_id)
    return trainer, run, log_every_n_epoch, log_every_n_step
Beispiel #9
0
def test_pytorch_autolog_log_models_configuration(log_models):
    mlflow.pytorch.autolog(log_models=log_models)
    model = IrisClassification()
    dm = IrisDataModule()
    dm.setup(stage="fit")
    trainer = pl.Trainer(max_epochs=NUM_EPOCHS)
    trainer.fit(model, dm)
    client = mlflow.tracking.MlflowClient()
    run = client.get_run(client.list_run_infos(experiment_id="0")[0].run_id)
    run_id = run.info.run_id
    client = mlflow.tracking.MlflowClient()
    artifacts = [f.path for f in client.list_artifacts(run_id)]
    assert ("model" in artifacts) == log_models
Beispiel #10
0
def test_autolog_registering_model():
    registered_model_name = "test_autolog_registered_model"
    mlflow.pytorch.autolog(registered_model_name=registered_model_name)
    model = IrisClassification()
    dm = IrisDataModule()
    dm.setup(stage="fit")
    trainer = pl.Trainer(max_epochs=NUM_EPOCHS)

    with mlflow.start_run():
        trainer.fit(model, dm)

        registered_model = MlflowClient().get_registered_model(
            registered_model_name)
        assert registered_model.name == registered_model_name
def pytorch_model_with_callback(patience):
    mlflow.pytorch.autolog()
    model = IrisClassification()
    dm = IrisDataModule()
    dm.prepare_data()
    dm.setup(stage="fit")
    early_stopping = EarlyStopping(
        monitor="val_loss",
        mode="min",
        min_delta=99999999,  # forces early stopping
        patience=patience,
        verbose=True,
    )

    with TempDir() as tmp:
        checkpoint_callback = ModelCheckpoint(
            filepath=tmp.path(),
            save_top_k=1,
            verbose=True,
            monitor="val_loss",
            mode="min",
            prefix="",
        )

        trainer = pl.Trainer(
            max_epochs=NUM_EPOCHS * 2,
            callbacks=[early_stopping],
            checkpoint_callback=checkpoint_callback,
        )
        trainer.fit(model, dm)

        client = mlflow.tracking.MlflowClient()
        run = client.get_run(
            client.list_run_infos(experiment_id="0")[0].run_id)

    return trainer, run
Beispiel #12
0
        "--save-model",
        type=bool,
        default=True,
        help="For Saving the current Model",
    )
    parser.add_argument(
        "--accelerator",
        type=lambda x: None if x == "None" else x,
        default=None,
        help="Accelerator - (default: None)",
    )

    from iris_data_module import IrisDataModule

    parser = IrisClassification.add_model_specific_args(parent_parser=parser)
    parser = IrisDataModule.add_model_specific_args(parent_parser=parser)

    args = parser.parse_args()
    dict_args = vars(args)

    dm = IrisDataModule(**dict_args)
    dm.prepare_data()
    dm.setup(stage="fit")

    model = IrisClassification(**dict_args)
    trainer = pl.Trainer.from_argparse_args(args)
    trainer.fit(model, dm)
    trainer.test()

    torch.save(model.state_dict(), "iris.pt")