예제 #1
0
    def testMlFlowLoggerCallbackConfig(self):
        # Explicitly pass in all args.
        logger = MLflowLoggerCallback(tracking_uri="test1",
                                      registry_uri="test2",
                                      experiment_name="test_exp")
        self.assertEqual(logger.client.tracking_uri, "test1")
        self.assertEqual(logger.client.registry_uri, "test2")
        self.assertListEqual(logger.client.experiment_names,
                             ["existing_experiment", "test_exp"])
        self.assertEqual(logger.experiment_id, 1)

        # Check if client recognizes already existing experiment.
        logger = MLflowLoggerCallback(experiment_name="existing_experiment")
        self.assertListEqual(logger.client.experiment_names,
                             ["existing_experiment"])
        self.assertEqual(logger.experiment_id, 0)

        # Pass in experiment name as env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_exp"
        logger = MLflowLoggerCallback()
        self.assertListEqual(logger.client.experiment_names,
                             ["existing_experiment", "test_exp"])
        self.assertEqual(logger.experiment_id, 1)

        # Pass in existing experiment name as env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_NAME"] = "existing_experiment"
        logger = MLflowLoggerCallback()
        self.assertListEqual(logger.client.experiment_names,
                             ["existing_experiment"])
        self.assertEqual(logger.experiment_id, 0)

        # Pass in existing experiment id as env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_ID"] = "0"
        logger = MLflowLoggerCallback()
        self.assertListEqual(logger.client.experiment_names,
                             ["existing_experiment"])
        self.assertEqual(logger.experiment_id, "0")

        # Pass in non existing experiment id as env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_ID"] = "500"
        with self.assertRaises(ValueError):
            logger = MLflowLoggerCallback()

        # Experiment name env var should take precedence over id env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_exp"
        os.environ["MLFLOW_EXPERIMENT_ID"] = "0"
        logger = MLflowLoggerCallback()
        self.assertListEqual(logger.client.experiment_names,
                             ["existing_experiment", "test_exp"])
        self.assertEqual(logger.experiment_id, 1)
예제 #2
0
    def testMlFlowLoggerLogging(self):
        clear_env_vars()
        trial_config = {"par1": 4, "par2": 9.}
        trial = MockTrial(trial_config, "trial1", 0, "artifact")

        logger = MLflowLoggerCallback(experiment_name="test1",
                                      save_artifact=True)

        # Check if run is created.
        logger.on_trial_start(iteration=0, trials=[], trial=trial)
        # New run should be created for this trial with correct tag.
        mock_run = logger.client.runs[1][0]
        self.assertDictEqual(mock_run.tags, {"trial_name": "trial1"})
        self.assertTupleEqual(mock_run.run_id, (1, 0))
        self.assertTupleEqual(logger._trial_runs[trial], mock_run.run_id)
        # Params should be logged.
        self.assertListEqual(mock_run.params, [{"par1": 4}, {"par2": 9}])

        # When same trial is started again, new run should not be created.
        logger.on_trial_start(iteration=0, trials=[], trial=trial)
        self.assertEqual(len(logger.client.runs[1]), 1)

        # Check metrics are logged properly.
        result = {"metric1": 0.8, "metric2": 1, "metric3": None}
        logger.on_trial_result(0, [], trial, result)
        mock_run = logger.client.runs[1][0]
        # metric3 is not logged since it cannot be converted to float.
        self.assertListEqual(mock_run.metrics, [{
            "metric1": 0.8
        }, {
            "metric2": 1.0
        }])

        # Check that artifact is logged on termination.
        logger.on_trial_complete(0, [], trial)
        mock_run = logger.client.runs[1][0]
        self.assertListEqual(mock_run.artifacts, ["artifact"])
        self.assertTrue(mock_run.terminated)
        self.assertEqual(mock_run.status, "FINISHED")
예제 #3
0
def tune_function(mlflow_tracking_uri, finish_fast=False):
    tune.run(easy_objective,
             name="mlflow",
             num_samples=5,
             callbacks=[
                 MLflowLoggerCallback(tracking_uri=mlflow_tracking_uri,
                                      experiment_name="example",
                                      save_artifact=True)
             ],
             config={
                 "width": tune.randint(10, 100),
                 "height": tune.randint(0, 100),
                 "steps": 5 if finish_fast else 100,
             })
예제 #4
0
def main(num_workers=2, use_gpu=False):
    trainer = TorchTrainer(
        train_func,
        train_loop_config={
            "lr": 1e-3,
            "batch_size": 64,
            "epochs": 4
        },
        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
        run_config=RunConfig(callbacks=[
            MLflowLoggerCallback(experiment_name="train_fashion_mnist")
        ]),
    )
    final_results = trainer.fit()

    print("Final metrics: ", final_results.metrics)
예제 #5
0
from ray.train.torch import TorchTrainer
from ray.tune.integration.mlflow import MLflowLoggerCallback
from ray.tune.logger import TBXLoggerCallback


def train_func():
    for i in range(3):
        session.report(dict(epoch=i))


trainer = TorchTrainer(
    train_func,
    scaling_config=ScalingConfig(num_workers=2),
    run_config=RunConfig(
        callbacks=[
            MLflowLoggerCallback(experiment_name="train_experiment"),
            TBXLoggerCallback(),
        ],
    ),
)

# Run the training function, logging all the intermediate results
# to MLflow and Tensorboard.
result = trainer.fit()

# For MLFLow logs:

# MLFlow logs will by default be saved in an `mlflow` directory
# in the current working directory.

# $ cd mlflow
예제 #6
0
    def testMlFlowLoggerCallbackConfig(self):
        # Explicitly pass in all args.
        logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri,
                                      experiment_name="test_exp")
        logger.setup()
        self.assertEqual(logger.mlflow_util._mlflow.get_tracking_uri(),
                         self.tracking_uri)
        self.assertEqual(logger.mlflow_util._mlflow.get_registry_uri(),
                         self.registry_uri)
        self.assertListEqual(
            [e.name for e in logger.mlflow_util._mlflow.list_experiments()],
            ["existing_experiment", "test_exp"])
        self.assertEqual(logger.mlflow_util.experiment_id, "1")

        # Check if client recognizes already existing experiment.
        logger = MLflowLoggerCallback(experiment_name="existing_experiment",
                                      tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri)
        logger.setup()
        self.assertEqual(logger.mlflow_util.experiment_id, "0")

        # Pass in experiment name as env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_exp"
        logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri)
        logger.setup()
        self.assertEqual(logger.mlflow_util.experiment_id, "1")

        # Pass in existing experiment name as env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_NAME"] = "existing_experiment"
        logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri)
        logger.setup()
        self.assertEqual(logger.mlflow_util.experiment_id, "0")

        # Pass in existing experiment id as env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_ID"] = "0"
        logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri)
        logger.setup()
        self.assertEqual(logger.mlflow_util.experiment_id, "0")

        # Pass in non existing experiment id as env var.
        # This should create a new experiment.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_ID"] = "500"
        with self.assertRaises(ValueError):
            logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri,
                                          registry_uri=self.registry_uri)
            logger.setup()

        # Experiment id env var should take precedence over name env var.
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_exp"
        os.environ["MLFLOW_EXPERIMENT_ID"] = "0"
        logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri)
        logger.setup()
        self.assertEqual(logger.mlflow_util.experiment_id, "0")

        # Using tags
        tags = {"user_name": "John", "git_commit_hash": "abc123"}
        clear_env_vars()
        os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_tags"
        os.environ["MLFLOW_EXPERIMENT_ID"] = "0"
        logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri,
                                      tags=tags)
        logger.setup()
        self.assertEqual(logger.tags, tags)
예제 #7
0
    def testMlFlowLoggerLogging(self):
        clear_env_vars()
        trial_config = {"par1": "a", "par2": "b"}
        trial = MockTrial(trial_config, "trial1", 0, "artifact")

        logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri,
                                      experiment_name="test1",
                                      save_artifact=True,
                                      tags={"hello": "world"})
        logger.setup()

        # Check if run is created with proper tags.
        logger.on_trial_start(iteration=0, trials=[], trial=trial)
        all_runs = logger.mlflow_util._mlflow.search_runs(experiment_ids=["1"])
        self.assertEqual(len(all_runs), 1)
        # all_runs is a pandas dataframe.
        all_runs = all_runs.to_dict(orient="records")
        run = logger.mlflow_util._mlflow.get_run(all_runs[0]["run_id"])
        self.assertDictEqual(run.data.tags, {
            "hello": "world",
            "trial_name": "trial1",
            "mlflow.runName": "trial1"
        })
        self.assertEqual(logger._trial_runs[trial], run.info.run_id)
        # Params should be logged.
        self.assertDictEqual(run.data.params, trial_config)

        # When same trial is started again, new run should not be created.
        logger.on_trial_start(iteration=0, trials=[], trial=trial)
        all_runs = logger.mlflow_util._mlflow.search_runs(experiment_ids=["1"])
        self.assertEqual(len(all_runs), 1)

        # Check metrics are logged properly.
        result = {
            "metric1": 0.8,
            "metric2": 1,
            "metric3": None,
            "training_iteration": 0
        }
        logger.on_trial_result(0, [], trial, result)
        run = logger.mlflow_util._mlflow.get_run(run_id=run.info.run_id)
        # metric3 is not logged since it cannot be converted to float.
        self.assertDictEqual(run.data.metrics, {
            "metric1": 0.8,
            "metric2": 1.0,
            "training_iteration": 0
        })

        # Check that artifact is logged on termination.
        logger.on_trial_complete(0, [], trial)
        self.assertTrue(logger.mlflow_util.artifact_saved)
        self.assertDictEqual(logger.mlflow_util.artifact_info, {
            "dir": "artifact",
            "run_id": run.info.run_id
        })