def testMlFlowLoggerCallbackConfig(self): # Explicitly pass in all args. logger = MLflowLoggerCallback(tracking_uri="test1", registry_uri="test2", experiment_name="test_exp") self.assertEqual(logger.client.tracking_uri, "test1") self.assertEqual(logger.client.registry_uri, "test2") self.assertListEqual(logger.client.experiment_names, ["existing_experiment", "test_exp"]) self.assertEqual(logger.experiment_id, 1) # Check if client recognizes already existing experiment. logger = MLflowLoggerCallback(experiment_name="existing_experiment") self.assertListEqual(logger.client.experiment_names, ["existing_experiment"]) self.assertEqual(logger.experiment_id, 0) # Pass in experiment name as env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_exp" logger = MLflowLoggerCallback() self.assertListEqual(logger.client.experiment_names, ["existing_experiment", "test_exp"]) self.assertEqual(logger.experiment_id, 1) # Pass in existing experiment name as env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_NAME"] = "existing_experiment" logger = MLflowLoggerCallback() self.assertListEqual(logger.client.experiment_names, ["existing_experiment"]) self.assertEqual(logger.experiment_id, 0) # Pass in existing experiment id as env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_ID"] = "0" logger = MLflowLoggerCallback() self.assertListEqual(logger.client.experiment_names, ["existing_experiment"]) self.assertEqual(logger.experiment_id, "0") # Pass in non existing experiment id as env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_ID"] = "500" with self.assertRaises(ValueError): logger = MLflowLoggerCallback() # Experiment name env var should take precedence over id env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_exp" os.environ["MLFLOW_EXPERIMENT_ID"] = "0" logger = MLflowLoggerCallback() self.assertListEqual(logger.client.experiment_names, ["existing_experiment", "test_exp"]) self.assertEqual(logger.experiment_id, 1)
def testMlFlowLoggerLogging(self): clear_env_vars() trial_config = {"par1": 4, "par2": 9.} trial = MockTrial(trial_config, "trial1", 0, "artifact") logger = MLflowLoggerCallback(experiment_name="test1", save_artifact=True) # Check if run is created. logger.on_trial_start(iteration=0, trials=[], trial=trial) # New run should be created for this trial with correct tag. mock_run = logger.client.runs[1][0] self.assertDictEqual(mock_run.tags, {"trial_name": "trial1"}) self.assertTupleEqual(mock_run.run_id, (1, 0)) self.assertTupleEqual(logger._trial_runs[trial], mock_run.run_id) # Params should be logged. self.assertListEqual(mock_run.params, [{"par1": 4}, {"par2": 9}]) # When same trial is started again, new run should not be created. logger.on_trial_start(iteration=0, trials=[], trial=trial) self.assertEqual(len(logger.client.runs[1]), 1) # Check metrics are logged properly. result = {"metric1": 0.8, "metric2": 1, "metric3": None} logger.on_trial_result(0, [], trial, result) mock_run = logger.client.runs[1][0] # metric3 is not logged since it cannot be converted to float. self.assertListEqual(mock_run.metrics, [{ "metric1": 0.8 }, { "metric2": 1.0 }]) # Check that artifact is logged on termination. logger.on_trial_complete(0, [], trial) mock_run = logger.client.runs[1][0] self.assertListEqual(mock_run.artifacts, ["artifact"]) self.assertTrue(mock_run.terminated) self.assertEqual(mock_run.status, "FINISHED")
def tune_function(mlflow_tracking_uri, finish_fast=False): tune.run(easy_objective, name="mlflow", num_samples=5, callbacks=[ MLflowLoggerCallback(tracking_uri=mlflow_tracking_uri, experiment_name="example", save_artifact=True) ], config={ "width": tune.randint(10, 100), "height": tune.randint(0, 100), "steps": 5 if finish_fast else 100, })
def main(num_workers=2, use_gpu=False): trainer = TorchTrainer( train_func, train_loop_config={ "lr": 1e-3, "batch_size": 64, "epochs": 4 }, scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu), run_config=RunConfig(callbacks=[ MLflowLoggerCallback(experiment_name="train_fashion_mnist") ]), ) final_results = trainer.fit() print("Final metrics: ", final_results.metrics)
from ray.train.torch import TorchTrainer from ray.tune.integration.mlflow import MLflowLoggerCallback from ray.tune.logger import TBXLoggerCallback def train_func(): for i in range(3): session.report(dict(epoch=i)) trainer = TorchTrainer( train_func, scaling_config=ScalingConfig(num_workers=2), run_config=RunConfig( callbacks=[ MLflowLoggerCallback(experiment_name="train_experiment"), TBXLoggerCallback(), ], ), ) # Run the training function, logging all the intermediate results # to MLflow and Tensorboard. result = trainer.fit() # For MLFLow logs: # MLFlow logs will by default be saved in an `mlflow` directory # in the current working directory. # $ cd mlflow
def testMlFlowLoggerCallbackConfig(self): # Explicitly pass in all args. logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri, experiment_name="test_exp") logger.setup() self.assertEqual(logger.mlflow_util._mlflow.get_tracking_uri(), self.tracking_uri) self.assertEqual(logger.mlflow_util._mlflow.get_registry_uri(), self.registry_uri) self.assertListEqual( [e.name for e in logger.mlflow_util._mlflow.list_experiments()], ["existing_experiment", "test_exp"]) self.assertEqual(logger.mlflow_util.experiment_id, "1") # Check if client recognizes already existing experiment. logger = MLflowLoggerCallback(experiment_name="existing_experiment", tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) logger.setup() self.assertEqual(logger.mlflow_util.experiment_id, "0") # Pass in experiment name as env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_exp" logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) logger.setup() self.assertEqual(logger.mlflow_util.experiment_id, "1") # Pass in existing experiment name as env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_NAME"] = "existing_experiment" logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) logger.setup() self.assertEqual(logger.mlflow_util.experiment_id, "0") # Pass in existing experiment id as env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_ID"] = "0" logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) logger.setup() self.assertEqual(logger.mlflow_util.experiment_id, "0") # Pass in non existing experiment id as env var. # This should create a new experiment. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_ID"] = "500" with self.assertRaises(ValueError): logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) logger.setup() # Experiment id env var should take precedence over name env var. clear_env_vars() os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_exp" os.environ["MLFLOW_EXPERIMENT_ID"] = "0" logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) logger.setup() self.assertEqual(logger.mlflow_util.experiment_id, "0") # Using tags tags = {"user_name": "John", "git_commit_hash": "abc123"} clear_env_vars() os.environ["MLFLOW_EXPERIMENT_NAME"] = "test_tags" os.environ["MLFLOW_EXPERIMENT_ID"] = "0" logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri, tags=tags) logger.setup() self.assertEqual(logger.tags, tags)
def testMlFlowLoggerLogging(self): clear_env_vars() trial_config = {"par1": "a", "par2": "b"} trial = MockTrial(trial_config, "trial1", 0, "artifact") logger = MLflowLoggerCallback(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri, experiment_name="test1", save_artifact=True, tags={"hello": "world"}) logger.setup() # Check if run is created with proper tags. logger.on_trial_start(iteration=0, trials=[], trial=trial) all_runs = logger.mlflow_util._mlflow.search_runs(experiment_ids=["1"]) self.assertEqual(len(all_runs), 1) # all_runs is a pandas dataframe. all_runs = all_runs.to_dict(orient="records") run = logger.mlflow_util._mlflow.get_run(all_runs[0]["run_id"]) self.assertDictEqual(run.data.tags, { "hello": "world", "trial_name": "trial1", "mlflow.runName": "trial1" }) self.assertEqual(logger._trial_runs[trial], run.info.run_id) # Params should be logged. self.assertDictEqual(run.data.params, trial_config) # When same trial is started again, new run should not be created. logger.on_trial_start(iteration=0, trials=[], trial=trial) all_runs = logger.mlflow_util._mlflow.search_runs(experiment_ids=["1"]) self.assertEqual(len(all_runs), 1) # Check metrics are logged properly. result = { "metric1": 0.8, "metric2": 1, "metric3": None, "training_iteration": 0 } logger.on_trial_result(0, [], trial, result) run = logger.mlflow_util._mlflow.get_run(run_id=run.info.run_id) # metric3 is not logged since it cannot be converted to float. self.assertDictEqual(run.data.metrics, { "metric1": 0.8, "metric2": 1.0, "training_iteration": 0 }) # Check that artifact is logged on termination. logger.on_trial_complete(0, [], trial) self.assertTrue(logger.mlflow_util.artifact_saved) self.assertDictEqual(logger.mlflow_util.artifact_info, { "dir": "artifact", "run_id": run.info.run_id })