def _timed_log_batch(self): start = time.time() metrics_slices = [ self.data[i:i + MAX_METRICS_PER_BATCH] for i in range(0, len(self.data), MAX_METRICS_PER_BATCH) ] for metrics_slice in metrics_slices: try_mlflow_log(MlflowClient().log_batch, run_id=self.run_id, metrics=metrics_slice) end = time.time() self.total_log_batch_time += end - start
def set_tags(tags): """ Log a batch of tags for the current run. If no run is active, this method will create a new active run. :param tags: Dictionary of tag_name: String -> value: (String, but will be string-ified if not) :returns: None """ run_id = _get_or_start_run().info.run_id tags_arr = [RunTag(key, str(value)) for key, value in tags.items()] MlflowClient().log_batch(run_id=run_id, metrics=[], params=[], tags=tags_arr)
def log_params(params): """ Log a batch of params for the current run. If no run is active, this method will create a new active run. :param params: Dictionary of param_name: String -> value: (String, but will be string-ified if not) :returns: None """ run_id = _get_or_start_run().info.run_id params_arr = [Param(key, str(value)) for key, value in params.items()] MlflowClient().log_batch(run_id=run_id, metrics=[], params=params_arr, tags=[])
def test_with_managed_run_with_non_throwing_class_exhibits_expected_behavior(): client = MlflowClient() @with_managed_run class TestPatch(PatchFunction): def _patch_implementation(self, original, *args, **kwargs): return mlflow.active_run() def _on_exception(self, exception): pass run1 = TestPatch.call(lambda: "foo") run1_status = client.get_run(run1.info.run_id).info.status assert RunStatus.from_string(run1_status) == RunStatus.FINISHED with mlflow.start_run() as active_run: run2 = TestPatch.call(lambda: "foo") assert run2 == active_run run2_status = client.get_run(run2.info.run_id).info.status assert RunStatus.from_string(run2_status) == RunStatus.FINISHED
def _get_paginated_runs(experiment_ids, filter_string, run_view_type, max_results, order_by): all_runs = [] next_page_token = None while (len(all_runs) < max_results): runs_to_get = max_results - len(all_runs) if runs_to_get < NUM_RUNS_PER_PAGE_PANDAS: runs = MlflowClient().search_runs(experiment_ids, filter_string, run_view_type, runs_to_get, order_by, next_page_token) else: runs = MlflowClient().search_runs(experiment_ids, filter_string, run_view_type, NUM_RUNS_PER_PAGE_PANDAS, order_by, next_page_token) all_runs.extend(runs) if hasattr(runs, 'token') and runs.token != '': next_page_token = runs.token else: break return all_runs
def set_experiment(experiment_name): """ Set given experiment as active experiment. If experiment does not exist, create an experiment with provided name. :param experiment_name: Name of experiment to be activated. """ client = MlflowClient() experiment = client.get_experiment_by_name(experiment_name) exp_id = experiment.experiment_id if experiment else None if exp_id is None: # id can be 0 print("INFO: '{}' does not exist. Creating a new experiment".format( experiment_name)) exp_id = client.create_experiment(experiment_name) elif experiment.lifecycle_stage == LifecycleStage.DELETED: raise MlflowException( "Cannot set a deleted experiment '%s' as the active experiment." " You can restore the experiment, or permanently delete the " " experiment to create a new one." % experiment.name) global _active_experiment_id _active_experiment_id = exp_id
def log_metric(key, value, step=None): """ Log a metric under the current run, creating a run if necessary. :param key: Metric name (string). :param value: Metric value (float). Note that some special values such as +/- Infinity may be replaced by other values depending on the store. For example, sFor example, the SQLAlchemy store replaces +/- Inf with max / min float values. :param step: Metric step (int). Defaults to zero if unspecified. """ run_id = _get_or_start_run().info.run_id MlflowClient().log_metric(run_id, key, value, int(time.time() * 1000), step or 0)
def test_client_logs_metric_steps_correctly(): client = MlflowAutologgingQueueingClient() with mlflow.start_run() as run: for step in range(3): client.log_metrics( run_id=run.info.run_id, metrics={"a": 1}, step=step, ) client.flush() metric_history = MlflowClient().get_metric_history(run_id=run.info.run_id, key="a") assert len(metric_history) == 3 assert [metric.step for metric in metric_history] == list(range(3))
def test_sklearn_api_autolog_registering_model(): registered_model_name = "test_autolog_registered_model" mlflow.lightgbm.autolog(registered_model_name=registered_model_name) X, y = datasets.load_iris(return_X_y=True) params = {"n_estimators": 10, "reg_lambda": 1} model = lgb.LGBMClassifier(**params) with mlflow.start_run(): model.fit(X, y) registered_model = MlflowClient().get_registered_model(registered_model_name) assert registered_model.name == registered_model_name
def log_metrics(metrics, step=None): """ Log multiple metrics for the current run, starting a run if no runs are active. :param metrics: Dictionary of metric_name: String -> value: Float :param step: A single integer step at which to log the specified Metrics. If unspecified, each metric is logged at step zero. :returns: None """ run_id = _get_or_start_run().info.run_id timestamp = int(time.time() * 1000) metrics_arr = [Metric(key, value, timestamp, step or 0) for key, value in metrics.items()] MlflowClient().log_batch(run_id=run_id, metrics=metrics_arr, params=[], tags=[])
def test_autolog_registering_model(random_train_data, random_one_hot_labels): registered_model_name = "test_autolog_registered_model" mlflow.keras.autolog(registered_model_name=registered_model_name) data = random_train_data labels = random_one_hot_labels model = create_model() with mlflow.start_run(): model.fit(data, labels, epochs=10) registered_model = MlflowClient().get_registered_model(registered_model_name) assert registered_model.name == registered_model_name
def log_metric(key, value): """ Log a metric under the current run, creating a run if necessary. :param key: Metric name (string). :param value: Metric value (float). """ if not isinstance(value, numbers.Number): _logger.warning( "The metric %s=%s was not logged because the value is not a number.", key, value) return run_id = _get_or_start_run().info.run_uuid MlflowClient().log_metric(run_id, key, value, int(time.time()))
def test_run_local_experiment_specification(experiment_name, tracking_uri_mock): # pylint: disable=unused-argument invoke_cli_runner( cli.run, [ TEST_PROJECT_DIR, "-e", "greeter", "-P", "name=test", "--experiment-name", experiment_name, ]) client = MlflowClient() experiment_id = client.get_experiment_by_name(experiment_name).experiment_id invoke_cli_runner( cli.run, [ TEST_PROJECT_DIR, "-e", "greeter", "-P", "name=test", "--experiment-id", experiment_id, ])
def _create_child_runs_for_parameter_search(parent_estimator, parent_model, parent_run, child_tags): from itertools import zip_longest client = MlflowClient() # Use the start time of the parent parameter search run as a rough estimate for the # start time of child runs, since we cannot precisely determine when each point # in the parameter search space was explored child_run_start_time = parent_run.info.start_time child_run_end_time = int(time.time() * 1000) estimator_param_maps = parent_estimator.getEstimatorParamMaps() tuned_estimator = parent_estimator.getEstimator() metrics_dict, _ = _get_param_search_metrics_and_best_index(parent_estimator, parent_model) for i in range(len(estimator_param_maps)): child_estimator = tuned_estimator.copy(estimator_param_maps[i]) tags_to_log = dict(child_tags) if child_tags else {} tags_to_log.update({MLFLOW_PARENT_RUN_ID: parent_run.info.run_id}) tags_to_log.update(_get_estimator_info_tags(child_estimator)) child_run = client.create_run( experiment_id=parent_run.info.experiment_id, start_time=child_run_start_time, tags=tags_to_log, ) params_to_log = _get_instance_param_map( child_estimator, parent_estimator._autologging_metadata.uid_to_indexed_name_map ) param_batches_to_log = _chunk_dict(params_to_log, chunk_size=MAX_PARAMS_TAGS_PER_BATCH) metrics_to_log = {k: v[i] for k, v in metrics_dict.items()} for params_batch, metrics_batch in zip_longest( param_batches_to_log, [metrics_to_log], fillvalue={} ): # Trim any parameter keys / values and metric keys that exceed the limits # imposed by corresponding MLflow Tracking APIs (e.g., LogParam, LogMetric) truncated_params_batch = _truncate_dict( params_batch, MAX_ENTITY_KEY_LENGTH, MAX_PARAM_VAL_LENGTH ) truncated_metrics_batch = _truncate_dict( metrics_batch, max_key_length=MAX_ENTITY_KEY_LENGTH ) client.log_batch( run_id=child_run.info.run_id, params=[ Param(str(key), str(value)) for key, value in truncated_params_batch.items() ], metrics=[ Metric(key=str(key), value=value, timestamp=child_run_end_time, step=0) for key, value in truncated_metrics_batch.items() ], ) client.set_terminated(run_id=child_run.info.run_id, end_time=child_run_end_time)
def evaluate_model(self, item, model, run_hash): from mlflow.tracking.client import MlflowClient from mlflow.entities import ViewType exp_name = self.runner.analysis_name + ':' + self.runner.current_pipeline_name + ':' exp_name += str(item.get('base', 'None')) + ':' + str( item['func']) + ':' + item['hash'] client = MlflowClient() experiments = [ exp for exp in client.list_experiments() if exp.name == exp_name ] if len(experiments) == 0 or len(experiments) > 1: raise ValueError('Unable to find the experiment.') experiment = experiments[0] run = client.search_runs( experiment_ids=experiment.experiment_id, filter_string='tags."mlflow.runName" = ' + "'" + run_hash + "'", run_view_type=ViewType.ACTIVE_ONLY, max_results=1, )[0] run_id = run.info.run_id Tracker.resume_run(run_id) process = self.get_process(item) for source in model.sources: source.load_files() source.load() model.load() print('Evaluating run ' + run_hash + '...') process.run_id = run_hash process.evaluate(model) Tracker.end_run()
def test_client_run_creation_and_termination_are_successful(): experiment_name = "test_run_creation_termination" MlflowClient().create_experiment(experiment_name) experiment_id = MlflowClient().get_experiment_by_name( experiment_name).experiment_id client = MlflowAutologgingQueueingClient() pending_run_id = client.create_run( experiment_id=experiment_id, start_time=5, tags={"a": "b"}, ) client.set_terminated(run_id=pending_run_id, status="FINISHED", end_time=6) client.flush() runs = mlflow.search_runs(experiment_ids=[experiment_id], output_format="list") assert len(runs) == 1 run = runs[0] assert run.info.start_time == 5 assert run.info.end_time == 6 assert run.info.status == "FINISHED" assert {"a": "b"}.items() <= run.data.tags.items()
def test_with_managed_run_sets_specified_run_tags(): client = MlflowClient() tags_to_set = { "foo": "bar", "num_layers": "7", } patch_function_1 = with_managed_run( "test_integration", lambda original, *args, **kwargs: mlflow.active_run(), tags=tags_to_set ) run1 = patch_function_1(lambda: "foo") assert tags_to_set.items() <= client.get_run(run1.info.run_id).data.tags.items() class PatchFunction2(PatchFunction): def _patch_implementation(self, original, *args, **kwargs): return mlflow.active_run() def _on_exception(self, exception): pass patch_function_2 = with_managed_run("test_integration", PatchFunction2, tags=tags_to_set) run2 = patch_function_2.call(lambda: "foo") assert tags_to_set.items() <= client.get_run(run2.info.run_id).data.tags.items()
def run(details): params = json.loads(details["params"]) params["server_path"] = os.getcwd() project_uri = details["project_uri"] if not(project_uri.startswith("http://") or project_uri.startswith("https://")): assert project_uri.replace(".", "").replace("/", "").startswith("modules"), "Only support modules dir" project_uri = os.path.join(os.path.dirname(__file__), project_uri) submitted_run = mlflow.projects.run(project_uri, parameters=params, use_conda=False) out = MlflowClient().get_run(submitted_run.run_id).to_dictionary() return {"result": json.dumps(out)}
def load_azure_workspace(): """ Load existing Azure Workspace from Tracking Store :rtype: AzureML Workspace object """ from .store import AzureMLRestStore from mlflow.exceptions import ExecutionException from mlflow.tracking.client import MlflowClient try: def_store = MlflowClient()._tracking_client.store except ExecutionException: logger.warning( VERSION_WARNING.format("MlflowClient()._tracking_client.store")) def_store = MlflowClient().store if isinstance(def_store, AzureMLRestStore): workspace = Workspace._from_service_context(def_store.service_context, _location=None) return workspace else: raise ExecutionException( "Workspace not found, please set the tracking URI in your script to AzureML." )
def test_autolog_registering_model(): registered_model_name = "test_autolog_registered_model" mlflow.pytorch.autolog(registered_model_name=registered_model_name) model = IrisClassification() dm = IrisDataModule() dm.setup(stage="fit") trainer = pl.Trainer(max_epochs=NUM_EPOCHS) with mlflow.start_run(): trainer.fit(model, dm) registered_model = MlflowClient().get_registered_model( registered_model_name) assert registered_model.name == registered_model_name
def search(data, max_runs, metric, algo): tracking_client = mlflow.tracking.MlflowClient() _inf = np.finfo(np.float64).max space = [ hp.quniform('max_depth', 2, 12, 1) hp.quniform('min_samples_leaf', 2, 20, 1) ] with mlflow.start_run() as run: exp_id = run.info.experiment_id best = fmin( fn=train_fn(exp_id, _inf, _inf), space=space, algo=tpe.suggest if algo == "tpe.suggest" else rand.suggest, max_evals=max_runs ) mlflow.set_tag("best params", str(best)) # find all runs generated by this search client = MlflowClient() query = "tags.mlflow.parentRunId = '{run_id}' ".format(run_id=run.info.run_id) runs = client.search_runs([exp_id], query) # iterate over all runs to find best one best_train, best_valid = _inf, _inf best_run = None for r in runs: if r.data.metrics["val_auc"] < best_val_valid: best_run = r best_train = r.data.metrics["train_auc"] best_valid = r.data.metrics["val_auc"] # log best run metrics as the final metrics of this run. mlflow.set_tag("best_run", best_run.info.run_id) mlflow.log_metrics({ "train_{}".format(metric): best_train, "val_{}".format(metric): best_valid })
def test_with_managed_run_with_throwing_function_exhibits_expected_behavior(): client = MlflowClient() patch_function_active_run = None @with_managed_run def patch_function(original, *args, **kwargs): nonlocal patch_function_active_run patch_function_active_run = mlflow.active_run() raise Exception("bad implementation") with pytest.raises(Exception): patch_function(lambda: "foo") assert patch_function_active_run is not None status1 = client.get_run(patch_function_active_run.info.run_id).info.status assert RunStatus.from_string(status1) == RunStatus.FAILED with mlflow.start_run() as active_run, pytest.raises(Exception): patch_function(lambda: "foo") assert patch_function_active_run == active_run # `with_managed_run` should not terminate a preexisting MLflow run, # even if the patch function throws status2 = client.get_run(active_run.info.run_id).info.status assert RunStatus.from_string(status2) == RunStatus.FINISHED
def log_metric(key, value): """ Log a metric under the current run, creating a run if necessary. :param key: Metric name (string). :param value: Metric value (float). """ if not isinstance(value, numbers.Number): print( "WARNING: The metric {}={} was not logged because the value is not a number." .format(key, value), file=sys.stderr) return run_id = _get_or_start_run().info.run_uuid MlflowClient().log_metric(run_id, key, value, int(time.time()))
def get_run(run_id): """ Fetch the run from backend store. The resulting :py:class:`Run <mlflow.entities.Run>` contains a collection of run metadata -- :py:class:`RunInfo <mlflow.entities.RunInfo>`, as well as a collection of run parameters, tags, and metrics -- :py:class:`RunData <mlflow.entities.RunData>`. In the case where multiple metrics with the same key are logged for the run, the :py:class:`RunData <mlflow.entities.RunData>` contains the most recently logged value at the largest step for each metric. :param run_id: Unique identifier for the run. :return: A single :py:class:`mlflow.entities.Run` object, if the run exists. Otherwise, raises an exception. """ return MlflowClient().get_run(run_id)
def check_finish(hparams, experiment_name): # NOTE : This is not a perfect logic. For example, the aborted run is also couted as completed for now. logging.info("checking status") query = ' and '.join( ['params.{}="{}"'.format(k, str(v)) for k, v in vars(hparams).items()]) experiment = mlflow.get_experiment_by_name(experiment_name) if experiment is None: return False finished_runs = MlflowClient().search_runs(experiment_ids=[ mlflow.get_experiment_by_name(experiment_name).experiment_id ], filter_string=query, run_view_type=ViewType.ALL) logging.info("done") return len(finished_runs) > 0
def log_metrics(metrics): """ Log multiple metrics for the current run, starting a run if no runs are active. :param metrics: Dictionary of metric_name: String -> value: Float :returns: None """ run_id = _get_or_start_run().info.run_uuid timestamp = int(time.time()) metrics_arr = [ Metric(key, value, timestamp) for key, value in metrics.items() ] MlflowClient().log_batch(run_id=run_id, metrics=metrics_arr, params=[], tags=[])
def log_metrics(metrics, step=None): """ Log multiple metrics for the current run, starting a run if no runs are active. :param metrics: Dictionary of metric_name: String -> value: Float. Note that some special values such as +/- Infinity may be replaced by other values depending on the store. For example, sql based store may replace +/- Inf with max / min float values. :param step: A single integer step at which to log the specified Metrics. If unspecified, each metric is logged at step zero. :returns: None """ run_id = _get_or_start_run().info.run_id timestamp = int(time.time() * 1000) metrics_arr = [Metric(key, value, timestamp, step or 0) for key, value in metrics.items()] MlflowClient().log_batch(run_id=run_id, metrics=metrics_arr, params=[], tags=[])
def _log_parameter_search_results_as_artifact(cv_results_df, run_id): """ Records a collection of parameter search results as an MLflow artifact for the specified run. :param cv_results_df: A Pandas DataFrame containing the results of a parameter search training session, which may be obtained by parsing the `cv_results_` attribute of a trained parameter search estimator such as `GridSearchCV`. :param run_id: The ID of the MLflow Run to which the artifact should be recorded. """ with TempDir() as t: results_path = t.path("cv_results.csv") cv_results_df.to_csv(results_path, index=False) try_mlflow_log(MlflowClient().log_artifact, run_id, results_path)
def test_with_managed_run_ends_run_on_keyboard_interrupt(): client = MlflowClient() run = None def original(): nonlocal run run = mlflow.active_run() raise KeyboardInterrupt patch_function_1 = with_managed_run( "test_integration", lambda original, *args, **kwargs: original(*args, **kwargs) ) with pytest.raises(KeyboardInterrupt): patch_function_1(original) assert not mlflow.active_run() run_status_1 = client.get_run(run.info.run_id).info.status assert RunStatus.from_string(run_status_1) == RunStatus.FAILED class PatchFunction2(PatchFunction): def _patch_implementation(self, original, *args, **kwargs): return original(*args, **kwargs) def _on_exception(self, exception): pass patch_function_2 = with_managed_run("test_integration", PatchFunction2) with pytest.raises(KeyboardInterrupt): patch_function_2.call(original) assert not mlflow.active_run() run_status_2 = client.get_run(run.info.run_id).info.status assert RunStatus.from_string(run_status_2) == RunStatus.FAILED
def test_safe_patch_manages_run_if_specified_and_sets_expected_run_tags( patch_destination, test_autologging_integration): client = MlflowClient() active_run = None def patch_impl(original, *args, **kwargs): nonlocal active_run active_run = mlflow.active_run() return original(*args, **kwargs) with mock.patch("mlflow.utils.autologging_utils.with_managed_run", wraps=with_managed_run) as managed_run_mock: safe_patch(test_autologging_integration, patch_destination, "fn", patch_impl, manage_run=True) patch_destination.fn() assert managed_run_mock.call_count == 1 assert active_run is not None assert active_run.info.run_id is not None assert (client.get_run( active_run.info.run_id).data.tags[MLFLOW_AUTOLOGGING] == "test_integration")