def test_get_experiment_id(): # When no experiment is active should return default assert _get_experiment_id() == Experiment.DEFAULT_EXPERIMENT_ID # Create a new experiment and set that as active experiment with TempDir(chdr=True): name = "Random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(name) assert exp_id is not None mlflow.set_experiment(name) assert _get_experiment_id() == exp_id
def test_get_experiment_id_in_databricks_with_active_experiment_returns_active_experiment_id(): with TempDir(chdr=True): exp_name = "random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(exp_name) mlflow.set_experiment(exp_name) notebook_id = str(int(exp_id) + 73) with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\ mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock: notebook_detection_mock.return_value = True notebook_id_mock.return_value = notebook_id assert _get_experiment_id() != notebook_id assert _get_experiment_id() == exp_id
def test_get_experiment_id_in_databricks_with_experiment_defined_in_env_returns_env_experiment_id(): with TempDir(chdr=True): exp_name = "random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(exp_name) notebook_id = str(int(exp_id) + 73) HelperEnv.set_values(id=exp_id) with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\ mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock: notebook_detection_mock.side_effect = lambda *args, **kwargs: True notebook_id_mock.side_effect = lambda *args, **kwargs: notebook_id assert _get_experiment_id() != notebook_id assert _get_experiment_id() == exp_id
def test_get_experiment_id_in_databricks_with_experiment_defined_in_env_returns_env_experiment_id( ): with TempDir(chdr=True): exp_name = "random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(exp_name) notebook_id = str(int(exp_id) + 73) HelperEnv.set_values(experiment_id=exp_id) with mock.patch( "mlflow.tracking.fluent.default_experiment_registry.get_experiment_id" ) as notebook_id_mock: notebook_id_mock.return_value = notebook_id assert _get_experiment_id() != notebook_id assert _get_experiment_id() == exp_id
def test_get_experiment_by_id_with_is_in_databricks_job(): job_exp_id = 123 with mock.patch( "mlflow.tracking.fluent.default_experiment_registry.get_experiment_id" ) as job_id_mock: job_id_mock.return_value = job_exp_id assert _get_experiment_id() == job_exp_id
def test_get_experiment_by_id_with_is_in_databricks_job(): exp_id = 768 job_id = 123 exp_name = "jobs:/" + str(job_id) job_type_info = "NORMAL" with mock.patch( "mlflow.tracking.fluent.is_in_databricks_job" ) as job_detection_mock, mock.patch( "mlflow.tracking.fluent.get_job_type_info" ) as job_type_info_mock, mock.patch( "mlflow.tracking.fluent.get_job_id") as job_id_mock, mock.patch( "mlflow.tracking.fluent.get_experiment_name_from_job_id" ) as job_to_experiment_name_mapping_mock, mock.patch.object( MlflowClient, "create_experiment", return_value=exp_id): job_detection_mock.return_value = True job_type_info_mock.return_value = job_type_info job_id_mock.return_value = job_id job_to_experiment_name_mapping_mock.return_value = exp_name tags = {} tags[MLFLOW_DATABRICKS_JOB_TYPE_INFO] = job_type_info tags[MLFLOW_EXPERIMENT_SOURCE_TYPE] = SourceType.to_string( SourceType.JOB) tags[MLFLOW_EXPERIMENT_SOURCE_ID] = job_id assert _get_experiment_id() == exp_id MlflowClient.create_experiment.assert_called_with(exp_name, None, tags)
def _already_ran(entry_point_name, parameters, git_commit, experiment_id=None): experiment_id = experiment_id if experiment_id is not None else _get_experiment_id() client = mlflow.tracking.MlflowClient() all_run_infos = reversed(client.list_run_infos(experiment_id)) for run_info in all_run_infos: full_run = client.get_run(run_info.run_id) tags = full_run.data.tags if tags.get(mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT, None) != entry_point_name: continue match_failed = False for param_key, param_value in six.iteritems(parameters): run_value = full_run.data.params.get(param_key) if run_value != param_value: match_failed = True break if match_failed: continue if run_info.to_proto().status != RunStatus.FINISHED: eprint(("Run matched, but is not FINISHED, so skipping " "(run_id=%s, status=%s)") % (run_info.run_id, run_info.status)) continue previous_version = tags.get(mlflow_tags.MLFLOW_GIT_COMMIT, None) if git_commit != previous_version: eprint(("Run matched, but has a different source version, so skipping " "(found=%s, expected=%s)") % (previous_version, git_commit)) continue return client.get_run(run_info.run_id) eprint("No matching run has been found.") return None
def _already_ran(entry_point_name, parameters, experiment_id=None): """Best-effort detection of if a run with the given entrypoint name, parameters, and experiment id already ran. The run must have completed successfully and have at least the parameters provided. """ experiment_id = experiment_id if experiment_id is not None else _get_experiment_id( ) client = mlflow.tracking.MlflowClient() all_run_infos = reversed(client.list_run_infos(experiment_id)) for run_info in all_run_infos: full_run = client.get_run(run_info.run_id) tags = full_run.data.tags if tags.get(mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT, None) != entry_point_name: continue match_failed = False for param_key, param_value in six.iteritems(parameters): run_value = full_run.data.params.get(param_key) if run_value != param_value: match_failed = True break if match_failed: continue if run_info.to_proto().status != RunStatus.FINISHED: eprint(("Run matched, but is not FINISHED, so skipping " "(run_id=%s, status=%s)") % (run_info.run_id, run_info.status)) continue return client.get_run(run_info.run_id) print("No matching run has been found.") return None
def _already_ran(entry_point_name, parameters, source_version, experiment_id=None): """Best-effort detection of if a run with the given entrypoint name, parameters, and experiment id already ran. The run must have completed successfully and have at least the parameters provided. """ experiment_id = experiment_id if experiment_id is not None else _get_experiment_id() client = mlflow.tracking.MlflowClient() all_run_infos = reversed(client.list_run_infos(experiment_id)) for run_info in all_run_infos: if run_info.entry_point_name != entry_point_name: continue full_run = client.get_run(run_info.run_uuid) run_params = _get_params(full_run) match_failed = False for param_key, param_value in six.iteritems(parameters): run_value = run_params.get(param_key) if run_value != param_value: match_failed = True break if match_failed: continue if run_info.status != RunStatus.FINISHED: eprint(("Run matched, but is not FINISHED, so skipping " "(run_id=%s, status=%s)") % (run_info.run_uuid, run_info.status)) continue if run_info.source_version != source_version: eprint(("Run matched, but has a different source version, so skipping " "(found=%s, expected=%s)") % (run_info.source_version, source_version)) continue return client.get_run(run_info.run_uuid) return None
def _resolve_experiment_id(experiment_name=None, experiment_id=None): """ Resolve experiment. Verifies either one or other is specified - cannot be both selected. If ``experiment_name`` is provided and does not exist, an experiment of that name is created and its id is returned. :param experiment_name: Name of experiment under which to launch the run. :param experiment_id: ID of experiment under which to launch the run. :return: str """ if experiment_name and experiment_id: raise MlflowException("Specify only one of 'experiment_name' or 'experiment_id'.") if experiment_id: return str(experiment_id) if experiment_name: client = tracking.MlflowClient() exp = client.get_experiment_by_name(experiment_name) if exp: return exp.experiment_id else: print("INFO: '{}' does not exist. Creating a new experiment".format(experiment_name)) return client.create_experiment(experiment_name) return _get_experiment_id()
def test_get_experiment_id_with_active_experiment_returns_active_experiment_id(): # Create a new experiment and set that as active experiment with TempDir(chdr=True): name = "Random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(name) assert exp_id is not None mlflow.set_experiment(name) assert _get_experiment_id() == exp_id
def test_get_experiment_id_in_databricks_detects_notebook_id_by_default(): notebook_id = 768 with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\ mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock: notebook_detection_mock.return_value = True notebook_id_mock.return_value = notebook_id assert _get_experiment_id() == notebook_id
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None, block=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in mode. Returns a ``SubmittedRun`` corresponding to the project run. """ if mode == "databricks": mlflow.projects.databricks.before_run_validations(mlflow.get_tracking_uri(), cluster_spec) exp_id = experiment_id or _get_experiment_id() parameters = parameters or {} work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version, git_username=git_username, git_password=git_password) project = _project_spec.load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) if run_id: active_run = tracking.MlflowClient().get_run(run_id) else: active_run = _create_run(uri, exp_id, work_dir, entry_point) # Consolidate parameters for logging. # `storage_dir` is `None` since we want to log actual path not downloaded local path entry_point_obj = project.get_entry_point(entry_point) final_params, extra_params = entry_point_obj.compute_parameters(parameters, storage_dir=None) for key, value in (list(final_params.items()) + list(extra_params.items())): tracking.MlflowClient().log_param(active_run.info.run_uuid, key, value) # Add branch name tag if a branch is specified through -version if _is_valid_branch_name(work_dir, version): tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_GIT_BRANCH_NAME, version) if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks( remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec) elif mode == "local" or mode is None: # Synchronously create a conda environment (even though this may take some time) to avoid # failures due to multiple concurrent attempts to create the same conda env. conda_env_name = _get_or_create_conda_env(project.conda_env_path) if use_conda else None # In blocking mode, run the entry point command in blocking fashion, sending status updates # to the tracking server when finished. Note that the run state may not be persisted to the # tracking server if interrupted if block: command = _get_entry_point_command( project, entry_point, parameters, conda_env_name, storage_dir) return _run_entry_point(command, work_dir, exp_id, run_id=active_run.info.run_uuid) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess( work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_uuid) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None, block=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in mode. Returns a ``SubmittedRun`` corresponding to the project run. """ exp_id = experiment_id or _get_experiment_id() parameters = parameters or {} work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version, git_username=git_username, git_password=git_password) project = _project_spec.load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) if run_id: active_run = tracking.get_service().get_run(run_id) else: active_run = _create_run(uri, exp_id, work_dir, entry_point, parameters) if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks( remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec) elif mode == "local" or mode is None: # Synchronously create a conda environment (even though this may take some time) to avoid # failures due to multiple concurrent attempts to create the same conda env. conda_env_name = _get_or_create_conda_env(project.conda_env_path) if use_conda else None # In blocking mode, run the entry point command in blocking fashion, sending status updates # to the tracking server when finished. Note that the run state may not be persisted to the # tracking server if interrupted if block: command = _get_entry_point_command( project, entry_point, parameters, conda_env_name, storage_dir) return _run_entry_point(command, work_dir, exp_id, run_id=active_run.info.run_uuid) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess( work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_uuid) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))
def _resolve_experiment_id(experiment_name=None, experiment_id=None): """ Resolve experiment. Verifies either one or other is specified - cannot be both selected. :param experiment_name: Name of experiment under which to launch the run. :param experiment_id: ID of experiment under which to launch the run. :return: int """ if experiment_name and experiment_id: raise MlflowException("Specify only one of 'experiment_name' or 'experiment_id'.") exp_id = experiment_id if experiment_name: client = tracking.MlflowClient() exp_id = client.get_experiment_by_name(experiment_name).experiment_id exp_id = exp_id or _get_experiment_id() return exp_id
def _already_ran(entry_point_name, parameters, git_commit, experiment_id=None): """Best-effort detection of if a run with the given parameters already ran. The run must have completed successfully and have at least the parameters provided. """ experiment_id = experiment_id if experiment_id is not None else _get_experiment_id( ) client = mlflow.tracking.MlflowClient() all_run_infos = reversed(client.list_run_infos(experiment_id)) for run_info in all_run_infos: full_run = client.get_run(run_info.run_id) tags = full_run.data.tags if tags.get(mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT, None) != entry_point_name: continue match_failed = False for param_key, param_value in parameters.items(): run_value = full_run.data.params.get(param_key) if run_value != param_value: match_failed = True break if match_failed: continue if run_info.status != RunStatus.FINISHED: eprint( f"Run matched, but is not FINISHED. Skipping. Run_id={run_info.run_id}, status={run_info.status}" ) continue previous_version = tags.get(mlflow_tags.MLFLOW_GIT_COMMIT, None) if git_commit != previous_version: eprint( f"Run matched, but has a different source version. Skipping. " f"Found={previous_version}, expected={git_commit}") continue return client.get_run(run_info.run_id) eprint("No matching run has been found.") return None
def __init__(self, name, verbose=False, artifacts_folder='.'): self._name = name self._params = dict() self._artifacts = list() self._metrics = dict() self._verbose = verbose self._client = MlflowClient() self._n_partitions = None self._n_samples = None self._tags = {} self._imgs_path = None self._model_path = None self._artifacts_folders = None self.create_artifacts_folders(artifacts_folder) self._actual_experiment_id = _get_experiment_id() logging.basicConfig( format= 'Date-Time : %(asctime)s : Line No. : %(lineno)d - %(message)s', level=logging.DEBUG) self.logger = logging.getLogger(__name__) if self._verbose: msg = 'New MlLogs object create.' self.logger.info(msg)
def test_get_experiment_id_with_no_active_experiments_returns_default_experiment_id(): assert _get_experiment_id() == Experiment.DEFAULT_EXPERIMENT_ID
def test_get_experiment_id_with_no_active_experiments_returns_zero(): assert _get_experiment_id() == "0"
def _run(uri, entry_point="main", version=None, parameters=None, experiment_name=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None, block=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in mode. Returns a ``SubmittedRun`` corresponding to the project run. """ if mode == "databricks": mlflow.projects.databricks.before_run_validations( mlflow.get_tracking_uri(), cluster_spec) if experiment_name: exp_id = tracking.MlflowClient().get_experiment_by_name( experiment_name) else: exp_id = experiment_id or _get_experiment_id() parameters = parameters or {} work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version, git_username=git_username, git_password=git_password) project = _project_spec.load_project(work_dir) _validate_execution_environment(project, mode) project.get_entry_point(entry_point)._validate_parameters(parameters) if run_id: active_run = tracking.MlflowClient().get_run(run_id) else: active_run = _create_run(uri, exp_id, work_dir, entry_point) # Consolidate parameters for logging. # `storage_dir` is `None` since we want to log actual path not downloaded local path entry_point_obj = project.get_entry_point(entry_point) final_params, extra_params = entry_point_obj.compute_parameters( parameters, storage_dir=None) for key, value in (list(final_params.items()) + list(extra_params.items())): tracking.MlflowClient().log_param(active_run.info.run_uuid, key, value) repo_url = _get_git_repo_url(work_dir) if repo_url is not None: for tag in [MLFLOW_GIT_REPO_URL, LEGACY_MLFLOW_GIT_REPO_URL]: tracking.MlflowClient().set_tag(active_run.info.run_uuid, tag, repo_url) # Add branch name tag if a branch is specified through -version if _is_valid_branch_name(work_dir, version): for tag in [MLFLOW_GIT_BRANCH, LEGACY_MLFLOW_GIT_BRANCH_NAME]: tracking.MlflowClient().set_tag(active_run.info.run_uuid, tag, version) if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks(remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec) elif mode == "local" or mode is None: command = [] command_separator = " " # If a docker_env attribute is defined in MLProject then it takes precedence over conda yaml # environments, so the project will be executed inside a docker container. if project.docker_env: tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_PROJECT_ENV, "docker") _validate_docker_env(project.docker_env) _validate_docker_installation() image = _build_docker_image(work_dir=work_dir, project=project, active_run=active_run) command += _get_docker_command(image=image, active_run=active_run) # Synchronously create a conda environment (even though this may take some time) # to avoid failures due to multiple concurrent attempts to create the same conda env. elif use_conda: tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_PROJECT_ENV, "conda") command_separator = " && " conda_env_name = _get_or_create_conda_env(project.conda_env_path) command += _get_conda_command(conda_env_name) # In blocking mode, run the entry point command in blocking fashion, sending status updates # to the tracking server when finished. Note that the run state may not be persisted to the # tracking server if interrupted if block: command += _get_entry_point_command(project, entry_point, parameters, storage_dir) command = command_separator.join(command) return _run_entry_point(command, work_dir, exp_id, run_id=active_run.info.run_uuid) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess(work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_uuid) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))
def _already_ran(entry_point_name, # entry point name of the run parameters, # parameters of the run git_commit, # git version of the code run config_sha, # sha256 of config file ignore_git=False, # whether to ignore git version or not (default: False) experiment_id=None, # experiment id (default: None) resume=False): # whether to resume a failed/killed previous run or not (default: False) """ Best-effort detection of if a run with the given entrypoint name, parameters, and experiment id already ran. The run must have completed successfully and have at least the parameters provided. Args: entry_point_name: Entry point name of the run parameters: Parameters of the run git_commit: Git version of the code run config_sha: Sha256 of config file ignore_git: Whether to ignore git version or not (default: False) experiment_id: Experiment id (default: None) resume: Whether to resume a failed/killed previous run (only for training) or not (default: False) Returns: Previously executed run if found, None otherwise. """ # if experiment ID is not provided retrieve current experiment ID experiment_id = experiment_id if experiment_id is not None else _get_experiment_id() # instantiate MLflowClient (creates and manages experiments and runs) client = mlflow.tracking.MlflowClient() # get reversed list of run information (from last to first) all_run_infos = reversed(client.list_run_infos(experiment_id)) run_to_resume_id = None # for all runs info for run_info in all_run_infos: # fetch run from backend store full_run = client.get_run(run_info.run_id) # get run dictionary of tags tags = full_run.data.tags # if there is no entry point, or the entry point for the run is different from 'entry_point_name', continue if tags.get(mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT, None) != entry_point_name: continue # initialize 'match_failed' bool to false match_failed = False # for each parameter in the provided run parameters for param_key, param_value in parameters.items(): # get run param value from the run dictionary of parameters run_value = full_run.data.params.get(param_key) # if the current parameter value is different from the run parameter set 'match_failed' to true and break if str(run_value) != str(param_value): match_failed = True break # if the current run is not the one we are searching for go to the next one if match_failed: continue # get previous run git commit version previous_version = tags.get(mlflow_tags.MLFLOW_GIT_COMMIT, None) # if the previous version is different from the current one, go to the next one if not ignore_git and git_commit != previous_version: logger.warning("Run matched, but has a different source version, so skipping (found={}, expected={})" .format(previous_version, git_commit)) continue # get config file sha256 from the run run_config_sha = full_run.data.params.get('config_sha') # if the config file sha256 for the run is different from the current sha, go to the next one if str(run_config_sha) != str(config_sha): logger.warning("Run matched, but config is different.") continue # if the run is not finished if run_info.to_proto().status != RunStatus.FINISHED: if resume: # if resume is enabled, set current run to resume id -> if no newer completed run is found, # this stopped run will be resumed run_to_resume_id = run_info.run_id continue else: # otherwise skip it and try with the next one logger.warning("Run matched, but is not FINISHED, so skipping " "(run_id={}, status={})" .format(run_info.run_id, run_info.status)) continue # otherwise (if the run was found and it is exactly the same), return the found run return client.get_run(run_info.run_id) # if no previously executed (and finished) run was found but a stopped run was found, resume such run if run_to_resume_id is not None: logger.info("Resuming run with entrypoint=%s and parameters=%s" % (entry_point_name, parameters)) # update new run parameters with the stopped run id parameters.update({ 'run_id': run_to_resume_id }) # submit new run that will resume the previously interrupted one submitted_run = mlflow.run(".", entry_point_name, parameters=parameters) # log config file sha256 as parameter in the submitted run client.log_param(submitted_run.run_id, 'config_sha', config_sha) # return submitted (new) run return mlflow.tracking.MlflowClient().get_run(submitted_run.run_id) # if the searched run was not found return 'None' logger.warning("No matching run has been found.") return None