def _create_run(uri, experiment_id, work_dir, entry_point): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking.utils._get_git_url_if_present(_expand_uri(uri)) else: source_name = _expand_uri(uri) source_version = _get_git_commit(work_dir) existing_run = fluent.active_run() if existing_run: parent_run_id = existing_run.info.run_id else: parent_run_id = None tags = { MLFLOW_USER: _get_user(), MLFLOW_SOURCE_NAME: source_name, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT), MLFLOW_PROJECT_ENTRY_POINT: entry_point } if source_version is not None: tags[MLFLOW_GIT_COMMIT] = source_version if parent_run_id is not None: tags[MLFLOW_PARENT_RUN_ID] = parent_run_id active_run = tracking.MlflowClient().create_run(experiment_id=experiment_id, tags=tags) return active_run
def _create_run(uri, experiment_id, work_dir, version, entry_point, parameters): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking._tracking_service.utils._get_git_url_if_present( _expand_uri(uri)) else: source_name = _expand_uri(uri) source_version = _get_git_commit(work_dir) existing_run = fluent.active_run() if existing_run: parent_run_id = existing_run.info.run_id else: parent_run_id = None tags = { MLFLOW_USER: _get_user(), MLFLOW_SOURCE_NAME: source_name, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT), MLFLOW_PROJECT_ENTRY_POINT: entry_point, } if source_version is not None: tags[MLFLOW_GIT_COMMIT] = source_version if parent_run_id is not None: tags[MLFLOW_PARENT_RUN_ID] = parent_run_id repo_url = _get_git_repo_url(work_dir) if repo_url is not None: tags[MLFLOW_GIT_REPO_URL] = repo_url tags[LEGACY_MLFLOW_GIT_REPO_URL] = repo_url # Add branch name tag if a branch is specified through -version if _is_valid_branch_name(work_dir, version): tags[MLFLOW_GIT_BRANCH] = version tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] = version active_run = tracking.MlflowClient().create_run( experiment_id=experiment_id, tags=tags) project = _project_spec.load_project(work_dir) # Consolidate parameters for logging. # `storage_dir` is `None` since we want to log actual path not downloaded local path entry_point_obj = project.get_entry_point(entry_point) final_params, extra_params = entry_point_obj.compute_parameters( parameters, storage_dir=None) params_list = [ Param(key, value) for key, value in list(final_params.items()) + list(extra_params.items()) ] tracking.MlflowClient().log_batch(active_run.info.run_id, params=params_list) return active_run
def log_model(artifact_path, **kwargs): """ Export model in Python function form and log it with current MLflow tracking service. Model is exported by calling ``@save_model`` and logging the result with ``@tracking.log_output_files``. """ with TempDir() as tmp: local_path = tmp.path(artifact_path) run_id = active_run().info.run_uuid if 'model' in kwargs: raise Exception("Unused argument 'model'. log_model creates a new model object") save_model(dst_path=local_path, model=Model(artifact_path=artifact_path, run_id=run_id), **kwargs) log_artifacts(local_path, artifact_path)
def _create_run(uri, experiment_id, work_dir, entry_point): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking.utils._get_git_url_if_present(_expand_uri(uri)) else: source_name = _expand_uri(uri) existing_run = fluent.active_run() if existing_run: parent_run_id = existing_run.info.run_uuid else: parent_run_id = None active_run = tracking.MlflowClient().create_run( experiment_id=experiment_id, source_name=source_name, source_version=_get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT, parent_run_id=parent_run_id) return active_run
def log_model(artifact_path, loader_module=None, data_path=None, code_path=None, conda_env=None, python_model=None, artifacts=None): """ Create a custom Pyfunc model, incorporating custom inference logic and data dependencies. For information about the workflows that this method supports, see :ref:`Workflows for creating custom pyfunc models <pyfunc-create-custom-workflows>` and :ref:`Which workflow is right for my use case? <pyfunc-create-custom-selecting-workflow>`. You cannot specify the parameters for the first workflow: ``loader_module``, ``data_path`` and the parameters for the second workflow: ``python_model``, ``artifacts`` together. :param artifact_path: The run-relative artifact path to which to log the Python model. :param loader_module: The name of the Python module that will be used to load the model from ``data_path``. This module must define a method with the prototype ``_load_pyfunc(data_path)``. If not *None*, this module and its dependencies must be included in one of the following locations: - The MLflow library. - Package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_path`` parameter. :param data_path: Path to a file or directory containing model data. :param code_path: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files will be *prepended* to the system path before the model is loaded. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. This decribes the environment this model should be run in. If ``python_model`` is not *None*, the Conda environment must at least specify the dependencies contained in :data:`mlflow.pyfunc.DEFAULT_CONDA_ENV`. If `None`, the default :data:`mlflow.pyfunc.DEFAULT_CONDA_ENV` environment will be added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'cloudpickle==0.5.8' ] } :param python_model: An instance of a subclass of :class:`~PythonModel`. This class will be serialized using the CloudPickle library. Any dependencies of the class should be included in one of the following locations: - The MLflow library. - Package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_path`` parameter. Note: If the class is imported from another module, as opposed to being defined in the ``__main__`` scope, the defining module should also be included in one of the listed locations. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs will be resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` parameter in :func:`PythonModel.load_context() <mlflow.pyfunc.PythonModel.load_context>` and :func:`PythonModel.predict() <mlflow.pyfunc.PythonModel.predict>`. For example, consider the following ``artifacts`` dictionary:: { "my_file": "s3://my-bucket/path/to/my/file" } In this case, the ``"my_file"`` artifact will be downloaded from S3. The ``python_model`` can then refer to ``"my_file"`` as an absolute filesystem path via ``context.artifacts["my_file"]``. If *None*, no artifacts will be added to the model. """ with TempDir() as tmp: local_path = tmp.path(artifact_path) run_id = active_run().info.run_uuid save_model(dst_path=local_path, model=Model(artifact_path=artifact_path, run_id=run_id), loader_module=loader_module, data_path=data_path, code_path=code_path, conda_env=conda_env, python_model=python_model, artifacts=artifacts) log_artifacts(local_path, artifact_path)
def get_current_run_id(): return active_run().info.run_uuid