Beispiel #1
0
def run(uri, entry_point="main", version=None, parameters=None, experiment_id=None,
        mode=None, cluster_spec=None, git_username=None, git_password=None,
        use_conda=True, use_temp_cwd=False, storage_dir=None, block=True):
    """
    Run an MLflow project from the given URI.

    Supports downloading projects from Git URIs with a specified version, or copying them from
    the file system. For Git-based projects, a commit can be specified as the `version`.

    Raises:
      `mlflow.projects.ExecutionException` if a run launched in blocking mode is unsuccessful.

    :param uri: URI of project to run. Expected to be either a relative/absolute local filesystem
                path or a git repository URI (e.g. https://github.com/databricks/mlflow-example)
                pointing to a project directory containing an MLproject file.
    :param entry_point: Entry point to run within the project. If no entry point with the specified
                        name is found, attempts to run the project file `entry_point` as a script,
                        using "python" to run .py files and the default shell (specified by
                        environment variable $SHELL) to run .sh files.
    :param experiment_id: ID of experiment under which to launch the run.
    :param mode: Execution mode for the run. Can be set to "local" or "databricks".
    :param cluster_spec: Path to JSON file describing the cluster to use when launching a run on
                         Databricks.
    :param git_username: Username for HTTP(S) authentication with Git.
    :param git_password: Password for HTTP(S) authentication with Git.
    :param use_conda: If True (the default), creates a new Conda environment for the run and
                      installs project dependencies within that environment. Otherwise, runs the
                      project in the current environment without installing any project
                      dependencies.
    :param use_temp_cwd: Only used if `mode` is "local" and `uri` is a local directory.
                         If True, copies project to a temporary working directory before running it.
                         Otherwise (the default), runs project using `uri` (the project's path) as
                         the working directory.
    :param storage_dir: Only used if `mode` is local. MLflow will download artifacts from
                        distributed URIs passed to parameters of type 'path' to subdirectories of
                        storage_dir.
    :param block: Whether or not to block while waiting for a run to complete. Defaults to True.
                  Note that if `block` is False and mode is "local", this method will return, but
                  the current process will block when exiting until the local run completes.
                  If the current process is interrupted, any asynchronous runs launched via this
                  method will be terminated.
    :return: A `SubmittedRun` exposing information (e.g. run ID) about the launched run.
    """
    submitted_run_obj = _run(uri=uri, entry_point=entry_point, version=version,
                             parameters=parameters,
                             experiment_id=experiment_id,
                             mode=mode, cluster_spec=cluster_spec, git_username=git_username,
                             git_password=git_password, use_conda=use_conda,
                             use_temp_cwd=use_temp_cwd, storage_dir=storage_dir, block=block)
    if block:
        submitted_run_obj.wait()
        run_status = submitted_run_obj.get_status()
        if run_status and RunStatus.from_string(run_status) != RunStatus.FINISHED:
            raise ExecutionException("=== Run %s was unsuccessful, status: '%s' ===" %
                                     (submitted_run_obj.run_id, run_status))
    return submitted_run_obj
Beispiel #2
0
 def to_proto(self):
     proto = ProtoRunInfo()
     proto.run_uuid = self.run_uuid
     proto.run_id = self.run_id
     proto.experiment_id = self.experiment_id
     proto.user_id = self.user_id
     proto.status = RunStatus.from_string(self.status)
     proto.start_time = self.start_time
     if self.end_time:
         proto.end_time = self.end_time
     if self.artifact_uri:
         proto.artifact_uri = self.artifact_uri
     proto.lifecycle_stage = self.lifecycle_stage
     return proto
Beispiel #3
0
def _validate_autologging_run(autologging_integration, run_id):
    """
    For testing purposes, verifies that an MLflow run produced by an `autologging_integration`
    satisfies the following properties:

        - The run has an autologging tag whose value is the name of the autologging integration
        - The run has a terminal status (e.g., KILLED, FAILED, FINISHED)
    """
    client = MlflowClient()
    run = client.get_run(run_id)
    autologging_tag_value = run.data.tags.get(MLFLOW_AUTOLOGGING)
    assert autologging_tag_value == autologging_integration, (
        "Autologging run with id {} failed to set autologging tag with expected value. Expected: "
        "'{}', Actual: '{}'".format(run_id, autologging_integration, autologging_tag_value)
    )
    assert RunStatus.is_terminated(
        RunStatus.from_string(run.info.status)
    ), "Autologging run with id {} has a non-terminal status '{}'".format(run_id, run.info.status)
Beispiel #4
0
 def set_terminated(self, status):
     self.run_info = self.store.update_run_info(
         self.run_info.run_uuid,
         run_status=RunStatus.from_string(status),
         end_time=_get_unix_timestamp())
Beispiel #5
0
def validate_exit_status(status_str, expected):
    assert RunStatus.from_string(status_str) == expected