Beispiel #1
0
def test_build_image_includes_default_metadata_in_azure_image_and_model_tags(
        sklearn_model):
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.sklearn.log_model(sk_model=sklearn_model,
                                 artifact_path=artifact_path)
        run_id = mlflow.active_run().info.run_uuid
    model_config = Model.load(
        os.path.join(_get_model_log_dir(artifact_path, run_id), "MLmodel"))

    with AzureMLMocks() as aml_mocks:
        workspace = get_azure_workspace()
        mlflow.azureml.build_image(model_path=artifact_path,
                                   run_id=run_id,
                                   workspace=workspace)

        register_model_call_args = aml_mocks["register_model"].call_args_list
        assert len(register_model_call_args) == 1
        _, register_model_call_kwargs = register_model_call_args[0]
        called_tags = register_model_call_kwargs["tags"]
        assert called_tags["run_id"] == run_id
        assert called_tags["model_path"] == artifact_path
        assert called_tags["python_version"] ==\
            model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.PY_VERSION]

        create_image_call_args = aml_mocks["create_image"].call_args_list
        assert len(create_image_call_args) == 1
        _, create_image_call_kwargs = create_image_call_args[0]
        image_config = create_image_call_kwargs["image_config"]
        assert image_config.tags["run_id"] == run_id
        assert image_config.tags["model_path"] == artifact_path
        assert image_config.tags["python_version"] ==\
            model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.PY_VERSION]
def test_log_model_persists_specified_conda_env_in_mlflow_model_directory(
        sklearn_knn_model, main_scoped_model_class, pyfunc_custom_env):
    sklearn_artifact_path = "sk_model"
    with mlflow.start_run():
        mlflow.sklearn.log_model(sk_model=sklearn_knn_model,
                                 artifact_path=sklearn_artifact_path)
        sklearn_run_id = mlflow.active_run().info.run_uuid

    pyfunc_artifact_path = "pyfunc_model"
    with mlflow.start_run():
        mlflow.pyfunc.log_model(
            artifact_path=pyfunc_artifact_path,
            artifacts={
                "sk_model":
                utils_get_artifact_uri(artifact_path=sklearn_artifact_path,
                                       run_id=sklearn_run_id)
            },
            python_model=main_scoped_model_class(predict_fn=None),
            conda_env=pyfunc_custom_env)
        pyfunc_run_id = mlflow.active_run().info.run_uuid

    pyfunc_model_path = _get_model_log_dir(pyfunc_artifact_path, pyfunc_run_id)
    pyfunc_conf = _get_flavor_configuration(
        model_path=pyfunc_model_path, flavor_name=mlflow.pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(pyfunc_model_path,
                                        pyfunc_conf[mlflow.pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)
    assert saved_conda_env_path != pyfunc_custom_env

    with open(pyfunc_custom_env, "r") as f:
        pyfunc_custom_env_parsed = yaml.safe_load(f)
    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_parsed = yaml.safe_load(f)
    assert saved_conda_env_parsed == pyfunc_custom_env_parsed
Beispiel #3
0
def load_model(path, tf_sess, run_id=None):
    """
    Load an MLflow model that contains the TensorFlow flavor from the specified path.

    **This method must be called within a TensorFlow graph context.**

    :param path: The local filesystem path or run-relative artifact path to the model.
    :param tf_sess: The TensorFlow session in which to the load the model.
    :return: A TensorFlow signature definition of type:
             ``tensorflow.core.protobuf.meta_graph_pb2.SignatureDef``. This defines the input and
             output tensors for model inference.

    >>> import mlflow.tensorflow
    >>> import tensorflow as tf
    >>> tf_graph = tf.Graph()
    >>> tf_sess = tf.Session(graph=tf_graph)
    >>> with tf_graph.as_default():
    >>>     signature_definition = mlflow.tensorflow.load_model(path="model_path", tf_sess=tf_sess)
    >>>     input_tensors = [tf_graph.get_tensor_by_name(input_signature.name)
    >>>                      for _, input_signature in signature_def.inputs.items()]
    >>>     output_tensors = [tf_graph.get_tensor_by_name(output_signature.name)
    >>>                       for _, output_signature in signature_def.outputs.items()]
    """
    if run_id is not None:
        path = _get_model_log_dir(model_name=path, run_id=run_id)
    path = os.path.abspath(path)
    flavor_conf = _get_flavor_configuration(model_path=path,
                                            flavor_name=FLAVOR_NAME)
    tf_saved_model_dir = os.path.join(path, flavor_conf['saved_model_dir'])
    return _load_model(tf_saved_model_dir=tf_saved_model_dir,
                       tf_sess=tf_sess,
                       tf_meta_graph_tags=flavor_conf['meta_graph_tags'],
                       tf_signature_def_key=flavor_conf['signature_def_key'])
def test_log_model_without_specified_conda_env_uses_default_env_with_expected_dependencies(
        sklearn_knn_model, main_scoped_model_class):
    sklearn_artifact_path = "sk_model"
    with mlflow.start_run():
        mlflow.sklearn.log_model(sk_model=sklearn_knn_model,
                                 artifact_path=sklearn_artifact_path)
        sklearn_run_id = mlflow.active_run().info.run_uuid

    pyfunc_artifact_path = "pyfunc_model"
    with mlflow.start_run():
        mlflow.pyfunc.log_model(
            artifact_path=pyfunc_artifact_path,
            artifacts={
                "sk_model":
                utils_get_artifact_uri(artifact_path=sklearn_artifact_path,
                                       run_id=sklearn_run_id)
            },
            python_model=main_scoped_model_class(predict_fn=None))
        pyfunc_run_id = mlflow.active_run().info.run_uuid

    pyfunc_model_path = _get_model_log_dir(pyfunc_artifact_path, pyfunc_run_id)
    pyfunc_conf = _get_flavor_configuration(
        model_path=pyfunc_model_path, flavor_name=mlflow.pyfunc.FLAVOR_NAME)
    conda_env_path = os.path.join(pyfunc_model_path,
                                  pyfunc_conf[mlflow.pyfunc.ENV])
    with open(conda_env_path, "r") as f:
        conda_env = yaml.safe_load(f)

    assert conda_env == mlflow.pyfunc.model.DEFAULT_CONDA_ENV
Beispiel #5
0
def test_validate_deployment_flavor_validates_python_function_flavor_successfully(
        pretrained_model):
    model_config_path = os.path.join(
        _get_model_log_dir(pretrained_model.model_path,
                           pretrained_model.run_id), "MLmodel")
    model_config = Model.load(model_config_path)
    mfs._validate_deployment_flavor(model_config=model_config,
                                    flavor=mlflow.pyfunc.FLAVOR_NAME)
Beispiel #6
0
def test_get_preferred_deployment_flavor_obtains_valid_flavor_from_model(
        pretrained_model):
    model_config_path = os.path.join(
        _get_model_log_dir(pretrained_model.model_path,
                           pretrained_model.run_id), "MLmodel")
    model_config = Model.load(model_config_path)

    selected_flavor = mfs._get_preferred_deployment_flavor(
        model_config=model_config)

    assert selected_flavor in mfs.SUPPORTED_DEPLOYMENT_FLAVORS
    assert selected_flavor in model_config.flavors
Beispiel #7
0
def test_model_log_uses_cloudpickle_serialization_format_by_default(sklearn_knn_model):
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.sklearn.log_model(
                sk_model=sklearn_knn_model.model, artifact_path=artifact_path, conda_env=None)
        run_id = mlflow.active_run().info.run_uuid
    model_path = _get_model_log_dir(artifact_path, run_id)

    sklearn_conf = _get_flavor_configuration(
            model_path=model_path, flavor_name=mlflow.sklearn.FLAVOR_NAME)
    assert "serialization_format" in sklearn_conf
    assert sklearn_conf["serialization_format"] == mlflow.sklearn.SERIALIZATION_FORMAT_CLOUDPICKLE
def test_mleap_model_log(spark_model_iris):
    artifact_path = "model"
    with mlflow.start_run():
        rid = active_run().info.run_id
        sparkm.log_model(spark_model=spark_model_iris.model,
                         sample_input=spark_model_iris.spark_df,
                         artifact_path=artifact_path)
    model_path = _get_model_log_dir(model_name=artifact_path, run_id=rid)
    config_path = os.path.join(model_path, "MLmodel")
    mlflow_model = Model.load(config_path)
    assert sparkm.FLAVOR_NAME in mlflow_model.flavors
    assert mleap.FLAVOR_NAME in mlflow_model.flavors
Beispiel #9
0
def serve(model_path, run_id, port):
    """
    Serve an RFunction model saved with MLflow.

    If a ``run_id`` is specified, ``model-path`` is treated as an artifact path within that run;
    otherwise it is treated as a local path.
    """
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)

    command = "mlflow::mlflow_rfunc_serve('{0}', port = {1})".format(model_path, port)
    execute(command)
def test_model_log_without_specified_conda_env_uses_default_env_with_expected_dependencies(
        h2o_iris_model):
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.h2o.log_model(h2o_model=h2o_iris_model.model, artifact_path=artifact_path)
        run_id = mlflow.active_run().info.run_uuid
    model_path = _get_model_log_dir(artifact_path, run_id)

    pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
    conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    with open(conda_env_path, "r") as f:
        conda_env = yaml.safe_load(f)

    assert conda_env == mlflow.h2o.DEFAULT_CONDA_ENV
Beispiel #11
0
def predict(model_path, run_id, input_path, output_path):
    """
    Serve an RFunction model saved with MLflow.
    Return the prediction results as a JSON DataFrame.

    If a ``run-id`` is specified, ``model-path`` is treated as an artifact path within that run;
    otherwise it is treated as a local path.
    """
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)

    str_cmd = "mlflow::mlflow_rfunc_predict('{0}', '{1}', '{2}')"
    command = str_cmd.format(model_path, input_path, str_optional(output_path))

    execute(command)
Beispiel #12
0
def test_deployment_of_model_with_no_supported_flavors_raises_exception(
        pretrained_model):
    logged_model_path = _get_model_log_dir(pretrained_model.model_path,
                                           pretrained_model.run_id)
    model_config_path = os.path.join(logged_model_path, "MLmodel")
    model_config = Model.load(model_config_path)
    del model_config.flavors[mlflow.pyfunc.FLAVOR_NAME]
    model_config.save(path=model_config_path)

    with pytest.raises(MlflowException) as exc:
        mfs.deploy(app_name="missing-flavor",
                   model_path=logged_model_path,
                   flavor=None)

    assert exc.value.error_code == ErrorCode.Name(RESOURCE_DOES_NOT_EXIST)
Beispiel #13
0
def test_model_log_without_specified_conda_env_uses_default_env_with_expected_dependencies(
        sklearn_knn_model):
    artifact_path = "model"
    knn_model = sklearn_knn_model.model
    with mlflow.start_run():
        mlflow.sklearn.log_model(sk_model=knn_model, artifact_path=artifact_path, conda_env=None,
                                 serialization_format=mlflow.sklearn.SERIALIZATION_FORMAT_PICKLE)
        run_id = mlflow.active_run().info.run_uuid
    model_path = _get_model_log_dir(artifact_path, run_id)

    pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
    conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    with open(conda_env_path, "r") as f:
        conda_env = yaml.safe_load(f)

    assert conda_env == mlflow.sklearn.DEFAULT_CONDA_ENV
Beispiel #14
0
def test_log_model_without_specified_conda_env_uses_default_env_with_expected_dependencies(
        saved_tf_iris_model, model_path):
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.tensorflow.log_model(tf_saved_model_dir=saved_tf_iris_model.path,
                                    tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags,
                                    tf_signature_def_key=saved_tf_iris_model.signature_def_key,
                                    artifact_path=artifact_path,
                                    conda_env=None)
        run_id = mlflow.active_run().info.run_uuid
    model_path = _get_model_log_dir(artifact_path, run_id)

    pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
    conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    with open(conda_env_path, "r") as f:
        conda_env = yaml.safe_load(f)

    assert conda_env == mlflow.tensorflow.DEFAULT_CONDA_ENV
Beispiel #15
0
def run_local(model_path, run_id=None, port=5000, image=DEFAULT_IMAGE_NAME, flavor=None):
    """
    Serve model locally in a SageMaker compatible Docker container.

    :param model_path: path to the model. Either local if no ``run_id`` or MLflow-relative if
                                          ``run_id`` is specified.
    :param run_id: MLflow run ID.
    :param port: Local port.
    :param image: Name of the Docker image to be used.
    :param flavor: The name of the flavor of the model to use for local serving. If ``None``,
                   a flavor is automatically selected from the model's available flavors. If the
                   specified flavor is not present or not supported for deployment, an exception
                   is thrown.
    """
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)
    model_path = os.path.abspath(model_path)
    model_config_path = os.path.join(model_path, "MLmodel")
    model_config = Model.load(model_config_path)

    if flavor is None:
        flavor = _get_preferred_deployment_flavor(model_config)
    else:
        _validate_deployment_flavor(model_config, flavor)
    print("Using the {selected_flavor} flavor for local serving!".format(selected_flavor=flavor))

    deployment_config = _get_deployment_config(flavor_name=flavor)

    _logger.info("launching docker image with path %s", model_path)
    cmd = ["docker", "run", "-v", "{}:/opt/ml/model/".format(model_path), "-p", "%d:8080" % port]
    for key, value in deployment_config.items():
        cmd += ["-e", "{key}={value}".format(key=key, value=value)]
    cmd += ["--rm", image, "serve"]
    _logger.info('executing: %s', ' '.join(cmd))
    proc = Popen(cmd, stdout=PIPE, stderr=STDOUT, universal_newlines=True)

    def _sigterm_handler(*_):
        _logger.info("received termination signal => killing docker process")
        proc.send_signal(signal.SIGINT)

    import signal
    signal.signal(signal.SIGTERM, _sigterm_handler)
    for x in iter(proc.stdout.readline, ""):
        eprint(x, end='')
def test_model_log_persists_specified_conda_env_in_mlflow_model_directory(
        h2o_iris_model, h2o_custom_env):
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.h2o.log_model(h2o_model=h2o_iris_model.model,
                             artifact_path=artifact_path,
                             conda_env=h2o_custom_env)
        run_id = mlflow.active_run().info.run_uuid
    model_path = _get_model_log_dir(artifact_path, run_id)

    pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)
    assert saved_conda_env_path != h2o_custom_env

    with open(h2o_custom_env, "r") as f:
        h2o_custom_env_text = f.read()
    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_text = f.read()
    assert saved_conda_env_text == h2o_custom_env_text
Beispiel #17
0
def test_model_log_persists_specified_conda_env_in_mlflow_model_directory(
        sklearn_knn_model, sklearn_custom_env):
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.sklearn.log_model(sk_model=sklearn_knn_model.model,
                                 artifact_path=artifact_path,
                                 conda_env=sklearn_custom_env)
        run_id = mlflow.active_run().info.run_uuid
    model_path = _get_model_log_dir(artifact_path, run_id)

    pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)
    assert saved_conda_env_path != sklearn_custom_env

    with open(sklearn_custom_env, "r") as f:
        sklearn_custom_env_parsed = yaml.safe_load(f)
    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_parsed = yaml.safe_load(f)
    assert saved_conda_env_parsed == sklearn_custom_env_parsed
Beispiel #18
0
def serve(model_path, run_id, port, host, no_conda):
    """
    Serve a pyfunc model saved with MLflow by launching a webserver on the specified
    host and port. For information about the input data formats accepted by the webserver,
    see the following documentation:
    https://www.mlflow.org/docs/latest/models.html#pyfunc-deployment.

    If a ``run_id`` is specified, ``model-path`` is treated as an artifact path within that run;
    otherwise it is treated as a local path.
    """
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)

    model_env_file = _load_model_env(model_path)
    if not no_conda and model_env_file is not None:
        conda_env_path = os.path.join(model_path, model_env_file)
        return _rerun_in_conda(conda_env_path)

    app = scoring_server.init(load_pyfunc(model_path))
    app.run(port=port, host=host)
def test_model_log_load(sklearn_knn_model, iris_data, tmpdir):
    sk_model_path = os.path.join(str(tmpdir), "knn.pkl")
    with open(sk_model_path, "wb") as f:
        pickle.dump(sklearn_knn_model, f)

    pyfunc_artifact_path = "pyfunc_model"
    with mlflow.start_run():
        mlflow.pyfunc.log_model(artifact_path=pyfunc_artifact_path,
                                data_path=sk_model_path,
                                loader_module=os.path.basename(__file__)[:-3],
                                code_path=[__file__])
        pyfunc_run_id = mlflow.active_run().info.run_uuid

    pyfunc_model_path = _get_model_log_dir(pyfunc_artifact_path, pyfunc_run_id)
    model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel"))
    assert mlflow.pyfunc.FLAVOR_NAME in model_config.flavors
    assert mlflow.pyfunc.PY_VERSION in model_config.flavors[
        mlflow.pyfunc.FLAVOR_NAME]
    reloaded_model = mlflow.pyfunc.load_pyfunc(pyfunc_model_path)
    np.testing.assert_array_equal(sklearn_knn_model.predict(iris_data[0]),
                                  reloaded_model.predict(iris_data[0]))
Beispiel #20
0
def test_log_model_persists_specified_conda_env_in_mlflow_model_directory(
        saved_tf_iris_model, tf_custom_env):
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.tensorflow.log_model(tf_saved_model_dir=saved_tf_iris_model.path,
                                    tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags,
                                    tf_signature_def_key=saved_tf_iris_model.signature_def_key,
                                    artifact_path=artifact_path,
                                    conda_env=tf_custom_env)
        run_id = mlflow.active_run().info.run_uuid
    model_path = _get_model_log_dir(artifact_path, run_id)

    pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)
    assert saved_conda_env_path != tf_custom_env

    with open(tf_custom_env, "r") as f:
        tf_custom_env_text = f.read()
    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_text = f.read()
    assert saved_conda_env_text == tf_custom_env_text
Beispiel #21
0
def predict(model_path, run_id, input_path, output_path, no_conda):
    """
    Load a pandas DataFrame and runs a python_function model saved with MLflow against it.
    Return the prediction results as a CSV-formatted pandas DataFrame.

    If a ``run-id`` is specified, ``model-path`` is treated as an artifact path within that run;
    otherwise it is treated as a local path.
    """
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)

    model_env_file = _load_model_env(model_path)
    if not no_conda and model_env_file is not None:
        conda_env_path = os.path.join(model_path, model_env_file)
        return _rerun_in_conda(conda_env_path)

    model = load_pyfunc(model_path)
    df = pandas.read_csv(input_path)
    result = model.predict(df)
    out_stream = sys.stdout
    if output_path:
        out_stream = open(output_path, 'w')
    pandas.DataFrame(data=result).to_csv(out_stream, header=False, index=False)
def test_model_log(sklearn_logreg_model, model_path):
    old_uri = mlflow.get_tracking_uri()
    with TempDir(chdr=True, remove_on_exit=True) as tmp:
        for should_start_run in [False, True]:
            try:
                mlflow.set_tracking_uri("test")
                if should_start_run:
                    mlflow.start_run()

                artifact_path = "linear"
                conda_env = os.path.join(tmp.path(), "conda_env.yaml")
                _mlflow_conda_env(conda_env,
                                  additional_pip_deps=["scikit-learn"])

                mlflow.sklearn.log_model(sk_model=sklearn_logreg_model.model,
                                         artifact_path=artifact_path,
                                         conda_env=conda_env)
                run_id = mlflow.active_run().info.run_uuid

                reloaded_logreg_model = mlflow.sklearn.load_model(
                    artifact_path, run_id)
                np.testing.assert_array_equal(
                    sklearn_logreg_model.model.predict(
                        sklearn_logreg_model.inference_data),
                    reloaded_logreg_model.predict(
                        sklearn_logreg_model.inference_data))

                model_path = _get_model_log_dir(artifact_path, run_id=run_id)
                model_config = Model.load(os.path.join(model_path, "MLmodel"))
                assert pyfunc.FLAVOR_NAME in model_config.flavors
                assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME]
                env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV]
                assert os.path.exists(os.path.join(model_path, env_path))

            finally:
                mlflow.end_run()
                mlflow.set_tracking_uri(old_uri)
Beispiel #23
0
def deploy(app_name, model_path, execution_role_arn=None, bucket=None, run_id=None,
           image_url=None, region_name="us-west-2", mode=DEPLOYMENT_MODE_CREATE, archive=False,
           instance_type=DEFAULT_SAGEMAKER_INSTANCE_TYPE,
           instance_count=DEFAULT_SAGEMAKER_INSTANCE_COUNT, vpc_config=None, flavor=None,
           synchronous=True, timeout_seconds=1200):
    """
    Deploy an MLflow model on AWS SageMaker.
    The currently active AWS account must have correct permissions set up.

    This function creates a SageMaker endpoint. For more information about the input data
    formats accepted by this endpoint, see the
    :ref:`MLflow deployment tools documentation <sagemaker_deployment>`.

    :param app_name: Name of the deployed application.
    :param path: Path to the model. Either local if no ``run_id`` or MLflow-relative if ``run_id``
                 is specified.
    :param execution_role_arn: The name of an IAM role granting the SageMaker service permissions to
                               access the specified Docker image and S3 bucket containing MLflow
                               model artifacts. If unspecified, the currently-assumed role will be
                               used. This execution role is passed to the SageMaker service when
                               creating a SageMaker model from the specified MLflow model. It is
                               passed as the ``ExecutionRoleArn`` parameter of the `SageMaker
                               CreateModel API call <https://docs.aws.amazon.com/sagemaker/latest/
                               dg/API_CreateModel.html>`_. This role is *not* assumed for any other
                               call. For more information about SageMaker execution roles for model
                               creation, see
                               https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html.
    :param bucket: S3 bucket where model artifacts will be stored. Defaults to a
                   SageMaker-compatible bucket name.
    :param run_id: MLflow run ID.
    :param image: Name of the Docker image to be used. if not specified, uses a
                  publicly-available pre-built image.
    :param region_name: Name of the AWS region to which to deploy the application.
    :param mode: The mode in which to deploy the application. Must be one of the following:

                 ``mlflow.sagemaker.DEPLOYMENT_MODE_CREATE``
                     Create an application with the specified name and model. This fails if an
                     application of the same name already exists.

                 ``mlflow.sagemaker.DEPLOYMENT_MODE_REPLACE``
                     If an application of the specified name exists, its model(s) is replaced with
                     the specified model. If no such application exists, it is created with the
                     specified name and model.

                 ``mlflow.sagemaker.DEPLOYMENT_MODE_ADD``
                     Add the specified model to a pre-existing application with the specified name,
                     if one exists. If the application does not exist, a new application is created
                     with the specified name and model. NOTE: If the application **already exists**,
                     the specified model is added to the application's corresponding SageMaker
                     endpoint with an initial weight of zero (0). To route traffic to the model,
                     update the application's associated endpoint configuration using either the
                     AWS console or the ``UpdateEndpointWeightsAndCapacities`` function defined in
                     https://docs.aws.amazon.com/sagemaker/latest/dg/API_UpdateEndpointWeightsAndCapacities.html.

    :param archive: If ``True``, any pre-existing SageMaker application resources that become
                    inactive (i.e. as a result of deploying in
                    ``mlflow.sagemaker.DEPLOYMENT_MODE_REPLACE`` mode) are preserved.
                    These resources may include unused SageMaker models and endpoint configurations
                    that were associated with a prior version of the application endpoint. If
                    ``False``, these resources are deleted. In order to use ``archive=False``,
                    ``deploy()`` must be executed synchronously with ``synchronous=True``.
    :param instance_type: The type of SageMaker ML instance on which to deploy the model. For a list
                          of supported instance types, see
                          https://aws.amazon.com/sagemaker/pricing/instance-types/.
    :param instance_count: The number of SageMaker ML instances on which to deploy the model.
    :param vpc_config: A dictionary specifying the VPC configuration to use when creating the
                       new SageMaker model associated with this application. The acceptable values
                       for this parameter are identical to those of the ``VpcConfig`` parameter in
                       the SageMaker boto3 client (https://boto3.readthedocs.io/en/latest/reference/
                       services/sagemaker.html#SageMaker.Client.create_model). For more information,
                       see https://docs.aws.amazon.com/sagemaker/latest/dg/API_VpcConfig.html.

                       Example:

                       >>> import mlflow.sagemaker as mfs
                       >>> vpc_config = {
                       ...                  'SecurityGroupIds': [
                       ...                      'sg-123456abc',
                       ...                  ],
                       ...                  'Subnets': [
                       ...                      'subnet-123456abc',
                       ...                  ]
                       ...              }
                       >>> mfs.deploy(..., vpc_config=vpc_config)

    :param flavor: The name of the flavor of the model to use for deployment. Must be either
                   ``None`` or one of mlflow.sagemaker.SUPPORTED_DEPLOYMENT_FLAVORS. If ``None``,
                   a flavor is automatically selected from the model's available flavors. If the
                   specified flavor is not present or not supported for deployment, an exception
                   will be thrown.
    :param synchronous: If `True`, this function will block until the deployment process succeeds
                        or encounters an irrecoverable failure. If `False`, this function will
                        return immediately after starting the deployment process. It will not wait
                        for the deployment process to complete; in this case, the caller is
                        responsible for monitoring the health and status of the pending deployment
                        via native SageMaker APIs or the AWS console.
    :param timeout_seconds: If `synchronous` is `True`, the deployment process will return after the
                            specified number of seconds if no definitive result (success or failure)
                            is achieved. Once the function returns, the caller is responsible
                            for monitoring the health and status of the pending deployment via
                            native SageMaker APIs or the AWS console. If `synchronous` is False,
                            this parameter is ignored.
    """
    if (not archive) and (not synchronous):
        raise MlflowException(
            message=(
                "Resources must be archived when `deploy()` is executed in non-synchronous mode."
                " Either set `synchronous=True` or `archive=True`."),
            error_code=INVALID_PARAMETER_VALUE)

    if mode not in DEPLOYMENT_MODES:
        raise MlflowException(
                message="`mode` must be one of: {deployment_modes}".format(
                    deployment_modes=",".join(DEPLOYMENT_MODES)),
                error_code=INVALID_PARAMETER_VALUE)

    s3_bucket_prefix = model_path
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)
        s3_bucket_prefix = os.path.join(run_id, s3_bucket_prefix)

    model_config_path = os.path.join(model_path, "MLmodel")
    if not os.path.exists(model_config_path):
        raise MlflowException(
            message=(
                "Failed to find MLmodel configuration within the specified model's"
                " root directory."),
            error_code=INVALID_PARAMETER_VALUE)
    model_config = Model.load(model_config_path)

    if flavor is None:
        flavor = _get_preferred_deployment_flavor(model_config)
    else:
        _validate_deployment_flavor(model_config, flavor)
    _logger.info("Using the %s flavor for deployment!", flavor)

    sage_client = boto3.client('sagemaker', region_name=region_name)
    s3_client = boto3.client('s3', region_name=region_name)

    endpoint_exists = _find_endpoint(endpoint_name=app_name, sage_client=sage_client) is not None
    if endpoint_exists and mode == DEPLOYMENT_MODE_CREATE:
        raise MlflowException(
                message=(
                    "You are attempting to deploy an application with name: {application_name} in"
                    " '{mode_create}' mode. However, an application with the same name already"
                    " exists. If you want to update this application, deploy in '{mode_add}' or"
                    " '{mode_replace}' mode.".format(
                        application_name=app_name,
                        mode_create=DEPLOYMENT_MODE_CREATE,
                        mode_add=DEPLOYMENT_MODE_ADD,
                        mode_replace=DEPLOYMENT_MODE_REPLACE)),
                error_code=INVALID_PARAMETER_VALUE)

    if not image_url:
        image_url = _get_default_image_url(region_name=region_name)
    if not execution_role_arn:
        execution_role_arn = _get_assumed_role_arn()
    if not bucket:
        _logger.info("No model data bucket specified, using the default bucket")
        bucket = _get_default_s3_bucket(region_name)

    model_s3_path = _upload_s3(local_model_path=model_path,
                               bucket=bucket,
                               prefix=s3_bucket_prefix,
                               region_name=region_name,
                               s3_client=s3_client)
    if endpoint_exists:
        deployment_operation = _update_sagemaker_endpoint(
                endpoint_name=app_name, image_url=image_url, model_s3_path=model_s3_path,
                run_id=run_id, flavor=flavor, instance_type=instance_type,
                instance_count=instance_count, vpc_config=vpc_config, mode=mode,
                role=execution_role_arn, sage_client=sage_client, s3_client=s3_client)
    else:
        deployment_operation = _create_sagemaker_endpoint(
                endpoint_name=app_name, image_url=image_url, model_s3_path=model_s3_path,
                run_id=run_id, flavor=flavor, instance_type=instance_type,
                instance_count=instance_count, vpc_config=vpc_config, role=execution_role_arn,
                sage_client=sage_client)

    if synchronous:
        _logger.info("Waiting for the deployment operation to complete...")
        operation_status = deployment_operation.await_completion(timeout_seconds=timeout_seconds)
        if operation_status.state == _SageMakerOperationStatus.STATE_SUCCEEDED:
            _logger.info("The deployment operation completed successfully with message: \"%s\"",
                         operation_status.message)
        else:
            raise MlflowException(
                "The deployment operation failed with the following error message:"
                " \"{error_message}\"".format(error_message=operation_status.message))
        if not archive:
            deployment_operation.clean_up()
Beispiel #24
0
def build_image(model_path, workspace, run_id=None, image_name=None, model_name=None,
                mlflow_home=None, description=None, tags=None, synchronous=True):
    """
    Register an MLflow model with Azure ML and build an Azure ML ContainerImage for deployment.
    The resulting image can be deployed as a web service to Azure Container Instances (ACI) or
    Azure Kubernetes Service (AKS).

    The resulting Azure ML ContainerImage will contain a webserver that processes model queries.
    For information about the input data formats accepted by this webserver, see the
    :ref:`MLflow deployment tools documentation <azureml_deployment>`.

    :param model_path: The path to MLflow model for which the image will be built. If a run id
                       is specified, this is should be a run-relative path. Otherwise, it
                       should be a local path.
    :param run_id: MLflow run ID.
    :param image_name: The name to assign the Azure Container Image that will be created. If
                       unspecified, a unique image name will be generated.
    :param model_name: The name to assign the Azure Model will be created. If unspecified,
                       a unique model name will be generated.
    :param workspace: The AzureML workspace in which to build the image. This is a
                      `azureml.core.Workspace` object.
    :param mlflow_home: Path to a local copy of the MLflow GitHub repository. If specified, the
                        image will install MLflow from this directory. Otherwise, it will install
                        MLflow from pip.
    :param description: A string description to associate with the Azure Container Image and the
                        Azure Model that will be created. For more information, see
                        `<https://docs.microsoft.com/en-us/python/api/azureml-core/
                        azureml.core.image.container.containerimageconfig>`_ and
                        `<https://docs.microsoft.com/en-us/python/api/azureml-core/
                        azureml.core.model.model?view=azure-ml-py#register>`_.
    :param tags: A collection of tags, represented as a dictionary of string key-value pairs, to
                 associate with the Azure Container Image and the Azure Model that will be created.
                 These tags will be added to a set of default tags that include the model path,
                 the model run id (if specified), and more. For more information, see
                 `<https://docs.microsoft.com/en-us/python/api/azureml-core/
                 azureml.core.image.container.containerimageconfig>`_ and
                 `<https://docs.microsoft.com/en-us/python/api/azureml-core/
                 azureml.core.model.model?view=azure-ml-py#register>`_.
    :param synchronous: If `True`, this method will block until the image creation procedure
                        terminates before returning. If `False`, the method will return immediately,
                        but the returned image will not be available until the asynchronous
                        creation process completes. The `azureml.core.Image.wait_for_creation()`
                        function can be used to wait for the creation process to complete.
    :return: A tuple containing the following elements in order:
             - An `azureml.core.image.ContainerImage` object containing metadata for the new image.
             - An `azureml.core.model.Model` object containing metadata for the new model.

    >>> import mlflow.azureml
    >>> from azureml.core import Workspace
    >>> from azureml.core.webservice import AciWebservice, Webservice
    >>>
    >>> # Load or create an Azure ML Workspace
    >>> workspace_name = "<Name of your Azure ML workspace>"
    >>> subscription_id = "<Your Azure subscription ID>"
    >>> resource_group = "<Name of the Azure resource group in which to create Azure ML resources>"
    >>> location = "<Name of the Azure location (region) in which to create Azure ML resources>"
    >>> azure_workspace = Workspace.create(name=workspace_name,
    >>>                                    subscription_id=subscription_id,
    >>>                                    resource_group=resource_group,
    >>>                                    location=location,
    >>>                                    create_resource_group=True,
    >>>                                    exist_okay=True)
    >>>
    >>> # Build an Azure ML Container Image for an MLflow model
    >>> azure_image, azure_model = mlflow.azureml.build_image(
    >>>                                 model_path="<model_path>",
    >>>                                 workspace=azure_workspace,
    >>>                                 synchronous=True)
    >>> # If your image build failed, you can access build logs at the following URI:
    >>> print("Access the following URI for build logs: {}".format(azure_image.image_build_log_uri))
    >>>
    >>> # Deploy the image to Azure Container Instances (ACI) for real-time serving
    >>> webservice_deployment_config = AciWebservice.deploy_configuration()
    >>> webservice = Webservice.deploy_from_image(
    >>>                    image=azure_image, workspace=azure_workspace, name="<deployment-name>")
    >>> webservice.wait_for_deployment()
    """
    # The Azure ML SDK is only compatible with Python 3. However, the `mlflow.azureml` module should
    # still be accessible for import from Python 2. Therefore, we will only import from the SDK
    # upon method invocation.
    # pylint: disable=import-error
    from azureml.core.image import ContainerImage
    from azureml.core.model import Model as AzureModel

    if run_id is not None:
        absolute_model_path = _get_model_log_dir(model_name=model_path, run_id=run_id)
    else:
        absolute_model_path = os.path.abspath(model_path)

    model_pyfunc_conf = _load_pyfunc_conf(model_path=absolute_model_path)
    model_python_version = model_pyfunc_conf.get(pyfunc.PY_VERSION, None)
    if model_python_version is not None and\
            StrictVersion(model_python_version) < StrictVersion("3.0.0"):
        raise MlflowException(
                message=("Azure ML can only deploy models trained in Python 3 or above! Please see"
                         " the following MLflow GitHub issue for a thorough explanation of this"
                         " limitation and a workaround to enable support for deploying models"
                         " trained in Python 2: https://github.com/mlflow/mlflow/issues/668"),
                error_code=INVALID_PARAMETER_VALUE)

    tags = _build_tags(relative_model_path=model_path, run_id=run_id,
                       model_python_version=model_python_version, user_tags=tags)

    if image_name is None:
        image_name = _get_mlflow_azure_resource_name()
    if model_name is None:
        model_name = _get_mlflow_azure_resource_name()

    with TempDir(chdr=True) as tmp:
        model_directory_path = tmp.path("model")
        tmp_model_path = os.path.join(
            model_directory_path,
            _copy_file_or_tree(src=absolute_model_path, dst=model_directory_path))

        registered_model = AzureModel.register(workspace=workspace, model_path=tmp_model_path,
                                               model_name=model_name, tags=tags,
                                               description=description)
        _logger.info("Registered an Azure Model with name: `%s` and version: `%s`",
                     registered_model.name, registered_model.version)

        # Create an execution script (entry point) for the image's model server. Azure ML requires
        # the container's execution script to be located in the current working directory during
        # image creation, so we create the execution script as a temporary file in the current
        # working directory.
        execution_script_path = tmp.path("execution_script.py")
        _create_execution_script(output_path=execution_script_path, azure_model=registered_model)
        # Azure ML copies the execution script into the image's application root directory by
        # prepending "/var/azureml-app" to the specified script path. The script is then executed
        # by referencing its path relative to the "/var/azureml-app" directory. Unfortunately,
        # if the script path is an absolute path, Azure ML attempts to reference it directly,
        # resulting in a failure. To circumvent this problem, we provide Azure ML with the relative
        # script path. Because the execution script was created in the current working directory,
        # this relative path is the script path's base name.
        execution_script_path = os.path.basename(execution_script_path)

        if mlflow_home is not None:
            _logger.info(
                "Copying the specified mlflow_home directory: `%s` to a temporary location for"
                " container creation",
                mlflow_home)
            mlflow_home = os.path.join(tmp.path(),
                                       _copy_project(src_path=mlflow_home, dst_path=tmp.path()))
            image_file_dependencies = [mlflow_home]
        else:
            image_file_dependencies = None
        dockerfile_path = tmp.path("Dockerfile")
        _create_dockerfile(output_path=dockerfile_path, mlflow_path=mlflow_home)

        conda_env_path = None
        if pyfunc.ENV in model_pyfunc_conf:
            conda_env_path = os.path.join(tmp_model_path, model_pyfunc_conf[pyfunc.ENV])

        image_configuration = ContainerImage.image_configuration(
                execution_script=execution_script_path,
                runtime="python",
                docker_file=dockerfile_path,
                dependencies=image_file_dependencies,
                conda_file=conda_env_path,
                description=description,
                tags=tags,
        )
        image = ContainerImage.create(workspace=workspace,
                                      name=image_name,
                                      image_config=image_configuration,
                                      models=[registered_model])
        _logger.info("Building an Azure Container Image with name: `%s` and version: `%s`",
                     image.name, image.version)
        if synchronous:
            image.wait_for_creation(show_output=True)
        return image, registered_model