Beispiel #1
0
    def create_model_version(
        self,
        name,
        source,
        run_id,
        tags=None,
        run_link=None,
        description=None,
        await_creation_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS,
    ):
        """
        Create a new model version from given source (artifact URI).

        :param name: Name for the containing registered model.
        :param source: Source path where the MLflow model is stored.
        :param run_id: Run ID from MLflow tracking server that generated the model
        :param tags: A dictionary of key-value pairs that are converted into
                     :py:class:`mlflow.entities.model_registry.ModelVersionTag` objects.
        :param run_link: Link to the run from an MLflow tracking server that generated this model.
        :param description: Description of the version.
        :param await_creation_for: Number of seconds to wait for the model version to finish being
                                    created and is in ``READY`` status. By default, the function
                                    waits for five minutes. Specify 0 or None to skip waiting.
        :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by
                 backend.
        """
        tracking_uri = self._tracking_client.tracking_uri
        if not run_link and is_databricks_uri(
                tracking_uri) and tracking_uri != self._registry_uri:
            run_link = self._get_run_link(tracking_uri, run_id)
        new_source = source
        if is_databricks_uri(
                self._registry_uri) and tracking_uri != self._registry_uri:
            # Print out some info for user since the copy may take a while for large models.
            eprint(
                "=== Copying model files from the source location to the model"
                + " registry workspace ===")
            new_source = _upload_artifacts_to_databricks(
                source, run_id, tracking_uri, self._registry_uri)
            # NOTE: we can't easily delete the target temp location due to the async nature
            # of the model version creation - printing to let the user know.
            eprint(
                "=== Source model files were copied to %s" % new_source +
                " in the model registry workspace. You may want to delete the files once the"
                +
                " model version is in 'READY' status. You can also find this location in the"
                + " `source` field of the created model version. ===")
        return self._get_registry_client().create_model_version(
            name=name,
            source=new_source,
            run_id=run_id,
            tags=tags,
            run_link=run_link,
            description=description,
            await_creation_for=await_creation_for,
        )
Beispiel #2
0
def test_upload_artifacts_to_databricks():
    import_root = 'mlflow.tracking.artifact_utils'
    with mock.patch(import_root + "._download_artifact_from_uri") as download_mock, \
            mock.patch(import_root + ".DbfsRestArtifactRepository") as repo_mock:
        new_source = _upload_artifacts_to_databricks('dbfs:/original/sourcedir/', 'runid12345',
                                                     'databricks://tracking',
                                                     'databricks://registry/ws')
        download_mock.assert_called_once_with('dbfs://tracking@databricks/original/sourcedir/',
                                              ANY)
        repo_mock.assert_called_once_with(
            'dbfs://*****:*****@databricks/databricks/mlflow/tmp-external-source/')
        assert new_source == 'dbfs:/databricks/mlflow/tmp-external-source/runid12345/sourcedir'
Beispiel #3
0
def test_upload_artifacts_to_databricks_no_run_id():
    from uuid import UUID
    import_root = 'mlflow.tracking.artifact_utils'
    with mock.patch(import_root + "._download_artifact_from_uri") as download_mock, \
            mock.patch(import_root + ".DbfsRestArtifactRepository") as repo_mock, \
            mock.patch("uuid.uuid4", return_value=UUID("4f746cdcc0374da2808917e81bb53323")):
        new_source = _upload_artifacts_to_databricks('dbfs:/original/sourcedir/', None,
                                                     'databricks://tracking/ws',
                                                     'databricks://registry')
        download_mock.assert_called_once_with('dbfs://*****:*****@databricks/original/sourcedir/',
                                              ANY)
        repo_mock.assert_called_once_with(
            'dbfs://registry@databricks/databricks/mlflow/tmp-external-source/')
        assert new_source == 'dbfs:/databricks/mlflow/tmp-external-source/' \
            '4f746cdcc0374da2808917e81bb53323/sourcedir'
Beispiel #4
0
    def create_model_version(self,
                             name,
                             source,
                             run_id,
                             tags=None,
                             run_link=None):
        """
        Create a new model version from given source (artifact URI).

        :param name: Name for the containing registered model.
        :param source: Source path where the MLflow model is stored.
        :param run_id: Run ID from MLflow tracking server that generated the model
        :param tags: A dictionary of key-value pairs that are converted into
                     :py:class:`mlflow.entities.model_registry.ModelVersionTag` objects.
        :param run_link: Link to the run from an MLflow tracking server that generated this model.
        :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by
                 backend.
        """
        tracking_uri = self._tracking_client.tracking_uri
        if not run_link and is_databricks_uri(
                tracking_uri) and tracking_uri != self._registry_uri:
            run_link = self._get_run_link(tracking_uri, run_id)
        new_source = source
        if is_databricks_uri(
                self._registry_uri) and tracking_uri != self._registry_uri:
            # Print out some info for user since the copy may take a while for large models.
            _logger.info(
                "=== Copying model files from the source location to the model "
                + " registry workspace ===")
            new_source = _upload_artifacts_to_databricks(
                source, run_id, tracking_uri, self._registry_uri)
            # NOTE: we can't easily delete the target temp location due to the async nature
            # of the model version creation - printing to let the user know.
            _logger.info(
                """
                === Source model files were copied to %s
                    in the model registry workspace. You may want to delete the files once the
                    model version is in 'READY' status. You can also find this location in the
                    `source` field of the created model version. ===
                """, new_source)
        return self._get_registry_client().create_model_version(
            name=name,
            source=new_source,
            run_id=run_id,
            tags=tags,
            run_link=run_link)
Beispiel #5
0
def test_upload_artifacts_to_databricks():
    import_root = "mlflow.tracking.artifact_utils"
    with mock.patch(
            import_root +
            "._download_artifact_from_uri") as download_mock, mock.patch(
                import_root + ".DbfsRestArtifactRepository") as repo_mock:
        new_source = _upload_artifacts_to_databricks(
            "dbfs:/original/sourcedir/",
            "runid12345",
            "databricks://tracking",
            "databricks://*****:*****@databricks/original/sourcedir/", ANY)
        repo_mock.assert_called_once_with(
            "dbfs://*****:*****@databricks/databricks/mlflow/tmp-external-source/"
        )
        assert new_source == "dbfs:/databricks/mlflow/tmp-external-source/runid12345/sourcedir"