コード例 #1
0
class ModelExporter():
    def __init__(self, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None):
        self.fs = filesystem or _filesystem.get_filesystem()
        self.client = mlflow.tracking.MlflowClient()
        self.client2 = HttpClient("api/2.0/preview/mlflow")
        self.run_exporter = RunExporter(self.client, export_metadata_tags=export_metadata_tags, notebook_formats=notebook_formats, filesystem=filesystem)

    def _get_all_model_versions(self, model_name):
        url = f"model-versions/search?filter=name=%27{model_name}%27"
        response = self.client2.get(url)
        model_versions = response["model_versions"]
        page_token = response.get("next_page_token")
        while page_token:
            response = self.client2.get(url + f"&page_token={page_token}")
            model_versions += response["model_versions"]
            page_token = response.get("next_page_token")
        return model_versions

    def export_model(self, output_dir, model_name):
        path = os.path.join(output_dir,"model.json")
        model = self.client2.get(f"registered-models/get?name={model_name}")
        del model["registered_model"]["latest_versions"]
        model["registered_model"]["all_versions"] = self._get_all_model_versions(model_name)
        for v in model["registered_model"]["all_versions"]:
            run_id = v["run_id"]
            opath = os.path.join(output_dir,run_id)
            self.run_exporter.export_run(run_id, opath)
            opath = opath.replace("dbfs:","/dbfs")
            run = self.client.get_run(run_id)
            v["artifact_uri"] = run.info.artifact_uri
        utils.write_json_file(self.fs, path, model)
コード例 #2
0
 def __init__(self,
              mlflow_client=None,
              export_metadata_tags=False,
              notebook_formats=[],
              export_notebook_revision=False,
              stages=None,
              export_run=True):
     """
     :param mlflow_client: MLflow client or if None create default client.
     :param export_metadata_tags: Export source run metadata tags.
     :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC.
     :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available.
     :param stages: Stages to export. Default is all stages. Values are Production, Staging, Archived and None.
     :param export_runs: Export the run that generated a registered model's version.
     """
     self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
     print(">> client:", self.mlflow_client)
     self.http_client = MlflowHttpClient()
     self.run_exporter = RunExporter(
         self.mlflow_client,
         export_metadata_tags=export_metadata_tags,
         notebook_formats=notebook_formats,
         export_notebook_revision=export_notebook_revision)
     self.stages = self._normalize_stages(stages)
     self.export_run = export_run
コード例 #3
0
 def __init__(self,
              client=None,
              export_metadata_tags=False,
              notebook_formats=["SOURCE"],
              filesystem=None):
     self.client = client or mlflow.tracking.MlflowClient()
     self.fs = filesystem or _filesystem.get_filesystem()
     print("Filesystem:", type(self.fs).__name__)
     self.run_exporter = RunExporter(self.client, export_metadata_tags,
                                     notebook_formats, self.fs)
コード例 #4
0
 def __init__(self,
              export_metadata_tags=False,
              notebook_formats=["SOURCE"],
              filesystem=None):
     self.fs = filesystem or _filesystem.get_filesystem()
     self.client = mlflow.tracking.MlflowClient()
     self.client2 = HttpClient("api/2.0/preview/mlflow")
     self.run_exporter = RunExporter(
         self.client,
         export_metadata_tags=export_metadata_tags,
         notebook_formats=notebook_formats,
         filesystem=filesystem)
コード例 #5
0
 def __init__(self,
              mlflow_client=None,
              export_metadata_tags=False,
              notebook_formats=[],
              export_notebook_revision=False):
     """
     :param mlflow_client: MLflow client or if None create default client.
     :param export_metadata_tags: Export source run metadata tags.
     :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC.
     :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available.
     """
     self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
     self.run_exporter = RunExporter(self.mlflow_client,
                                     export_metadata_tags, notebook_formats,
                                     export_notebook_revision)
コード例 #6
0
def test_run_import_metadata_tags():
    run1, run2 = init_run_test(RunExporter(export_metadata_tags=True),
                               RunImporter(mlmodel_fix=mlmodel_fix,
                                           import_metadata_tags=True,
                                           import_mlflow_tags=True),
                               verbose=False)
    compare_run_import_metadata_tags(client, output, run1, run2)
コード例 #7
0
class ExperimentExporter():
    def __init__(self,
                 client=None,
                 export_metadata_tags=False,
                 notebook_formats=["SOURCE"],
                 filesystem=None):
        self.client = client or mlflow.tracking.MlflowClient()
        self.fs = filesystem or _filesystem.get_filesystem()
        print("Filesystem:", type(self.fs).__name__)
        self.run_exporter = RunExporter(self.client, export_metadata_tags,
                                        notebook_formats, self.fs)

    def export_experiment(self, exp_id_or_name, output):
        exp = mlflow_utils.get_experiment(self.client, exp_id_or_name)
        exp_id = exp.experiment_id
        print(
            f"Exporting experiment '{exp.name}' (ID {exp.experiment_id}) to '{output}'"
        )
        if output.endswith(".zip"):
            self.export_experiment_to_zip(exp_id, output)
        else:
            self.fs.mkdirs(output)
            self.export_experiment_to_dir(exp_id, output)

    def export_experiment_to_dir(self, exp_id, exp_dir):
        exp = self.client.get_experiment(exp_id)
        dct = {"experiment": utils.strip_underscores(exp)}
        infos = self.client.list_run_infos(exp_id)
        dct["export_info"] = {
            "export_time": utils.get_now_nice(),
            "num_runs": len(infos)
        }
        run_ids = []
        failed_run_ids = []
        for j, info in enumerate(infos):
            run_dir = os.path.join(exp_dir, info.run_id)
            print(f"Exporting run {j+1}/{len(infos)}: {info.run_id}")
            res = self.run_exporter.export_run(info.run_id, run_dir)
            if res:
                run_ids.append(info.run_id)
            else:
                failed_run_ids.append(info.run_id)
        dct["run_ids"] = run_ids
        dct["failed_run_ids"] = failed_run_ids
        path = os.path.join(exp_dir, "manifest.json")
        utils.write_json_file(self.fs, path, dct)
        if len(failed_run_ids) == 0:
            print(f"All {len(run_ids)} runs succesfully exported")
        else:
            print(f"{len(run_ids)}/{len(infos)} runs succesfully exported")
            print(f"{len(failed_run_ids)}/{len(infos)} runs failed")

    def export_experiment_to_zip(self, exp_id, zip_file):
        temp_dir = tempfile.mkdtemp()
        try:
            self.export_experiment_to_dir(exp_id, temp_dir)
            utils.zip_directory(zip_file, temp_dir)
        finally:
            shutil.rmtree(temp_dir)
コード例 #8
0
class ModelExporter():
    def __init__(self, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None):
        self.fs = filesystem or _filesystem.get_filesystem()
        self.client = mlflow.tracking.MlflowClient()
        self.client2 = HttpClient("api/2.0/preview/mlflow")
        self.run_exporter = RunExporter(self.client, export_metadata_tags=export_metadata_tags, notebook_formats=notebook_formats, filesystem=filesystem)

    def export_model(self, output_dir, model_name):
        path = os.path.join(output_dir,"model.json")
        model = self.client2.get(f"registered-models/get?name={model_name}")
        for v in model["registered_model"]["latest_versions"]:
            run_id = v["run_id"] 
            opath = os.path.join(output_dir,run_id)
            self.run_exporter.export_run(run_id, opath)
            opath = opath.replace("dbfs:","/dbfs")
            run = self.client.get_run(run_id)
            v["artifact_uri"] = run.info.artifact_uri
        utils.write_json_file(self.fs, path, model)
コード例 #9
0
class ModelExporter():
    def __init__(self,
                 export_metadata_tags=False,
                 notebook_formats=["SOURCE"],
                 filesystem=None):
        self.fs = filesystem or _filesystem.get_filesystem()
        self.client = mlflow.tracking.MlflowClient()
        self.client2 = HttpClient("api/2.0/preview/mlflow")
        self.run_exporter = RunExporter(
            self.client,
            export_metadata_tags=export_metadata_tags,
            notebook_formats=notebook_formats,
            filesystem=filesystem)

    def export_model(self, output_dir, model_name):
        path = os.path.join(output_dir, "model.json")
        model = self.client2.get(f"registered-models/get?name={model_name}")
        for v in model["registered_model"]["latest_versions"]:
            run_id = v["run_id"]
            opath = os.path.join(output_dir, run_id)
            try:
                self.run_exporter.export_run(run_id, opath)
                opath = opath.replace("dbfs:", "/dbfs")
                run = self.client.get_run(run_id)
                v["artifact_uri"] = run.info.artifact_uri
                model["registered_model"]["latest_versions"] = []
                for mv in self.client.search_model_versions(
                        f"name='{model_name}'"):
                    run_id = mv.run_id
                    opath = os.path.join(output_dir, run_id)
                    self.run_exporter.export_run(run_id, opath)
                    run = self.client.get_run(run_id)
                    dmv = dict(mv)
                    dmv["artifact_uri"] = run.info.artifact_uri
                    model["registered_model"]["latest_versions"].append(dmv)
            except mlflow.exceptions.RestException as e:
                if "RESOURCE_DOES_NOT_EXIST: Run" in str(e):
                    print(
                        "WARNING: Run backing the registered model does not exist.",
                        e)
                else:
                    raise (e)
        utils.write_json_file(self.fs, path, model)
コード例 #10
0
class ModelExporter():
    def __init__(self,
                 mlflow_client=None,
                 export_metadata_tags=False,
                 notebook_formats=[],
                 export_notebook_revision=False,
                 stages=None,
                 export_run=True):
        """
        :param mlflow_client: MLflow client or if None create default client.
        :param export_metadata_tags: Export source run metadata tags.
        :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC.
        :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available.
        :param stages: Stages to export. Default is all stages. Values are Production, Staging, Archived and None.
        :param export_runs: Export the run that generated a registered model's version.
        """
        self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
        print(">> client:", self.mlflow_client)
        self.http_client = MlflowHttpClient()
        self.run_exporter = RunExporter(
            self.mlflow_client,
            export_metadata_tags=export_metadata_tags,
            notebook_formats=notebook_formats,
            export_notebook_revision=export_notebook_revision)
        self.stages = self._normalize_stages(stages)
        self.export_run = export_run

    def export_model(self, model_name, output_dir):
        """
        :param model_name: Registered model name.
        :param output_dir: Output directory.
        :return: Returns bool if export succeeded and the model name.
        """
        try:
            self._export_model(model_name, output_dir)
            return True, model_name
        except Exception as e:
            print("ERROR:", e)
            return False, model_name

    def _export_model(self, model_name, output_dir):
        fs = _filesystem.get_filesystem(output_dir)
        model = self.http_client.get(f"registered-models/get",
                                     {"name": model_name})
        fs.mkdirs(output_dir)
        model["registered_model"]["latest_versions"] = []
        versions = self.mlflow_client.search_model_versions(
            f"name='{model_name}'")
        print(f"Found {len(versions)} versions for model {model_name}")
        manifest = []
        exported_versions = 0
        for vr in versions:
            if len(self.stages) > 0 and not vr.current_stage.lower(
            ) in self.stages:
                continue
            run_id = vr.run_id
            opath = os.path.join(output_dir, run_id)
            opath = opath.replace("dbfs:", "/dbfs")
            dct = {
                "version": vr.version,
                "stage": vr.current_stage,
                "run_id": run_id
            }
            print(f"Exporting: {dct}")
            manifest.append(dct)
            try:
                if self.export_run:
                    self.run_exporter.export_run(run_id, opath)
                run = self.mlflow_client.get_run(run_id)
                dct = dict(vr)
                dct["_run_artifact_uri"] = run.info.artifact_uri
                experiment = mlflow.get_experiment(run.info.experiment_id)
                dct["_experiment_name"] = experiment.name
                model["registered_model"]["latest_versions"].append(dct)
                exported_versions += 1
            except mlflow.exceptions.RestException as e:
                if "RESOURCE_DOES_NOT_EXIST: Run" in str(e):
                    print(
                        f"WARNING: Run for version {vr.version} does not exist. {e}"
                    )
                else:
                    import traceback
                    traceback.print_exc()
        print(
            f"Exported {exported_versions}/{len(versions)} versions for model {model_name}"
        )
        path = os.path.join(output_dir, "model.json")
        utils.write_json_file(fs, path, model)
        return manifest

    def _normalize_stages(self, stages):
        from mlflow.entities.model_registry import model_version_stages
        if stages is None:
            return []
        if isinstance(stages, str):
            stages = stages.split(",")
        stages = [stage.lower() for stage in stages]
        for stage in stages:
            if stage not in model_version_stages._CANONICAL_MAPPING:
                print(
                    f"WARNING: stage '{stage}' must be one of: {model_version_stages.ALL_STAGES}"
                )
        return stages
コード例 #11
0
def test_run_no_import_mlflow_tags():
    run1, run2 = init_run_test(
        RunExporter(),
        RunImporter(mlmodel_fix=mlmodel_fix, import_mlflow_tags=False))
    compare_run_no_import_mlflow_tags(run1, run2)
コード例 #12
0
def test_run_basic():
    run1, run2 = init_run_test(RunExporter(),
                               RunImporter(mlmodel_fix=mlmodel_fix))
    compare_runs(run1, run2)
コード例 #13
0
# MAGIC %md ### Remove any previous exported run data

# COMMAND ----------

dbutils.fs.rm(output_dir, True)

# COMMAND ----------

# MAGIC %md ### Export the run

# COMMAND ----------

from mlflow_export_import.run.export_run import RunExporter

exporter = RunExporter(notebook_formats=formats,
                       export_metadata_tags=export_metadata_tags)
exporter.export_run(run_id, output_dir)

# COMMAND ----------

# MAGIC %md ### Display  exported run files

# COMMAND ----------

import os

output_dir = output_dir.replace("dbfs:", "/dbfs")
os.environ['OUTPUT_DIR'] = output_dir

# COMMAND ----------
コード例 #14
0
class ExperimentExporter():
    def __init__(self,
                 mlflow_client=None,
                 export_metadata_tags=False,
                 notebook_formats=[],
                 export_notebook_revision=False):
        """
        :param mlflow_client: MLflow client or if None create default client.
        :param export_metadata_tags: Export source run metadata tags.
        :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC.
        :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available.
        """
        self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
        self.run_exporter = RunExporter(self.mlflow_client,
                                        export_metadata_tags, notebook_formats,
                                        export_notebook_revision)

    def export_experiment(self, exp_id_or_name, output_dir, run_ids=None):
        """
        :param exp_id_or_name: Experiment ID or name.
        :param output_dir: Output directory.
        :param run_ids: List of run IDs to export. If None export all run IDs.
        :return: Number of successful and number of failed runs.
        """
        exp = mlflow_utils.get_experiment(self.mlflow_client, exp_id_or_name)
        exp_id = exp.experiment_id
        print(
            f"Exporting experiment '{exp.name}' (ID {exp.experiment_id}) to '{output_dir}'"
        )
        fs = _filesystem.get_filesystem(output_dir)
        print("Filesystem:", type(fs).__name__)
        fs.mkdirs(output_dir)
        exp = self.mlflow_client.get_experiment(exp_id)
        dct = {"experiment": utils.strip_underscores(exp)}
        ok_run_ids = []
        failed_run_ids = []
        j = -1
        if run_ids:
            for j, run_id in enumerate(run_ids):
                run = self.mlflow_client.get_run(run_id)
                self._export_run(j, run, output_dir, ok_run_ids,
                                 failed_run_ids)
        else:
            for j, run in enumerate(
                    SearchRunsIterator(self.mlflow_client, exp_id)):
                self._export_run(j, run, output_dir, ok_run_ids,
                                 failed_run_ids)
        dct["export_info"] = {
            "export_time": utils.get_now_nice(),
            "num_total_runs": (j + 1),
            "num_ok_runs": len(ok_run_ids),
            "ok_runs": ok_run_ids,
            "num_failed_runs": len(failed_run_ids),
            "failed_runs": failed_run_ids
        }

        path = os.path.join(output_dir, "manifest.json")
        utils.write_json_file(fs, path, dct)
        msg = f"for experiment '{exp.name}' (ID: {exp.experiment_id})"
        if len(failed_run_ids) == 0:
            print(f"All {len(ok_run_ids)} runs succesfully exported {msg}")
        else:
            print(f"{len(ok_run_ids)/j} runs succesfully exported {msg}")
            print(f"{len(failed_run_ids)/j} runs failed {msg}")
        return len(ok_run_ids), len(failed_run_ids)

    def _export_run(self, idx, run, output_dir, ok_run_ids, failed_run_ids):
        run_dir = os.path.join(output_dir, run.info.run_id)
        print(f"Exporting run {idx+1}: {run.info.run_id}")
        res = self.run_exporter.export_run(run.info.run_id, run_dir)
        if res:
            ok_run_ids.append(run.info.run_id)
        else:
            failed_run_ids.append(run.info.run_id)
コード例 #15
0
def test_run_basic():
    run1, run2 = init_run_test(
        RunExporter(),
        RunImporter(mlmodel_fix=mlmodel_fix, import_mlflow_tags=True))
    compare_runs(client, output, run1, run2)