class ModelExporter(): def __init__(self, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None): self.fs = filesystem or _filesystem.get_filesystem() self.client = mlflow.tracking.MlflowClient() self.client2 = HttpClient("api/2.0/preview/mlflow") self.run_exporter = RunExporter(self.client, export_metadata_tags=export_metadata_tags, notebook_formats=notebook_formats, filesystem=filesystem) def _get_all_model_versions(self, model_name): url = f"model-versions/search?filter=name=%27{model_name}%27" response = self.client2.get(url) model_versions = response["model_versions"] page_token = response.get("next_page_token") while page_token: response = self.client2.get(url + f"&page_token={page_token}") model_versions += response["model_versions"] page_token = response.get("next_page_token") return model_versions def export_model(self, output_dir, model_name): path = os.path.join(output_dir,"model.json") model = self.client2.get(f"registered-models/get?name={model_name}") del model["registered_model"]["latest_versions"] model["registered_model"]["all_versions"] = self._get_all_model_versions(model_name) for v in model["registered_model"]["all_versions"]: run_id = v["run_id"] opath = os.path.join(output_dir,run_id) self.run_exporter.export_run(run_id, opath) opath = opath.replace("dbfs:","/dbfs") run = self.client.get_run(run_id) v["artifact_uri"] = run.info.artifact_uri utils.write_json_file(self.fs, path, model)
def __init__(self, mlflow_client=None, export_metadata_tags=False, notebook_formats=[], export_notebook_revision=False, stages=None, export_run=True): """ :param mlflow_client: MLflow client or if None create default client. :param export_metadata_tags: Export source run metadata tags. :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC. :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available. :param stages: Stages to export. Default is all stages. Values are Production, Staging, Archived and None. :param export_runs: Export the run that generated a registered model's version. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() print(">> client:", self.mlflow_client) self.http_client = MlflowHttpClient() self.run_exporter = RunExporter( self.mlflow_client, export_metadata_tags=export_metadata_tags, notebook_formats=notebook_formats, export_notebook_revision=export_notebook_revision) self.stages = self._normalize_stages(stages) self.export_run = export_run
def __init__(self, client=None, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None): self.client = client or mlflow.tracking.MlflowClient() self.fs = filesystem or _filesystem.get_filesystem() print("Filesystem:", type(self.fs).__name__) self.run_exporter = RunExporter(self.client, export_metadata_tags, notebook_formats, self.fs)
def __init__(self, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None): self.fs = filesystem or _filesystem.get_filesystem() self.client = mlflow.tracking.MlflowClient() self.client2 = HttpClient("api/2.0/preview/mlflow") self.run_exporter = RunExporter( self.client, export_metadata_tags=export_metadata_tags, notebook_formats=notebook_formats, filesystem=filesystem)
def __init__(self, mlflow_client=None, export_metadata_tags=False, notebook_formats=[], export_notebook_revision=False): """ :param mlflow_client: MLflow client or if None create default client. :param export_metadata_tags: Export source run metadata tags. :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC. :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() self.run_exporter = RunExporter(self.mlflow_client, export_metadata_tags, notebook_formats, export_notebook_revision)
def test_run_import_metadata_tags(): run1, run2 = init_run_test(RunExporter(export_metadata_tags=True), RunImporter(mlmodel_fix=mlmodel_fix, import_metadata_tags=True, import_mlflow_tags=True), verbose=False) compare_run_import_metadata_tags(client, output, run1, run2)
class ExperimentExporter(): def __init__(self, client=None, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None): self.client = client or mlflow.tracking.MlflowClient() self.fs = filesystem or _filesystem.get_filesystem() print("Filesystem:", type(self.fs).__name__) self.run_exporter = RunExporter(self.client, export_metadata_tags, notebook_formats, self.fs) def export_experiment(self, exp_id_or_name, output): exp = mlflow_utils.get_experiment(self.client, exp_id_or_name) exp_id = exp.experiment_id print( f"Exporting experiment '{exp.name}' (ID {exp.experiment_id}) to '{output}'" ) if output.endswith(".zip"): self.export_experiment_to_zip(exp_id, output) else: self.fs.mkdirs(output) self.export_experiment_to_dir(exp_id, output) def export_experiment_to_dir(self, exp_id, exp_dir): exp = self.client.get_experiment(exp_id) dct = {"experiment": utils.strip_underscores(exp)} infos = self.client.list_run_infos(exp_id) dct["export_info"] = { "export_time": utils.get_now_nice(), "num_runs": len(infos) } run_ids = [] failed_run_ids = [] for j, info in enumerate(infos): run_dir = os.path.join(exp_dir, info.run_id) print(f"Exporting run {j+1}/{len(infos)}: {info.run_id}") res = self.run_exporter.export_run(info.run_id, run_dir) if res: run_ids.append(info.run_id) else: failed_run_ids.append(info.run_id) dct["run_ids"] = run_ids dct["failed_run_ids"] = failed_run_ids path = os.path.join(exp_dir, "manifest.json") utils.write_json_file(self.fs, path, dct) if len(failed_run_ids) == 0: print(f"All {len(run_ids)} runs succesfully exported") else: print(f"{len(run_ids)}/{len(infos)} runs succesfully exported") print(f"{len(failed_run_ids)}/{len(infos)} runs failed") def export_experiment_to_zip(self, exp_id, zip_file): temp_dir = tempfile.mkdtemp() try: self.export_experiment_to_dir(exp_id, temp_dir) utils.zip_directory(zip_file, temp_dir) finally: shutil.rmtree(temp_dir)
class ModelExporter(): def __init__(self, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None): self.fs = filesystem or _filesystem.get_filesystem() self.client = mlflow.tracking.MlflowClient() self.client2 = HttpClient("api/2.0/preview/mlflow") self.run_exporter = RunExporter(self.client, export_metadata_tags=export_metadata_tags, notebook_formats=notebook_formats, filesystem=filesystem) def export_model(self, output_dir, model_name): path = os.path.join(output_dir,"model.json") model = self.client2.get(f"registered-models/get?name={model_name}") for v in model["registered_model"]["latest_versions"]: run_id = v["run_id"] opath = os.path.join(output_dir,run_id) self.run_exporter.export_run(run_id, opath) opath = opath.replace("dbfs:","/dbfs") run = self.client.get_run(run_id) v["artifact_uri"] = run.info.artifact_uri utils.write_json_file(self.fs, path, model)
class ModelExporter(): def __init__(self, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None): self.fs = filesystem or _filesystem.get_filesystem() self.client = mlflow.tracking.MlflowClient() self.client2 = HttpClient("api/2.0/preview/mlflow") self.run_exporter = RunExporter( self.client, export_metadata_tags=export_metadata_tags, notebook_formats=notebook_formats, filesystem=filesystem) def export_model(self, output_dir, model_name): path = os.path.join(output_dir, "model.json") model = self.client2.get(f"registered-models/get?name={model_name}") for v in model["registered_model"]["latest_versions"]: run_id = v["run_id"] opath = os.path.join(output_dir, run_id) try: self.run_exporter.export_run(run_id, opath) opath = opath.replace("dbfs:", "/dbfs") run = self.client.get_run(run_id) v["artifact_uri"] = run.info.artifact_uri model["registered_model"]["latest_versions"] = [] for mv in self.client.search_model_versions( f"name='{model_name}'"): run_id = mv.run_id opath = os.path.join(output_dir, run_id) self.run_exporter.export_run(run_id, opath) run = self.client.get_run(run_id) dmv = dict(mv) dmv["artifact_uri"] = run.info.artifact_uri model["registered_model"]["latest_versions"].append(dmv) except mlflow.exceptions.RestException as e: if "RESOURCE_DOES_NOT_EXIST: Run" in str(e): print( "WARNING: Run backing the registered model does not exist.", e) else: raise (e) utils.write_json_file(self.fs, path, model)
class ModelExporter(): def __init__(self, mlflow_client=None, export_metadata_tags=False, notebook_formats=[], export_notebook_revision=False, stages=None, export_run=True): """ :param mlflow_client: MLflow client or if None create default client. :param export_metadata_tags: Export source run metadata tags. :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC. :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available. :param stages: Stages to export. Default is all stages. Values are Production, Staging, Archived and None. :param export_runs: Export the run that generated a registered model's version. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() print(">> client:", self.mlflow_client) self.http_client = MlflowHttpClient() self.run_exporter = RunExporter( self.mlflow_client, export_metadata_tags=export_metadata_tags, notebook_formats=notebook_formats, export_notebook_revision=export_notebook_revision) self.stages = self._normalize_stages(stages) self.export_run = export_run def export_model(self, model_name, output_dir): """ :param model_name: Registered model name. :param output_dir: Output directory. :return: Returns bool if export succeeded and the model name. """ try: self._export_model(model_name, output_dir) return True, model_name except Exception as e: print("ERROR:", e) return False, model_name def _export_model(self, model_name, output_dir): fs = _filesystem.get_filesystem(output_dir) model = self.http_client.get(f"registered-models/get", {"name": model_name}) fs.mkdirs(output_dir) model["registered_model"]["latest_versions"] = [] versions = self.mlflow_client.search_model_versions( f"name='{model_name}'") print(f"Found {len(versions)} versions for model {model_name}") manifest = [] exported_versions = 0 for vr in versions: if len(self.stages) > 0 and not vr.current_stage.lower( ) in self.stages: continue run_id = vr.run_id opath = os.path.join(output_dir, run_id) opath = opath.replace("dbfs:", "/dbfs") dct = { "version": vr.version, "stage": vr.current_stage, "run_id": run_id } print(f"Exporting: {dct}") manifest.append(dct) try: if self.export_run: self.run_exporter.export_run(run_id, opath) run = self.mlflow_client.get_run(run_id) dct = dict(vr) dct["_run_artifact_uri"] = run.info.artifact_uri experiment = mlflow.get_experiment(run.info.experiment_id) dct["_experiment_name"] = experiment.name model["registered_model"]["latest_versions"].append(dct) exported_versions += 1 except mlflow.exceptions.RestException as e: if "RESOURCE_DOES_NOT_EXIST: Run" in str(e): print( f"WARNING: Run for version {vr.version} does not exist. {e}" ) else: import traceback traceback.print_exc() print( f"Exported {exported_versions}/{len(versions)} versions for model {model_name}" ) path = os.path.join(output_dir, "model.json") utils.write_json_file(fs, path, model) return manifest def _normalize_stages(self, stages): from mlflow.entities.model_registry import model_version_stages if stages is None: return [] if isinstance(stages, str): stages = stages.split(",") stages = [stage.lower() for stage in stages] for stage in stages: if stage not in model_version_stages._CANONICAL_MAPPING: print( f"WARNING: stage '{stage}' must be one of: {model_version_stages.ALL_STAGES}" ) return stages
def test_run_no_import_mlflow_tags(): run1, run2 = init_run_test( RunExporter(), RunImporter(mlmodel_fix=mlmodel_fix, import_mlflow_tags=False)) compare_run_no_import_mlflow_tags(run1, run2)
def test_run_basic(): run1, run2 = init_run_test(RunExporter(), RunImporter(mlmodel_fix=mlmodel_fix)) compare_runs(run1, run2)
# MAGIC %md ### Remove any previous exported run data # COMMAND ---------- dbutils.fs.rm(output_dir, True) # COMMAND ---------- # MAGIC %md ### Export the run # COMMAND ---------- from mlflow_export_import.run.export_run import RunExporter exporter = RunExporter(notebook_formats=formats, export_metadata_tags=export_metadata_tags) exporter.export_run(run_id, output_dir) # COMMAND ---------- # MAGIC %md ### Display exported run files # COMMAND ---------- import os output_dir = output_dir.replace("dbfs:", "/dbfs") os.environ['OUTPUT_DIR'] = output_dir # COMMAND ----------
class ExperimentExporter(): def __init__(self, mlflow_client=None, export_metadata_tags=False, notebook_formats=[], export_notebook_revision=False): """ :param mlflow_client: MLflow client or if None create default client. :param export_metadata_tags: Export source run metadata tags. :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC. :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() self.run_exporter = RunExporter(self.mlflow_client, export_metadata_tags, notebook_formats, export_notebook_revision) def export_experiment(self, exp_id_or_name, output_dir, run_ids=None): """ :param exp_id_or_name: Experiment ID or name. :param output_dir: Output directory. :param run_ids: List of run IDs to export. If None export all run IDs. :return: Number of successful and number of failed runs. """ exp = mlflow_utils.get_experiment(self.mlflow_client, exp_id_or_name) exp_id = exp.experiment_id print( f"Exporting experiment '{exp.name}' (ID {exp.experiment_id}) to '{output_dir}'" ) fs = _filesystem.get_filesystem(output_dir) print("Filesystem:", type(fs).__name__) fs.mkdirs(output_dir) exp = self.mlflow_client.get_experiment(exp_id) dct = {"experiment": utils.strip_underscores(exp)} ok_run_ids = [] failed_run_ids = [] j = -1 if run_ids: for j, run_id in enumerate(run_ids): run = self.mlflow_client.get_run(run_id) self._export_run(j, run, output_dir, ok_run_ids, failed_run_ids) else: for j, run in enumerate( SearchRunsIterator(self.mlflow_client, exp_id)): self._export_run(j, run, output_dir, ok_run_ids, failed_run_ids) dct["export_info"] = { "export_time": utils.get_now_nice(), "num_total_runs": (j + 1), "num_ok_runs": len(ok_run_ids), "ok_runs": ok_run_ids, "num_failed_runs": len(failed_run_ids), "failed_runs": failed_run_ids } path = os.path.join(output_dir, "manifest.json") utils.write_json_file(fs, path, dct) msg = f"for experiment '{exp.name}' (ID: {exp.experiment_id})" if len(failed_run_ids) == 0: print(f"All {len(ok_run_ids)} runs succesfully exported {msg}") else: print(f"{len(ok_run_ids)/j} runs succesfully exported {msg}") print(f"{len(failed_run_ids)/j} runs failed {msg}") return len(ok_run_ids), len(failed_run_ids) def _export_run(self, idx, run, output_dir, ok_run_ids, failed_run_ids): run_dir = os.path.join(output_dir, run.info.run_id) print(f"Exporting run {idx+1}: {run.info.run_id}") res = self.run_exporter.export_run(run.info.run_id, run_dir) if res: ok_run_ids.append(run.info.run_id) else: failed_run_ids.append(run.info.run_id)
def test_run_basic(): run1, run2 = init_run_test( RunExporter(), RunImporter(mlmodel_fix=mlmodel_fix, import_mlflow_tags=True)) compare_runs(client, output, run1, run2)