def __init__(self, client=None, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None): self.client = client or mlflow.tracking.MlflowClient() self.dbx_client = DatabricksHttpClient() print("Databricks REST client:",self.dbx_client) self.fs = filesystem or _filesystem.get_filesystem() print("Filesystem:",type(self.fs).__name__) self.export_metadata_tags = export_metadata_tags self.notebook_formats = notebook_formats
def __init__(self, mlflow_client=None, export_metadata_tags=False, notebook_formats=[], export_notebook_revision=False): """ :param mlflow_client: MLflow client or if None create default client. :param export_metadata_tags: Export source run metadata tags. :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC. :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() self.dbx_client = DatabricksHttpClient() print("Databricks REST client:",self.dbx_client) self.export_metadata_tags = export_metadata_tags self.notebook_formats = notebook_formats self.export_notebook_revision = export_notebook_revision
def __init__(self, mlflow_client=None, mlmodel_fix=True, use_src_user_id=False, import_mlflow_tags=False, import_metadata_tags=False): """ :param mlflow_client: MLflow client or if None create default client. :param mlmodel_fix: Add correct run ID in destination MLmodel artifact. Can be expensive for deeply nested artifacts. :param use_src_user_id: Set the destination user ID to the source user ID. Source user ID is ignored when importing into Databricks since setting it is not allowed. :param import_mlflow_tags: Import mlflow tags. :param import_metadata_tags: Import mlflow_export_import tags. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() self.mlmodel_fix = mlmodel_fix self.use_src_user_id = use_src_user_id self.import_mlflow_tags = import_mlflow_tags self.import_metadata_tags = import_metadata_tags self.in_databricks = "DATABRICKS_RUNTIME_VERSION" in os.environ self.dbx_client = DatabricksHttpClient() print(f"in_databricks: {self.in_databricks}") print( f"importing_into_databricks: {utils.importing_into_databricks()}")
def __init__(self, mlflow_client=None, mlmodel_fix=True, use_src_user_id=False, import_mlflow_tags=True, import_metadata_tags=False): """ :param mlflow_client: MLflow client or if None create default client. :param import_mlflow_tags: Import mlflow tags. :param use_src_user_id: Set the destination user ID to the source user ID. Source user ID is ignored when importing into :param import_metadata_tags: Import mlflow_export_import tags. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() self.run_importer = RunImporter( self.mlflow_client, mlmodel_fix=mlmodel_fix, use_src_user_id=use_src_user_id, import_mlflow_tags=import_mlflow_tags, import_metadata_tags=import_metadata_tags) print("MLflowClient:", self.mlflow_client) self.dbx_client = DatabricksHttpClient()
class RunExporter(): def __init__(self, client=None, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None): self.client = client or mlflow.tracking.MlflowClient() self.dbx_client = DatabricksHttpClient() print("Databricks REST client:",self.dbx_client) self.fs = filesystem or _filesystem.get_filesystem() print("Filesystem:",type(self.fs).__name__) self.export_metadata_tags = export_metadata_tags self.notebook_formats = notebook_formats def export_run(self, run_id, output): run = self.client.get_run(run_id) if output.endswith(".zip"): return self.export_run_to_zip(run, output) else: self.fs.mkdirs(output) return self.export_run_to_dir(run, output) def export_run_to_zip(self, run, zip_file): temp_dir = tempfile.mkdtemp() try: self.export_run_to_dir(run, temp_dir) utils.zip_directory(zip_file, temp_dir) finally: shutil.rmtree(temp_dir) #fs.rm(temp_dir,True) # TODO def export_run_to_dir(self, run, run_dir): tags = utils.create_tags_for_metadata(self.client, run, self.export_metadata_tags) dct = { "info": utils.strip_underscores(run.info) , "params": run.data.params, "metrics": run.data.metrics, "tags": tags, } path = os.path.join(run_dir,"run.json") utils.write_json_file(self.fs, path, dct) # copy artifacts dst_path = os.path.join(run_dir,"artifacts") try: artifacts = self.client.list_artifacts(run.info.run_id) if len(artifacts) > 0: # Because of https://github.com/mlflow/mlflow/issues/2839 self.fs.mkdirs(dst_path) self.client.download_artifacts(run.info.run_id,"", dst_path=mk_local_path(dst_path)) notebook = tags.get("mlflow.databricks.notebookPath", None) if notebook is not None: self.export_notebook(run_dir, notebook) return True except Exception as e: print("ERROR: run_id:",run.info.run_id,"Exception:",e) traceback.print_exc() return False def export_notebook(self, run_dir, notebook): for format in self.notebook_formats: self.export_notebook_format(run_dir, notebook, format, format.lower()) def export_notebook_format(self, run_dir, notebook, format, extension): resource = f"workspace/export?path={notebook}&direct_download=true&format={format}" try: rsp = self.dbx_client._get(resource) nb_name = "notebook."+extension nb_path = os.path.join(run_dir,nb_name) utils.write_file(nb_path, rsp.content) #self.fs.write(nb_path, rsp.content) # Bombs for DBC because dbutils.fs.put only writes strings! except MlflowToolsException as e: print(f"WARNING: Cannot save notebook '{notebook}'. {e}")
class RunExporter(): def __init__(self, mlflow_client=None, export_metadata_tags=False, notebook_formats=[], export_notebook_revision=False): """ :param mlflow_client: MLflow client or if None create default client. :param export_metadata_tags: Export source run metadata tags. :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC. :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() self.dbx_client = DatabricksHttpClient() print("Databricks REST client:",self.dbx_client) self.export_metadata_tags = export_metadata_tags self.notebook_formats = notebook_formats self.export_notebook_revision = export_notebook_revision def export_run(self, run_id, output_dir): """ :param run_id: Run ID. :param output_dir: Output directory. :return: whether export succeeded. """ fs = _filesystem.get_filesystem(output_dir) run = self.mlflow_client.get_run(run_id) fs.mkdirs(output_dir) tags = utils.create_tags_for_metadata(self.mlflow_client, run, self.export_metadata_tags) dct = { "info": utils.strip_underscores(run.info) , "params": run.data.params, "metrics": run.data.metrics, "tags": tags, } path = os.path.join(output_dir,"run.json") utils.write_json_file(fs, path, dct) # copy artifacts dst_path = os.path.join(output_dir,"artifacts") try: TAG_NOTEBOOK_PATH = "mlflow.databricks.notebookPath" artifacts = self.mlflow_client.list_artifacts(run.info.run_id) if len(artifacts) > 0: # Because of https://github.com/mlflow/mlflow/issues/2839 fs.mkdirs(dst_path) self.mlflow_client.download_artifacts(run.info.run_id,"", dst_path=mk_local_path(dst_path)) notebook = tags.get(TAG_NOTEBOOK_PATH, None) if notebook is not None: if len(self.notebook_formats) > 0: self.export_notebook(output_dir, notebook, run.data.tags, fs) elif len(self.notebook_formats) > 0: print(f"WARNING: Cannot export notebook since tag '{TAG_NOTEBOOK_PATH}' is not set.") return True except Exception as e: print("ERROR: run_id:",run.info.run_id,"Exception:",e) traceback.print_exc() return False def export_notebook(self, output_dir, notebook, tags, fs): notebook_dir = os.path.join(output_dir,"artifacts","notebooks") fs.mkdirs(notebook_dir) revision_id = tags["mlflow.databricks.notebookRevisionID"] notebook_path = tags["mlflow.databricks.notebookPath"] notebook_name = os.path.basename(notebook_path) dct = { "mlflow.databricks.notebookRevisionID": revision_id, "mlflow.databricks.notebookPath": notebook_path, "mlflow.databricks.export-notebook-revision": self.export_notebook_revision } path = os.path.join(notebook_dir, "manifest.json") with open(path, "w") as f: f.write(json.dumps(dct,indent=2)+"\n") for format in self.notebook_formats: self.export_notebook_format(notebook_dir, notebook, format, format.lower(), notebook_name, revision_id) def export_notebook_format(self, notebook_dir, notebook, format, extension, notebook_name, revision_id): params = { "path": notebook, "direct_download": True, "format": format, # "revision": {"revision_timestamp": revision_id} } if self.export_notebook_revision: params["revision"] = { "revision_timestamp": revision_id } try: rsp = self.dbx_client._get("workspace/export", params) notebook_path = os.path.join(notebook_dir, f"{notebook_name}.{extension}") utils.write_file(notebook_path, rsp.content) except MlflowExportImportException as e: print(f"WARNING: Cannot save notebook '{notebook}'. {e}")