예제 #1
0
 def __init__(self, client=None, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None):
     self.client = client or mlflow.tracking.MlflowClient()
     self.dbx_client = DatabricksHttpClient()
     print("Databricks REST client:",self.dbx_client)
     self.fs = filesystem or _filesystem.get_filesystem()
     print("Filesystem:",type(self.fs).__name__)
     self.export_metadata_tags = export_metadata_tags
     self.notebook_formats = notebook_formats
예제 #2
0
 def __init__(self, mlflow_client=None, export_metadata_tags=False, notebook_formats=[], export_notebook_revision=False):
     """
     :param mlflow_client: MLflow client or if None create default client.
     :param export_metadata_tags: Export source run metadata tags.
     :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC.
     :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available.
     """
     self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
     self.dbx_client = DatabricksHttpClient()
     print("Databricks REST client:",self.dbx_client)
     self.export_metadata_tags = export_metadata_tags
     self.notebook_formats = notebook_formats
     self.export_notebook_revision = export_notebook_revision
예제 #3
0
 def __init__(self,
              mlflow_client=None,
              mlmodel_fix=True,
              use_src_user_id=False,
              import_mlflow_tags=False,
              import_metadata_tags=False):
     """ 
     :param mlflow_client: MLflow client or if None create default client.
     :param mlmodel_fix: Add correct run ID in destination MLmodel artifact. 
                         Can be expensive for deeply nested artifacts.
     :param use_src_user_id: Set the destination user ID to the source user ID. 
                             Source user ID is ignored when importing into 
                             Databricks since setting it is not allowed.
     :param import_mlflow_tags: Import mlflow tags.
     :param import_metadata_tags: Import mlflow_export_import tags.
     """
     self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
     self.mlmodel_fix = mlmodel_fix
     self.use_src_user_id = use_src_user_id
     self.import_mlflow_tags = import_mlflow_tags
     self.import_metadata_tags = import_metadata_tags
     self.in_databricks = "DATABRICKS_RUNTIME_VERSION" in os.environ
     self.dbx_client = DatabricksHttpClient()
     print(f"in_databricks: {self.in_databricks}")
     print(
         f"importing_into_databricks: {utils.importing_into_databricks()}")
 def __init__(self,
              mlflow_client=None,
              mlmodel_fix=True,
              use_src_user_id=False,
              import_mlflow_tags=True,
              import_metadata_tags=False):
     """
     :param mlflow_client: MLflow client or if None create default client.
     :param import_mlflow_tags: Import mlflow tags.
     :param use_src_user_id: Set the destination user ID to the source user ID.
                             Source user ID is ignored when importing into
     :param import_metadata_tags: Import mlflow_export_import tags.
     """
     self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
     self.run_importer = RunImporter(
         self.mlflow_client,
         mlmodel_fix=mlmodel_fix,
         use_src_user_id=use_src_user_id,
         import_mlflow_tags=import_mlflow_tags,
         import_metadata_tags=import_metadata_tags)
     print("MLflowClient:", self.mlflow_client)
     self.dbx_client = DatabricksHttpClient()
예제 #5
0
class RunExporter():
    def __init__(self, client=None, export_metadata_tags=False, notebook_formats=["SOURCE"], filesystem=None):
        self.client = client or mlflow.tracking.MlflowClient()
        self.dbx_client = DatabricksHttpClient()
        print("Databricks REST client:",self.dbx_client)
        self.fs = filesystem or _filesystem.get_filesystem()
        print("Filesystem:",type(self.fs).__name__)
        self.export_metadata_tags = export_metadata_tags
        self.notebook_formats = notebook_formats

    def export_run(self, run_id, output):
        run = self.client.get_run(run_id)
        if output.endswith(".zip"):
            return self.export_run_to_zip(run, output)
        else:
            self.fs.mkdirs(output)
            return self.export_run_to_dir(run, output)

    def export_run_to_zip(self, run, zip_file):
        temp_dir = tempfile.mkdtemp()
        try:
            self.export_run_to_dir(run, temp_dir)
            utils.zip_directory(zip_file, temp_dir)
        finally:
            shutil.rmtree(temp_dir)
            #fs.rm(temp_dir,True) # TODO

    def export_run_to_dir(self, run, run_dir):
        tags =  utils.create_tags_for_metadata(self.client, run, self.export_metadata_tags)
        dct = { "info": utils.strip_underscores(run.info) , 
                "params": run.data.params,
                "metrics": run.data.metrics,
                "tags": tags,
              }
        path = os.path.join(run_dir,"run.json")
        utils.write_json_file(self.fs, path, dct)

        # copy artifacts
        dst_path = os.path.join(run_dir,"artifacts")
        try:
            artifacts = self.client.list_artifacts(run.info.run_id)
            if len(artifacts) > 0: # Because of https://github.com/mlflow/mlflow/issues/2839
                self.fs.mkdirs(dst_path)
                self.client.download_artifacts(run.info.run_id,"", dst_path=mk_local_path(dst_path))
            notebook = tags.get("mlflow.databricks.notebookPath", None)
            if notebook is not None:
                self.export_notebook(run_dir, notebook)
            return True
        except Exception as e:
            print("ERROR: run_id:",run.info.run_id,"Exception:",e)
            traceback.print_exc()
            return False

    def export_notebook(self, run_dir, notebook):
        for format in self.notebook_formats:
            self.export_notebook_format(run_dir, notebook, format, format.lower())

    def export_notebook_format(self, run_dir, notebook, format, extension):
        resource = f"workspace/export?path={notebook}&direct_download=true&format={format}"
        try:
            rsp = self.dbx_client._get(resource)
            nb_name = "notebook."+extension
            nb_path = os.path.join(run_dir,nb_name)
            utils.write_file(nb_path, rsp.content)
            #self.fs.write(nb_path, rsp.content) # Bombs for DBC because dbutils.fs.put only writes strings!
        except MlflowToolsException as e:
            print(f"WARNING: Cannot save notebook '{notebook}'. {e}")
예제 #6
0
class RunExporter():
    def __init__(self, mlflow_client=None, export_metadata_tags=False, notebook_formats=[], export_notebook_revision=False):
        """
        :param mlflow_client: MLflow client or if None create default client.
        :param export_metadata_tags: Export source run metadata tags.
        :param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC.
        :param export_notebook_revision: Export the run's notebook revision. Experimental not yet publicly available.
        """
        self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
        self.dbx_client = DatabricksHttpClient()
        print("Databricks REST client:",self.dbx_client)
        self.export_metadata_tags = export_metadata_tags
        self.notebook_formats = notebook_formats
        self.export_notebook_revision = export_notebook_revision

    def export_run(self, run_id, output_dir):
        """
        :param run_id: Run ID.
        :param output_dir: Output directory.
        :return: whether export succeeded.
        """
        fs = _filesystem.get_filesystem(output_dir)
        run = self.mlflow_client.get_run(run_id)
        fs.mkdirs(output_dir)
        tags =  utils.create_tags_for_metadata(self.mlflow_client, run, self.export_metadata_tags)
        dct = { "info": utils.strip_underscores(run.info) , 
                "params": run.data.params,
                "metrics": run.data.metrics,
                "tags": tags,
              }
        path = os.path.join(output_dir,"run.json")
        utils.write_json_file(fs, path, dct)

        # copy artifacts
        dst_path = os.path.join(output_dir,"artifacts")
        try:
            TAG_NOTEBOOK_PATH = "mlflow.databricks.notebookPath"
            artifacts = self.mlflow_client.list_artifacts(run.info.run_id)
            if len(artifacts) > 0: # Because of https://github.com/mlflow/mlflow/issues/2839
                fs.mkdirs(dst_path)
                self.mlflow_client.download_artifacts(run.info.run_id,"", dst_path=mk_local_path(dst_path))
            notebook = tags.get(TAG_NOTEBOOK_PATH, None)
            if notebook is not None:
                if len(self.notebook_formats) > 0:
                    self.export_notebook(output_dir, notebook, run.data.tags, fs)
            elif len(self.notebook_formats) > 0:
                print(f"WARNING: Cannot export notebook since tag '{TAG_NOTEBOOK_PATH}' is not set.")
            return True
        except Exception as e:
            print("ERROR: run_id:",run.info.run_id,"Exception:",e)
            traceback.print_exc()
            return False

    def export_notebook(self, output_dir, notebook, tags, fs):
        notebook_dir = os.path.join(output_dir,"artifacts","notebooks")
        fs.mkdirs(notebook_dir)
        revision_id = tags["mlflow.databricks.notebookRevisionID"]
        notebook_path = tags["mlflow.databricks.notebookPath"]
        notebook_name = os.path.basename(notebook_path)
        dct = { 
           "mlflow.databricks.notebookRevisionID": revision_id, 
           "mlflow.databricks.notebookPath": notebook_path,
           "mlflow.databricks.export-notebook-revision": self.export_notebook_revision }
        path = os.path.join(notebook_dir, "manifest.json")
        with open(path, "w") as f:
            f.write(json.dumps(dct,indent=2)+"\n")
        for format in self.notebook_formats:
            self.export_notebook_format(notebook_dir, notebook, format, format.lower(), notebook_name, revision_id)

    def export_notebook_format(self, notebook_dir, notebook, format, extension, notebook_name, revision_id):
        params = { 
            "path": notebook, 
            "direct_download": True,
            "format": format,
            # "revision": {"revision_timestamp": revision_id}
        }
        if self.export_notebook_revision:
            params["revision"] = { "revision_timestamp": revision_id }
        try:
            rsp = self.dbx_client._get("workspace/export", params)
            notebook_path = os.path.join(notebook_dir, f"{notebook_name}.{extension}")
            utils.write_file(notebook_path, rsp.content)
        except MlflowExportImportException as e:
            print(f"WARNING: Cannot save notebook '{notebook}'. {e}")