class ExperimentImporter():
    def __init__(self,
                 mlflow_client=None,
                 mlmodel_fix=True,
                 use_src_user_id=False,
                 import_mlflow_tags=True,
                 import_metadata_tags=False):
        """
        :param mlflow_client: MLflow client or if None create default client.
        :param import_mlflow_tags: Import mlflow tags.
        :param use_src_user_id: Set the destination user ID to the source user ID.
                                Source user ID is ignored when importing into
        :param import_metadata_tags: Import mlflow_export_import tags.
        """
        self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
        self.run_importer = RunImporter(
            self.mlflow_client,
            mlmodel_fix=mlmodel_fix,
            use_src_user_id=use_src_user_id,
            import_mlflow_tags=import_mlflow_tags,
            import_metadata_tags=import_metadata_tags)
        print("MLflowClient:", self.mlflow_client)
        self.dbx_client = DatabricksHttpClient()

    def import_experiment(self, exp_name, input_dir):
        """
        :param: exp_name: Destination experiment name.
        :param: input_dir: Source experiment directory.
        :return: A map of source run IDs and destination run.info.
        """
        mlflow_utils.set_experiment(self.dbx_client, exp_name)
        dst_exp_id = self.mlflow_client.get_experiment_by_name(
            exp_name).experiment_id  # TODO
        manifest_path = os.path.join(input_dir, "manifest.json")
        dct = utils.read_json_file(manifest_path)
        run_ids = dct["export_info"]["ok_runs"]
        failed_run_ids = dct["export_info"]["failed_runs"]
        print(
            f"Importing {len(run_ids)} runs into experiment '{exp_name}' from {input_dir}"
        )
        run_ids_map = {}
        run_info_map = {}
        for src_run_id in run_ids:
            dst_run, src_parent_run_id = self.run_importer.import_run(
                exp_name, os.path.join(input_dir, src_run_id))
            dst_run_id = dst_run.info.run_id
            run_ids_map[src_run_id] = {
                "dst_run_id": dst_run_id,
                "src_parent_run_id": src_parent_run_id
            }
            run_info_map[src_run_id] = dst_run.info
        print(
            f"Imported {len(run_ids)} runs into experiment '{exp_name}' from {input_dir}"
        )
        if len(failed_run_ids) > 0:
            print(
                f"Warning: {len(failed_run_ids)} failed runs were not imported - see {manifest_path}"
            )
        utils.nested_tags(self.mlflow_client, run_ids_map)
        return run_info_map
Esempio n. 2
0
class ModelImporter():
    def __init__(self, filesystem=None):
        self.fs = filesystem or _filesystem.get_filesystem()
        self.client = mlflow.tracking.MlflowClient()
        self.run_importer = RunImporter(self.client, mlmodel_fix=True)

    def import_model(self,
                     input_dir,
                     model_name,
                     experiment_name,
                     delete_model=False):
        path = os.path.join(input_dir, "model.json")
        dct = utils.read_json_file(path)
        dct = dct["registered_model"]

        print("Model to import:")
        print(f"  Name: {dct['name']}")
        print(f"  Description: {dct.get('description','')}")
        print(f"  Tags: {dct.get('tags','')}")
        print(f"  {len(dct['latest_versions'])} latest versions")

        if delete_model:
            model_utils.delete_model(self.client, model_name)

        tags = {e["key"]: e["value"] for e in dct.get("tags", {})}
        self.client.create_registered_model(model_name, tags,
                                            dct.get("description"))
        mlflow.set_experiment(experiment_name)

        print("Importing latest versions:")
        for v in dct["latest_versions"]:
            run_id = v["run_id"]
            source = v["source"]
            current_stage = v["current_stage"]
            artifact_uri = v["artifact_uri"]
            run_dir = os.path.join(input_dir, run_id)
            print(f"  Version {v['version']}:")
            print(f"    current_stage: {v['current_stage']}:")
            print("    Run to import:")
            print("      run_id:", run_id)
            print("      artifact_uri:", artifact_uri)
            print("      source:      ", source)
            model_path = source.replace(artifact_uri + "/", "")
            print("      model_path:", model_path)
            run_id, _ = self.run_importer.import_run(experiment_name, run_dir)
            run = self.client.get_run(run_id)
            print("    Imported run:")
            print("      run_id:", run_id)
            print("      artifact_uri:", run.info.artifact_uri)
            source = os.path.join(run.info.artifact_uri, model_path)
            print("      source:      ", source)
            version = self.client.create_model_version(model_name, source,
                                                       run_id)
            model_utils.wait_until_version_is_ready(self.client,
                                                    model_name,
                                                    version,
                                                    sleep_time=2)
            self.client.transition_model_version_stage(model_name,
                                                       version.version,
                                                       current_stage)
Esempio n. 3
0
def test_run_import_metadata_tags():
    run1, run2 = init_run_test(RunExporter(export_metadata_tags=True),
                               RunImporter(mlmodel_fix=mlmodel_fix,
                                           import_metadata_tags=True,
                                           import_mlflow_tags=True),
                               verbose=False)
    compare_run_import_metadata_tags(client, output, run1, run2)
Esempio n. 4
0
class ExperimentImporter():
    def __init__(self, mlflow_client=None, use_src_user_id=False, import_mlflow_tags=True, import_metadata_tags=False):
        self.client = mlflow_client or mlflow.tracking.MlflowClient()
        self.run_importer = RunImporter(self.client, use_src_user_id=use_src_user_id, import_mlflow_tags=import_mlflow_tags, import_metadata_tags=import_metadata_tags)
        print("MLflowClient:",self.client)

    def import_experiment(self, exp_name, input):
        if input.endswith(".zip"):
            self.import_experiment_from_zip(exp_name, input)
        else:
            self.import_experiment_from_dir(exp_name, input)

    def import_experiment_from_dir(self, exp_name, exp_dir):
        mlflow.set_experiment(exp_name)
        manifest_path = os.path.join(exp_dir,"manifest.json")
        dct = utils.read_json_file(manifest_path)
        run_ids = dct["run_ids"]
        failed_run_ids = dct['failed_run_ids']
        print(f"Importing {len(run_ids)} runs into experiment '{exp_name}' from {exp_dir}")
        run_ids_mapping = {}
        for src_run_id in run_ids:
            dst_run_id, src_parent_run_id = self.run_importer.import_run(exp_name, os.path.join(exp_dir,src_run_id))
            run_ids_mapping[src_run_id] = (dst_run_id,src_parent_run_id)
        print(f"Imported {len(run_ids)} runs into experiment '{exp_name}' from {exp_dir}")
        if len(failed_run_ids) > 0:
            print(f"Warning: {len(failed_run_ids)} failed runs were not imported - see {manifest_path}")
        utils.nested_tags(self.client, run_ids_mapping)

    def import_experiment_from_zip(self, exp_name, zip_file):
        utils.unzip_directory(zip_file, exp_name, self.import_experiment_from_dir)
 def __init__(self,
              mlflow_client=None,
              mlmodel_fix=True,
              use_src_user_id=False,
              import_mlflow_tags=True,
              import_metadata_tags=False):
     """
     :param mlflow_client: MLflow client or if None create default client.
     :param import_mlflow_tags: Import mlflow tags.
     :param use_src_user_id: Set the destination user ID to the source user ID.
                             Source user ID is ignored when importing into
     :param import_metadata_tags: Import mlflow_export_import tags.
     """
     self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient()
     self.run_importer = RunImporter(
         self.mlflow_client,
         mlmodel_fix=mlmodel_fix,
         use_src_user_id=use_src_user_id,
         import_mlflow_tags=import_mlflow_tags,
         import_metadata_tags=import_metadata_tags)
     print("MLflowClient:", self.mlflow_client)
     self.dbx_client = DatabricksHttpClient()
 def __init__(self,
              mlflow_client=None,
              run_importer=None,
              await_creation_for=None):
     """
     :param mlflow_client: MLflow client or if None create default client.
     :param run_importer: RunImporter instance.
     :param await_creation_for: Seconds to wait for model version crreation.
     """
     self.mlflow_client = mlflow.tracking.MlflowClient()
     print(">> run_importer:", run_importer)
     print(">> run_importer.type:", type(run_importer))
     self.run_importer = run_importer if run_importer else RunImporter(
         self.mlflow_client, mlmodel_fix=True, import_mlflow_tags=False)
     self.await_creation_for = await_creation_for
Esempio n. 7
0
dbutils.widgets.text("Input folder", "")
input_dir = dbutils.widgets.get("Input folder")

if len(input_dir) == 0: raise Exception("ERROR: Input is required")
input_dir, experiment_name

print("input_dir:", input_dir)
print("experiment_name:", experiment_name)

# COMMAND ----------

# MAGIC %md ### Import Run

# COMMAND ----------

from mlflow_export_import.run.import_run import RunImporter
importer = RunImporter()
run_id, parent_run_id = importer.import_run(experiment_name, input_dir)
run_id, parent_run_id

# COMMAND ----------

# MAGIC %md ### Display MLflow UI URIs

# COMMAND ----------

# MAGIC %run ./Common

# COMMAND ----------

display_run_uri(run_id)
Esempio n. 8
0
 def __init__(self, filesystem=None):
     self.fs = filesystem or _filesystem.get_filesystem()
     self.client = mlflow.tracking.MlflowClient()
     self.run_importer = RunImporter(self.client, mlmodel_fix=True)
def test_run_no_import_mlflow_tags():
    run1, run2 = init_run_test(
        RunExporter(),
        RunImporter(mlmodel_fix=mlmodel_fix, import_mlflow_tags=False))
    compare_run_no_import_mlflow_tags(run1, run2)
def test_run_basic():
    run1, run2 = init_run_test(RunExporter(),
                               RunImporter(mlmodel_fix=mlmodel_fix))
    compare_runs(run1, run2)
Esempio n. 11
0
 def __init__(self, filesystem=None, run_importer=None):
     self.fs = filesystem or _filesystem.get_filesystem()
     self.client = mlflow.tracking.MlflowClient()
     self.run_importer = run_importer if run_importer else RunImporter(
         self.client, mlmodel_fix=True, import_mlflow_tags=True)
Esempio n. 12
0
def test_run_basic():
    run1, run2 = init_run_test(
        RunExporter(),
        RunImporter(mlmodel_fix=mlmodel_fix, import_mlflow_tags=True))
    compare_runs(client, output, run1, run2)
Esempio n. 13
0
 def __init__(self, mlflow_client=None, use_src_user_id=False, import_mlflow_tags=True, import_metadata_tags=False):
     self.client = mlflow_client or mlflow.tracking.MlflowClient()
     self.run_importer = RunImporter(self.client, use_src_user_id=use_src_user_id, import_mlflow_tags=import_mlflow_tags, import_metadata_tags=import_metadata_tags)
     print("MLflowClient:",self.client)