def import_experiments(input_dir, use_src_user_id, import_mlflow_tags,
                       import_metadata_tags):
    start_time = time.time()
    manifest_path = os.path.join(input_dir, "experiments", "manifest.json")
    manifest = utils.read_json_file(manifest_path)
    exps = manifest["experiments"]
    importer = ExperimentImporter(None, use_src_user_id, import_mlflow_tags,
                                  import_metadata_tags)
    print("Experiments:")
    for exp in exps:
        print(" ", exp)
    run_info_map = {}
    exceptions = []
    for exp in exps:
        exp_input_dir = os.path.join(input_dir, "experiments", exp["id"])
        try:
            _run_info_map = importer.import_experiment(exp["name"],
                                                       exp_input_dir)
            run_info_map[exp["id"]] = _run_info_map
        except Exception as e:
            exceptions.append(e)
            import traceback
            traceback.print_exc()

    duration = round(time.time() - start_time, 1)
    if len(exceptions) > 0:
        print(f"Errors: {len(exceptions)}")
    print(f"Duration: {duration} seconds")

    return run_info_map, {
        "experiments": len(exps),
        "exceptions": exceptions,
        "duration": duration
    }
    def _import_run(self, dst_exp_name, src_run_id):
        mlflow_utils.set_experiment(self.dbx_client, dst_exp_name)
        exp = self.mlflow_client.get_experiment_by_name(dst_exp_name)
        src_run_path = os.path.join(src_run_id, "run.json")
        src_run_dct = utils.read_json_file(src_run_path)

        run = self.mlflow_client.create_run(exp.experiment_id)
        run_id = run.info.run_id
        try:
            self._import_run_data(src_run_dct, run_id,
                                  src_run_dct["info"]["user_id"])
            path = os.path.join(src_run_id, "artifacts")
            if os.path.exists(_filesystem.mk_local_path(path)):
                self.mlflow_client.log_artifacts(run_id, mk_local_path(path))
            if self.mlmodel_fix:
                self._update_mlmodel_run_id(run_id)
            self.mlflow_client.set_terminated(
                run_id, RunStatus.to_string(RunStatus.FINISHED))
        except Exception as e:
            self.mlflow_client.set_terminated(
                run_id, RunStatus.to_string(RunStatus.FAILED))
            import traceback
            from mlflow_export_import.common import MlflowExportImportException
            traceback.print_exc()
            raise MlflowExportImportException from e

        return (run, src_run_dct["tags"].get(utils.TAG_PARENT_ID, None))
Example #3
0
    def import_model(self,
                     input_dir,
                     model_name,
                     experiment_name,
                     delete_model=False):
        path = os.path.join(input_dir, "model.json")
        dct = utils.read_json_file(path)
        dct = dct["registered_model"]

        print("Model to import:")
        print(f"  Name: {dct['name']}")
        print(f"  Description: {dct.get('description','')}")
        print(f"  Tags: {dct.get('tags','')}")
        print(f"  Total {len(dct['all_versions'])} versions")

        if delete_model:
            model_utils.delete_model(self.client, model_name)

        tags = {e["key"]: e["value"] for e in dct.get("tags", {})}
        self.client.create_registered_model(model_name, tags,
                                            dct.get("description"))
        mlflow.set_experiment(experiment_name)

        print("Importing all versions ...")
        model_versions_sorted = sorted(dct["all_versions"],
                                       key=lambda v: v["version"])
        for v in model_versions_sorted:
            run_id = v["run_id"]
            source = v["source"]
            current_stage = v["current_stage"]
            artifact_uri = v["artifact_uri"]
            run_dir = os.path.join(input_dir, run_id)
            print(f"  Version {v['version']}:")
            print(f"    current_stage: {v['current_stage']}:")
            print("    Run to import:")
            print("      run_id:", run_id)
            print("      artifact_uri:", artifact_uri)
            print("      source:      ", source)
            model_path = source.replace(artifact_uri + "/", "")
            print("      model_path:", model_path)
            run_id, _ = self.run_importer.import_run(experiment_name, run_dir)
            run = self.client.get_run(run_id)
            print("    Imported run:")
            print("      run_id:", run_id)
            print("      artifact_uri:", run.info.artifact_uri)
            source = os.path.join(run.info.artifact_uri, model_path)
            print("      source:      ", source)
            version = self.client.create_model_version(model_name, source,
                                                       run_id)
            model_utils.wait_until_version_is_ready(self.client,
                                                    model_name,
                                                    version,
                                                    sleep_time=2)
            if current_stage != "None":
                self.client.transition_model_version_stage(
                    model_name, version.version, current_stage)
            assert v["version"] == version.version
Example #4
0
 def import_experiment_from_dir(self, exp_name, exp_dir):
     mlflow.set_experiment(exp_name)
     manifest_path = os.path.join(exp_dir,"manifest.json")
     dct = utils.read_json_file(manifest_path)
     run_ids = dct["run_ids"]
     failed_run_ids = dct['failed_run_ids']
     print(f"Importing {len(run_ids)} runs into experiment '{exp_name}' from {exp_dir}")
     run_ids_mapping = {}
     for src_run_id in run_ids:
         dst_run_id, src_parent_run_id = self.run_importer.import_run(exp_name, os.path.join(exp_dir,src_run_id))
         run_ids_mapping[src_run_id] = (dst_run_id,src_parent_run_id)
     print(f"Imported {len(run_ids)} runs into experiment '{exp_name}' from {exp_dir}")
     if len(failed_run_ids) > 0:
         print(f"Warning: {len(failed_run_ids)} failed runs were not imported - see {manifest_path}")
     utils.nested_tags(self.client, run_ids_mapping)
 def import_run_from_dir(self, dst_exp_name, src_run_id):
     mlflow.set_experiment(dst_exp_name)
     dst_exp = self.client.get_experiment_by_name(dst_exp_name)
     #print("Experiment name:",dst_exp_name)
     #print("Experiment ID:",dst_exp.experiment_id)
     src_run_path = os.path.join(src_run_id, "run.json")
     src_run_dct = utils.read_json_file(src_run_path)
     with mlflow.start_run() as run:
         run_id = run.info.run_id
         self.import_run_data(src_run_dct, run_id,
                              src_run_dct["info"]["user_id"])
         path = os.path.join(src_run_id, "artifacts")
         mlflow.log_artifacts(mk_local_path(path))
     if self.mlmodel_fix:
         self.update_mlmodel_run_id(run.info.run_id)
     return (run_id, src_run_dct["tags"].get(utils.TAG_PARENT_ID, None))
def import_models(input_dir, run_info_map, delete_model, verbose, use_threads):
    max_workers = os.cpu_count() or 4 if use_threads else 1
    start_time = time.time()
    models_dir = os.path.join(input_dir, "models")
    manifest_path = os.path.join(models_dir, "manifest.json")
    manifest = utils.read_json_file(manifest_path)
    models = manifest["ok_models"]
    importer = AllModelImporter(run_info_map)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for model in models:
            dir = os.path.join(models_dir, model)
            executor.submit(importer.import_model, dir, model, delete_model,
                            verbose)

    duration = round(time.time() - start_time, 1)
    return {"models": len(models), "duration": duration}
    def _import_model(self,
                      model_name,
                      input_dir,
                      delete_model=False,
                      verbose=False,
                      sleep_time=30):
        """
        :param model_name: Model name.
        :param input_dir: Input directory.
        :param delete_model: Delete current model before importing versions.
        :param verbose: Verbose.
        :param sleep_time: Seconds to wait for model version crreation.
        :return: Model import manifest.
        """
        path = os.path.join(input_dir, "model.json")
        model_dct = utils.read_json_file(path)["registered_model"]

        print("Model to import:")
        print(f"  Name: {model_dct['name']}")
        print(f"  Description: {model_dct.get('description','')}")
        print(f"  Tags: {model_dct.get('tags','')}")
        print(f"  {len(model_dct['latest_versions'])} latest versions")
        print(f"  path: {path}")

        if not model_name:
            model_name = model_dct["name"]
        if delete_model:
            model_utils.delete_model(self.mlflow_client, model_name)

        try:
            tags = {e["key"]: e["value"] for e in model_dct.get("tags", {})}
            self.mlflow_client.create_registered_model(
                model_name, tags, model_dct.get("description"))
            print(f"Created new registered model '{model_name}'")
        except RestException as e:
            if not "RESOURCE_ALREADY_EXISTS: Registered Model" in str(e):
                raise e
            print(f"Registered model '{model_name}' already exists")
        return model_dct
 def import_experiment(self, exp_name, input_dir):
     """
     :param: exp_name: Destination experiment name.
     :param: input_dir: Source experiment directory.
     :return: A map of source run IDs and destination run.info.
     """
     mlflow_utils.set_experiment(self.dbx_client, exp_name)
     dst_exp_id = self.mlflow_client.get_experiment_by_name(
         exp_name).experiment_id  # TODO
     manifest_path = os.path.join(input_dir, "manifest.json")
     dct = utils.read_json_file(manifest_path)
     run_ids = dct["export_info"]["ok_runs"]
     failed_run_ids = dct["export_info"]["failed_runs"]
     print(
         f"Importing {len(run_ids)} runs into experiment '{exp_name}' from {input_dir}"
     )
     run_ids_map = {}
     run_info_map = {}
     for src_run_id in run_ids:
         dst_run, src_parent_run_id = self.run_importer.import_run(
             exp_name, os.path.join(input_dir, src_run_id))
         dst_run_id = dst_run.info.run_id
         run_ids_map[src_run_id] = {
             "dst_run_id": dst_run_id,
             "src_parent_run_id": src_parent_run_id
         }
         run_info_map[src_run_id] = dst_run.info
     print(
         f"Imported {len(run_ids)} runs into experiment '{exp_name}' from {input_dir}"
     )
     if len(failed_run_ids) > 0:
         print(
             f"Warning: {len(failed_run_ids)} failed runs were not imported - see {manifest_path}"
         )
     utils.nested_tags(self.mlflow_client, run_ids_map)
     return run_info_map