def main(input_path, output_path): client = MlflowClient() experiment = client.get_experiment_by_name("iris") with mlflow.start_run(experiment_id=experiment.experiment_id): # Import dataset logging.info(f"reading {input_path}") mlflow.log_artifact(input_path) iris = pd.read_csv(input_path) X = iris.drop("Species", axis=1) y = iris.Species # Instantiate PCA pca = PCA() # Instatiate logistic_regression logistic = SGDClassifier(loss='log', penalty='l2', max_iter=100, tol=1e-3, random_state=0) mlflow.log_params(logistic.get_params()) # Parameters grid to try param_grid = { 'pca__n_components': [2, 3], 'logistic__alpha': np.logspace(-4, 4, 5), } mlflow.log_params(param_grid) # Define training pipeline pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)]) # Training logging.info("beginning training") search = GridSearchCV(pipe, param_grid, iid=False, cv=3, return_train_score=False) search.fit(X, y) print(f"Best parameter (CV score={search.best_score_}):") print(search.best_params_) mlflow.log_params(search.best_params_) mlflow.log_metric("best_score", search.best_score_) # Save best model logging.info("saving best model") dump(search.best_estimator_, output_path) mlflow.log_artifact(output_path)
def get_experiment_id(client: MlflowClient, experiment_name: str) -> int: experiment = client.get_experiment_by_name(experiment_name) if experiment is None: experiment_id = client.create_experiment(experiment_name) else: experiment_id = experiment.experiment_id return experiment_id
def test_hyperopt_ray_mlflow(csv_filename, tmpdir, ray_cluster_4cpu): mlflow_uri = f"file://{tmpdir}/mlruns" mlflow.set_tracking_uri(mlflow_uri) client = MlflowClient(tracking_uri=mlflow_uri) num_samples = 2 config = _get_config( {"type": "variant_generator"}, {"type": "ray", "num_samples": num_samples} # search_alg # executor ) rel_path = generate_data(config["input_features"], config["output_features"], csv_filename) exp_name = "mlflow_test" run_hyperopt(config, rel_path, tmpdir, experiment_name=exp_name, callbacks=[MlflowCallback(mlflow_uri)]) experiment = client.get_experiment_by_name(exp_name) assert experiment is not None runs = client.search_runs([experiment.experiment_id]) assert len(runs) > 0 for run in runs: artifacts = [f.path for f in client.list_artifacts(run.info.run_id, "")] assert "config.yaml" in artifacts assert "model" in artifacts
def get_exp_id(name, registry_uri, artifact_location): clnt = MlflowClient(registry_uri=registry_uri) exp_info = clnt.get_experiment_by_name(name) exp_id = exp_info.experiment_id if exp_info else clnt.create_experiment( name, artifact_location=artifact_location) return exp_id
def test_hyperopt_ray_mlflow(csv_filename, ray_start_4_cpus, tmpdir): mlflow_uri = f'file://{tmpdir}/mlruns' mlflow.set_tracking_uri(mlflow_uri) client = MlflowClient(tracking_uri=mlflow_uri) num_samples = 2 config = _get_config({ "type": "ray", "num_samples": num_samples }, {"type": "ray"}) rel_path = generate_data(config['input_features'], config['output_features'], csv_filename) exp_name = 'mlflow_test' run_hyperopt(config, rel_path, experiment_name=exp_name, callbacks=[MlflowCallback(mlflow_uri)]) experiment = client.get_experiment_by_name(exp_name) assert experiment is not None runs = client.search_runs([experiment.experiment_id]) assert len(runs) > 0 for run in runs: artifacts = [ f.path for f in client.list_artifacts(run.info.run_id, "") ] assert 'config.yaml' in artifacts assert 'model' in artifacts
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf): # create an experiment with the same name and then delete it mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() mlflow_client = MlflowClient(mlflow_tracking_uri) mlflow_client.create_experiment("exp1") mlflow_client.delete_experiment( mlflow_client.get_experiment_by_name("exp1").experiment_id ) # the config must restore properly the experiment config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="mlruns", experiment_opts=dict(name="exp1"), ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
def get_run_id(client: MlflowClient, experiment_name: str, model_key: str) -> str: """ Get an existing or create a new run for the given model_key and experiment_name. The model key corresponds to a unique configuration of the model. The corresponding run must be manually stopped using the `mlflow.tracking.MlflowClient.set_terminated` method. Parameters ---------- client: mlflow.tracking.MlflowClient Client with tracking uri set to AzureML if configured. experiment_name: str Name of experiment to log to. model_key: str Unique ID of model configuration. Returns ------- run_id: str Unique ID of MLflow run to log to. """ experiment = client.get_experiment_by_name(experiment_name) experiment_id = (getattr(experiment, "experiment_id") if experiment else client.create_experiment(experiment_name)) return client.create_run(experiment_id, tags={ "model_key": model_key }).info.run_id
class MlflowWriter: def __init__(self, experiment_name, **kwargs): self.client = MlflowClient(**kwargs) try: self.experiment_id = self.client.create_experiment(experiment_name) except Exception: self.experiment_id = self.client.get_experiment_by_name( experiment_name).experiment_id self.run_id = self.client.create_run(self.experiment_id).info.run_id def log_params_from_omegaconf_dict(self, params): for param_name, element in params.items(): self._explore_recursive(param_name, element) def _explore_recursive(self, parent_name, element): if isinstance(element, DictConfig): for k, v in element.items(): if isinstance(v, DictConfig) or isinstance(v, ListConfig): self._explore_recursive(f"{parent_name}.{k}", v) else: self.client.log_param(self.run_id, f"{parent_name}.{k}", v) elif isinstance(element, ListConfig): for i, v in enumerate(element): self.client.log_param(self.run_id, f"{parent_name}.{i}", v) def log_torch_model(self, model): with mlflow.start_run(self.run_id): pytorch.log_model(model, "models") def log_param(self, key, value): self.client.log_param(self.run_id, key, value) def log_metric(self, key, value, timestamp=None, step=None): self.client.log_metric(self.run_id, key, value, timestamp, step) def log_artifact(self, local_path, artifact_path=None): self.client.log_artifact(self.run_id, local_path, artifact_path) def set_terminated(self): self.client.set_terminated(self.run_id) def move_mlruns(self): # runのコピー hydra_cwd = os.getcwd() exp_root, exp_id = os.path.split(hydra_cwd) src_mlrun_dir = os.path.join(hydra_cwd, "mlruns", "1") src_mlrun_path = [ file_folder for file_folder in glob.glob(f"{src_mlrun_dir}/*") if os.path.isdir(file_folder) ][0] run_hash = os.path.basename(src_mlrun_path) dst_mlrun_path = os.path.join(hydra.utils.get_original_cwd(), "mlruns", "1", run_hash) shutil.copytree(src_mlrun_path, dst_mlrun_path) overwrite_meta_yaml(dst_mlrun_path, run_hash) # experimentのコピー dst_exp_path = os.path.join(hydra.utils.get_original_cwd(), "mlruns", "1") copy_exp_meta_yaml(src_mlrun_dir, dst_exp_path)
def setUp(self): self.tracking_uri = tempfile.mkdtemp() self.registry_uri = tempfile.mkdtemp() client = MlflowClient(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) client.create_experiment(name="existing_experiment") assert client.get_experiment_by_name( "existing_experiment").experiment_id == "0"
def get_most_recent_run(experiment_name: str): client = MlflowClient() experiment_id = client.get_experiment_by_name( experiment_name).experiment_id df_experiment_runs = mlflow.search_runs(experiment_ids=[experiment_id]) df_run = df_experiment_runs.sort_values("start_time", ascending=False).head(1) df_run["experiment_name"] = experiment_name return df_run
def _get_exp_id_from_name(name): client = MlflowClient(tracking_uri=os.environ['MLFLOW_TRACKING_URI']) try: experiment_id = client.create_experiment(name) except MlflowException as exc: if exc.error_code == 'RESOURCE_ALREADY_EXISTS': experiment = client.get_experiment_by_name(name) experiment_id = experiment.experiment_id return experiment_id
def preprocess_data(catalog, ml_stage, parameters='default', config_path=None, requires=None, provides=None, load_run_id=None, enable_tracking=None, experiment=None, tracking_uri=None, save_dir=None, tags=None): cfg = load_configuration(catalog=catalog, parameters=parameters, config_path=config_path, tracking_uri=tracking_uri, save_dir=save_dir) run_id = None if enable_tracking: client = MlflowClient(cfg.tracking_uri) try: experiment_id = client.create_experiment(experiment) except: experiment_id = client.get_experiment_by_name( experiment).experiment_id mlrun = client.create_run(experiment_id=experiment_id) run_id = mlrun.info.run_id data_catalog = DataCatalog.from_config(catalog=cfg.catalog, data_dir=cfg.data_dir, load_versions=load_run_id, save_version=run_id, ml_stages=[ml_stage]) parameters = cfg.parameters.preprocess rebind = cfg.catalog.rebind_names requires = dict() if requires is None else dict(requires) provides = dict() if provides is None else dict(provides) preprocess = PreprocessStep(data_catalog, parameters, ml_stage=ml_stage, rebind=rebind, requires=requires, provides=provides, run_id=run_id, tracking_uri=cfg.tracking_uri, experiment_name=experiment, tags=tags) preprocess()
def register_best_model(model_name, experiment_name, parent_run_name, metric, order_by="ASC", model_artifact_name="model"): """ Register best model obtained for model `model_name`, experiment `experiment_name` and parent run `parent_run_name`. :param model_name: model name in the Model registry :type model_name: str :param experiment_name: name of the experiment :type experiment_name: str :param parent_run_name: name of the parent run used when running hypeparameter optimization via Hyperopt :type parent_run_name: str :param metric: name of the metric used to optimize our models :type metric: str :param order_by: "ASC" to order metric values by ascending order, "DESC" for descending :type order_by: str :param model_artifact_name: name of the model when saved as an artifact in the Tracking Server :type model_artifact_name: str :return: ModelVersion object associated to the transitioned version :rtype: mlflow.entities.model_registry.ModelVersion """ client = MlflowClient() experiment_id = client.get_experiment_by_name( experiment_name).experiment_id parent_run = client.search_runs( experiment_id, filter_string=f"tags.mlflow.runName = '{parent_run_name}'", order_by=[f"metrics.loss {order_by}"])[0] parent_run_id = parent_run.info.run_id best_run_from_parent_run = client.search_runs( experiment_id, filter_string=f"tags.mlflow.parentRunId = '{parent_run_id}'", order_by=[f"metrics.{metric} {order_by}"])[0] best_model_uri = f"runs:/{best_run_from_parent_run.info.run_id}/{model_artifact_name}" model_details = mlflow.register_model(model_uri=best_model_uri, name=model_name) return model_details
def set_experiment(self, experiment_name: str = "") -> None: """Set active experiment. Sets 'Default' if doesn't exist. Args: experiment_name (str): Experiment name. """ self._session.experiment_name = experiment_name if self._session.experiment_name not in self.list_experiments(): self._session.experiment_name = "" client = MlflowClient(self.__get_uri()) experiment: Experiment = client.get_experiment_by_name( self._session.experiment_name) self.__experiment_id = experiment.experiment_id
def mlflow(mlflow_uri=None, tags={}, show_args={}): try: from mlflow.tracking import MlflowClient except ImportError: logger.error('ERROR: Failed importing mlflow') MlflowClient = NotImplemented if mlflow_uri is None: return None, None tags["git_branch"] = subprocess.check_output( ["git", "describe", "--all"]).decode("utf8").strip() tags["git_hash"] = subprocess.check_output( ["git", "describe", "--always"]).decode("utf8").strip() tags["timestamp"] = datetime.now().strftime("_%Y-%m-%d-%H-%M-%S") if "MY_HOSTIP" in os.environ.keys(): tags["my_host_ip"] = os.environ["MY_HOSTIP"] if "MY_HOSTNAME" in os.environ.keys(): tags["my_host_name"] = os.environ["MY_HOSTNAME"] for key, value in tags.items(): show_args[f'tags-{key}'] = value if "exp_name" in tags: exp_name = tags["exp_name"] else: exp_name = "Ocr-PSEOnly" ml_client = MlflowClient(mlflow_uri) ml_exp = ml_client.get_experiment_by_name(exp_name) if ml_exp is None: ml_exp_id = ml_client.create_experiment(exp_name) else: ml_exp_id = ml_exp.experiment_id ml_run = ml_client.create_run(ml_exp_id) logger.info("ml_run: {}", ml_run) ml_run_id = ml_run.info.run_id for key, value in tags.items(): logger.info(f'tag --- {key}: {value}') if ml_client is not None: ml_client.set_tag(ml_run_id, key, value) logger.info("------------") for key, value in show_args.items(): logger.info(f'{key}: {value}') if ml_client is not None: ml_client.log_param(ml_run_id, key, value) return ml_client, ml_run_id
def test_mlflow(ray_start_4_cpus, tmp_path): config = TestConfig() params = {"p1": "p1"} temp_dir = tmp_path num_workers = 4 def train_func(config): train.report(episode_reward_mean=4) train.report(episode_reward_mean=5) train.report(episode_reward_mean=6) return 1 callback = MLflowLoggerCallback(experiment_name="test_exp", logdir=temp_dir) trainer = Trainer(config, num_workers=num_workers) trainer.start() trainer.run(train_func, config=params, callbacks=[callback]) from mlflow.tracking import MlflowClient client = MlflowClient( tracking_uri=callback.mlflow_util._mlflow.get_tracking_uri()) experiment_id = client.get_experiment_by_name("test_exp").experiment_id all_runs = callback.mlflow_util._mlflow.search_runs( experiment_ids=[experiment_id]) assert len(all_runs) == 1 # all_runs is a pandas dataframe. all_runs = all_runs.to_dict(orient="records") run_id = all_runs[0]["run_id"] run = client.get_run(run_id) assert run.data.params == params assert ("episode_reward_mean" in run.data.metrics and run.data.metrics["episode_reward_mean"] == 6.0) assert (TRAINING_ITERATION in run.data.metrics and run.data.metrics[TRAINING_ITERATION] == 3.0) metric_history = client.get_metric_history(run_id=run_id, key="episode_reward_mean") assert len(metric_history) == 3 iterations = [metric.step for metric in metric_history] assert iterations == [1, 2, 3] rewards = [metric.value for metric in metric_history] assert rewards == [4, 5, 6]
class MlflowLogger: def __init__(self, experiment_name, tracking_uri=None, registry_uri=None, **kwargs): super().__init__(**kwargs) self.client = MlflowClient(tracking_uri, registry_uri) try: self.experiment_id = self.client.create_experiment(experiment_name) except MlflowException: self.experiment_id = self.client.get_experiment_by_name( experiment_name).experiment_id self.run_id = self.client.create_run(self.experiment_id).info.run_id def log_params_from_omegaconf_dict(self, params): for param_name, element in params.items(): self._explore_recursive(param_name, element) def _explore_recursive(self, parent_name, element): if isinstance(element, DictConfig): for k, v in element.items(): if isinstance(v, DictConfig) or isinstance(v, ListConfig): self._explore_recursive(f'{parent_name}.{k}', v) else: self.client.log_param(self.run_id, f'{parent_name}.{k}', v) elif isinstance(element, ListConfig): for i, v in enumerate(element): self.client.log_param(self.run_id, f'{parent_name}.{i}', v) def log_metric(self, key, value): self.client.log_metric(self.run_id, key, value) def log_torch_model(self, model, model_name): with mlflow.start_run(self.run_id): pytorch.log_model(model, model_name) def log_param(self, key, value): self.client.log_param(self.run_id, key, value) def log_artifact(self, local_path): self.client.log_artifact(self.run_id, local_path) def set_terminated(self): self.client.set_terminated(self.run_id)
class MlflowWriter(): def __init__(self, experiment_name, task_name, **kwargs): self.client = MlflowClient(**kwargs) self.experiment_name = experiment_name self.task_name = task_name try: self.experiment_id = self.client.create_experiment(experiment_name) except: self.experiment_id = self.client.get_experiment_by_name(experiment_name).experiment_id self.run_id = self.client.create_run(self.experiment_id).info.run_id mlflow.set_tag(MLFLOW_RUN_NAME, task_name) print(f"Exp ID:{self.experiment_id}, Run ID:{self.run_id}") def log_params_from_omegaconf_dict(self, params): for param_name, element in params.items(): self._explore_recursive(param_name, element) def _explore_recursive(self, parent_name, element): if isinstance(element, DictConfig): for k, v in element.items(): if isinstance(v, DictConfig) or isinstance(v, ListConfig): self._explore_recursive(f'{parent_name}.{k}', v) else: self.client.log_param(self.run_id, f'{parent_name}.{k}', v) elif isinstance(element, ListConfig): for i, v in enumerate(element): self.client.log_param(self.run_id, f'{parent_name}.{i}', v) def log_torch_model(self, model): with mlflow.start_run(self.run_id): pytorch.log_model(model, 'models') def log_param(self, key, value): self.client.log_param(self.run_id, key, value) def log_metric(self, key, value): self.client.log_metric(self.run_id, key, value) def log_artifact(self, local_path): self.client.log_artifact(self.run_id, local_path) def set_terminated(self): self.client.set_terminated(self.run_id)
def get_or_create_experiment(experiment_name) -> Experiment: """ Creates an mlflow experiment :param experiment_name: str. The name of the experiment to be set in MLFlow :return: the experiment created if it doesn't exist, experiment if it is already created. """ try: client = MlflowClient() experiment: Experiment = client.get_experiment_by_name( name=experiment_name) if experiment and experiment.lifecycle_stage != 'deleted': return experiment else: experiment_id = client.create_experiment(name=experiment_name) return client.get_experiment(experiment_id=experiment_id) except Exception as e: logger.error( f'Unable to get or create experiment {experiment_name}: {e}')
def testLogArtifact(self): with open('artifacts/foo.txt', 'rb') as f1, open('artifacts/image.png', 'rb') as f2, open( 'artifacts/animation.gif', 'rb') as f3: client = MlflowClient(tracking_uri="http://localhost:5000") try: experiment_id = client.create_experiment("foo") except RestException as e: experiment = client.get_experiment_by_name("foo") experiment_id = experiment.experiment_id run = client.create_run(experiment_id) run_id = run.info.run_id print(experiment_id + ":" + run_id) files = {'file1': f1, 'file2': f2, 'file3': f3} data = {'run_id': run_id} r = requests.post('http://localhost:5001/log_artifact', files=files, data=data) print(r.text)
def _init(self): from mlflow.tracking import MlflowClient uri = osp.join(osp.dirname(self.logdir), 'mlruns') # print(uri) # import ipdb # ipdb.set_trace() # raise RuntimeError client = MlflowClient(tracking_uri=uri) experiments = [e.name for e in client.list_experiments()] exp_name = self.config.get("mlflow_experiment", "test") if exp_name in experiments: experiment_id = client.get_experiment_by_name(exp_name) else: experiment_id = client.create_experiment(exp_name) run = client.create_run(experiment_id.experiment_id, tags={'mlflow.runName': self.trial.trial_id}) self._run_id = run.info.run_id self.client = client self._log_hparams()
def test_on_pipeline_error(kedro_project_with_mlflow_conf): tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create( project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() with pytest.raises(ValueError): session.run() # the run we want is the last one in the configuration experiment mlflow_client = MlflowClient(tracking_uri) experiment = mlflow_client.get_experiment_by_name( context.mlflow.tracking.experiment.name) failing_run_info = MlflowClient(tracking_uri).list_run_infos( experiment.experiment_id)[0] assert mlflow.active_run() is None # the run must have been closed assert failing_run_info.status == RunStatus.to_string( RunStatus.FAILED) # it must be marked as failed
def test_kedro_mlflow_config_experiment_was_deleted(mocker, tmp_path): # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True) # create an experiment with the same name and then delete it mlflow_tracking_uri = (tmp_path / "mlruns").as_uri() mlflow_client = MlflowClient(mlflow_tracking_uri) mlflow_client.create_experiment("exp1") mlflow_client.delete_experiment( mlflow_client.get_experiment_by_name("exp1").experiment_id) # the config must restore properly the experiment config = KedroMlflowConfig( project_path=tmp_path, mlflow_tracking_uri="mlruns", experiment_opts=dict(name="exp1"), ) assert "exp1" in [ exp.name for exp in config.mlflow_client.list_experiments() ]
class Experiment: EXPERIMENT_NAME = 'None' NUM_RUNS = 1 FIXED_PARAMS = {} LANGUAGE_PARAMS = {} CHANGING_PARAMS = {} def __init__(self, base_config): self.base_config = base_config self.mlflow_client = MlflowClient( tracking_uri=base_config['mlflow_output_uri']) def get_runs(self): mlflow_experiment = self.mlflow_client.get_experiment_by_name( self.EXPERIMENT_NAME) return self.mlflow_client.search_runs( experiment_ids=[mlflow_experiment.experiment_id]) def get_parameters_combinations(self): run_params = self.base_config run_params.update(self.FIXED_PARAMS) for source_language in self.LANGUAGE_PARAMS['source_language']: for target_language in self.LANGUAGE_PARAMS['target_language']: if source_language != target_language: run_params['source_language'] = source_language run_params['target_language'] = target_language if len(self.CHANGING_PARAMS) > 0: param_values = product(*self.CHANGING_PARAMS.values()) ablated_params = self.CHANGING_PARAMS.keys() for params_combination in param_values: # Apply the language params combination new_params = { ablated_param: param_value for ablated_param, param_value in zip( ablated_params, params_combination) } run_params.update(new_params) yield run_params else: yield run_params
class MlflowWriter(BaseMlWriter): def __init__(self, log_dir, experiment_name): super().__init__() mlflow_dir = log_dir / "mlflow" / "mlruns" self.client = MlflowClient(tracking_uri=str(mlflow_dir)) try: self.experiment_id = self.client.create_experiment(experiment_name) except: self.experiment_id = self.client.get_experiment_by_name( experiment_name).experiment_id self.run_id = self.client.create_run(self.experiment_id).info.run_id def __del__(self): self.client.set_terminated(self.run_id) def log_params(self, params: Dict): # from flatten_dict import flatten flatten_params = flatten(params) for key, value in flatten_params.items(): self.log_param(key, value) def log_artifact(self, local_path: Path): if local_path.exists(): self.client.log_artifact(self.run_id, str(local_path)) else: logger.info(f"NOT Exists: {local_path}") def log_torch_model(self, model): with mlflow.start_run(self.run_id): mlflow.pytorch.log_model(model, 'models') def log_param(self, key, value): self.client.log_param(self.run_id, key, value) def log_metric(self, key, value): self.client.log_metric(self.run_id, key, value)
class Tracker(): def __init__(self, experiment: str, tracking_uri: str): self.experiment_name = experiment self.experiment_id = None self.run_id = None self.client = MlflowClient(tracking_uri=tracking_uri) def log_param(self, key: str, value: Any): self.client.log_param(self.run_id, key, value) def log_params(self, params: Dict): for k, v in params.items(): self.log_param(k, v) def log_artifacts(self, artifacts: List[str]): for artifact in artifacts: self.client.log_artifact(self.run_id, artifact) def log_metrics(self, metrics: Dict[str, Any]): for k, v in metrics.items(): if isinstance(v, list): for i in range(len(v)): self.client.log_metric(self.run_id, k, v[i], step=i) else: self.client.log_metric(self.run_id, k, v) def start_run(self): try: self.experiment_id = self.client.create_experiment( self.experiment_name) except Exception: self.experiment_id = self.client.get_experiment_by_name( self.experiment_name).experiment_id run = self.client.create_run(self.experiment_id) self.run_id = run.info.run_id def end_run(self): self.client.set_terminated(self.run_id)
class RemoteTracking: def __init__(self, tracking_uri=None, registry_uri=None): self.server = MlflowClient(tracking_uri, registry_uri) def get_experiment_id(self, name, artifact_location=None): experiment = self.server.get_experiment_by_name(name) if experiment: experiment_id = experiment.experiment_id return experiment_id else: print("Experiment not exist") print("Creating new experiment on tracking") experiment_id = self.server.create_experiment( name, artifact_location) return experiment_id def get_run_id(self, experiment_id): run = self.server.create_run(experiment_id) run_id = run.info.run_id return run_id def log_params(self, run_id, params): for key, value in params.items(): self.server.log_param(run_id, key, value) print("Parameters successful logged") def set_tags(self, run_id, params): for key, value in params.items(): self.server.set_tag(run_id, key, value) print("Tags successful logged") def log_metrics(self, run_id, params): for key, value in params.items(): self.server.log_metric(run_id, key, value) print("Metrics successful logged") def log_artifacts(self, run_id, local_dir, artifact_path=None): self.server.log_artifacts(run_id, local_dir, artifact_path)
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf): # create an experiment with the same name and then delete it mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() mlflow_client = MlflowClient(mlflow_tracking_uri) mlflow_client.create_experiment("exp1") mlflow_client.delete_experiment( mlflow_client.get_experiment_by_name("exp1").experiment_id ) # the config must restore properly the experiment config = KedroMlflowConfig( server=dict(mlflow_tracking_uri="mlruns"), tracking=dict(experiment=dict(name="exp1")), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config config.setup(context) assert "exp1" in [ exp.name for exp in config.server._mlflow_client.list_experiments() ]
class MLFlowLoggerCallback(LoggerCallback): """MLFlow Logger to automatically log Tune results and config to MLFlow. MLFlow (https://mlflow.org) Tracking is an open source library for recording and querying experiments. This Ray Tune ``LoggerCallback`` sends information (config parameters, training results & metrics, and artifacts) to MLFlow for automatic experiment tracking. Args: tracking_uri (str): The tracking URI for where to manage experiments and runs. This can either be a local file path or a remote server. This arg gets passed directly to mlflow.tracking.MlflowClient initialization. When using Tune in a multi-node setting, make sure to set this to a remote server and not a local file path. registry_uri (str): The registry URI that gets passed directly to mlflow.tracking.MlflowClient initialization. experiment_name (str): The experiment name to use for this Tune run. If None is passed in here, the Logger will automatically then check the MLFLOW_EXPERIMENT_NAME and then the MLFLOW_EXPERIMENT_ID environment variables to determine the experiment name. If the experiment with the name already exists with MlFlow, it will be reused. If not, a new experiment will be created with that name. save_artifact (bool): If set to True, automatically save the entire contents of the Tune local_dir as an artifact to the corresponding run in MlFlow. Example: .. code-block:: python from ray.tune.integration.mlflow import MLFlowLoggerCallback tune.run( train_fn, config={ # define search space here "parameter_1": tune.choice([1, 2, 3]), "parameter_2": tune.choice([4, 5, 6]), }, callbacks=[MLFlowLoggerCallback( experiment_name="experiment1", save_artifact=True)]) """ def __init__(self, tracking_uri: Optional[str] = None, registry_uri: Optional[str] = None, experiment_name: Optional[str] = None, save_artifact: bool = False): mlflow = _import_mlflow() if mlflow is None: raise RuntimeError("MLFlow has not been installed. Please `pip " "install mlflow` to use the MLFlowLogger.") from mlflow.tracking import MlflowClient self.client = MlflowClient(tracking_uri=tracking_uri, registry_uri=registry_uri) if experiment_name is None: # If no name is passed in, then check env vars. # First check if experiment_name env var is set. experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") if experiment_name is not None: # First check if experiment with name exists. experiment = self.client.get_experiment_by_name(experiment_name) if experiment is not None: # If it already exists then get the id. experiment_id = experiment.experiment_id else: # If it does not exist, create the experiment. experiment_id = self.client.create_experiment( name=experiment_name) else: # No experiment_name is passed in and name env var is not set. # Now check the experiment id env var. experiment_id = os.environ.get("MLFLOW_EXPERIMENT_ID") # Confirm that an experiment with this id exists. if experiment_id is None or self.client.get_experiment( experiment_id) is None: raise ValueError("No experiment_name passed, " "MLFLOW_EXPERIMENT_NAME env var is not " "set, and MLFLOW_EXPERIMENT_ID either " "is not set or does not exist. Please " "set one of these to use the " "MLFlowLoggerCallback.") # At this point, experiment_id should be set. self.experiment_id = experiment_id self.save_artifact = save_artifact self._trial_runs = {} def log_trial_start(self, trial: "Trial"): # Create run if not already exists. if trial not in self._trial_runs: run = self.client.create_run(experiment_id=self.experiment_id, tags={"trial_name": str(trial)}) self._trial_runs[trial] = run.info.run_id run_id = self._trial_runs[trial] # Log the config parameters. config = trial.config for key, value in config.items(): self.client.log_param(run_id=run_id, key=key, value=value) def log_trial_result(self, iteration: int, trial: "Trial", result: Dict): run_id = self._trial_runs[trial] for key, value in result.items(): try: value = float(value) except (ValueError, TypeError): logger.debug("Cannot log key {} with value {} since the " "value cannot be converted to float.".format( key, value)) continue self.client.log_metric(run_id=run_id, key=key, value=value, step=iteration) def log_trial_end(self, trial: "Trial", failed: bool = False): run_id = self._trial_runs[trial] # Log the artifact if set_artifact is set to True. if self.save_artifact: self.client.log_artifacts(run_id, local_dir=trial.logdir) # Stop the run once trial finishes. status = "FINISHED" if not failed else "FAILED" self.client.set_terminated(run_id=run_id, status=status)
class MLFlowLogger(LightningLoggerBase): """ Log using `MLflow <https://mlflow.org>`_. Install it with pip: .. code-block:: bash pip install mlflow .. code-block:: python from pytorch_lightning import Trainer from pytorch_lightning.loggers import MLFlowLogger mlf_logger = MLFlowLogger(experiment_name="default", tracking_uri="file:./ml-runs") trainer = Trainer(logger=mlf_logger) Use the logger anywhere in your :class:`~pytorch_lightning.core.lightning.LightningModule` as follows: .. code-block:: python from pytorch_lightning import LightningModule class LitModel(LightningModule): def training_step(self, batch, batch_idx): # example self.logger.experiment.whatever_ml_flow_supports(...) def any_lightning_module_function_or_hook(self): self.logger.experiment.whatever_ml_flow_supports(...) Args: experiment_name: The name of the experiment run_name: Name of the new run. The `run_name` is internally stored as a ``mlflow.runName`` tag. If the ``mlflow.runName`` tag has already been set in `tags`, the value is overridden by the `run_name`. tracking_uri: Address of local or remote tracking server. If not provided, defaults to `MLFLOW_TRACKING_URI` environment variable if set, otherwise it falls back to `file:<save_dir>`. tags: A dictionary tags for the experiment. save_dir: A path to a local directory where the MLflow runs get saved. Defaults to `./mlflow` if `tracking_uri` is not provided. Has no effect if `tracking_uri` is provided. prefix: A string to put at the beginning of metric keys. artifact_location: The location to store run artifacts. If not provided, the server picks an appropriate default. Raises: ImportError: If required MLFlow package is not installed on the device. """ LOGGER_JOIN_CHAR = "-" def __init__( self, experiment_name: str = "default", run_name: Optional[str] = None, tracking_uri: Optional[str] = os.getenv("MLFLOW_TRACKING_URI"), tags: Optional[Dict[str, Any]] = None, save_dir: Optional[str] = "./mlruns", prefix: str = "", artifact_location: Optional[str] = None, ): if mlflow is None: raise ImportError( "You want to use `mlflow` logger which is not installed yet, install it with `pip install mlflow`." ) super().__init__() if not tracking_uri: tracking_uri = f"{LOCAL_FILE_URI_PREFIX}{save_dir}" self._experiment_name = experiment_name self._experiment_id = None self._tracking_uri = tracking_uri self._run_name = run_name self._run_id = None self.tags = tags self._prefix = prefix self._artifact_location = artifact_location self._mlflow_client = MlflowClient(tracking_uri) @property @rank_zero_experiment def experiment(self) -> MlflowClient: r""" Actual MLflow object. To use MLflow features in your :class:`~pytorch_lightning.core.lightning.LightningModule` do the following. Example:: self.logger.experiment.some_mlflow_function() """ if self._experiment_id is None: expt = self._mlflow_client.get_experiment_by_name( self._experiment_name) if expt is not None: self._experiment_id = expt.experiment_id else: log.warning( f"Experiment with name {self._experiment_name} not found. Creating it." ) self._experiment_id = self._mlflow_client.create_experiment( name=self._experiment_name, artifact_location=self._artifact_location) if self._run_id is None: if self._run_name is not None: self.tags = self.tags or {} if MLFLOW_RUN_NAME in self.tags: log.warning( f"The tag {MLFLOW_RUN_NAME} is found in tags. The value will be overridden by {self._run_name}." ) self.tags[MLFLOW_RUN_NAME] = self._run_name run = self._mlflow_client.create_run( experiment_id=self._experiment_id, tags=resolve_tags(self.tags)) self._run_id = run.info.run_id return self._mlflow_client @property def run_id(self) -> str: """ Create the experiment if it does not exist to get the run id. Returns: The run id. """ _ = self.experiment return self._run_id @property def experiment_id(self) -> str: """ Create the experiment if it does not exist to get the experiment id. Returns: The experiment id. """ _ = self.experiment return self._experiment_id @rank_zero_only def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None: params = self._convert_params(params) params = self._flatten_dict(params) for k, v in params.items(): if len(str(v)) > 250: rank_zero_warn( f"Mlflow only allows parameters with up to 250 characters. Discard {k}={v}", RuntimeWarning) continue self.experiment.log_param(self.run_id, k, v) @rank_zero_only def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None: assert rank_zero_only.rank == 0, "experiment tried to log from global_rank != 0" metrics = self._add_prefix(metrics) timestamp_ms = int(time() * 1000) for k, v in metrics.items(): if isinstance(v, str): log.warning(f"Discarding metric with string value {k}={v}.") continue new_k = re.sub("[^a-zA-Z0-9_/. -]+", "", k) if k != new_k: rank_zero_warn( "MLFlow only allows '_', '/', '.' and ' ' special characters in metric name." f" Replacing {k} with {new_k}.", RuntimeWarning, ) k = new_k self.experiment.log_metric(self.run_id, k, v, timestamp_ms, step) @rank_zero_only def finalize(self, status: str = "FINISHED") -> None: super().finalize(status) status = "FINISHED" if status == "success" else status if self.experiment.get_run(self.run_id): self.experiment.set_terminated(self.run_id, status) @property def save_dir(self) -> Optional[str]: """ The root file directory in which MLflow experiments are saved. Return: Local path to the root experiment directory if the tracking uri is local. Otherwhise returns `None`. """ if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX): return self._tracking_uri.lstrip(LOCAL_FILE_URI_PREFIX) @property def name(self) -> str: """ Get the experiment id. Returns: The experiment id. """ return self.experiment_id @property def version(self) -> str: """ Get the run id. Returns: The run id. """ return self.run_id