def test_get_experiment_id_with_active_experiment_returns_active_experiment_id( ): # Create a new experiment and set that as active experiment with TempDir(chdr=True): name = "Random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(name) assert exp_id is not None mlflow.set_experiment(name) assert _get_experiment_id() == exp_id
def __init__(self, x_train, x_test, y_train, y_test, experiment_name): self.x_train = x_train self.x_test = x_test self.y_train = y_train self.y_test = y_test # Create MLFlow experiment self.mlflow_exp_id = mlflow.create_experiment( name=experiment_name+'_'+datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') )
def setOrCeateMLFlowExperiment(experimentPath): from mlflow.exceptions import MlflowException try: experiment_id = mlflow.create_experiment(experimentPath) except MlflowException: # if experiment is already created experiment_id = mlflow.get_experiment_by_name( experimentPath).experiment_id mlflow.set_experiment(experimentPath) return experiment_id
def prepare_environment(environment: str): environment_data = InfoFile.get("environments").get(environment) if not environment_data: raise Exception( f"No environment {environment} provided in the project file") config = EnvironmentVariableConfigProvider().get_config() if config: config_type = "ENV" dbx_echo("Using configuration from the environment variables") else: dbx_echo( "No environment variables provided, using the ~/.databrickscfg") config = ProfileConfigProvider( environment_data["profile"]).get_config() config_type = "PROFILE" if not config: raise Exception( f"""Couldn't get profile with name: {environment_data["profile"]}. Please check the config settings""" ) api_client = _get_api_client(config, command_name="cicdtemplates-") _prepare_workspace_dir(api_client, environment_data["workspace_dir"]) if config_type == "ENV": mlflow.set_tracking_uri(DATABRICKS_MLFLOW_URI) elif config_type == "PROFILE": mlflow.set_tracking_uri( f'{DATABRICKS_MLFLOW_URI}://{environment_data["profile"]}') else: raise NotImplementedError( f"Config type: {config_type} is not implemented") experiment = mlflow.get_experiment_by_name( environment_data["workspace_dir"]) if not experiment: mlflow.create_experiment(environment_data["workspace_dir"], environment_data["artifact_location"]) mlflow.set_experiment(environment_data["workspace_dir"]) return api_client
def _register_model(model, tracking_uri, artifact_path, model_name): mlflow.set_tracking_uri(tracking_uri) experiment_id = \ mlflow.create_experiment(artifact_location=artifact_path, name='test') with mlflow.start_run(experiment_id=experiment_id): mlflow.keras.log_model(model, "model", registered_model_name=model_name)
def _get_experiement_id(self): experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME', 'Dev') e = mlflow.get_experiment_by_name(experiment_name) if not e: experiment_id = mlflow.create_experiment(name=experiment_name) else: experiment_id = e.experiment_id return experiment_id
def get_experiment_id(experiment_name): try: exp_id = mlflow.create_experiment(experiment_name) eprint(f"Created new experiment {bold(experiment_name)}") except mlflow.exceptions.MlflowException: mlflow.set_experiment(experiment_name) exp = mlflow.get_experiment_by_name(experiment_name) exp_id = exp.experiment_id os.environ["EXPERIMENT_ID"] = exp_id return exp_id
def find_best_parameters(data, ax_search_domain, control_arm_params, fixed_gbm_params, n_sobol=5, n_ei=5): ax_experiment = setup_ax_experiment_optimizer(data, ax_search_domain, control_arm_params, fixed_gbm_params) sobol = Models.SOBOL(search_space=ax_experiment.search_space, device=DEVICE) dateTimeObj = datetime.now() timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)") mlflow_experiment_id = mlflow.create_experiment(name=timestampStr) mlflow.set_experiment(timestampStr) for run_id in range(0, n_sobol): with mlflow.start_run( run_name="sobol_" + str(run_id), experiment_id=mlflow_experiment_id) as parent_run: generator_run = sobol.gen(1) ax_experiment.new_batch_trial(generator_run=generator_run) ax_experiment.trials[len(ax_experiment.trials) - 1].run() log_parameters(ax_experiment) log_metric(ax_experiment) for run_id in range(0, n_ei): with mlflow.start_run( run_name="gp_nei_" + str(run_id), experiment_id=mlflow_experiment_id) as parent_run: data = ax_experiment.fetch_data() botorch_model = Models.BOTORCH( experiment=ax_experiment, data=data, acqf_constructor=get_NEI, device=DEVICE, ) generator_run = botorch_model.gen(1) ax_experiment.new_batch_trial(generator_run=generator_run) ax_experiment.trials[len(ax_experiment.trials) - 1].run() log_parameters(ax_experiment) log_metric(ax_experiment) ax_experiment_data = ax_experiment.fetch_data().df best_score = ax_experiment_data.min()['mean'] best_arm_name = ax_experiment_data.loc[ax_experiment_data['mean'] == best_score].arm_name.item() best_arm = ax_experiment.arms_by_name[best_arm_name] best_params = best_arm.parameters return best_params, best_score
def start_run(param_prefix: Optional[str] = None, experiment_name: Optional[str] = None, run_name: Optional[str] = None, artifact_location: Optional[str] = None, **args) -> mlf.ActiveRun: """Close alias of mlflow.start_run. The only difference is that uv.start_run attempts to extract parameters from the environment and log those to the bound UV reporter using `report_params`. Note that the returned value can be used as a context manager: https://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.start_run """ if experiment_name is None: experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") if run_name is None: run_name = os.environ.get("MLFLOW_RUN_NAME") if artifact_location is None: artifact_location = os.environ.get("MLFLOW_ARTIFACT_ROOT") # Make sure the experiment exists before the run starts. if experiment_name is not None: if mlf.get_experiment_by_name(experiment_name) is None: mlf.create_experiment(experiment_name, artifact_location) mlf.set_experiment(experiment_name) ret = mlf.start_run(run_name=run_name, **args) env_params = ue.extract_params(prefix=param_prefix) mlf.set_tags(env_params) # for CAIP jobs, we add the job id as a tag, along with a link to the # console page cloud_ml_job_id = os.environ.get('CLOUD_ML_JOB_ID') if cloud_ml_job_id is not None: mlf.set_tag( 'cloud_ml_job_details', f'https://console.cloud.google.com/ai-platform/jobs/{cloud_ml_job_id}' ) mlf.set_tag('cloud_ml_job_id', cloud_ml_job_id) return ret
def get_or_create_experiment(project_path, experiment_name): experiment_path = os.path.join(project_path, experiment_name) try: experiment_id = mlflow.create_experiment(experiment_path) except: experiment_id = mlflow.tracking.MlflowClient().get_experiment_by_name( experiment_path).experiment_id out = {'experiment_path': experiment_path, 'experiment_id': experiment_id} return out
def run(self): """Complete pipeline execution for model training and persistence""" logger = lg.getLogger(self.load_data.__name__) logger.info('Starting pipeline execution') self.load_data() self.preprocessing() self.qa_data() self.split_data() try: mlflow.create_experiment(self.mlflow_experiment_name) logger.info('The MLflow experiment %s has been created', self.mlflow_experiment_name) except MlflowException: logger.info('The MLflow experiment %s has been set', self.mlflow_experiment_name) mlflow.set_experiment(self.mlflow_experiment_name) with mlflow.start_run(): self.train_model() self.evaluate_model() self.persist_artifacts()
def test_log_metrics_and_params(): mlflow_root = tempfile.mkdtemp() mlflow.set_tracking_uri(mlflow_root) mlflow.create_experiment("xp") with mlflow.start_run(nested=False) as active_run: model = tf.keras.models.Sequential([tf.keras.layers.Dense(1)]) model.compile(optimizer="sgd", loss=tf.keras.losses.mean_squared_error) history = model.fit(x=[1, 1, 1], y=[2, 2, 2], verbose=1) log_metrics_and_params(history=history, config=None, mlflow_active_run=active_run) tag_model(model) print(glob.glob(f"{mlflow_root}/*")) shutil.rmtree(mlflow_root)
def evaluate_forecast(evaluation_pd): mlflow.set_tracking_uri('http://mlflow_server:5000') try: experiment_id = mlflow.create_experiment( name="ev_modelosv4", artifact_location='http://mlflow_server:5000/mlflow/') except: experiment_id = mlflow.get_experiment_by_name( name="ev_modelosv4").experiment_id with mlflow.start_run(experiment_id=experiment_id, run_name="store={0}_item{1}".format( evaluation_pd['store'].iloc[0], evaluation_pd['item'].iloc[0])): # get store & item in incoming data set training_date = evaluation_pd['training_date'].iloc[0] store = evaluation_pd['store'].iloc[0] item = evaluation_pd['item'].iloc[0] print("Evaluacion_modelo_{0}_{1}".format(store, item)) # calulate evaluation metrics mae = mean_absolute_error(evaluation_pd['y'], evaluation_pd['yhat']) mse = mean_squared_error(evaluation_pd['y'], evaluation_pd['yhat']) rmse = sqrt(mse) mlflow.log_metric('mae', mae) mlflow.log_metric('mse', mse) mlflow.log_metric('rmse', rmse) mlflow.log_param('store', store) mlflow.log_param('item', item) mlflow.log_param('interval_width', evaluation_pd['interval_width'].iloc[0]) mlflow.log_param('growth', evaluation_pd['growth'].iloc[0]) mlflow.log_param('daily_seasonality', evaluation_pd['daily_seasonality'].iloc[0]) mlflow.log_param('weekly_seasonality', evaluation_pd['weekly_seasonality'].iloc[0]) mlflow.log_param('yearly_seasonality', evaluation_pd['yearly_seasonality'].iloc[0]) mlflow.log_param('seasonality_mode', evaluation_pd['seasonality_mode'].iloc[0]) # assemble result set results = { 'training_date': [training_date], 'store': [store], 'item': [item], 'mae': [mae], 'mse': [mse], 'rmse': [rmse] } mlflow.end_run() return pd.DataFrame.from_dict(results)
def _log_model_with_signature_and_example(tmp_path, sig, input_example): experiment_id = mlflow.create_experiment("test") with mlflow.start_run(experiment_id=experiment_id) as run: Model.log("some/path", TestFlavor, signature=sig, input_example=input_example) local_path = _download_artifact_from_uri( "runs:/{}/some/path".format(run.info.run_id), output_path=tmp_path.path("") ) return local_path, run
def test_get_experiment_id(): # When no experiment is active should return default assert _get_experiment_id() == Experiment.DEFAULT_EXPERIMENT_ID # Create a new experiment and set that as active experiment with TempDir(chdr=True): name = "Random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(name) assert exp_id is not None mlflow.set_experiment(name) assert _get_experiment_id() == exp_id
def prepare_environment(environment: str) -> ApiClient: environment_data = get_environment_data(environment) config_type, config = pick_config(environment_data) api_client = _get_api_client(config, command_name="cicdtemplates-") _prepare_workspace_dir(api_client, environment_data["workspace_dir"]) if config_type == "ENV": mlflow.set_tracking_uri(DATABRICKS_MLFLOW_URI) elif config_type == "PROFILE": mlflow.set_tracking_uri( f'{DATABRICKS_MLFLOW_URI}://{environment_data["profile"]}') else: raise NotImplementedError( f"Config type: {config_type} is not implemented") experiment: Optional[ mlflow.entities.Experiment] = mlflow.get_experiment_by_name( environment_data["workspace_dir"]) # if there is no experiment if not experiment: mlflow.create_experiment(environment_data["workspace_dir"], environment_data["artifact_location"]) else: # verify experiment location if experiment.artifact_location != environment_data[ "artifact_location"]: raise Exception( f"Required location of experiment {environment_data['workspace_dir']} " f"doesn't match the project defined one: \n" f"\t experiment artifact location: {experiment.artifact_location} \n" f"\t project artifact location : {environment_data['artifact_location']} \n" f"Change of experiment location is currently not supported in MLflow. " f"Please change the experiment name to create a new experiment." ) mlflow.set_experiment(environment_data["workspace_dir"]) return api_client
def test_runs_artifact_repo_init(): artifact_location = "s3://blah_bucket/" experiment_id = mlflow.create_experiment("expr_abc", artifact_location) with mlflow.start_run(experiment_id=experiment_id): run_id = mlflow.active_run().info.run_id runs_uri = "runs:/%s/path/to/model" % run_id runs_repo = RunsArtifactRepository(runs_uri) assert runs_repo.artifact_uri == runs_uri assert isinstance(runs_repo.repo, S3ArtifactRepository) expected_absolute_uri = "%s%s/artifacts/path/to/model" % (artifact_location, run_id) assert runs_repo.repo.artifact_uri == expected_absolute_uri
def __post_init__(self): cfg_exp_id = self.cfg.expm.exp_name task_type = self.cfg.task_type exp_name = cfg_exp_id + "_" + task_type cfg_uid = self.cfg.uid exp_exist = mlflow.get_experiment_by_name(exp_name) if not exp_exist: mlflow.create_experiment(exp_name) exp_exist = mlflow.get_experiment_by_name(exp_name) exp_id = exp_exist.experiment_id self.active_run = mlflow.start_run(experiment_id=exp_id, run_name=cfg_uid) self.cfg.defrost() run_id = self.active_run.info.run_id st_time = datetime.datetime.fromtimestamp( int(str(self.active_run.info.start_time)[:10])) st_time_str = st_time.strftime("%Y-%m-%d %H:%M:%S") self.cfg.expm.run_id = run_id self.cfg.expm.st_time = st_time_str self.cfg.freeze()
def before_pipeline_run(self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog): parameters = catalog._data_sets["parameters"].load() mlflow_logging_params = parameters.get( self.mlflow_logging_config_key) if mlflow_logging_params: self.uri = mlflow_logging_params.get("uri") self.experiment_name = mlflow_logging_params.get( "experiment_name") self.artifact_location = mlflow_logging_params.get( "artifact_location") self.offset_hours = mlflow_logging_params.get( "offset_hours") or 0 self.logging_artifacts = ( mlflow_logging_params.get("logging_artifacts") or []) from mlflow import ( create_experiment, set_experiment, start_run, end_run, set_tracking_uri, log_artifact, log_metric, log_param, ) from mlflow.exceptions import MlflowException if self.uri: set_tracking_uri(self.uri) if self.experiment_name: try: experiment_id = create_experiment( self.experiment_name, artifact_location=self.artifact_location, ) start_run(experiment_id=experiment_id) except MlflowException: set_experiment(self.experiment_name) for path in self.initial_logging_artifact_paths: log_artifact(path) log_metric("__time_begin", get_timestamp_int(offset_hours=self.offset_hours)) log_param("__time_begin", get_timestamp(offset_hours=self.offset_hours)) self.time_begin = time.time() self.mlflow_logging_params = mlflow_logging_params
def get_or_create_experiment_id(experiment_name): tracking_uri = mlflow.tracking.get_tracking_uri() client = mlflow.tracking.MlflowClient(tracking_uri) exp = client.get_experiment_by_name(experiment_name) if exp is None: experiment_id = mlflow.create_experiment(experiment_name) else: experiment_id = exp.experiment_id print("experiment_name={} experiment_id={}".format(experiment_name, experiment_id)) return experiment_id
def test_set_experiment_by_name(): name = "random_exp" exp_id = mlflow.create_experiment(name) exp1 = mlflow.set_experiment(name) assert exp1.experiment_id == exp_id with start_run() as run: assert run.info.experiment_id == exp_id another_name = "another_experiment" exp2 = mlflow.set_experiment(another_name) with start_run() as another_run: assert another_run.info.experiment_id == exp2.experiment_id
def test_search_runs_multiple_experiments(): experiment_ids = [mlflow.create_experiment("exp__{}".format(exp_id)) for exp_id in range(1, 4)] for eid in experiment_ids: with mlflow.start_run(experiment_id=eid): mlflow.log_metric("m0", 1) mlflow.log_metric("m_{}".format(eid), 2) assert len(MlflowClient().search_runs(experiment_ids, "metrics.m0 > 0", ViewType.ALL)) == 3 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_1 > 0", ViewType.ALL)) == 1 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_2 = 2", ViewType.ALL)) == 1 assert len(MlflowClient().search_runs(experiment_ids, "metrics.m_3 < 4", ViewType.ALL)) == 1
def test_runs_artifact_repo_uses_repo_download_artifacts(): """ The RunsArtifactRepo should delegate `download_artifacts` to it's self.repo.download_artifacts function """ artifact_location = "s3://blah_bucket/" experiment_id = mlflow.create_experiment("expr_abcd", artifact_location) with mlflow.start_run(experiment_id=experiment_id): run_id = mlflow.active_run().info.run_id runs_repo = RunsArtifactRepository('runs:/{}'.format(run_id)) runs_repo.repo = Mock() runs_repo.download_artifacts('artifact_path', 'dst_path') runs_repo.repo.download_artifacts.assert_called_once()
def mlflow_client(tmp_path_factory, resnet_model_uri: str, spark: SparkSession) -> MlflowClient: tmp_path = tmp_path_factory.mktemp("mlflow") tmp_path.mkdir(parents=True, exist_ok=True) tracking_uri = "sqlite:///" + str(tmp_path / "tracking.db") mlflow.set_tracking_uri(tracking_uri) experiment_id = mlflow.create_experiment("rikai-test", str(tmp_path)) # simpliest with mlflow.start_run(experiment_id=experiment_id): mlflow.log_param("optimizer", "Adam") # Fake training loop model = torch.load(resnet_model_uri) artifact_path = "model" schema = ("STRUCT<boxes:ARRAY<ARRAY<float>>," "scores:ARRAY<float>,labels:ARRAY<int>>") pre_processing = ("rikai.contrib.torch.transforms." "fasterrcnn_resnet50_fpn.pre_processing") post_processing = ("rikai.contrib.torch.transforms." "fasterrcnn_resnet50_fpn.post_processing") rikai.mlflow.pytorch.log_model( model, # same as vanilla mlflow artifact_path, # same as vanilla mlflow schema, pre_processing, post_processing, registered_model_name="rikai-test", # same as vanilla mlflow ) # vanilla mlflow with mlflow.start_run(): mlflow.pytorch.log_model(model, artifact_path, registered_model_name="vanilla-mlflow") mlflow.set_tags({ "rikai.model.flavor": "pytorch", "rikai.output.schema": schema, "rikai.transforms.pre": pre_processing, "rikai.transforms.post": post_processing, }) # vanilla mlflow no tags with mlflow.start_run(): mlflow.pytorch.log_model( model, artifact_path, registered_model_name="vanilla-mlflow-no-tags", ) spark.conf.set("rikai.sql.ml.registry.mlflow.tracking_uri", tracking_uri) return mlflow.tracking.MlflowClient(tracking_uri)
def test_create_experiment(): with pytest.raises(TypeError): mlflow.create_experiment() # pylint: disable=no-value-for-parameter with pytest.raises(Exception): mlflow.create_experiment(None) with pytest.raises(Exception): mlflow.create_experiment("") exp_id = mlflow.create_experiment("Some random experiment name %d" % random.randint(1, 1e6)) assert exp_id is not None
def test_get_experiment_id_in_databricks_with_experiment_defined_in_env_returns_env_experiment_id(): with TempDir(chdr=True): exp_name = "random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(exp_name) notebook_id = str(int(exp_id) + 73) HelperEnv.set_values(id=exp_id) with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\ mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock: notebook_detection_mock.side_effect = lambda *args, **kwargs: True notebook_id_mock.side_effect = lambda *args, **kwargs: notebook_id assert _get_experiment_id() != notebook_id assert _get_experiment_id() == exp_id
def test_get_experiment_id_in_databricks_with_active_experiment_returns_active_experiment_id(): with TempDir(chdr=True): exp_name = "random experiment %d" % random.randint(1, 1e6) exp_id = mlflow.create_experiment(exp_name) mlflow.set_experiment(exp_name) notebook_id = str(int(exp_id) + 73) with mock.patch("mlflow.tracking.fluent.is_in_databricks_notebook") as notebook_detection_mock,\ mock.patch("mlflow.tracking.fluent.get_notebook_id") as notebook_id_mock: notebook_detection_mock.return_value = True notebook_id_mock.return_value = notebook_id assert _get_experiment_id() != notebook_id assert _get_experiment_id() == exp_id
def test_set_experiment_by_id(): name = "random_exp" exp_id = mlflow.create_experiment(name) active_exp = mlflow.set_experiment(experiment_id=exp_id) assert active_exp.experiment_id == exp_id with start_run() as run: assert run.info.experiment_id == exp_id nonexistent_id = "-1337" with pytest.raises(MlflowException) as exc: mlflow.set_experiment(experiment_id=nonexistent_id) assert exc.value.error_code == ErrorCode.Name(RESOURCE_DOES_NOT_EXIST) with start_run() as run: assert run.info.experiment_id == exp_id
def _get_or_create_experiment(self, experiment_name): """ Return the id of experiment. If non-exist, create one. Otherwise, return the existing one. :return: Experiment id """ try: experiment = mlflow.get_experiment_by_name(experiment_name) if experiment is None: # if not found raise MlflowException("No valid experiment has been found") return experiment.experiment_id # if found except MlflowException: experiment = mlflow.create_experiment(name=experiment_name) return experiment
def log_experiment( params={}, metrics={}, artifacts={}, experiment_name="my_experiment", mlflow_tracking_uri="./experiments", mlflow_artifact_location=None, ): """ Evaluate the model and log it with mlflow Args: params (dict): dictionary of parameters to log metrics (dict): dictionary of metrics to log artifacts (dict): dictionary of artifacts (path) to log experiment_name (str): experiment name mlflow_tracking_uri (str): path or sql url for mlflow logging mlflow_artifact_location (str): path or s3bucket url for artifact logging. If none, it will default to a standard. Returns: None """ # Try to create an experiment if it doesn't exist try: exp_0 = mlflow.create_experiment( experiment_name, artifact_location=mlflow_artifact_location ) # set uri mlflow.set_tracking_uri(mlflow_tracking_uri) logger.info(f"Created new experiment id: {exp_0}") except Exception as E: logger.info(f"{E}. Writing to same URI/artifact store") # Always set the experiment mlflow.set_experiment(experiment_name) logger.info(f"Running experiment {experiment_name}") with mlflow.start_run(): # param logging for key, val in params.items(): logging.info(f"Logging param {key}") mlflow.log_param(key, val) # metric logging for key, val in metrics.items(): logging.info(f"Logging metric {key}") mlflow.log_metric(key, val) # artifact logging print(artifacts) for key, val in artifacts.items(): logging.info(f"Logging artifact {key}") mlflow.log_artifact(val)