def run_databricks(uri, entry_point, version, parameters, experiment_id, cluster_spec, git_username, git_password): """ Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be used to query the run's status or wait for the resulting Databricks Job run to terminate. """ _check_databricks_auth_available() if cluster_spec is None: raise ExecutionException( "Cluster spec must be provided when launching MLflow project runs " "on Databricks.") # Fetch the project into work_dir & validate parameters work_dir = _fetch_project(uri=uri, use_temp_cwd=True, version=version, git_username=git_username, git_password=git_password) project = _load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) # Upload the project to DBFS, get the URI of the project dbfs_project_uri = _upload_project_to_dbfs(work_dir, experiment_id) # Create run object with remote tracking server. Get the git commit from the working directory, # etc. tracking_uri = tracking.get_tracking_uri() remote_run = _create_databricks_run( tracking_uri=tracking_uri, experiment_id=experiment_id, source_name=_expand_uri(uri), source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point) # Set up environment variables for remote execution env_vars = {} if experiment_id is not None: eprint("=== Using experiment ID %s ===" % experiment_id) env_vars[tracking._EXPERIMENT_ID_ENV_VAR] = experiment_id if remote_run is not None: env_vars[tracking._TRACKING_URI_ENV_VAR] = tracking.get_tracking_uri() env_vars[tracking._RUN_ID_ENV_VAR] = remote_run.run_info.run_uuid eprint("=== Running entry point %s of project %s on Databricks. ===" % (entry_point, uri)) # Launch run on Databricks with open(cluster_spec, 'r') as handle: try: cluster_spec = json.load(handle) except ValueError: eprint( "Error when attempting to load and parse JSON cluster spec from file " "%s. " % cluster_spec) raise fuse_dst_dir = os.path.join( "/dbfs/", _parse_dbfs_uri_path(dbfs_project_uri).lstrip("/")) final_run_id = remote_run.run_info.run_uuid if remote_run else None command = _get_databricks_run_cmd(fuse_dst_dir, final_run_id, entry_point, parameters) db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec) run_id = remote_run.run_info.run_uuid if remote_run else None return DatabricksSubmittedRun(db_run_id, run_id)
def run_databricks(uri, entry_point, version, parameters, experiment_id, cluster_spec, git_username, git_password): """ Runs a project on Databricks, returning a `SubmittedRun` that can be used to query the run's status or wait for the resulting Databricks Job run to terminate. """ # Create run object with remote tracking server tracking_uri = tracking.get_tracking_uri() remote_run = _create_databricks_run(tracking_uri=tracking_uri, experiment_id=experiment_id, source_name=uri, source_version=version, entry_point_name=entry_point) # Set up environment variables for remote execution env_vars = {"MLFLOW_GIT_URI": uri} if git_username is not None: env_vars["MLFLOW_GIT_USERNAME"] = git_username if git_password is not None: env_vars["MLFLOW_GIT_PASSWORD"] = git_password if experiment_id is not None: env_vars[tracking._EXPERIMENT_ID_ENV_VAR] = experiment_id if remote_run is not None: env_vars[tracking._TRACKING_URI_ENV_VAR] = tracking.get_tracking_uri() env_vars[tracking._RUN_ID_ENV_VAR] = remote_run.run_info.run_uuid eprint("=== Running entry point %s of project %s on Databricks. ===" % (entry_point, uri)) # Launch run on Databricks with open(cluster_spec, 'r') as handle: cluster_spec = json.load(handle) command = _get_databricks_run_cmd(uri, entry_point, version, parameters) db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec) from mlflow.projects.submitted_run import SubmittedRun return SubmittedRun(remote_run, DatabricksPollableRun(db_run_id))
def test_log_model(sequential_model, data, sequential_predicted): old_uri = tracking.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: tracking.set_tracking_uri(tmp.path("test")) if should_start_run: mlflow.start_run() artifact_path = "pytorch" mlflow.pytorch.log_model(sequential_model, artifact_path=artifact_path) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) # Load model sequential_model_loaded = mlflow.pytorch.load_model( model_uri=model_uri) test_predictions = _predict(sequential_model_loaded, data) np.testing.assert_array_equal(test_predictions, sequential_predicted) finally: mlflow.end_run() tracking.set_tracking_uri(old_uri)
def run_databricks(uri, entry_point, version, parameters, experiment_id, cluster_spec, git_username, git_password): """ Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be used to query the run's status or wait for the resulting Databricks Job run to terminate. """ tracking_uri = tracking.get_tracking_uri() _before_run_validations(tracking_uri, cluster_spec) work_dir = _fetch_and_clean_project( uri=uri, version=version, git_username=git_username, git_password=git_password) project = _load_project(work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) dbfs_project_uri = _upload_project_to_dbfs(work_dir, experiment_id) remote_run = tracking._create_run( experiment_id=experiment_id, source_name=_expand_uri(uri), source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) env_vars = { tracking._TRACKING_URI_ENV_VAR: tracking_uri, tracking._EXPERIMENT_ID_ENV_VAR: experiment_id, } run_id = remote_run.run_info.run_uuid eprint("=== Running entry point %s of project %s on Databricks. ===" % (entry_point, uri)) # Launch run on Databricks with open(cluster_spec, 'r') as handle: try: cluster_spec = json.load(handle) except ValueError: eprint("Error when attempting to load and parse JSON cluster spec from file " "%s. " % cluster_spec) raise fuse_dst_dir = os.path.join("/dbfs/", _parse_dbfs_uri_path(dbfs_project_uri).lstrip("/")) command = _get_databricks_run_cmd(fuse_dst_dir, run_id, entry_point, parameters) db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec) return DatabricksSubmittedRun(db_run_id, run_id)
def run_databricks(remote_run, uri, entry_point, work_dir, parameters, experiment_id, cluster_spec): """ Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be used to query the run's status or wait for the resulting Databricks Job run to terminate. """ tracking_uri = tracking.get_tracking_uri() _before_run_validations(tracking_uri, cluster_spec) dbfs_fuse_uri = _upload_project_to_dbfs(work_dir, experiment_id) env_vars = { tracking._TRACKING_URI_ENV_VAR: tracking_uri, tracking._EXPERIMENT_ID_ENV_VAR: experiment_id, } run_id = remote_run.info.run_uuid eprint("=== Running entry point %s of project %s on Databricks. ===" % (entry_point, uri)) # Launch run on Databricks with open(cluster_spec, 'r') as handle: try: cluster_spec = json.load(handle) except ValueError: eprint( "Error when attempting to load and parse JSON cluster spec from file " "%s. " % cluster_spec) raise command = _get_databricks_run_cmd(dbfs_fuse_uri, run_id, entry_point, parameters) db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec) return DatabricksSubmittedRun(db_run_id, run_id)
def _get_docker_command(image, active_run, docker_args=None, volumes=None, user_env_vars=None): from mlflow.projects.docker import get_docker_tracking_cmd_and_envs docker_path = "docker" cmd = [docker_path, "run", "--rm"] if docker_args: for name, value in docker_args.items(): # Passed just the name as boolean flag if isinstance(value, bool) and value: if len(name) == 1: cmd += ["-" + name] else: cmd += ["--" + name] else: # Passed name=value if len(name) == 1: cmd += ["-" + name, value] else: cmd += ["--" + name, value] env_vars = get_run_env_vars(run_id=active_run.info.run_id, experiment_id=active_run.info.experiment_id) tracking_uri = tracking.get_tracking_uri() tracking_cmds, tracking_envs = get_docker_tracking_cmd_and_envs( tracking_uri) artifact_cmds, artifact_envs = _get_docker_artifact_storage_cmd_and_envs( active_run.info.artifact_uri) cmd += tracking_cmds + artifact_cmds env_vars.update(tracking_envs) env_vars.update(artifact_envs) if user_env_vars is not None: for user_entry in user_env_vars: if isinstance(user_entry, list): # User has defined a new environment variable for the docker environment env_vars[user_entry[0]] = user_entry[1] else: # User wants to copy an environment variable from system environment system_var = os.environ.get(user_entry) if system_var is None: raise MlflowException( "This project expects the %s environment variables to " "be set on the machine running the project, but %s was " "not set. Please ensure all expected environment variables " "are set" % (", ".join(user_env_vars), user_entry)) env_vars[user_entry] = system_var if volumes is not None: for v in volumes: cmd += ["-v", v] for key, value in env_vars.items(): cmd += ["-e", "{key}={value}".format(key=key, value=value)] cmd += [image.tags[0]] return cmd
def _get_tracking_uri_for_run(): if not tracking.utils.is_tracking_uri_set(): return "databricks" uri = tracking.get_tracking_uri() if uri.startswith("databricks"): return "databricks" return uri
def _get_docker_command(image, active_run): docker_path = "docker" cmd = [docker_path, "run", "--rm"] env_vars = _get_run_env_vars(run_id=active_run.info.run_uuid, experiment_id=active_run.info.experiment_id) tracking_uri = tracking.get_tracking_uri() if tracking.utils._is_local_uri(tracking_uri): cmd += [ "-v", "%s:%s" % (tracking_uri, _MLFLOW_DOCKER_TRACKING_DIR_PATH) ] env_vars[ tracking._TRACKING_URI_ENV_VAR] = _MLFLOW_DOCKER_TRACKING_DIR_PATH if tracking.utils._is_databricks_uri(tracking_uri): db_profile = mlflow.tracking.utils.get_db_profile_from_uri( tracking_uri) config = databricks_utils.get_databricks_host_creds(db_profile) # We set these via environment variables so that only the current profile is exposed, rather # than all profiles in ~/.databrickscfg; maybe better would be to mount the necessary # part of ~/.databrickscfg into the container env_vars[tracking._TRACKING_URI_ENV_VAR] = 'databricks' env_vars['DATABRICKS_HOST'] = config.host if config.username: env_vars['DATABRICKS_USERNAME'] = config.username if config.password: env_vars['DATABRICKS_PASSWORD'] = config.password if config.token: env_vars['DATABRICKS_TOKEN'] = config.token if config.ignore_tls_verification: env_vars['DATABRICKS_INSECURE'] = config.ignore_tls_verification for key, value in env_vars.items(): cmd += ["-e", "{key}={value}".format(key=key, value=value)] cmd += [image] return cmd
def test_dnn(): old_uri = tracking.get_tracking_uri() try: with TempDir(chdr=False, remove_on_exit=True) as tmp: diamonds = tmp.path("diamonds") estimator = tmp.path("estimator") artifacts = tmp.path("artifacts") os.mkdir(diamonds) os.mkdir(estimator) os.mkdir(artifacts) tracking.set_tracking_uri(artifacts) # Download the diamonds dataset via mlflow run run(".", entry_point="main", version=None, parameters={"dest-dir": diamonds}, experiment_id=tracking._get_experiment_id(), mode="local", cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None) # Run the main dnn app via mlflow run("apps/dnn-regression", entry_point="main", version=None, parameters={ "model-dir": estimator, "train": os.path.join(diamonds, "train_diamonds.parquet"), "test": os.path.join(diamonds, "test_diamonds.parquet"), "hidden-units": "30,30", "label-col": "price", "steps": 5000, "batch-size": 128 }, experiment_id=tracking._get_experiment_id(), mode="local", cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None) # Loading the saved model as a pyfunc. pyfunc = tensorflow.load_pyfunc( os.path.join(estimator, os.listdir(estimator)[0])) df = pandas.read_parquet( os.path.join(diamonds, "test_diamonds.parquet")) predict_df = pyfunc.predict(df) assert 'predictions' in predict_df assert isinstance(predict_df['predictions'][0][0], numpy.float32) finally: tracking.set_tracking_uri(old_uri)
def test_log_saved_model(self): # This tests model logging capabilities on the sklearn.iris dataset. iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. y = iris.target trainingFeatures = {} for i in range(0, 2): # TensorFlow is fickle about feature names, so we remove offending characters iris.feature_names[i] = iris.feature_names[i].replace(" ", "") iris.feature_names[i] = iris.feature_names[i].replace("(", "") iris.feature_names[i] = iris.feature_names[i].replace(")", "") trainingFeatures[iris.feature_names[i]] = iris.data[:, i:i + 1] tf_feat_cols = [] feature_names = iris.feature_names[:2] # Creating TensorFlow-specific numeric columns for input. for col in iris.feature_names[:2]: tf_feat_cols.append(tf.feature_column.numeric_column(col)) # Creating input training function. input_train = tf.estimator.inputs.numpy_input_fn(trainingFeatures, y, shuffle=False, batch_size=1) # Creating Deep Neural Network Regressor. estimator = tf.estimator.DNNRegressor(feature_columns=tf_feat_cols, hidden_units=[1]) # Training and creating expected predictions on training dataset. estimator.train(input_train, steps=10) # Saving the estimator's prediction on the training data; assume the DNNRegressor # produces a single output column named 'predictions' pred_col = "predictions" estimator_preds = [s[pred_col] for s in estimator.predict(input_train)] estimator_preds_df = pd.DataFrame({pred_col: estimator_preds}) old_tracking_uri = tracking.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: # Creating dict of features names (str) to placeholders (tensors) feature_spec = {} for name in feature_names: feature_spec[name] = tf.placeholder("float", name=name, shape=[150]) tracking.set_tracking_uri("test") if should_start_run: tracking.start_run() pyfunc_preds_df = self.helper( feature_spec, tmp, estimator, pandas.DataFrame(data=X, columns=feature_names)) # Asserting that the loaded model predictions are as expected. assert estimator_preds_df.equals(pyfunc_preds_df) finally: # Restoring the old logging location. tracking.end_run() tracking.set_tracking_uri(old_tracking_uri)
def _get_run_env_vars(run_id, experiment_id): """ Returns a dictionary of environment variable key-value pairs to set in subprocess launched to run MLflow projects. """ return { tracking._RUN_ID_ENV_VAR: run_id, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(), tracking._EXPERIMENT_ID_ENV_VAR: str(experiment_id), }
def test_log_saved_model(self): # This tests model logging capabilities on the sklearn.iris dataset. with TempDir(chdr=False, remove_on_exit=True) as tmp: iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. y = iris.target trainingFeatures = {} feature_names = iris.feature_names[:2] for i in range(0, 2): # TensorFlow is fickle about feature names, so we remove offending characters iris.feature_names[i] = iris.feature_names[i].replace(" ", "") iris.feature_names[i] = iris.feature_names[i].replace("(", "") iris.feature_names[i] = iris.feature_names[i].replace(")", "") trainingFeatures[iris.feature_names[i]] = iris.data[:, i:i + 1] tf_feat_cols = [] feature_names = iris.feature_names[:2] # Creating TensorFlow-specific numeric columns for input. for col in iris.feature_names[:2]: tf_feat_cols.append(tf.feature_column.numeric_column(col)) # Creating input training function. input_train = tf.estimator.inputs.numpy_input_fn(trainingFeatures, y, shuffle=False, batch_size=1) # Creating Deep Neural Network Regressor. estimator = tf.estimator.DNNRegressor(feature_columns=tf_feat_cols, hidden_units=[1]) # Training and creating expected predictions on training dataset. estimator.train(input_train, steps=100) estimator_preds = estimator.predict(input_train) # Setting the logging such that it is in the temp folder and deleted after the test. old_tracking_dir = tracking.get_tracking_uri() tracking_dir = os.path.abspath(tmp.path("mlruns")) tracking.set_tracking_uri("file://%s" % tracking_dir) tracking.start_run() try: # Creating dict of features names (str) to placeholders (tensors) feature_spec = {} for name in feature_names: feature_spec[name] = tf.placeholder("float", name=name, shape=[150]) saved = [s['predictions'] for s in estimator_preds] results = self.helper( feature_spec, tmp, estimator, pandas.DataFrame(data=X, columns=feature_names)) # Asserting that the loaded model predictions are as expected. np.testing.assert_array_equal(saved, results) finally: # Restoring the old logging location. tracking.end_run() tracking.set_tracking_uri(old_tracking_dir)
def _run_databricks(uri, entry_point, version, parameters, experiment_id, cluster_spec, git_username, git_password): hostname, token, username, password, = _get_db_hostname_and_auth() auth = ( username, password) if username is not None and password is not None else None # Read cluster spec from file with open(cluster_spec, 'r') as handle: cluster_spec = json.load(handle) # Make jobs API request to launch run. env_vars = { "MLFLOW_GIT_URI": uri, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri() } if git_username is not None: env_vars["MLFLOW_GIT_USERNAME"] = git_username if git_password is not None: env_vars["MLFLOW_GIT_PASSWORD"] = git_password # Pass experiment ID to shell job on Databricks as an environment variable. if experiment_id is not None: eprint("=== Using experiment ID %s ===" % experiment_id) env_vars[tracking._EXPERIMENT_ID_ENV_VAR] = experiment_id req_body_json = { 'run_name': 'MLflow Job Run for %s' % uri, 'new_cluster': cluster_spec, 'shell_command_task': { 'command': _get_databricks_run_cmd(uri, entry_point, version, parameters), "env_vars": env_vars } } eprint("=== Running entry point %s of project %s on Databricks. ===" % (entry_point, uri)) run_submit_res = rest_utils.databricks_api_request( hostname=hostname, endpoint="jobs/runs/submit", token=token, auth=auth, method="POST", req_body_json=req_body_json) run_id = run_submit_res["run_id"] eprint( "=== Launched MLflow run as Databricks job run with ID %s. Getting run status " "page URL... ===") run_info = rest_utils.databricks_api_request(hostname=hostname, endpoint="jobs/runs/get", token=token, auth=auth, method="GET", params={"run_id": run_id}) jobs_page_url = run_info["run_page_url"] eprint("=== Check the run's status at %s ===" % jobs_page_url)
def run_databricks(remote_run, uri, entry_point, work_dir, parameters, experiment_id, cluster_spec): """ Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be used to query the run's status or wait for the resulting Databricks Job run to terminate. """ profile = tracking.utils.get_db_profile_from_uri(tracking.get_tracking_uri()) run_id = remote_run.info.run_uuid db_job_runner = DatabricksJobRunner(databricks_profile=profile) db_run_id = db_job_runner.run_databricks( uri, entry_point, work_dir, parameters, experiment_id, cluster_spec, run_id) submitted_run = DatabricksSubmittedRun(db_run_id, run_id, db_job_runner) submitted_run._print_description_and_log_tags() return submitted_run
def test_gbt(): old_uri = tracking.get_tracking_uri() with TempDir(chdr=False, remove_on_exit=True) as tmp: try: diamonds = tmp.path("diamonds") artifacts = tmp.path("artifacts") os.mkdir(diamonds) os.mkdir(artifacts) tracking.set_tracking_uri(artifacts) # Download the diamonds dataset via mlflow run run(".", entry_point="main", version=None, parameters={"dest-dir": diamonds}, experiment_id=0, mode="local", cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None) initial = os.path.join(artifacts, "0") dir_list = os.listdir(initial) # Run the main gbt app via mlflow run("apps/gbt-regression", entry_point="main", version=None, parameters={"train": os.path.join(diamonds, "train_diamonds.parquet"), "test": os.path.join(diamonds, "test_diamonds.parquet"), "n-trees": 10, "m-depth": 3, "learning-rate": .1, "loss": "rmse", "label-col": "price"}, experiment_id=0, mode="local", cluster_spec=None, git_username=None, git_password=None, use_conda=True, storage_dir=None) # Identifying the new run's folder main = None for item in os.listdir(initial): if item not in dir_list: main = item pyfunc = load_pyfunc(os.path.join(initial, main, "artifacts/model/model.pkl")) df = pandas.read_parquet(os.path.join(diamonds, "test_diamonds.parquet")) # Removing the price column from the DataFrame so we can use the features to predict df = df.drop(columns="price") # Predicting from the saved pyfunc predict = pyfunc.predict(df) # Make sure the data is of the right type assert isinstance(predict[0], numpy.float32) finally: tracking.set_tracking_uri(old_uri)
def _run_project(project, entry_point, work_dir, parameters, use_conda, storage_dir, experiment_id): """Locally run a project that has been checked out in `work_dir`.""" storage_dir_for_run = _get_storage_dir(storage_dir) eprint( "=== Created directory %s for downloading remote URIs passed to arguments of " "type 'path' ===" % storage_dir_for_run) # Try to build the command first in case the user mis-specified parameters run_project_command = project.get_entry_point(entry_point)\ .compute_command(parameters, storage_dir_for_run) commands = [] if use_conda: conda_env_path = os.path.abspath( os.path.join(work_dir, project.conda_env)) _maybe_create_conda_env(conda_env_path) commands.append("source activate %s" % _get_conda_env_name(conda_env_path)) # Create a new run and log every provided parameter into it. active_run = tracking.start_run( experiment_id=experiment_id, source_name=project.uri, source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) if parameters is not None: for key, value in parameters.items(): active_run.log_param(Param(key, value)) # Add the run id into a magic environment variable that the subprocess will read, # causing it to reuse the run. exp_id = experiment_id or tracking._get_experiment_id() env_map = { tracking._RUN_NAME_ENV_VAR: active_run.run_info.run_uuid, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(), tracking._EXPERIMENT_ID_ENV_VAR: str(exp_id), } commands.append(run_project_command) command = " && ".join(commands) eprint("=== Running command: %s ===" % command) try: process.exec_cmd([os.environ.get("SHELL", "bash"), "-c", command], cwd=work_dir, stream_output=True, env=env_map) tracking.end_run() eprint("=== Run succeeded ===") except process.ShellCommandException: tracking.end_run("FAILED") eprint("=== Run failed ===")
def _run_project(project, entry_point, work_dir, parameters, use_conda, storage_dir, experiment_id): """Locally run a project that has been checked out in `work_dir`.""" mlflow.set_tracking_uri('..\\') #added by cliicy if storage_dir is not None and not os.path.exists(storage_dir): os.makedirs(storage_dir) storage_dir_for_run = tempfile.mkdtemp(dir=storage_dir) print( "=== Created directory %s for downloading remote URIs passed to arguments of " "type 'path' ===" % storage_dir_for_run) # Try to build the command first in case the user mis-specified parameters run_project_command = project.get_entry_point(entry_point).compute_command( parameters, storage_dir_for_run) commands = [] # Create a new run and log every provided parameter into it. active_run = tracking.start_run( experiment_id=experiment_id, source_name=project.uri, source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) for key, value in parameters.items(): active_run.log_param(Param(key, value)) # Add the run id into a magic environment variable that the subprocess will read, # causing it to reuse the run. exp_id = experiment_id or tracking._get_experiment_id() env_map = { tracking._RUN_NAME_ENV_VAR: active_run.run_info.run_uuid, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(), tracking._EXPERIMENT_ID_ENV_VAR: str(exp_id), } commands.append(run_project_command) command = " && ".join(commands) print("=== Running command: %s ===" % command) try: command = "python my_train.py 0.4 0.1" print("will run command aaaaa " + command + " " + work_dir + " aaaaa ") process.exec_cmd(command, cwd=work_dir, stream_output=True, env=env_map) #process.exec_cmd([os.environ.get("SHELL", "bash"), "-c", command], cwd=work_dir, # stream_output=True, env=env_map) tracking.end_run() print("=== Run succeeded ===") except process.ShellCommandException: tracking.end_run("FAILED") print("=== Run failed ===")
def setup_mlflow_tracking(self, URI, experiment_name, run_name): # select URI for server tracking set_tracking_uri(uri=URI) if is_tracking_uri_set(): logging.debug('MLFlow URI: ' + str(get_tracking_uri())) # CRUD interface self.client = MlflowClient(tracking_uri=get_tracking_uri()) # Experiment setup if self.client.get_experiment_by_name(name=experiment_name) is None: exp_id = self.client.create_experiment(name=experiment_name) else: exp = self.client.get_experiment_by_name(name=experiment_name) exp_id = exp.experiment_id # Run setup mlflow.start_run(experiment_id=exp_id, run_name=run_name) self.run_id = mlflow.active_run().info.run_id data = self.client.get_run(mlflow.active_run().info.run_id).data logging.info('MLFlow tracking started - Experiment: ' + str(experiment_name) + " - Run: " + str(data.tags["mlflow.runName"]))
def test_log_param(tracking_uri_mock): print(tracking.get_tracking_uri()) active_run = start_run() run_uuid = active_run.info.run_uuid with active_run: mlflow.log_param("name_1", "a") mlflow.log_param("name_2", "b") mlflow.log_param("name_1", "c") mlflow.log_param("nested/nested/name", 5) finished_run = tracking.MlflowClient().get_run(run_uuid) # Validate params assert len(finished_run.data.params) == 3 expected_pairs = {"name_1": "c", "name_2": "b", "nested/nested/name": "5"} for param in finished_run.data.params: assert expected_pairs[param.key] == param.value
def test_model_log(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env( conda_env, additional_pip_deps=["pyspark=={}".format(pyspark_version)]) iris = datasets.load_iris() X = iris.data # we only take the first two features. y = iris.target pandas_df = pd.DataFrame(X, columns=iris.feature_names) pandas_df['label'] = pd.Series(y) spark_session = pyspark.sql.SparkSession.builder \ .config(key="spark_session.python.worker.reuse", value=True) \ .master("local-cluster[2, 1, 1024]") \ .getOrCreate() spark_df = spark_session.createDataFrame(pandas_df) model_path = tmpdir.mkdir("model") assembler = VectorAssembler(inputCols=iris.feature_names, outputCol="features") lr = LogisticRegression(maxIter=50, regParam=0.1, elasticNetParam=0.8) pipeline = Pipeline(stages=[assembler, lr]) # Fit the model model = pipeline.fit(spark_df) # Print the coefficients and intercept for multinomial logistic regression preds_df = model.transform(spark_df) preds1 = [x.prediction for x in preds_df.select("prediction").collect()] old_tracking_uri = tracking.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: try: tracking_dir = os.path.abspath(str(tmpdir.mkdir("mlruns"))) tracking.set_tracking_uri("file://%s" % tracking_dir) if should_start_run: tracking.start_run() sparkm.log_model(artifact_path="model", spark_model=model) run_id = tracking.active_run().info.run_uuid x = pyfunc.load_pyfunc("model", run_id=run_id) preds2 = x.predict(pandas_df) assert preds1 == preds2 reloaded_model = sparkm.load_model("model", run_id=run_id) preds_df_1 = reloaded_model.transform(spark_df) preds3 = [ x.prediction for x in preds_df_1.select("prediction").collect() ] assert preds1 == preds3 finally: tracking.end_run() tracking.set_tracking_uri(old_tracking_uri) shutil.rmtree(tracking_dir)
def test_model_log(self): old_uri = tracking.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: tracking.set_tracking_uri("test") if should_start_run: tracking.start_run() sklearn.log_model(sk_model=self._linear_lr, artifact_path="linear") x = sklearn.load_model("linear", run_id=tracking.active_run().info.run_uuid) xpred = x.predict(self._X) np.testing.assert_array_equal(self._linear_lr_predict, xpred) finally: tracking.end_run() tracking.set_tracking_uri(old_uri)
def _run_project(project, entry_point, work_dir, parameters, use_conda, storage_dir, experiment_id, block): """Locally run a project that has been checked out in `work_dir`.""" storage_dir_for_run = _get_storage_dir(storage_dir) eprint( "=== Created directory %s for downloading remote URIs passed to arguments of " "type 'path' ===" % storage_dir_for_run) # Try to build the command first in case the user mis-specified parameters run_project_command = project.get_entry_point(entry_point)\ .compute_command(parameters, storage_dir_for_run) commands = [] if use_conda: conda_env_path = os.path.abspath( os.path.join(work_dir, project.conda_env)) _maybe_create_conda_env(conda_env_path) commands.append("source activate %s" % _get_conda_env_name(conda_env_path)) # Create a new run and log every provided parameter into it. active_run = tracking._create_run( experiment_id=experiment_id, source_name=project.uri, source_version=tracking._get_git_commit(work_dir), entry_point_name=entry_point, source_type=SourceType.PROJECT) if parameters is not None: for key, value in parameters.items(): active_run.log_param(Param(key, value)) # Add the run id into a magic environment variable that the subprocess will read, # causing it to reuse the run. env_map = { tracking._RUN_ID_ENV_VAR: active_run.run_info.run_uuid, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(), tracking._EXPERIMENT_ID_ENV_VAR: str(experiment_id), } commands.append(run_project_command) command = " && ".join(commands) eprint("=== Running command '%s' in run with ID '%s' === " % (command, active_run.run_info.run_uuid)) return _launch_local_run(active_run, command, work_dir, env_map, stream_output=block)
def test_log_saved_model(self): with TempDir(chdr=False, remove_on_exit=True) as tmp: # Setting the logging such that it is in the temp folder and deleted after the test. old_tracking_dir = tracking.get_tracking_uri() tracking_dir = os.path.abspath(tmp.path("mlruns")) tracking.set_tracking_uri("file://%s" % tracking_dir) tracking.start_run() try: # Creating dict of features names (str) to placeholders (tensors) feature_spec = {} for name in self._feature_names: feature_spec[name] = tf.placeholder("float", name=name, shape=[150]) # Creating receiver function for model saving. receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_spec) saved_model_path = tmp.path("model") os.makedirs(saved_model_path) os.makedirs(tmp.path("hello")) # Saving Tensorflow model. saved_model_path = self._dnn.export_savedmodel( saved_model_path, receiver_fn).decode("utf-8") # Logging the Tensorflow model just saved. tensorflow.log_saved_model(saved_model_dir=saved_model_path, signature_def_key="predict", artifact_path=tmp.path("hello")) # Loading the saved Tensorflow model as a pyfunc. x = pyfunc.load_pyfunc(saved_model_path) # Predicting on the iris dataset using the pyfunc. xpred = x.predict( pandas.DataFrame(data=self._X, columns=self._feature_names)) saved = [] for s in self._dnn_predict: saved.append(s['predictions']) loaded = [] for index, rows in xpred.iterrows(): loaded.append(rows) # Asserting that the loaded model predictions are as expected. np.testing.assert_array_equal(saved, loaded) finally: # Restoring the old logging location. tracking.end_run() tracking.set_tracking_uri(old_tracking_dir)
def test_model_log(self): old_uri = tracking.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: tracking.set_tracking_uri("test") if should_start_run: tracking.start_run() mlflow.h2o.log_model(self.gbm, artifact_path="gbm") # Load model gbm_loaded = mlflow.h2o.load_model("gbm", run_id=tracking.active_run().info.run_uuid) assert all(gbm_loaded.predict(self.test).as_data_frame() == self.predicted) finally: tracking.end_run() tracking.set_tracking_uri(old_uri)
def _get_docker_command(image, active_run): docker_path = "docker" cmd = [docker_path, "run", "--rm"] env_vars = _get_run_env_vars(run_id=active_run.info.run_id, experiment_id=active_run.info.experiment_id) tracking_uri = tracking.get_tracking_uri() tracking_cmds, tracking_envs = _get_docker_tracking_cmd_and_envs(tracking_uri) artifact_cmds, artifact_envs = \ _get_docker_artifact_storage_cmd_and_envs(active_run.info.artifact_uri) cmd += tracking_cmds + artifact_cmds env_vars.update(tracking_envs) env_vars.update(artifact_envs) for key, value in env_vars.items(): cmd += ["-e", "{key}={value}".format(key=key, value=value)] cmd += [image.tags[0]] return cmd
def test_log_model(model, data, predicted): old_uri = tracking.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: tracking.set_tracking_uri(tmp.path("test")) if should_start_run: mlflow.start_run() mlflow.pytorch.log_model(model, artifact_path="pytorch") # Load model run_id = mlflow.active_run().info.run_uuid model_loaded = mlflow.pytorch.load_model("pytorch", run_id=run_id) test_predictions = _predict(model_loaded, data) assert np.all(test_predictions == predicted) finally: mlflow.end_run() tracking.set_tracking_uri(old_uri)
def _invoke_mlflow_run_subprocess(work_dir, entry_point, parameters, experiment_id, use_conda, storage_dir, run_id): """ Run an MLflow project asynchronously by invoking ``mlflow run`` in a subprocess, returning a SubmittedRun that can be used to query run status. """ eprint("=== Asynchronously launching MLflow run with ID %s ===" % run_id) # Add the run id into a magic environment variable that the subprocess will read, # causing it to reuse the run. env_map = { tracking._RUN_ID_ENV_VAR: run_id, tracking._TRACKING_URI_ENV_VAR: tracking.get_tracking_uri(), tracking._EXPERIMENT_ID_ENV_VAR: str(experiment_id), } mlflow_run_arr = _build_mlflow_run_cmd(uri=work_dir, entry_point=entry_point, storage_dir=storage_dir, use_conda=use_conda, run_id=run_id, parameters=parameters) mlflow_run_subprocess = _run_mlflow_run_cmd(mlflow_run_arr, env_map) return LocalSubmittedRun(run_id, mlflow_run_subprocess)
def test_model_log(model, data, predicted): x, y = data old_uri = tracking.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: tracking.set_tracking_uri("test") if should_start_run: tracking.start_run() mlflow.keras.log_model(model, artifact_path="keras_model") # Load model model_loaded = mlflow.keras.load_model( "keras_model", run_id=tracking.active_run().info.run_uuid) assert all(model_loaded.predict(x) == predicted) # Loading pyfunc model pyfunc_loaded = mlflow.pyfunc.load_pyfunc( "keras_model", run_id=tracking.active_run().info.run_uuid) assert all(pyfunc_loaded.predict(x).values == predicted) finally: tracking.end_run() tracking.set_tracking_uri(old_uri)
def _get_tracking_uri_for_run(): uri = tracking.get_tracking_uri() if uri.startswith("databricks"): return "databricks" return uri
def test_categorical_columns(self): """ This tests logging capabilities on datasets with categorical columns. See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/get_started/regression/imports85.py for reference code. """ with TempDir(chdr=False, remove_on_exit=True) as tmp: # Downloading the data into a pandas DataFrame. URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data" path = tf.contrib.keras.utils.get_file(URL.split("/")[-1], URL) # Order is important for the csv-readers, so we use an OrderedDict here. defaults = collections.OrderedDict([("body-style", [""]), ("curb-weight", [0.0]), ("highway-mpg", [0.0]), ("price", [0.0])]) types = collections.OrderedDict( (key, type(value[0])) for key, value in defaults.items()) df = pandas.read_csv(path, names=types.keys(), dtype=types, na_values="?") df = df.dropna() # Extract the label from the features dataframe. y_train = df.pop("price") # Creating the input training function required. trainingFeatures = {} for i in df: trainingFeatures[i] = df[i].values input_train = tf.estimator.inputs.numpy_input_fn(trainingFeatures, y_train.values, shuffle=False, batch_size=1) # Creating the feature columns required for the DNNRegressor. body_style_vocab = [ "hardtop", "wagon", "sedan", "hatchback", "convertible" ] body_style = tf.feature_column.categorical_column_with_vocabulary_list( key="body-style", vocabulary_list=body_style_vocab) feature_columns = [ tf.feature_column.numeric_column(key="curb-weight"), tf.feature_column.numeric_column(key="highway-mpg"), # Since this is a DNN model, convert categorical columns from sparse # to dense. # Wrap them in an `indicator_column` to create a # one-hot vector from the input. tf.feature_column.indicator_column(body_style), ] # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns # defined above as input. estimator = tf.estimator.DNNRegressor( hidden_units=[20, 20], feature_columns=feature_columns) # Training the estimator. estimator.train(input_fn=input_train, steps=100) # Saving the estimator's prediction on the training data. estimator_preds = estimator.predict(input_train) # Setting the logging such that it is in the temp folder and deleted after the test. old_tracking_dir = tracking.get_tracking_uri() tracking_dir = os.path.abspath(tmp.path("mlruns")) tracking.set_tracking_uri("file://%s" % tracking_dir) tracking.start_run() try: # Creating dict of features names (str) to placeholders (tensors) feature_spec = {} feature_spec["body-style"] = tf.placeholder("string", name="body-style", shape=[None]) feature_spec["curb-weight"] = tf.placeholder( "float", name="curb-weight", shape=[None]) feature_spec["highway-mpg"] = tf.placeholder( "float", name="highway-mpg", shape=[None]) saved = [s['predictions'] for s in estimator_preds] results = self.helper(feature_spec, tmp, estimator, df) # Asserting that the loaded model predictions are as expected. # TensorFlow is known to have precision errors, hence the almost_equal. np.testing.assert_array_almost_equal(saved, results, decimal=2) finally: # Restoring the old logging location. tracking.end_run() tracking.set_tracking_uri(old_tracking_dir)