def _run_databricks(uri, entry_point, version, parameters, experiment_id, cluster_spec, git_username, git_password): hostname, token, username, password, = _get_db_hostname_and_auth() auth = ( username, password) if username is not None and password is not None else None # Read cluster spec from file with open(cluster_spec, 'r') as handle: cluster_spec = json.load(handle) # Make jobs API request to launch run. env_vars = {"MLFLOW_GIT_URI": uri} if git_username is not None: env_vars["MLFLOW_GIT_USERNAME"] = git_username if git_password is not None: env_vars["MLFLOW_GIT_PASSWORD"] = git_password # Pass experiment ID to shell job on Databricks as an environment variable. if experiment_id is not None: eprint("=== Using experiment ID %s ===" % experiment_id) env_vars[tracking._EXPERIMENT_ID_ENV_VAR] = experiment_id req_body_json = { 'run_name': 'MLflow Job Run for %s' % uri, 'new_cluster': cluster_spec, 'shell_command_task': { 'command': _get_databricks_run_cmd(uri, entry_point, version, parameters), "env_vars": env_vars }, "libraries": [{ "pypi": { "package": "mlflow==%s" % VERSION } }] } eprint("=== Running entry point %s of project %s on Databricks. ===" % (entry_point, uri)) run_submit_res = rest_utils.databricks_api_request( hostname=hostname, endpoint="jobs/runs/submit", token=token, auth=auth, method="POST", req_body_json=req_body_json) run_id = run_submit_res["run_id"] eprint( "=== Launched MLflow run as Databricks job run with ID %s. Getting run status " "page URL... ===" % run_id) run_info = rest_utils.databricks_api_request(hostname=hostname, endpoint="jobs/runs/get", token=token, auth=auth, method="GET", params={"run_id": run_id}) jobs_page_url = run_info["run_page_url"] eprint("=== Check the run's status at %s ===" % jobs_page_url)
def test_databricks_http_request_integration(get_config_for_profile, request): """Confirms that the databricks http request params can in fact be used as an HTTP request""" def confirm_request_params(**kwargs): assert kwargs == { 'method': 'PUT', 'url': 'host/api/2.0/clusters/list', 'headers': { 'Authorization': 'Basic dXNlcjpwYXNz' }, 'verify': True, 'json': { 'a': 'b' } } http_response = mock.MagicMock() http_response.status_code = 200 http_response.text = '{"OK": "woo"}' return http_response request.side_effect = confirm_request_params get_config_for_profile.return_value = \ DatabricksConfig("host", "user", "pass", None, insecure=False) response = rest_utils.databricks_api_request('clusters/list', 'PUT', json={'a': 'b'}) assert response == {'OK': 'woo'}
def _dbfs_path_exists(dbfs_uri): """ Returns True if the passed-in path exists in DBFS for the workspace corresponding to the default Databricks CLI profile. """ dbfs_path = _parse_dbfs_uri_path(dbfs_uri) json_response_obj = rest_utils.databricks_api_request( endpoint="dbfs/get-status", method="GET", json={"path": dbfs_path}) # If request fails with a RESOURCE_DOES_NOT_EXIST error, the file does not exist on DBFS error_code_field = "error_code" if error_code_field in json_response_obj: if json_response_obj[error_code_field] == "RESOURCE_DOES_NOT_EXIST": return False raise ExecutionException("Got unexpected error response when checking whether file %s " "exists in DBFS: %s" % json_response_obj) return True
def _jobs_runs_submit(req_body_json): return rest_utils.databricks_api_request(endpoint="jobs/runs/submit", method="POST", req_body_json=req_body_json)
def _jobs_runs_cancel(databricks_run_id): return rest_utils.databricks_api_request( endpoint="jobs/runs/cancel", method="POST", req_body_json={"run_id": databricks_run_id})
def _jobs_runs_get(databricks_run_id): return rest_utils.databricks_api_request( endpoint="jobs/runs/get", method="GET", params={"run_id": databricks_run_id})