Ejemplo n.º 1
0
def _run_databricks(uri, entry_point, version, parameters, experiment_id,
                    cluster_spec, git_username, git_password):
    hostname, token, username, password, = _get_db_hostname_and_auth()
    auth = (
        username,
        password) if username is not None and password is not None else None
    # Read cluster spec from file
    with open(cluster_spec, 'r') as handle:
        cluster_spec = json.load(handle)
    # Make jobs API request to launch run.
    env_vars = {"MLFLOW_GIT_URI": uri}
    if git_username is not None:
        env_vars["MLFLOW_GIT_USERNAME"] = git_username
    if git_password is not None:
        env_vars["MLFLOW_GIT_PASSWORD"] = git_password
    # Pass experiment ID to shell job on Databricks as an environment variable.
    if experiment_id is not None:
        eprint("=== Using experiment ID %s ===" % experiment_id)
        env_vars[tracking._EXPERIMENT_ID_ENV_VAR] = experiment_id
    req_body_json = {
        'run_name': 'MLflow Job Run for %s' % uri,
        'new_cluster': cluster_spec,
        'shell_command_task': {
            'command':
            _get_databricks_run_cmd(uri, entry_point, version, parameters),
            "env_vars":
            env_vars
        },
        "libraries": [{
            "pypi": {
                "package": "mlflow==%s" % VERSION
            }
        }]
    }
    eprint("=== Running entry point %s of project %s on Databricks. ===" %
           (entry_point, uri))
    run_submit_res = rest_utils.databricks_api_request(
        hostname=hostname,
        endpoint="jobs/runs/submit",
        token=token,
        auth=auth,
        method="POST",
        req_body_json=req_body_json)
    run_id = run_submit_res["run_id"]
    eprint(
        "=== Launched MLflow run as Databricks job run with ID %s. Getting run status "
        "page URL... ===" % run_id)
    run_info = rest_utils.databricks_api_request(hostname=hostname,
                                                 endpoint="jobs/runs/get",
                                                 token=token,
                                                 auth=auth,
                                                 method="GET",
                                                 params={"run_id": run_id})
    jobs_page_url = run_info["run_page_url"]
    eprint("=== Check the run's status at %s ===" % jobs_page_url)
Ejemplo n.º 2
0
def test_databricks_http_request_integration(get_config_for_profile, request):
    """Confirms that the databricks http request params can in fact be used as an HTTP request"""
    def confirm_request_params(**kwargs):
        assert kwargs == {
            'method': 'PUT',
            'url': 'host/api/2.0/clusters/list',
            'headers': {
                'Authorization': 'Basic dXNlcjpwYXNz'
            },
            'verify': True,
            'json': {
                'a': 'b'
            }
        }
        http_response = mock.MagicMock()
        http_response.status_code = 200
        http_response.text = '{"OK": "woo"}'
        return http_response

    request.side_effect = confirm_request_params
    get_config_for_profile.return_value = \
        DatabricksConfig("host", "user", "pass", None, insecure=False)

    response = rest_utils.databricks_api_request('clusters/list',
                                                 'PUT',
                                                 json={'a': 'b'})
    assert response == {'OK': 'woo'}
Ejemplo n.º 3
0
def _dbfs_path_exists(dbfs_uri):
    """
    Returns True if the passed-in path exists in DBFS for the workspace corresponding to the
    default Databricks CLI profile.
    """
    dbfs_path = _parse_dbfs_uri_path(dbfs_uri)
    json_response_obj = rest_utils.databricks_api_request(
        endpoint="dbfs/get-status", method="GET", json={"path": dbfs_path})
    # If request fails with a RESOURCE_DOES_NOT_EXIST error, the file does not exist on DBFS
    error_code_field = "error_code"
    if error_code_field in json_response_obj:
        if json_response_obj[error_code_field] == "RESOURCE_DOES_NOT_EXIST":
            return False
        raise ExecutionException("Got unexpected error response when checking whether file %s "
                                 "exists in DBFS: %s" % json_response_obj)
    return True
Ejemplo n.º 4
0
def _jobs_runs_submit(req_body_json):
    return rest_utils.databricks_api_request(endpoint="jobs/runs/submit",
                                             method="POST",
                                             req_body_json=req_body_json)
Ejemplo n.º 5
0
def _jobs_runs_cancel(databricks_run_id):
    return rest_utils.databricks_api_request(
        endpoint="jobs/runs/cancel",
        method="POST",
        req_body_json={"run_id": databricks_run_id})
Ejemplo n.º 6
0
def _jobs_runs_get(databricks_run_id):
    return rest_utils.databricks_api_request(
        endpoint="jobs/runs/get",
        method="GET",
        params={"run_id": databricks_run_id})