예제 #1
0
    def create(self, group_name):
        """
        Creates a new group

        Parameters
        ----------
        group_name : str
            A group to be created

        Returns
        -------
        dict : A Python representation of the JSON returned by the API

        """
        METHOD = 'POST'
        API_PATH = '/groups/create'

        data = {'group_name': group_name}

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)
        if resp.status_code == 200:
            return resp.json()

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            raise APIError("Response code {0}: {1} {2}".format(
                resp.status_code,
                resp.json().get('error_code'),
                resp.json().get('message')))
예제 #2
0
    def list_node_types(self):
        """
        List details on all possible node types for Databricks.

        Not all node types will be available for the given subscription.

        :return: List object with information (dict) of all possible node
        """
        METHOD = 'GET'
        API_PATH = 'clusters/list-node-types'

        resp = self._rest_call[METHOD](API_PATH)

        if resp.status_code == 200:
            return resp.json()['node_types']

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #3
0
    def list(self):
        """
        Lists all groups in the workspace

        Returns
        -------
        list : A list of of group_names

        """
        METHOD = 'GET'
        API_PATH = '/groups/list'

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH)

        if resp.status_code == 200:
            return resp.json().get('group_names')

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            raise APIError("Response code {0}: {1} {2}".format(
                resp.status_code,
                resp.json().get('error_code'),
                resp.json().get('message')))
예제 #4
0
    def list_members(self, group_name):
        """
        Lists members of a given group

        Parameters
        ----------
        group_name : str
            A group for which members should be listed

        Returns
        -------
        list : A list of Python dict objects (specifying user_name or group_name

        """
        METHOD = 'GET'
        API_PATH = '/groups/list-members'

        data = {'group_name': group_name}

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return resp.json().get('members')

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            raise APIError("Response code {0}: {1} {2}".format(
                resp.status_code,
                resp.json().get('error_code'),
                resp.json().get('message')))
예제 #5
0
    def delete(self, group_name):
        """
        Deletes a group

        Parameters
        ----------
        group_name : str
            A group to be deleted

        Returns
        -------
        str : The name of the removed group

        """
        METHOD = 'POST'
        API_PATH = '/groups/delete'

        data = {'group_name': group_name}

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return group_name

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            raise APIError("Response code {0}: {1} {2}".format(
                resp.status_code,
                resp.json().get('error_code'),
                resp.json().get('message')))
예제 #6
0
    def all_cluster_statuses(self):
        """
        Returns library status for all clusters.

        Parameters
        ----------
        Returns
        -------
            A json string containing the libraries installed on all clusters

            Format here : https://docs.azuredatabricks.net/dev-tools/api/latest/libraries.html#all-cluster-statuses

        """
        METHOD = 'GET'
        API_PATH = '/libraries/all-cluster-statuses'

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH)

        if resp.status_code == 200:
            return resp.json()

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
    def export(self, dbx_path, file_path, file_format='DBC'):
        """ Exports the Databricks path to a file on the local PC.

        Parameters
        ----------
        dbx_path : str
            The path, in the Databricks workspace, to export

        file_path : str
            The path, on the local PC, where the file should be created

        file_format: str, optional
            The format of the file to be saved. Defaults to DBC. Must be in SOURCT

        Returns
        -------
        file_path if successful

        Raises
        ------
        ResourceDoesNotExist:
            If the given Databricks path does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'GET'
        API_PATH = '/workspace/export'

        if file_format.upper() not in EXPORT_FORMATS:
            raise UnknownFormat('{0} is not a supported format type. Please use DBC, SOURCE, HTML, or JUPYTER')

        data = {'path': dbx_path,
                'format': file_format,
                'direct_download': True}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            with open(file_path, 'wb+') as fo:
                fo.write(resp.get('content'))

            return file_path

        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")

        elif resp.json().get("error_code") == "MAX_NOTEBOOK_SIZE_EXCEEDED":
            raise MaxNotebookSizeExceeded(resp.json().get('message'))

        elif resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST":
            raise ResourceDoesNotExist(resp.json().get('message'))

        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))
예제 #8
0
def choose_exception(response: requests.Response) -> Exception:
    """ Choose the correct error handling message if status is not 200

    Parameters
    ----------
        response: The requests.Response object returned from the API call

    Returns
    -------
        Exception: The appropriate exception to raise
    """
    if response.status_code == 403:  # pragma: no cover
        return_error = AuthorizationError(
            "User is not authorized or token is incorrect.")

    else:  # pragma: no cover
        if response.json().get("error_code") in ERROR_CODES:
            return_error = ERROR_CODES[response.json().get('error_code')](
                response.json().get('message'))
        else:
            return_error = APIError("Response code {0}: {1} {2}".format(
                response.status_code,
                response.json().get('error_code'),
                response.json().get('message')))
    return return_error
예제 #9
0
    def delete(self, path, recursive=False, not_exists_ok=False):
        """
        Deletes the path in the given workspace.

        Parameters
        ----------
        path : str
            The path, in the Databricks workspace, to delete

        recursive : bool, optional
            Recursively delete the given path

        not_exists_ok : bool, optional
            If the given path is not found, avoid raising error

        Returns
        -------
        path if successfully deleted

        Raises
        ------
        ResourceDoesNotExist:
            If not_exists_ok is set to False and the given path does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'POST'
        API_PATH = '/workspace/delete'

        data = {'path': path, 'recursive': recursive}
        resp = self._rest_call[METHOD](API_PATH, data=data)

        # Process response
        if resp.status_code == 200:
            return path

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                if resp.json().get(
                        "error_code"
                ) == "RESOURCE_DOES_NOT_EXIST" and not_exists_ok:
                    return path
                else:
                    raise ERROR_CODES[resp.json().get('error_code')](
                        resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #10
0
    def mkdirs(self, path, exists_ok=False):
        """
        Creates the given directory and necessary parent directories if they do not exist.

        If there exists an object (not a directory) at any prefix of the input path, this call raises an
        error RESOURCE_ALREADY_EXISTS. Note that if this operation fails it may have succeeded in creating
        some of the necessary parent directories.

        Parameters
        ----------
        path : str
            The path, in the Databricks workspace, where a directory should be made

        exists_ok : bool, optional
            Supress an error a resource already exists at the given endpoint

        Returns
        -------
        path : str
            The path that was created

        Raises
        ------
        ResourceAlreadyExists
            If you are trying to create a path that already exists and the exists_ok flag is false.
        APIError
            If the Databricks API returned an error
        """
        METHOD = 'POST'
        API_PATH = '/workspace/mkdirs'

        data = {'path': path}
        resp = self._rest_call[METHOD](API_PATH, data=data)

        # Process response
        if resp.status_code == 200:
            return path

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                if resp.json(
                ).get("error_code") == "RESOURCE_ALREADY_EXISTS" and exists_ok:
                    return path
                else:
                    raise ERROR_CODES[resp.json().get('error_code')](
                        resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #11
0
    def __send_cluster_id_to_endpoint(self, method, api_path, cluster_name, cluster_id):
        """
        Private method to post cluster id only to a given endpoint

        Parameters
        ----------
        method : str
            HTTP POST or GET method
        api_path : str
            API path that post request is sent to

        cluster_name : str, optional
            The name of the cluster.

        cluster_id : str, optional
            The id of the cluster to be terminated.

        Returns
        -------
            The cluster ID of a stopped cluster

        Raises
        ------
        ValueError
            When neither cluster_name or cluster_id are passed
        ResourceDoesNotExist
            When a cluster with the given name or id aren't found

        Returns
        -------

        """
        if not (cluster_name or cluster_id):
            raise ValueError("Either cluster_id or cluster_name must be specified")

        if cluster_name and not cluster_id:
            cluster_id = self.get_cluster_id(cluster_name)

        data = {"cluster_id": cluster_id}

        resp = self._rest_call[method](api_path, data=data)

        if resp.status_code == 200 and method == 'GET':
            return resp.json()
        elif resp.status_code == 200:
            return cluster_id
        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")
        elif resp.status_code == 400 and resp.json()['message'] == "Cluster {id} does not exist":
            raise ResourceDoesNotExist(resp.json()['message'])
        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))
예제 #12
0
    def add_member(self, parent_group, group_name=None, user_name=None):
        """
        Adds a new member (either user or group) to a given parent group

        Parameters
        ----------
        parent_group : str
            The group to which the new user or group should be added
        group_name : str, optional
            A group to be added to parent group
        user_name : str, optional
            A user to be added to parent group

        Returns
        -------
        str : The group name or user name added

        Raises
        ------
        ValueError
            If both group_name and user_name are defined or if neither group_name or user_name are defined

        """
        METHOD = 'POST'
        API_PATH = '/groups/add-member'

        # Process group_name and user_name and add parent name to resulting dict
        data, target_name = self.__prep_group_or_user(group_name=group_name,
                                                      user_name=user_name)
        data['parent_name'] = parent_group

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return target_name

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #13
0
    def list(self, path):
        """Lists the contents of the given director

        Parameters
        ----------
        path : str
            The path, in the Databricks workspace, of which, the contents should be listed

        Returns
        -------
        List of WorkspaceObjectgs

        Raises
        ------
        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'GET'
        API_PATH = '/workspace/list'

        data = {'path': path}
        resp = self._rest_call[METHOD](API_PATH, data=data)

        # Process response
        if resp.status_code == 200:
            if resp.json().get('objects'):
                return [
                    WorkspaceObjectInfo(**obj)
                    for obj in resp.json().get('objects')
                ]
            else:
                return []

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #14
0
    def spark_versions(self):
        METHOD = 'GET'
        API_PATH = 'clusters/spark-versions'

        resp = self._rest_call[METHOD](API_PATH)
        if resp.status_code == 200:
            return {item['key']: item['name'] for item in resp.json()['versions']}

        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")

        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))
예제 #15
0
    def install(self,
                cluster_id,
                libraries,
                wait_for_completion=False,
                timeout=120):
        """
        Installs new libraries on the cluster

        This is an async call. You can check the status of library installation using the 'cluster_status' method.

        Parameters
        ----------
        cluster_id : str
            The display name of the cluster on which to install libraries

        libraries : array of libraries
            see https://docs.azuredatabricks.net/dev-tools/api/latest/libraries.html#install

        Returns
        -------
        Cluster library status for given cluster
        """
        METHOD = 'POST'
        API_PATH = '/libraries/install'

        # create payload to add librairies
        data = {'cluster_id': cluster_id, 'libraries': libraries}

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return self.cluster_status(cluster_id)

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #16
0
    def get_status(self, path):
        """ Gets the status of a given Databricks path

        Parameters
        ----------
        path : str
            The path, in the Databricks workspace, to get the status of

        Returns
        -------
            WorkspaceObject - details of the item at given path

        Raises
        ------
        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'GET'
        API_PATH = '/workspace/get-status'

        data = {'path': path}
        resp = self._rest_call[METHOD](API_PATH, data=data)

        # Process response
        if resp.status_code == 200:
            return WorkspaceObjectInfo(**resp.json())

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #17
0
    def list_parents(self, group_name=None, user_name=None):
        """
        Lists all groups of a given user or group

        Parameters
        ----------
        group_name : str, optional
            The name of a group

        user_name : str, optional
            The name of a user

        Returns
        -------
        list : A list of of group_names

        """
        METHOD = 'GET'
        API_PATH = '/groups/list-parents'

        # Process group_name and user_name
        data, target_name = self.__prep_group_or_user(group_name=group_name,
                                                      user_name=user_name)

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return resp.json().get('group_names')

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #18
0
    def list(self):
        METHOD = 'GET'
        API_PATH = 'clusters/list'

        resp = self._rest_call[METHOD](API_PATH)

        if resp.status_code == 200:
            return resp.json().get('clusters')

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #19
0
    def cluster_status(self, cluster_id):
        """
        Returns library status for a specific cluster.

        Parameters
        ----------
        cluster_id : str
            The cluster ID to query

        Returns
        -------
            A json string containing the libraries installed on this cluster_id
            Format here : https://docs.azuredatabricks.net/dev-tools/api/latest/libraries.html#cluster-status
        """
        METHOD = 'GET'
        API_PATH = '/libraries/cluster-status'

        data = {'cluster_id': cluster_id}
        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return resp.json()

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #20
0
    def import_file(self,
                    dbx_path,
                    file_format,
                    language="",
                    overwrite=False,
                    url=None,
                    filepath=None):
        """ Imports a file to the Databricks workspace from a given URL or file path

        Parameters
        ----------
        dbx_path : str
            The path, in the Databricks workspace, where the object should be created

        file_format : str
            The format of the file imported. Options are SOURCE, HTML, JUPYTER, DBC

        language : str, optional
            Required if file_format is set to SOURCE

            The computer language that the source code is written in. Options are SCALA, PYTHON, SQL or R

        overwrite : bool, optional
            Overwrite the Databricks path (not currently supported for DBC)

        url : str, optional
            The url for the file to be imported. Often this is a Github raw URL.

        filepath : str, optional
            The path on the local PC of the file to be uploaded

        Returns
        -------
        dbx_path if successful

        Raises
        ------
        AttributeError:
            If the requirements for attributes are not met

        MaxNotebookSizeExceeded:
            If imported file size is greater than 10 MB.

        ResourceAlreadyExists:
            If overwrite is set to false and there is already an object at the given dbx_path

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'POST'
        API_PATH = '/workspace/import'

        # url XOR filepath defined
        if not (url or filepath):
            raise AttributeError(
                "Must pass either URL or filepath to Workspace Import")
        elif file_format.upper() == 'SOURCE' and language.upper(
        ) not in LANGUAGES:
            raise AttributeError(
                "If file_format=SOURCE, language must be Scala, Jupyter, Python or R"
            )
        elif file_format.upper() not in EXPORT_FORMATS:
            raise AttributeError(
                "File format must be SOURCE, DBC, JUPYTER or HTML")

        if url:
            content = url_content_to_b64(url)
        else:
            content = file_content_to_b64(filepath)

        data = {
            "content": content.decode('utf-8'),
            "format": file_format.upper(),
            "overwrite": overwrite,
            "path": dbx_path
        }

        if file_format.upper() == 'SOURCE':
            data['language'] = language.upper()

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return dbx_path

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            if resp.json().get("error_code") in ERROR_CODES:
                raise ERROR_CODES[resp.json().get('error_code')](
                    resp.json().get('message'))
            else:
                raise APIError("Response code {0}: {1} {2}".format(
                    resp.status_code,
                    resp.json().get('error_code'),
                    resp.json().get('message')))
예제 #21
0
    def create(self, cluster_name, num_workers, spark_version, node_type_id,
               python_version=3, autotermination_minutes=60, **kwargs):
        """
        Creates a new cluster in the given
        Parameters
        ----------
        cluster_name
        num_workers
        spark_version : str
        node_type_id : str

        python_version : int, optional, default=3

        autotermination_minutes : int, optional, default=60

        kwargs

        Returns
        -------

        """
        METHOD = 'POST'
        API_PATH = 'clusters/create'

        # Check if spark_version supported:
        if not spark_version in self.spark_versions():
            raise ValueError("'{0}' is not a recognized spark_version. Please see the ".format(spark_version) +
                             "spark_versions() method for available Spark Versions. ")

        available_vms = self.list_available_node_type_names()
        driver_vm_id = kwargs.get('driver_node_type_id')

        # Check if node_type available supported:
        if not node_type_id in available_vms or (driver_vm_id and driver_vm_id not in available_vms):
            raise ValueError("'{0}' is not an available VM type. Please see the ".format(node_type_id) +
                             "list_available_node_type_names() method for available node types")

        cluster_config = {'cluster_name': cluster_name,
                          'spark_version': spark_version,
                          'node_type_id': node_type_id}

        # If python_version is set to Python 3, then overwrite the PYSPARK_PYTHON environment variable
        if python_version == 3:
            if kwargs.get('spark_env_vars'):
                kwargs['spark_env_vars']['PYSPARK_PYTHON'] = '/databricks/python3/bin/python3'
            else:
                kwargs['spark_env_vars'] = {'PYSPARK_PYTHON': '/databricks/python3/bin/python3'}

        # Set default value of autotermination minutes - this defaults to 60 minutes.
        if autotermination_minutes:
            kwargs['autotermination_minutes'] = autotermination_minutes

        # Specify the size of the cluster
        if type(num_workers) == 'dict':
            cluster_config['autoscale'] = num_workers
        else:
            cluster_config['num_workers'] = int(num_workers)

        # Merge kwargs and cluster_config
        cluster_config = dict_update(kwargs, cluster_config)

        resp = self._rest_call[METHOD](API_PATH, data=cluster_config)

        if resp.status_code == 200:
            return resp.json()['cluster_id']

        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")

        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))
예제 #22
0
    def create(self,
               cluster_name,
               num_workers,
               spark_version,
               node_type_id,
               python_version=3,
               autotermination_minutes=60,
               custom_spark_version=False,
               **kwargs):
        """
        Creates a new cluster in the given
        Parameters
        ----------
        cluster_name : str
            The display name of the cluster being created

        num_workers : int
            The number of worker nodes in the cluster

        spark_version : str
        node_type_id : str

        python_version : int, optional, default=3
            

        autotermination_minutes : int, optional, default=60
            Automatically terminates the cluster after it is inactive for this time in minutes.
            If not set, this cluster will not be automatically terminated. If specified, the threshold
            must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable
            automatic termination.

        custom_spark_version : bool, optional, default=False
            If a custom Spark version is passed - then this prevents error checking for supported Spark versions

        kwargs : optional
            Other keyword arguments are passed to the API in the JSON payload. See supported arguments here:
            https://docs.azuredatabricks.net/api/latest/clusters.html#create

        Returns
        -------

        """
        METHOD = 'POST'
        API_PATH = 'clusters/create'

        # Check if spark_version supported:
        if not spark_version in self.spark_versions(
        ) and not custom_spark_version:
            raise ValueError(
                "'{0}' is not a recognized spark_version. Please see the ".
                format(spark_version) +
                "spark_versions() method for available Spark Versions. ")

        available_vms = self.list_available_node_type_names()
        driver_vm_id = kwargs.get('driver_node_type_id')

        # Check if node_type available supported:
        if not node_type_id in available_vms or (driver_vm_id and driver_vm_id
                                                 not in available_vms):
            raise ValueError(
                "'{0}' is not an available VM type. Please see the ".format(
                    node_type_id) +
                "list_available_node_type_names() method for available node types"
            )

        cluster_config = {
            'cluster_name': cluster_name,
            'spark_version': spark_version,
            'node_type_id': node_type_id
        }

        # If python_version is set to Python 3, then overwrite the PYSPARK_PYTHON environment variable
        if python_version == 3:
            if kwargs.get('spark_env_vars'):
                kwargs['spark_env_vars'][
                    'PYSPARK_PYTHON'] = '/databricks/python3/bin/python3'
            else:
                kwargs['spark_env_vars'] = {
                    'PYSPARK_PYTHON': '/databricks/python3/bin/python3'
                }

        # Set default value of autotermination minutes - this defaults to 60 minutes.
        if autotermination_minutes:
            kwargs['autotermination_minutes'] = autotermination_minutes

        # Specify the size of the cluster
        if type(num_workers) == 'dict':
            cluster_config['autoscale'] = num_workers
        else:
            cluster_config['num_workers'] = int(num_workers)

        # Merge kwargs and cluster_config
        cluster_config = dict_update(kwargs, cluster_config)

        resp = self._rest_call[METHOD](API_PATH, data=cluster_config)

        if resp.status_code == 200:
            return resp.json()['cluster_id']

        elif resp.status_code == 403:
            raise AuthorizationError(
                "User is not authorized or token is incorrect.")

        else:
            raise APIError("Response code {0}: {1} {2}".format(
                resp.status_code,
                resp.json().get('error_code'),
                resp.json().get('message')))