Esempio n. 1
0
    def create(self, comment, lifetime_seconds=7776000):
        """
        Creates a new personal access token

        Parameters
        ----------
        comment : str
            The comment to be added for the token to be created
        lifetime_seconds : int, optional, default=7776000 (90 days)
            The lifetime seconds

        Returns
        -------
        Token Value and Info : dict
            Dictionary with token value as 'token_value' key and TokenInfo object as 'token_info'
        """

        METHOD = 'POST'
        API_PATH = '/token/create'

        data = {'lifetime_seconds': lifetime_seconds, 'comment': comment}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        resp_json = resp.json()

        if resp.status_code == 200:
            return {
                'token_value': resp_json.get('token_value'),
                'token_info': TokenInfo(**resp_json.get('token_info'))
            }
        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 2
0
    def revoke(self, token_id):
        """
        Deletes a token.

        Parameters
        ----------
        token_id : str
            The ID of the token to be deleted.

        Returns
        -------
        token_id : str
            If the token is deleted.
        """
        METHOD = 'POST'
        API_PATH = '/token/delete'

        data = {'token_id': token_id}
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return token_id
        else:
            exception = choose_exception(resp)
            raise exception
    def __send_cluster_id_to_endpoint(self, method, api_path, cluster_name,
                                      cluster_id):
        """
        Private method to post cluster id only to a given endpoint

        Parameters
        ----------
        method : str
            HTTP POST or GET method
        api_path : str
            API path that post request is sent to

        cluster_name : str, optional
            The name of the cluster.

        cluster_id : str, optional
            The id of the cluster to be terminated.

        Returns
        -------
            The cluster ID of a stopped cluster

        Raises
        ------
        ValueError
            When neither cluster_name or cluster_id are passed
        ResourceDoesNotExist
            When a cluster with the given name or id aren't found

        Returns
        -------

        """
        if not (cluster_name or cluster_id):
            raise ValueError(
                "Either cluster_id or cluster_name must be specified")

        if cluster_name and not cluster_id:
            try:
                cluster_id = self.get_cluster_id(cluster_name)
            except ResourceDoesNotExist:
                raise ResourceDoesNotExist(
                    "No cluster named '{0}' was found".format(cluster_name))

        data = {"cluster_id": cluster_id}

        resp = self._rest_call[method](api_path, data=data)

        if resp.status_code == 200 and method == 'GET':
            return resp.json()

        elif resp.status_code == 200:
            return cluster_id

        else:
            exception = choose_exception(resp)
            raise exception
    def list(self):
        METHOD = 'GET'
        API_PATH = 'clusters/list'

        resp = self._rest_call[METHOD](API_PATH)

        if resp.status_code == 200:
            return resp.json().get('clusters', [])

        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 5
0
    def __read(self, path, offset, length=MB_BYTES):
        METHOD = 'GET'
        API_PATH = '/dbfs/read'

        data = {"path": path, "offset": offset, "length": length}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return FileReadInfo(**resp.json())
        else:
            exception = choose_exception(resp)
            raise exception
    def spark_versions(self):
        METHOD = 'GET'
        API_PATH = 'clusters/spark-versions'

        resp = self._rest_call[METHOD](API_PATH)
        if resp.status_code == 200:
            return {
                item['key']: item['name']
                for item in resp.json()['versions']
            }

        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 7
0
    def move(self, source_path, destination_path):
        """
        Move a file or directory from one location to another location within DBFS. If the given source path is a
        directory, this will always recursively move all files.

        Parameters
        ----------
        source_path : str
            The source path of the file or directory. The path should be the absolute DBFS path (e.g. “/mnt/foo/”).
            This field is required.
        destination_path : str
            The destination path of the file or directory. The path should be the absolute DBFS path (e.g. “/mnt/bar/”).
            This field is required.

        Returns
        -------
        destination_path if successful

        Raises
        ------
        ResourceDoesNotExist:
             If the source file does not exist

        ResourceAlreadyExists:
            If there already exists a file in the destination path

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured abov

        """
        METHOD = 'POST'
        API_PATH = '/dbfs/move'

        data = {
            "source_path": source_path,
            "destination_path": destination_path
        }

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return destination_path
        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 8
0
    def __put(self, path, data, overwrite=False):
        METHOD = 'POST'
        API_PATH = '/dbfs/put'

        payload = {
            "path": path,
            "contents": data.decode('utf-8'),
            "overwrite": overwrite
        }

        resp = self._rest_call[METHOD](API_PATH, data=payload)

        if resp.status_code == 200:
            return path

        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 9
0
    def add_block(self, handle, data_block):
        """
        Adds a block of data to the specified handle

        Parameters
        ----------
        handle : int
            The handle on an open stream. This field is required.

        data_block : bytes
            The base64-encoded data to append to the stream. This has a limit of 1 MB. This field is required.

        Returns
        -------
        handle if successful

        Raises
        ------
        MaxBlockSizeExceeded:
            If the block of data sent is greater that 1 MB

        ResourceDoesNotExist:
            If the handle does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'POST'
        API_PATH = '/dbfs/add-block'

        data = {"handle": handle, "data": data_block.decode('utf-8')}

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return handle

        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 10
0
    def delete(self, path, recursive=False, not_exists_ok=False):
        """

        Parameters
        ----------
        path : str
            The path of the file or directory to delete. The path should be the absolute DBFS path (e.g. “/mnt/foo/”).
            This field is required.
        recursive : bool
            Whether or not to recursively delete the directory’s contents. Deleting empty directories can be done
            without providing the recursive flag.
        not_exists_ok : bool
            Suppress any exceptions caused by trying to delete a file that does not exist.

        Returns
        -------
        path if successful

        Raises
        ------
        IOError:
            If the path is a non-empty directory and recursive is set to false or on other similar errors

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'POST'
        API_PATH = '/dbfs/delete'

        data = {"path": path, "recursive": recursive}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return path
        else:
            exception = choose_exception(resp)
            if not_exists_ok and isinstance(exception, ResourceDoesNotExist):
                return path
            else:
                raise exception
Esempio n. 11
0
    def create(self, path, overwrite=False):
        """
        Opens a new DBFS handle

        Parameters
        ----------
        path : str
            The path of the new file. The path should be the absolute DBFS path (e.g. “/mnt/foo.txt”). This field is required.

        overwrite : bool optional
            The flag that specifies whether to overwrite existing file/files.

        Returns
        -------
        handle if successful

        Raises
        ------
        MaxBlockSizeExceeded:
            If blocksize sent is greater that 1 MB

        ResourceDoesNotExist:
            If the handle does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'POST'
        API_PATH = '/dbfs/create'

        data = {"path": path, "overwrite": overwrite}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return resp.json().get('handle')

        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 12
0
    def list_node_types(self):
        """
        List details on all possible node types for Databricks.

        Not all node types will be available for the given subscription.

        :return: List object with information (dict) of all possible node
        """
        METHOD = 'GET'
        API_PATH = 'clusters/list-node-types'

        resp = self._rest_call[METHOD](API_PATH)

        if resp.status_code == 200:
            return resp.json()['node_types']

        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 13
0
    def get_status(self, path):
        """
        Gets the file information of a file or directory.

        Parameters
        ----------
        path : str
            The path of the file or directory. The path should be the absolute DBFS path (e.g. “/mnt/foo/”).
            This field is required.


        Returns
        -------
        FileInfo named tuple with path, is_dir and file_size

        Raises
        ------
        ResourceDoesNotExist:
            If the file or directory does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above

        """
        METHOD = 'GET'
        API_PATH = '/dbfs/get-status'

        data = {"path": path}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return FileInfo(**resp.json())
        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 14
0
    def mkdirs(self, path):
        """
        Creates the given directory and necessary parent directories if they do not exist.

        Note: that if this operation fails it may have succeeded in creating some of the necessary parent directories.

        Parameters
        ----------
        path : str
            The path of the new directory. The path should be the absolute DBFS path (e.g. “/mnt/foo/”). This field is required.

        Returns
        -------
        path if successful

        Raises
        ------
        ResourceAlreadyExists:
            If there exists a file (not a directory) at any prefix of the input path

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'POST'
        API_PATH = '/dbfs/mkdirs'

        data = {"path": path}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return path
        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 15
0
    def close(self, handle):
        """
        Closes the specified handle

        Parameters
        ----------
        handle : int
            The handle on an open stream. This field is required.

        Returns
        -------
        handle if successful

        Raises
        ------
        ResourceDoesNotExist:
            If the handle does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'POST'
        API_PATH = '/dbfs/close'

        data = {"handle": handle}

        # Make REST call
        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return handle

        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 16
0
    def list(self, path):
        """
        Lists the contents of a directory, or details of the file.

        Parameters
        ----------
        path : str
            The path of the file or directory. The path should be the absolute DBFS path (e.g. “/mnt/foo/”).
            This field is required.

        Returns
        -------
        Array of FileInfo named tuples (with path, is_dir and file_size)

        Raises
        ------
        ResourceDoesNotExist:
            If the file or directory does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'GET'
        API_PATH = '/dbfs/list'

        data = {"path": path}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            return [FileInfo(**file) for file in resp.json().get('files')]
        else:
            exception = choose_exception(resp)
            raise exception
Esempio n. 17
0
    def create(self,
               cluster_name,
               num_workers,
               spark_version,
               node_type_id,
               python_version=3,
               autotermination_minutes=60,
               custom_spark_version=False,
               **kwargs):
        """
        Creates a new cluster in the given
        Parameters
        ----------
        cluster_name : str
            The display name of the cluster being created

        num_workers : int
            The number of worker nodes in the cluster

        spark_version : str
        node_type_id : str

        python_version : int, optional, default=3
            

        autotermination_minutes : int, optional, default=60
            Automatically terminates the cluster after it is inactive for this time in minutes.
            If not set, this cluster will not be automatically terminated. If specified, the threshold
            must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable
            automatic termination.

        custom_spark_version : bool, optional, default=False
            If a custom Spark version is passed - then this prevents error checking for supported Spark versions

        kwargs : optional
            Other keyword arguments are passed to the API in the JSON payload. See supported arguments here:
            https://docs.azuredatabricks.net/api/latest/clusters.html#create

        Returns
        -------

        """
        METHOD = 'POST'
        API_PATH = 'clusters/create'

        # Check if spark_version supported:
        if not spark_version in self.spark_versions(
        ) and not custom_spark_version:
            raise ValueError(
                "'{0}' is not a recognized spark_version. Please see the ".
                format(spark_version) +
                "spark_versions() method for available Spark Versions. ")

        available_vms = self.list_available_node_type_names()
        driver_vm_id = kwargs.get('driver_node_type_id')

        # Check if node_type available supported:
        if not node_type_id in available_vms or (driver_vm_id and driver_vm_id
                                                 not in available_vms):
            raise ValueError(
                "'{0}' is not an available VM type. Please see the ".format(
                    node_type_id) +
                "list_available_node_type_names() method for available node types"
            )

        cluster_config = {
            'cluster_name': cluster_name,
            'spark_version': spark_version,
            'node_type_id': node_type_id
        }

        # If python_version is set to Python 3, then overwrite the PYSPARK_PYTHON environment variable
        if python_version == 3:
            if kwargs.get('spark_env_vars'):
                kwargs['spark_env_vars'][
                    'PYSPARK_PYTHON'] = '/databricks/python3/bin/python3'
            else:
                kwargs['spark_env_vars'] = {
                    'PYSPARK_PYTHON': '/databricks/python3/bin/python3'
                }

        # Set default value of autotermination minutes - this defaults to 60 minutes.
        if autotermination_minutes:
            kwargs['autotermination_minutes'] = autotermination_minutes

        # Specify the size of the cluster
        if isinstance(num_workers, dict):
            cluster_config['autoscale'] = num_workers
        else:
            cluster_config['num_workers'] = int(num_workers)

        # Merge kwargs and cluster_config
        cluster_config = dict_update(kwargs, cluster_config)

        resp = self._rest_call[METHOD](API_PATH, data=cluster_config)

        if resp.status_code == 200:
            return resp.json()['cluster_id']
        else:
            exception = choose_exception(resp)
            raise exception