Ejemplo n.º 1
0
    def append_file(self, path, file_data, **kwargs):
        """
        Appends to an existing file on HDFS

        WebHDFS REST call:
        POST http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=APPEND
        [&buffersize=<INT>]

        Note: This function does not follow automatic redirects but
        instead uses a two step call to the API as required in the
        WebHDFS documentation
        """

        #make the initial APPEND call to the HDFS namenode
        optional_args = kwargs
        uri = self._create_uri(path, operations.APPEND, **optional_args)
        init_response = requests.post(uri, data=file_data,
                                      allow_redirects=False)

        if not init_response.status_code == httplib.TEMPORARY_REDIRECT:
            raise errors.PyWebHdfsException(init_response.text)

        #Get the address provided in the location header of the
        # initial response from the namenode and make the APPEND request
        #to the datanode
        uri = init_response.headers['location']
        response = requests.post(uri, data=file_data)

        if not response.status_code == httplib.OK:
            raise errors.PyWebHdfsException(response.text)

        return True
Ejemplo n.º 2
0
    def create_file(self, path, file_data, **kwargs):
        """
        Creates a new file on HDFS

        WebHDFS REST call:
        PUT http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CREATE
        [&overwrite=<true|false>][&blocksize=<LONG>][&replication=<SHORT>]
        [&permission=<OCTAL>][&buffersize=<INT>]

        Note: This function does not follow automatic redirects but
        instead uses a two step call to the API as required in the
        WebHDFS documentation
        """

        #make the initial CREATE call to the HDFS namenode
        optional_args = kwargs
        uri = self._create_uri(path, operations.CREATE, **optional_args)
        init_response = requests.put(uri, data=file_data,
                                     allow_redirects=False)

        if not init_response.status_code == httplib.TEMPORARY_REDIRECT:
            raise errors.PyWebHdfsException(init_response.text)

        #Get the address provided in the location header of the
        # initial response from the namenode and make the CREATE request
        #to the datanode
        uri = init_response.headers['location']
        response = requests.put(uri, data=file_data)

        if not response.status_code == httplib.CREATED:
            raise errors.PyWebHdfsException(response.text)

        return response.header['location']
Ejemplo n.º 3
0
    def temp_create_file(self, path, file_data, **kwargs):

        optional_args = kwargs
        uri = self._create_uri(path, operations.CREATE, **optional_args)
        response = requests.put(uri, data=file_data, allow_redirects=True)

        if not response.status_code == httplib.CREATED:
            raise errors.PyWebHdfsException(response.text)

        return response.header['location']
Ejemplo n.º 4
0
def _raise_pywebhdfs_exception(resp_code, message=None):
    if resp_code == http_client.BAD_REQUEST:
        raise errors.BadRequest(msg=message)
    elif resp_code == http_client.UNAUTHORIZED:
        raise errors.Unauthorized(msg=message)
    elif resp_code == http_client.NOT_FOUND:
        raise errors.FileNotFound(msg=message)
    elif resp_code == http_client.METHOD_NOT_ALLOWED:
        raise errors.MethodNotAllowed(msg=message)
    else:
        raise errors.PyWebHdfsException(msg=message)
Ejemplo n.º 5
0
    def get_file_dir_status(self, path):
        """
        Get the file_status of a single file or directory on HDFS

        WebHDFS REST call:
        GET http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILESTATUS
        """

        uri = self._create_uri(path, operations.GETFILESTATUS)
        response = requests.get(uri)

        if not response.status_code == httplib.OK:
            raise errors.PyWebHdfsException(response.text)

        return response.json()
Ejemplo n.º 6
0
    def delete_file_dir(self, path, recursive='false'):
        """
        Delete an existing file or directory from HDFS

        WebHDFS REST call:
        DELETE <HOST>:<PORT>/webhdfs/v1/<PATH>?op=RENAME&destination=<PATH>
        """

        uri = self._create_uri(path, operations.DELETE, recursive=recursive)
        response = requests.delete(uri, allow_redirects=True)

        if not response.status_code == httplib.OK:
            raise errors.PyWebHdfsException(response.text)

        return True
Ejemplo n.º 7
0
    def list_dir(self, path):
        """
        Get a list of file_status for all files and directories
        inside an HDFS directory

        WebHDFS REST call:
        GET http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS
        """

        uri = self._create_uri(path, operations.LISTSTATUS)
        response = requests.get(uri, allow_redirects=True)

        if not response.status_code == httplib.OK:
            raise errors.PyWebHdfsException(response.text)

        return response.json()
Ejemplo n.º 8
0
    def rename_file_dir(self, path, destination_path):
        """
        Rename an existing directory or file on HDFS

        WebHDFS REST call:
        PUT <HOST>:<PORT>/webhdfs/v1/<PATH>?op=RENAME&destination=<PATH>
        """

        uri = self._create_uri(path, operations.RENAME,
                               destination=destination_path)

        response = requests.put(uri, allow_redirects=True)

        if not response.status_code == httplib.OK:
            raise errors.PyWebHdfsException(response.text)

        return True
Ejemplo n.º 9
0
    def make_dir(self, path, **kwargs):
        """
        Create a new durectory on HDFS

        WebHDFS REST call:
        PUT http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=MKDIRS
        [&permission=<OCTAL>]
        """
        optional_args = kwargs
        uri = self._create_uri(path, operations.MKDIRS, **optional_args)

        response = requests.put(uri, allow_redirects=True)

        if not response.status_code == httplib.OK:
            raise errors.PyWebHdfsException(response.text)

        return True
Ejemplo n.º 10
0
    def read_file(self, path, **kwargs):
        """
        Reads from a file on HDFS  and returns the content

        WebHDFS REST call:
        GET http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN
        [&offset=<LONG>][&length=<LONG>][&buffersize=<INT>]

        Note: this function follows automatic redirects
        """

        optional_args = kwargs
        uri = self._create_uri(path, operations.OPEN, **optional_args)

        response = requests.get(uri, allow_redirects=True)

        if not response.status_code == httplib.OK:
            raise errors.PyWebHdfsException(response.text)

        return response.text