class WhenTestingRenameOperation(unittest.TestCase):

    def setUp(self):

        self.host = 'hostname'
        self.port = '00000'
        self.user_name = 'username'
        self.webhdfs = PyWebHdfsClient(host=self.host, port=self.port,
                                       user_name=self.user_name)
        self.response = MagicMock()
        self.requests = MagicMock(return_value=self.response)
        self.path = 'user/hdfs/old_dir'
        self.new_path = '/user/hdfs/new_dir'
        self.response = MagicMock()

    def test_rename_throws_exception_for_not_ok(self):

        self.response.status_code = httplib.BAD_REQUEST
        self.requests.put.return_value = self.response
        with patch('pywebhdfs.webhdfs.requests', self.requests):
            with self.assertRaises(errors.PyWebHdfsException):
                self.webhdfs.rename_file_dir(self.path, self.new_path)

    def test_rename_returns_true(self):

        self.response.status_code = httplib.OK
        self.requests.put.return_value = self.response
        with patch('pywebhdfs.webhdfs.requests', self.requests):
            result = self.webhdfs.rename_file_dir(self.path, self.new_path)
        self.assertTrue(result)
Exemple #2
0
class WhenTestingRenameOperation(unittest.TestCase):
    def setUp(self):

        self.host = 'hostname'
        self.port = '00000'
        self.user_name = 'username'
        self.webhdfs = PyWebHdfsClient(host=self.host,
                                       port=self.port,
                                       user_name=self.user_name)
        self.response = MagicMock()
        self.requests = MagicMock(return_value=self.response)
        self.path = 'user/hdfs/old_dir'
        self.new_path = '/user/hdfs/new_dir'
        self.response = MagicMock()

    def test_rename_throws_exception_for_not_ok(self):

        self.response.status_code = http_client.BAD_REQUEST
        self.requests.put.return_value = self.response
        with patch('pywebhdfs.webhdfs.requests', self.requests):
            with self.assertRaises(errors.PyWebHdfsException):
                self.webhdfs.rename_file_dir(self.path, self.new_path)

    def test_rename_returns_true(self):

        self.response.status_code = http_client.OK
        self.requests.put.return_value = self.response
        with patch('pywebhdfs.webhdfs.requests', self.requests):
            result = self.webhdfs.rename_file_dir(self.path, self.new_path)
        self.assertTrue(result)
Exemple #3
0
class WhenTestingRenameOperation(unittest.TestCase):
    def setUp(self):
        self.host = 'hostname'
        self.port = '00000'
        self.user_name = 'username'
        self.webhdfs = PyWebHdfsClient(host=self.host, port=self.port,
                                       user_name=self.user_name)
        self.response = MagicMock()
        self.path = 'user/hdfs/old_dir'
        self.new_path = '/user/hdfs/new_dir'
        self.response = MagicMock()
        self.rename = {"boolean": True}
        self.response.json = MagicMock(return_value=self.rename)

    @patch.object(Session, 'put')
    def test_rename_throws_exception_for_not_ok(self, mock_put):
        self.response.status_code = http_client.BAD_REQUEST
        mock_put.return_value = self.response
        with self.assertRaises(errors.PyWebHdfsException):
            self.webhdfs.rename_file_dir(self.path, self.new_path)

    @patch.object(Session, 'put')
    def test_rename_returns_true(self, mock_put):
        self.response.status_code = http_client.OK
        mock_put.return_value = self.response
        result = self.webhdfs.rename_file_dir(self.path, self.new_path)
        self.assertEqual(result, {"boolean": True})
Exemple #4
0
# append to the file created in previous step
print('appending to file at: {0}\n'.format(example_file))
hdfs.append_file(example_file, example_data)

file_status = hdfs.get_file_dir_status(example_file)
print file_status

# read in the data for the file
print('reading data from file at: {0}\n'.format(example_file))
file_data = hdfs.read_file(example_file)
print file_data

# rename the example_dir
print('renaming directory from {0} to {1}\n').format(example_dir, rename_dir)
hdfs.rename_file_dir(example_dir, '/{0}'.format(rename_dir))

# list the contents of the new directory
listdir_stats = hdfs.list_dir(rename_dir)
print listdir_stats

example_file = '{dir}/example.txt'.format(dir=rename_dir)

# delete the example file
print('deleting example file at: {0}'.format(example_file))
hdfs.delete_file_dir(example_file)

# list the contents of the directory
listdir_stats = hdfs.list_dir(rename_dir)
print listdir_stats
Exemple #5
0
file_status = hdfs.get_file_dir_status(example_file)
print(file_status)

# checksum reflects file changes
file_checksum = hdfs.get_file_checksum(example_file)
print(file_checksum)

# read in the data for the file
print('reading data from file at: {0}\n'.format(example_file))
file_data = hdfs.read_file(example_file)
print(file_data)

# rename the example_dir
print('renaming directory from {0} to {1}\n').format(example_dir, rename_dir)
hdfs.rename_file_dir(example_dir, '/{0}'.format(rename_dir))

# list the contents of the new directory
listdir_stats = hdfs.list_dir(rename_dir)
print(listdir_stats)

example_file = '{dir}/example.txt'.format(dir=rename_dir)

# delete the example file
print('deleting example file at: {0}'.format(example_file))
hdfs.delete_file_dir(example_file)

# list the contents of the directory
listdir_stats = hdfs.list_dir(rename_dir)
print(listdir_stats)
Exemple #6
0
class HadoopFileSystem(BaseFs.FileSystem):
    def __init__(self,
                 vcPath,
                 simulateOnly=False,
                 isVerbose=False,
                 logger=None,
                 user=None,
                 host=None,
                 port=None):
        BaseFs.FileSystem.__init__(self, simulateOnly, isVerbose, logger)
        config = Config.Config()
        hdfsUser, hdfsHost, hdfsPort = config.getHadoopConfig(user, host, port)
        self.hdfs = PyWebHdfsClient(host=hdfsHost,
                                    port=hdfsPort,
                                    user_name=hdfsUser)
        self.vcPath = vcPath

    def make_fd(self, path, isSrc, dstDirMustExist):
        fd = None
        try:
            fd = HadoopFileDescriptor(self, path, isSrc, dstDirMustExist)
        except pywebhdfs.errors.FileNotFound:
            self.logger.info("DESC: does not exist: " + path)
            raise Errors.FileNotFound("Path {0} does not exist".format(path))
        except pywebhdfs.errors.Unauthorized as e:
            self.logger.info("Unauthorized for path {0}: {1}".format(path, e))
            raise Errors.Unauthorized(
                "Unauthorized access to the path {0}: {1}".format(path, e))
        except requests.exceptions.RequestException as e:
            self.logger.info("ConnectionError for path {0}: {1}".format(
                path, e))
            raise Errors.BadConnection(
                "Connection error while looking for path: {0}, exc={1}".format(
                    path, e))
        except pywebhdfs.errors.PyWebHdfsException as e:
            self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                path, e))
            raise Errors.FsException(
                "An exception happened while looking for path: {0}, exc={1}".
                format(path, e))
        return fd

    def exists_file_dir(self, fd):
        try:
            return self.hdfs.exists_file_dir(fd.abspath)
        except pywebhdfs.errors.Unauthorized as e:
            self.logger.info("Unauthorized for path {0}: {1}".format(
                fd.abspath, e))
            raise Errors.Unauthorized(
                "Unauthorized access to the path {0}: {1}".format(
                    fd.abspath, e))
        except requests.exceptions.RequestException as e:
            self.logger.info("ConnectionError for path {0}: {1}".format(
                fd.abspath, e))
            raise Errors.BadConnection(
                "Connection error during HDFS exists test: {0}, exc={1}".
                format(fd.abspath, e))
        except pywebhdfs.errors.PyWebHdfsException as e:
            self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                fd.abspath, e))
            raise Errors.FsException(
                "An exception happened during HDFS exists test: {0}, exc={1}".
                format(fd.abspath, e))

    def delete_file_dir(self, fd, recursive=False, force=False):
        if self.simulateOnly:
            print("SIMULATE -> remove file/dir: {0}, recursive={1}".format(
                fd.abspath, recursive))
        else:
            try:
                if not recursive or force or \
                        query_yes_no(question="Are you sure you want to delete folder recursively?", default="no"):
                    status = self.hdfs.delete_file_dir(fd.abspath,
                                                       recursive=recursive)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(
                        fd.abspath, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS delete directory: {0}, exc={1}"
                    .format(fd.abspath, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.FsException(
                    "An exception happened during HDFS delete directory: {0}, exc={1}"
                    .format(fd.abspath, e))

    def list_dir(self, fd):
        try:
            status = self.hdfs.list_dir(fd.abspath)
        except pywebhdfs.errors.Unauthorized as e:
            self.logger.info("Unauthorized for path {0}: {1}".format(
                fd.abspath, e))
            raise Errors.Unauthorized(
                "Unauthorized access to the path {0}: {1}".format(
                    fd.abspath, e))
        except requests.exceptions.RequestException as e:
            self.logger.info("ConnectionError for path {0}: {1}".format(
                fd.abspath, e))
            raise Errors.BadConnection(
                "Connection error while looking for path: {0}, exc={1}".format(
                    fd.abspath, e))
        except pywebhdfs.errors.PyWebHdfsException as e:
            self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                fd.abspath, e))
            raise Errors.FsException(
                "An exception happened while looking for path: {0}, exc={1}".
                format(fd.abspath, e))
        currentDir = status["FileStatuses"]["FileStatus"]
        for item in currentDir:
            yield HadoopFileDescriptor(self,
                                       fd.abspath,
                                       isSrc=True,
                                       needsDstDirCheck=False,
                                       fileJson=item)

    def make_dir(self, path):
        if self.simulateOnly:
            print("SIMULATE -> make dir: " + path)
        else:
            try:
                self.hdfs.make_dir(path)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    path, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(path, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    path, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS create directory: {0}, exc={1}"
                    .format(path, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    path, e))
                raise Errors.FsException(
                    "An exception happened during HDFS create directory: {0}, exc={1}"
                    .format(path, e))

    def open_file(self, fd, rwMode):
        return fd

    def close_file(self, fd):
        pass

    def touch_file(self, fd):
        if self.simulateOnly:
            print("SIMULATE -> touch file: " + fd.abspath)
        else:
            try:
                self.hdfs.create_file(fd.abspath, 0, overwrite=True)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(
                        fd.abspath, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS create file: {0}, exc={1}".
                    format(fd.abspath, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.FsException(
                    "An exception happened during HDFS create file: {0}, exc={1}"
                    .format(fd.abspath, e))

    def truncate_file(self, fd, size):
        if self.simulateOnly:
            print("SIMULATE -> truncate file: {0}, size={1}".format(
                fd.abspath, size))
        else:
            try:
                self.hdfs.truncate_file(fd.abspath, size)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(
                        fd.abspath, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS truncate file: {0}, exc={1}".
                    format(fd.abspath, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.FsException(
                    "An exception happened during HDFS truncate file: {0}, exc={1}"
                    .format(fd.abspath, e))

    def try_concat_files(self, fd, chunkFdList):
        # Workaround for unordered concat bug in Hadoop 2.7.1 is to use one source at the time
        # https://issues.apache.org/jira/browse/HDFS-8891
        currIndex = 0
        concatStep = 20
        chunkedList = [
            chunkFdList[pos:pos + concatStep]
            for pos in range(0, len(chunkFdList), concatStep)
        ]
        for sourceChunk in chunkedList:
            try:
                self.concat_files(fd, sourceChunk)
                currIndex += len(sourceChunk)
            except Errors.FsException as e:
                break

        return currIndex

    def concat_files(self, fd, chunkFdList):
        strList = list()
        for chunkFd in chunkFdList:
            strList.append(chunkFd.abspath)

        if self.simulateOnly:
            print("SIMULATE -> concat file: {0}, sources={1}".format(
                fd.abspath, ",".join(strList)))
        else:
            try:
                self.hdfs.concat_files(fd.abspath, strList)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(
                        fd.abspath, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS concat file: {0}, exc={1}".
                    format(fd.abspath, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.FsException(
                    "An exception happened during HDFS concat file: {0}, exc={1}"
                    .format(fd.abspath, e))

    def read_data(self, fd, offset, size):
        if offset >= fd.size:
            return ""
        else:
            try:
                contents = self.hdfs.read_file(fd.abspath,
                                               offset=offset,
                                               length=size)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(
                        fd.abspath, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS read file: {0}, exc={1}".
                    format(fd.abspath, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.FsException(
                    "An exception happened during HDFS read file: {0}, exc={1}"
                    .format(fd.abspath, e))
            return contents

    def append_data(self, fd, data):
        if self.simulateOnly:
            print("SIMULATE -> write file data: " + fd.abspath)
        else:
            try:
                self.hdfs.append_file(fd.abspath, data)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(
                        fd.abspath, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS append file: {0}, exc={1}".
                    format(fd.abspath, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.FsException(
                    "An exception happened during HDFS append file: {0}, exc={1}"
                    .format(fd.abspath, e))

    def local_mv_file(self, src, dst):
        if self.simulateOnly:
            print("SIMULATE -> local move file: {0} -> {1} ".format(
                src.abspath, dst.abspath))
        else:
            try:
                self.hdfs.rename_file_dir(src.abspath, dst.abspath)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    src.abspath, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(
                        src.abspath, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    src.abspath, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS rename file: {0}, exc={1}".
                    format(src.abspath, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    src.abspath, e))
                raise Errors.FsException(
                    "An exception happened during HDFS rename file: {0}, exc={1}"
                    .format(src.abspath, e))

    def local_cp_file(self, src, dst):
        # This is an open issue in Hadoop community: https://issues.apache.org/jira/browse/HDFS-3370
        # Instead, we can do a symbolic link
        if self.simulateOnly:
            print("SIMULATE -> local copy file: {0} -> {1} ".format(
                src.abspath, dst.abspath))
        else:
            print(
                "Copy within HDFS is not supported due to lack of Hadoop support"
            )
            print(
                "Once symbolic links are enabled, this feature will be enabled"
            )
            sys.exit(1)
            # self.hdfs.create_sym_link(src.abspath, dst.abspath, createParent=True)

    def get_hdfs_file_dir_json(self, path):
        try:
            status = self.hdfs.get_file_dir_status(path)
            return status["FileStatus"]
        except pywebhdfs.errors.FileNotFound:
            return None

    def validate_hdfs_arg(self, arg):
        if not arg.startswith(self.vcPath):
            print("Error: You don't have permissions to the path: %s" % arg)
            print("Your path must be rooted under: %s" % self.vcPath)
            sys.exit(1)