예제 #1
0
class HdfsFileManager(FileManagerBase):
    """A wrapper of snakebite client."""

    def can_handle(self, path):
        return path.startswith('hdfs://')

    def __init__(self):
        self._client = AutoConfigClient()

    def ls(self, path: str, recursive=False) -> List[File]:
        files = []
        for file in self._client.ls([path], recurse=recursive):
            if file['file_type'] == 'f':
                files.append(File(
                    path=file['path'],
                    size=file['length']))
        return files

    def move(self, source: str, destination: str) -> bool:
        return len(list(self._client.rename([source], destination))) > 0

    def remove(self, path: str) -> bool:
        return len(list(self._client.delete([path]))) > 0

    def copy(self, source: str, destination: str) -> bool:
        # TODO
        raise NotImplementedError()

    def mkdir(self, path: str) -> bool:
        return next(self._client.mkdir([path], create_parent=True))\
            .get('result')
def rm(hdfs_path, recurse=False, force=False):
    """
    hdfs_path (str or list of strings) : hdfs files to delete
    recurse (boolean) : recursively delete the folder
    force (boolean) : force deletion (non-interactive) 
    Returns:
    String mkdir result as json
    """
    client = AutoConfigClient()

    return list(client.delete([hdfs_path], recurse))
예제 #3
0
class HdfsFileManager(FileManagerBase):
    """A wrapper of snakebite client."""
    def can_handle(self, path):
        return path.startswith('hdfs://')

    def __init__(self):
        self._client = AutoConfigClient()

    def ls(self, path: str, recursive=False) -> List[str]:
        files = []
        for file in self._client.ls([path], recurse=recursive):
            if file['file_type'] == 'f':
                files.append(file['path'])
        return files

    def move(self, source: str, destination: str) -> bool:
        return len(list(self._client.rename([source], destination))) > 0

    def remove(self, path: str) -> bool:
        return len(list(self._client.delete([path]))) > 0
예제 #4
0
    # don't nuke this; hbase uses it for bulk loading.
    re.compile("^/tmp/hbase-staging/?"),

    # let's try to make sure we're not matching against a top-level path
    re.compile("^/[-_.a-zA-Z0-9]+/?$"),

    re.compile("cloudera_health_monitoring_canary_files"),

    # let's bail out explicitly on anything in our data path
    re.compile("^/data/production/?"),
]


if client.test(args.path, exists=True):
    for x in client.ls([args.path], recurse=args.recurse_filesystem):
        if any(regex.search(x['path']) for regex in donotdelete_whitelist):
            logger.info("Matched banned thing, not attempting to delete it: %s", x['path'])
        else:
            f_timestamp = datetime.datetime.fromtimestamp(x['modification_time']/1000)
            if  f_timestamp < older_than:
                logger.info("I might delete this: %s %s", x['path'], f_timestamp)
                if args.actually_delete:
                    logger.info("Issuing delete of %s", list(client.delete([x['path']], recurse=True)))
                    if client.test(x['path'], exists=True):
                        logger.info("Removed %s", x['path'])
                else:
                    logger.info( "I would have deleted this: %s ", x['path'])
else:
    logger.warn("%s is not found on hdfs", args.path)