class HdfsFileManager(FileManagerBase): """A wrapper of snakebite client.""" def can_handle(self, path): return path.startswith('hdfs://') def __init__(self): self._client = AutoConfigClient() def ls(self, path: str, recursive=False) -> List[File]: files = [] for file in self._client.ls([path], recurse=recursive): if file['file_type'] == 'f': files.append(File( path=file['path'], size=file['length'])) return files def move(self, source: str, destination: str) -> bool: return len(list(self._client.rename([source], destination))) > 0 def remove(self, path: str) -> bool: return len(list(self._client.delete([path]))) > 0 def copy(self, source: str, destination: str) -> bool: # TODO raise NotImplementedError() def mkdir(self, path: str) -> bool: return next(self._client.mkdir([path], create_parent=True))\ .get('result')
def rm(hdfs_path, recurse=False, force=False): """ hdfs_path (str or list of strings) : hdfs files to delete recurse (boolean) : recursively delete the folder force (boolean) : force deletion (non-interactive) Returns: String mkdir result as json """ client = AutoConfigClient() return list(client.delete([hdfs_path], recurse))
class HdfsFileManager(FileManagerBase): """A wrapper of snakebite client.""" def can_handle(self, path): return path.startswith('hdfs://') def __init__(self): self._client = AutoConfigClient() def ls(self, path: str, recursive=False) -> List[str]: files = [] for file in self._client.ls([path], recurse=recursive): if file['file_type'] == 'f': files.append(file['path']) return files def move(self, source: str, destination: str) -> bool: return len(list(self._client.rename([source], destination))) > 0 def remove(self, path: str) -> bool: return len(list(self._client.delete([path]))) > 0
# don't nuke this; hbase uses it for bulk loading. re.compile("^/tmp/hbase-staging/?"), # let's try to make sure we're not matching against a top-level path re.compile("^/[-_.a-zA-Z0-9]+/?$"), re.compile("cloudera_health_monitoring_canary_files"), # let's bail out explicitly on anything in our data path re.compile("^/data/production/?"), ] if client.test(args.path, exists=True): for x in client.ls([args.path], recurse=args.recurse_filesystem): if any(regex.search(x['path']) for regex in donotdelete_whitelist): logger.info("Matched banned thing, not attempting to delete it: %s", x['path']) else: f_timestamp = datetime.datetime.fromtimestamp(x['modification_time']/1000) if f_timestamp < older_than: logger.info("I might delete this: %s %s", x['path'], f_timestamp) if args.actually_delete: logger.info("Issuing delete of %s", list(client.delete([x['path']], recurse=True))) if client.test(x['path'], exists=True): logger.info("Removed %s", x['path']) else: logger.info( "I would have deleted this: %s ", x['path']) else: logger.warn("%s is not found on hdfs", args.path)