def _get_status(self, path): logger.debug("_get_dir_status %s", path) if path in self._stats_cache: ts_delta = datetime.now() - self._stats_cache[path][0] if ts_delta.total_seconds() < CACHE_MAX_SECONDS: sd = self._stats_cache[path][1] logger.debug("_get_status: path %s --> cached status %s", path, sd) return sd # logger.info("get_file_dir_status: %s", path) s = self.client.get_file_dir_status(path)["FileStatus"] sd = webhdfs.webhdfs_entry_to_dict(s) logger.debug("_get_status: path %s --> new status %s", path, sd) self._stats_cache[path] = (datetime.now(), sd) return sd
def _get_listdir(self, path): logger.info("List dir %s", path) if path in self._listdir_cache: ts_delta = datetime.now() - self._listdir_cache[path][0] if ts_delta.total_seconds() < CACHE_MAX_SECONDS: entries = self._listdir_cache[path][1] logger.debug("_get_listdir %s: cached value %s", path, entries) return entries entries = [] # logger.info("Listdir: %s", path) for s in self.client.list_dir(path)["FileStatuses"]["FileStatus"]: sd = webhdfs.webhdfs_entry_to_dict(s) # logger.debug("webhdfs_entry_to_dict %s: %s --> %s", sd['name'], s, sd) logger.debug("Updating self._stats_cache[%s]", os.path.join(path, sd['name'])) self._stats_cache[path + '/' + sd['name']] = (datetime.now(), sd) entries.append(sd['name']) self._listdir_cache[path] = (datetime.now(), entries) logger.debug("_get_listdir %s: new value %s", path, entries) return entries