def _dir(self, path): iterator = self.__dir_paginator(path) for response_data in iterator: for prefix in response_data.get('CommonPrefixes', []): yield FileSystemFileDesc( fs=self, path=prefix['Prefix'][:-1], name=prefix['Prefix'][:-1].split( self.path_separator)[-1], # Remove trailing slash type='directory', ) for prefix in response_data.get('Contents', []): if self.skip_hadoop_artifacts and prefix['Key'].endswith( '_$folder$'): continue yield FileSystemFileDesc(fs=self, path=prefix['Key'], name=prefix['Key'].split( self.path_separator)[-1], type='file', bytes=prefix['Size'], owner=prefix['Owner']['DisplayName'] if 'Owner' in prefix else None, last_modified=prefix['LastModified'])
def _dir(self, path): for attrs in self.__client_sftp.listdir_attr(path): yield FileSystemFileDesc( fs=self, path=posixpath.join(path, attrs.filename), name=attrs.filename, type='directory' if stat.S_ISDIR(attrs.st_mode) else 'file', # TODO: What about links, which are of form: lrwxrwxrwx? bytes=attrs.st_size, owner=attrs.st_uid, group=attrs.st_gid, last_modified=attrs.st_mtime, )
def _dir(self, path): # TODO: Currently we strip link annotations below with ...[:9]. Should we capture them? dir = pd.DataFrame(sorted([ re.split(r'\s+', f)[:9] for f in self.execute('ls -Al {}'.format( path)).stdout.decode().strip().split('\n')[1:] ]), columns=[ 'file_mode', 'link_count', 'owner', 'group', 'bytes', 'month', 'day', 'time', 'path' ]) def convert_to_datetime(x): months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] year = datetime.datetime.now().year if ':' in x.time else x.time time = x.time if ':' in x.time else None return datetime.datetime( year=int(year), month=months.index(x.month) + 1, day=int(x.day), hour=int(time.split(':')[0]) if time is not None else 0, minute=int(time.split(':')[1]) if time is not None else 0) if len(dir) == 0: # Directory is empty return dir = dir.assign( last_modified=lambda x: x.apply(convert_to_datetime, axis=1), type=lambda x: x.apply(lambda x: 'directory' if x.file_mode.startswith('d') else 'file', axis=1)).drop(['month', 'day', 'time'], axis=1).sort_values([ 'type', 'path' ]).reset_index(drop=True) for i, row in dir.iterrows(): yield FileSystemFileDesc( fs=self, path=posixpath.join(path, row.path), name=row.path, type='directory' if row.file_mode.startswith('d') else 'file', # TODO: What about links, which are of form: lrwxrwxrwx? bytes=row.bytes, owner=row.owner, group=row.group, last_modified=row.last_modified, )