Example #1
0
    def _dir(self, path):
        iterator = self.__dir_paginator(path)

        for response_data in iterator:
            for prefix in response_data.get('CommonPrefixes', []):
                yield FileSystemFileDesc(
                    fs=self,
                    path=prefix['Prefix'][:-1],
                    name=prefix['Prefix'][:-1].split(
                        self.path_separator)[-1],  # Remove trailing slash
                    type='directory',
                )
            for prefix in response_data.get('Contents', []):
                if self.skip_hadoop_artifacts and prefix['Key'].endswith(
                        '_$folder$'):
                    continue
                yield FileSystemFileDesc(fs=self,
                                         path=prefix['Key'],
                                         name=prefix['Key'].split(
                                             self.path_separator)[-1],
                                         type='file',
                                         bytes=prefix['Size'],
                                         owner=prefix['Owner']['DisplayName']
                                         if 'Owner' in prefix else None,
                                         last_modified=prefix['LastModified'])
Example #2
0
 def _dir(self, path):
     for attrs in self.__client_sftp.listdir_attr(path):
         yield FileSystemFileDesc(
             fs=self,
             path=posixpath.join(path, attrs.filename),
             name=attrs.filename,
             type='directory' if stat.S_ISDIR(attrs.st_mode) else 'file',  # TODO: What about links, which are of form: lrwxrwxrwx?
             bytes=attrs.st_size,
             owner=attrs.st_uid,
             group=attrs.st_gid,
             last_modified=attrs.st_mtime,
         )
Example #3
0
    def _dir(self, path):
        # TODO: Currently we strip link annotations below with ...[:9]. Should we capture them?
        dir = pd.DataFrame(sorted([
            re.split(r'\s+', f)[:9] for f in self.execute('ls -Al {}'.format(
                path)).stdout.decode().strip().split('\n')[1:]
        ]),
                           columns=[
                               'file_mode', 'link_count', 'owner', 'group',
                               'bytes', 'month', 'day', 'time', 'path'
                           ])

        def convert_to_datetime(x):
            months = [
                'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
                'Oct', 'Nov', 'Dec'
            ]
            year = datetime.datetime.now().year if ':' in x.time else x.time
            time = x.time if ':' in x.time else None
            return datetime.datetime(
                year=int(year),
                month=months.index(x.month) + 1,
                day=int(x.day),
                hour=int(time.split(':')[0]) if time is not None else 0,
                minute=int(time.split(':')[1]) if time is not None else 0)

        if len(dir) == 0:  # Directory is empty
            return

        dir = dir.assign(
            last_modified=lambda x: x.apply(convert_to_datetime, axis=1),
            type=lambda x: x.apply(lambda x: 'directory'
                                   if x.file_mode.startswith('d') else 'file',
                                   axis=1)).drop(['month', 'day', 'time'],
                                                 axis=1).sort_values([
                                                     'type', 'path'
                                                 ]).reset_index(drop=True)

        for i, row in dir.iterrows():
            yield FileSystemFileDesc(
                fs=self,
                path=posixpath.join(path, row.path),
                name=row.path,
                type='directory' if row.file_mode.startswith('d') else
                'file',  # TODO: What about links, which are of form: lrwxrwxrwx?
                bytes=row.bytes,
                owner=row.owner,
                group=row.group,
                last_modified=row.last_modified,
            )