Example #1
0
def _get_from_s3(filepaths: List[str]) -> List[str]:
    new_paths = []
    if any([is_s3(x) for x in filepaths]):
        conn = S3Connection(CONFIG['s3']['access_key'], CONFIG['s3']['secret_key'])
        bucket = conn.get_bucket(BUCKET)
        for path in filepaths:
            if not is_s3(path):
                continue
            start = path.index(BUCKET)
            filename = path[start + len(BUCKET) + 1:]
            if not opexists(S3_FILES_DIR):
                makedirs(S3_FILES_DIR)
            local_path = opjoin(S3_FILES_DIR, basename(filename))
            if not opexists(local_path):
                s3key = bucket.get_key(filename, validate=False)
                with open(local_path, 'wb') as f:
                    s3key.get_contents_to_file(f)
                if not is_tarfile(local_path):
                    raise FilepathInvalidException('%s is not tar. Local copy %s' %
                                                   (path, local_path))
            new_paths.append(local_path)
    return new_paths
Example #2
0
    def __init__(self, paths: Iterable, is_recursive=False, is_clear_downloaded: bool = False):
        self._is_clear_downloaded = is_clear_downloaded
        self._last_tar = None
        self._is_recursive = is_recursive
        self._archives = []
        self._dirs = []
        s3paths = []

        for path in paths:
            if is_s3(path):
                s3paths.append(path)
            elif os.path.isdir(path):
                self._dirs.append(path)
            elif is_tarfile(path):
                self._archives.append(path)

        _check(paths, self._dirs + self._archives + s3paths)
        self._archives += _get_from_s3(s3paths)
        self._dirs.append(EXTRACT_PATH)