def _get_from_s3(filepaths: List[str]) -> List[str]: new_paths = [] if any([is_s3(x) for x in filepaths]): conn = S3Connection(CONFIG['s3']['access_key'], CONFIG['s3']['secret_key']) bucket = conn.get_bucket(BUCKET) for path in filepaths: if not is_s3(path): continue start = path.index(BUCKET) filename = path[start + len(BUCKET) + 1:] if not opexists(S3_FILES_DIR): makedirs(S3_FILES_DIR) local_path = opjoin(S3_FILES_DIR, basename(filename)) if not opexists(local_path): s3key = bucket.get_key(filename, validate=False) with open(local_path, 'wb') as f: s3key.get_contents_to_file(f) if not is_tarfile(local_path): raise FilepathInvalidException('%s is not tar. Local copy %s' % (path, local_path)) new_paths.append(local_path) return new_paths
def __init__(self, paths: Iterable, is_recursive=False, is_clear_downloaded: bool = False): self._is_clear_downloaded = is_clear_downloaded self._last_tar = None self._is_recursive = is_recursive self._archives = [] self._dirs = [] s3paths = [] for path in paths: if is_s3(path): s3paths.append(path) elif os.path.isdir(path): self._dirs.append(path) elif is_tarfile(path): self._archives.append(path) _check(paths, self._dirs + self._archives + s3paths) self._archives += _get_from_s3(s3paths) self._dirs.append(EXTRACT_PATH)