def _s3_open_file_with_retries(fs: s3fs.S3FileSystem, path: str, retries: int) -> Any: for _ in range(retries): try: logger.info(f"opening {path}") file = fs.open(path) return file except Exception as ex: logger.warning(f"could not open {path}: {ex}") # if the file has just been uploaded, then it might not be visible immediatly # but the fail to open has been cached by s3fs # so, we invalidate the cache fs.invalidate_cache(path) # and we give some time to S3 to settle the file status sleep(1)
def open_file(fs: s3fs.S3FileSystem, **kwargs) -> Any: """Open s3fs file with retries to overcome eventual consistency.""" fs.invalidate_cache() fs.clear_instance_cache() return try_it(f=fs.open, ex=FileNotFoundError, **kwargs)