def test_s3open(s3_tempfile): # Make sure attempt to read a file that doesn't exist gives a FileNotFound error got_exception = True try: f = s3.s3open("bad path") got_exception = False except ValueError as e: pass if got_exception == False: raise RuntimeError("should have gotten exception for bad path") # Make sure s3open works in a variety of approaches # Reading s3open as an iterator val1 = "" for line in s3.s3open(s3_tempfile, "r"): val1 += line # Reading s3open with .read(): f = s3.s3open(s3_tempfile, "r") val2 = f.read() # Reading s3open with a context manager with s3.s3open(s3_tempfile, "r") as f: val3 = f.read() assert val1 == val2 == val3
def test_s3open_iter(): if "EC2_HOME" not in os.environ: warnings.warn("s3open only runs on AWS EC2 computers") return if TEST_S3ROOT is None: warnings.warn("no TEST_S3ROOT is defined.") return path = os.path.join(TEST_S3ROOT, f"tmp/tmp.{os.getpid()}") with s3.s3open(path, "w", fsync=True) as f: for i in range(10): f.write(TEST_STRING[:-1] + str(i) + "\n") with s3.s3open(path, "r") as f: fl = [l for l in f] for i, l in enumerate(fl): assert l == TEST_STRING[:-1] + str(i) + "\n" f = s3.s3open(path, "r") fl = [l for l in f] for i, l in enumerate(fl): assert l == TEST_STRING[:-1] + str(i) + "\n" s3.s3rm(path)
def test_s3open_write_fsync(s3_tempfile): """See if we s3open with the fsync option works""" with s3.s3open(s3_tempfile, "w", fsync=True) as f: f.write(TEST_STRING) with s3.s3open(s3_tempfile, "r") as f: buf = f.read() assert buf == TEST_STRING
def test_s3open_iter(s3_tempfile): with s3.s3open(s3_tempfile, "w", fsync=True) as f: for i in range(10): f.write(TEST_STRING[:-1] + str(i) + "\n") with s3.s3open(s3_tempfile, "r") as f: fl = [l for l in f] for i, l in enumerate(fl): assert l == TEST_STRING[:-1] + str(i) + "\n" f = s3.s3open(s3_tempfile, "r") fl = [l for l in f] for i, l in enumerate(fl): assert l == TEST_STRING[:-1] + str(i) + "\n"
def dopen(path, mode='r', encoding='utf-8'): """open data relatively to ROOT. Allows opening UFS files or S3 files.""" logging.info("dopen: path:{} mode:{} encoding:{}".format( path, mode, encoding)) path = dpath_expand(path) if path[0:5] == 's3://': return s3open(path, mode=mode, encoding=encoding) if 'b' in mode: encoding = None # Check for full path name logging.info("=>open(path={},mode={},encoding={})".format( path, mode, encoding)) # If opening mode==r, and the file does not exist, see if it is present in a ZIP file if "r" in mode and (not os.path.exists(path)): # path does not exist; see if there is a single zip file in the directory # If there is, see if the zipfile has the requested file in it (dirname, filename) = os.path.split(path) zipnames = glob.glob(os.path.join(dirname, "*.zip")) if len(zipnames) == 1: zip_file = zipfile.ZipFile(zipnames[0]) zf = zip_file.open(filename, 'r') logging.info(" ({} found in {})".format(filename, zipnames[0])) if encoding == None and ("b" not in mode): encoding = 'utf-8' return io.TextIOWrapper(zf, encoding=encoding) if encoding == None: return open(path, mode=mode) else: return open(path, mode=mode, encoding=encoding)
def test_s3zipfile_support(s3_tempfile): """See if we can read hello.txt from a zipfile we upload. This is a pretty good test.""" with s3.s3open(s3_tempfile, 'wb', fsync=True) as f: f.write(open(TEST_ZIPFILE, 'rb').read()) zf = zipfile.ZipFile(s3.S3File(s3_tempfile)) hf = zf.open('hello.txt') hello = hf.read() assert hello == b'Hello World!\n'
def test_s3open(): if "EC2_HOME" not in os.environ: warnings.warn("test_s3open only runs on AWS EC2 computers") return if TEST_S3ROOT is None: warnings.warn("no TEST_S3ROOT is defined.") return # Make sure attempt to read a file that doesn't exist gives a FileNotFound error got_exception = True try: f = s3.s3open("bad path") got_exception = False except ValueError as e: pass if got_exception == False: raise RuntimeError("should have gotten exception for bad path") path = os.path.join(TEST_S3ROOT, TEST_S3_FILE) print("path:", path) # Make sure s3open works in a variety of approaches # Reading s3open as an iterator val1 = "" for line in s3.s3open(path, "r"): val1 += line # Reading s3open with .read(): f = s3.s3open(path, "r") val2 = f.read() # Reading s3open with a context manager with s3.s3open(path, "r") as f: val3 = f.read() assert val1 == val2 == val3
def test_s3open_write_fsync(): """See if we s3open with the fsync option works""" if "EC2_HOME" not in os.environ: warnings.warn("s3open only runs on AWS EC2 computers") return if TEST_S3ROOT is None: warnings.warn("no TEST_S3ROOT is defined.") return path = os.path.join(TEST_S3ROOT, f"tmp/tmp.{os.getpid()}") with s3.s3open(path, "w", fsync=True) as f: f.write(TEST_STRING) with s3.s3open(path, "r") as f: buf = f.read() print("Wanted: ", TEST_STRING) print("Got:: ", buf) assert buf == TEST_STRING try: s3.s3rm(path) except RuntimeError as e: print("path:", file=sys.stderr) raise e