def test_fsspec_versions_multiple(oss): """Test that the standard fsspec.core.get_fs_token_paths behaves as expected for versionId urls. """ oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) versioned_file = versioned_bucket_name + "/versioned_file3-" + str(uuid.uuid4()) version_lookup = {} for i in range(20): contents = str(i).encode() with oss.open(versioned_file, "wb") as fo: fo.write(contents) version_lookup[fo.version_id] = contents urls = [ "oss://{}?versionId={}".format(versioned_file, version) for version in version_lookup.keys() ] fs, token, paths = fsspec.core.get_fs_token_paths( urls, storage_options=dict(key=key, secret=secret, endpoint=endpoint) ) assert isinstance(fs, OSSFileSystem) assert fs.version_aware for path in paths: with fs.open(path, "rb") as fo: contents = fo.read() assert contents == version_lookup[fo.version_id]
def test_anonymous_access(oss): oss = OSSFileSystem(anon=True) assert oss.ls("") == [] # TODO: public bucket doesn't work through moto with pytest.raises(PermissionError): oss.mkdir("newbucket")
def test_public_file(oss): # works on real oss, not on moto test_bucket_name = "ossfs_public_test" other_bucket_name = "ossfs_private_test" oss.touch(test_bucket_name) oss.touch(test_bucket_name + "/afile") oss.touch(other_bucket_name, acl="public-read") oss.touch(other_bucket_name + "/afile", acl="public-read") s = OSSFileSystem(anon=True) with pytest.raises(PermissionError): s.ls(test_bucket_name) s.ls(other_bucket_name) oss.chmod(test_bucket_name, acl="public-read") oss.chmod(other_bucket_name, acl="private") with pytest.raises(PermissionError): s.ls(other_bucket_name, refresh=True) assert s.ls(test_bucket_name, refresh=True) # public file in private bucket with oss.open(other_bucket_name + "/see_me", "wb", acl="public-read") as f: f.write(b"hello") assert s.cat(other_bucket_name + "/see_me") == b"hello"
def test_change_defaults_only_subsequent(oss): """Test for Issue #135 Ensure that changing the default block size doesn't affect existing file systems that were created using that default. It should only affect file systems created after the change. """ try: OSSFileSystem.cachable = False # don't reuse instances with same pars fs_default = OSSFileSystem(key=key, secret=secret, endpoint=endpoint) assert fs_default.default_block_size == 5 * (1024**2) fs_overridden = OSSFileSystem( default_block_size=64 * (1024**2), key=key, secret=secret, endpoint=endpoint, ) assert fs_overridden.default_block_size == 64 * (1024**2) # Suppose I want all subsequent file systems to have a block size of 1 GiB # instead of 5 MiB: OSSFileSystem.default_block_size = 1024**3 fs_big = OSSFileSystem(key=key, secret=secret, endpoint=endpoint) assert fs_big.default_block_size == 1024**3 # Test the other file systems created to see if their block sizes changed assert fs_overridden.default_block_size == 64 * (1024**2) assert fs_default.default_block_size == 5 * (1024**2) finally: OSSFileSystem.default_block_size = 5 * (1024**2) OSSFileSystem.cachable = True
def test_strip_protocol(): """ Test protocols """ address = "http://oss-cn-hangzhou.aliyuncs.com/mybucket/myobject" assert OSSFileSystem._strip_protocol(address) == "/mybucket/myobject" address = "oss://mybucket/myobject" assert OSSFileSystem._strip_protocol(address) == "/mybucket/myobject"
def test_list_versions_many(oss): oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) versioned_file = versioned_bucket_name + "/versioned_file-" + str(uuid.uuid4()) for i in range(1200): with oss.open(versioned_file, "wb") as fo: fo.write(b"1") versions = oss.object_version_info(versioned_file) assert len(versions) == 1200
def test_config_kwargs_class_attributes_override(): oss = OSSFileSystem( key=key, secret=secret, endpoint=endpoint, connect_timeout=120, ) bucket = oss._make_bucket(test_bucket_name) assert bucket.timeout == 120
def test_sts_login(endpoint, test_bucket_name): key, secret, token = fetch_sts_token(STSAccessKeyId, STSAccessKeySecret, STSArn) ossfs = OSSFileSystem( key=key, secret=secret, token=token, endpoint=endpoint, ) ossfs.ls(test_bucket_name)
def test_default_cache_type(init_config, default_cache_type, test_path): data = b"a" * (10 * 2**20) file = test_path + "/test_default_cache_type/file" init_config["default_cache_type"] = default_cache_type ossfs = OSSFileSystem(**init_config) with ossfs.open(file, "wb") as f: f.write(data) with ossfs.open(file, "rb") as f: assert isinstance(f.cache, fsspec.core.caches[default_cache_type]) out = f.read(len(data)) assert len(data) == len(out) assert out == data
def test_default_pars(oss): oss = OSSFileSystem( key=key, secret=secret, endpoint=endpoint, default_block_size=20, default_fill_cache=False, ) fn = test_bucket_name + "/" + list(files)[0] with oss.open(fn) as f: assert f.blocksize == 20 assert f.fill_cache is False with oss.open(fn, block_size=40, fill_cache=True) as f: assert f.blocksize == 40 assert f.fill_cache is True
def test_set_endpoint(endpoint, test_bucket_name, monkeypatch): key, secret, token = fetch_sts_token(STSAccessKeyId, STSAccessKeySecret, STSArn) monkeypatch.delenv("OSS_ENDPOINT") ossfs = OSSFileSystem(key=key, secret=secret, token=token, endpoint=None) with pytest.raises(ValueError): ossfs.ls(test_bucket_name) ossfs.set_endpoint(endpoint) ossfs.ls(test_bucket_name)
def test_autocommit(oss): auto_file = test_bucket_name + "/auto_file" committed_file = test_bucket_name + "/commit_file" aborted_file = test_bucket_name + "/aborted_file" oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) def write_and_flush(path, autocommit): with oss.open(path, "wb", autocommit=autocommit) as fo: fo.write(b"1") return fo # regular behavior fo = write_and_flush(auto_file, autocommit=True) assert fo.autocommit assert oss.exists(auto_file) fo = write_and_flush(committed_file, autocommit=False) assert not fo.autocommit assert not oss.exists(committed_file) fo.commit() assert oss.exists(committed_file) fo = write_and_flush(aborted_file, autocommit=False) assert not oss.exists(aborted_file) fo.discard() assert not oss.exists(aborted_file) # Cannot commit a file that was discarded with pytest.raises(Exception): fo.commit()
def test_default_cache_type(oss, default_cache_type): data = b"a" * (10 * 2**20) oss = OSSFileSystem( anon=False, key=key, secret=secret, endpoint=endpoint, default_cache_type=default_cache_type, ) with oss.open(a, "wb") as f: f.write(data) with oss.open(a, "rb") as f: assert isinstance(f.cache, fsspec.core.caches[default_cache_type]) out = f.read(len(data)) assert len(data) == len(out) assert out == data
def test_requester_pays(oss): fn = test_bucket_name + "/myfile" oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, requester_pays=True) assert oss.additional_header[oss2.headers.OSS_REQUEST_PAYER] == "requester" oss.mkdir(test_bucket_name) oss.touch(fn) with oss.open(fn, "rb") as f: assert f.oss.additional_header[oss2.headers.OSS_REQUEST_PAYER] == "requester"
def test_version_sizes(oss): # protect against caching of incorrect version details oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) import gzip path = f"oss://{versioned_bucket_name}/test.txt.gz" versions = [ oss.pipe_file(path, gzip.compress(text)) for text in ( b"good morning!", b"hello!", b"hi!", b"hello!", ) ] for version in versions: version_id = version.versionid with oss.open(path, version_id=version_id) as f: with gzip.open(f) as zfp: zfp.read()
def test_versions(oss): versioned_file = versioned_bucket_name + "/versioned_file_" + str(uuid.uuid4()) oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) with oss.open(versioned_file, "wb") as fo: fo.write(b"1") with oss.open(versioned_file, "wb") as fo: fo.write(b"2") assert oss.isfile(versioned_file) versions = oss.object_version_info(versioned_file) version_ids = [version.versionid for version in reversed(versions)] assert len(version_ids) == 2 with oss.open(versioned_file) as fo: assert fo.version_id == version_ids[1] assert fo.read() == b"2" with oss.open(versioned_file, version_id=version_ids[0]) as fo: assert fo.version_id == version_ids[0] assert fo.read() == b"1"
def test_versions_unaware(oss): versioned_file = versioned_bucket_name + "/versioned_file3" oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=False) with oss.open(versioned_file, "wb") as fo: fo.write(b"1") with oss.open(versioned_file, "wb") as fo: fo.write(b"2") with oss.open(versioned_file) as fo: assert fo.version_id is None assert fo.read() == b"2" with pytest.raises(ValueError): with oss.open(versioned_file, version_id="0"): fo.read()
def test_exists_versioned(oss, version_aware): """Test to ensure that a prefix exists when using a versioned bucket""" n = 2 oss = OSSFileSystem( key=key, secret=secret, endpoint=endpoint, version_aware=version_aware, ) segments = [versioned_bucket_name] + [str(uuid.uuid4()) for _ in range(n)] path = "/".join(segments) for i in range(2, n + 1): assert not oss.exists("/".join(segments[:i])) oss.touch(path) for i in range(2, n + 1): assert oss.exists("/".join(segments[:i]))
def test_versioned_file_fullpath(oss): versioned_file = (versioned_bucket_name + "/versioned_file_fullpath-" + str(uuid.uuid4())) oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) with oss.open(versioned_file, "wb") as fo: fo.write(b"1") # moto doesn't correctly return a versionId for a multipart upload. So we resort to this. # version_id = fo.version_id versions = oss.object_version_info(versioned_file) version_ids = [version.versionid for version in reversed(versions)] version_id = version_ids[0] with oss.open(versioned_file, "wb") as fo: fo.write(b"2") file_with_version = "{}?versionId={}".format(versioned_file, version_id) with oss.open(file_with_version, "rb") as fo: assert fo.version_id == version_id assert fo.read() == b"1"
def test_current(ossfs, init_config): ossfs._cache.clear() # pylint: disable=protected-access ossfs = OSSFileSystem(**init_config) assert ossfs.current() is ossfs assert OSSFileSystem.current() is ossfs
def test_multiple_objects(oss): other_oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint) assert oss.ls(f"{test_bucket_name}/test") == other_oss.ls(f"{test_bucket_name}/test")
def test_additional_params(): oss = OSSFileSystem(anon=True, additional_params={"foo": "bar"}) assert oss.additional_params.get("foo") == "bar"
def test_config_kwargs_class_attributes_default(): oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint) bucket = oss._make_bucket(test_bucket_name) assert bucket.timeout == 60
def test_additional_header(): oss = OSSFileSystem(anon=True, additional_header={"foo": "bar"}) assert oss.additional_header.get("foo") == "bar"
def task(num): # pylint: disable=unused-argument ossfs = OSSFileSystem(**init_config) ossfs.ls(test_bucket_name) time.sleep(5) ossfs.ls(test_bucket_name) return True
def test_env_endpoint(endpoint, test_bucket_name, monkeypatch): key, secret, token = fetch_sts_token(STSAccessKeyId, STSAccessKeySecret, STSArn) monkeypatch.setenv("OSS_ENDPOINT", endpoint) ossfs = OSSFileSystem(key=key, secret=secret, token=token, endpoint=None) ossfs.ls(test_bucket_name)
def task(i): OSSFileSystem(key=key, secret=secret, endpoint=endpoint).ls("") return True
def test_anonymous_login(file_in_anonymous, endpoint): ossfs = OSSFileSystem(endpoint=endpoint) ossfs.cat(f"{file_in_anonymous}")
def oss(): auth = oss2.Auth(key, secret) test_bucket = oss2.Bucket(auth, endpoint, test_bucket_name) test_bucket.create_bucket(oss2.BUCKET_ACL_PUBLIC_READ) versioned_bucket = oss2.Bucket(auth, endpoint, versioned_bucket_name) versioned_bucket.create_bucket(oss2.BUCKET_ACL_PUBLIC_READ) config = oss2.models.BucketVersioningConfig() config.status = oss2.BUCKET_VERSIONING_ENABLE versioned_bucket.put_bucket_versioning(config) # # # secure_bucket = oss2.Bucket(auth, endpoint, secure_bucket_name) # # secure_bucket.create_bucket(oss2.BUCKET_ACL_PUBLIC_READ) # # try: # # bucket.create_bucket() # # except oss2.exceptions.ServerError: # bucket exists. # # pass for flist in [files, csv_files, text_files, glob_files]: for f, data in flist.items(): test_bucket.put_object(f, data) OSSFileSystem.clear_instance_cache() oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint) oss.invalidate_cache() yield oss oss._clear_multipart_uploads(test_bucket_name) oss.rm(test_bucket_name, recursive=True) oss.rm(versioned_bucket_name, recursive=True) try: oss.rm(tmp_bucket_name, recursive=True) except: pass
def ossfs(init_config): return OSSFileSystem(**init_config)