def test_default_cache_type(init_config, default_cache_type, test_path): data = b"a" * (10 * 2**20) file = test_path + "/test_default_cache_type/file" init_config["default_cache_type"] = default_cache_type ossfs = OSSFileSystem(**init_config) with ossfs.open(file, "wb") as f: f.write(data) with ossfs.open(file, "rb") as f: assert isinstance(f.cache, fsspec.core.caches[default_cache_type]) out = f.read(len(data)) assert len(data) == len(out) assert out == data
def test_versions_unaware(oss): versioned_file = versioned_bucket_name + "/versioned_file3" oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=False) with oss.open(versioned_file, "wb") as fo: fo.write(b"1") with oss.open(versioned_file, "wb") as fo: fo.write(b"2") with oss.open(versioned_file) as fo: assert fo.version_id is None assert fo.read() == b"2" with pytest.raises(ValueError): with oss.open(versioned_file, version_id="0"): fo.read()
def test_default_pars(oss): oss = OSSFileSystem( key=key, secret=secret, endpoint=endpoint, default_block_size=20, default_fill_cache=False, ) fn = test_bucket_name + "/" + list(files)[0] with oss.open(fn) as f: assert f.blocksize == 20 assert f.fill_cache is False with oss.open(fn, block_size=40, fill_cache=True) as f: assert f.blocksize == 40 assert f.fill_cache is True
def test_fsspec_versions_multiple(oss): """Test that the standard fsspec.core.get_fs_token_paths behaves as expected for versionId urls. """ oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) versioned_file = versioned_bucket_name + "/versioned_file3-" + str(uuid.uuid4()) version_lookup = {} for i in range(20): contents = str(i).encode() with oss.open(versioned_file, "wb") as fo: fo.write(contents) version_lookup[fo.version_id] = contents urls = [ "oss://{}?versionId={}".format(versioned_file, version) for version in version_lookup.keys() ] fs, token, paths = fsspec.core.get_fs_token_paths( urls, storage_options=dict(key=key, secret=secret, endpoint=endpoint) ) assert isinstance(fs, OSSFileSystem) assert fs.version_aware for path in paths: with fs.open(path, "rb") as fo: contents = fo.read() assert contents == version_lookup[fo.version_id]
def test_requester_pays(oss): fn = test_bucket_name + "/myfile" oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, requester_pays=True) assert oss.additional_header[oss2.headers.OSS_REQUEST_PAYER] == "requester" oss.mkdir(test_bucket_name) oss.touch(fn) with oss.open(fn, "rb") as f: assert f.oss.additional_header[oss2.headers.OSS_REQUEST_PAYER] == "requester"
def test_list_versions_many(oss): oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) versioned_file = versioned_bucket_name + "/versioned_file-" + str(uuid.uuid4()) for i in range(1200): with oss.open(versioned_file, "wb") as fo: fo.write(b"1") versions = oss.object_version_info(versioned_file) assert len(versions) == 1200
def test_default_cache_type(oss, default_cache_type): data = b"a" * (10 * 2**20) oss = OSSFileSystem( anon=False, key=key, secret=secret, endpoint=endpoint, default_cache_type=default_cache_type, ) with oss.open(a, "wb") as f: f.write(data) with oss.open(a, "rb") as f: assert isinstance(f.cache, fsspec.core.caches[default_cache_type]) out = f.read(len(data)) assert len(data) == len(out) assert out == data
def test_versioned_file_fullpath(oss): versioned_file = (versioned_bucket_name + "/versioned_file_fullpath-" + str(uuid.uuid4())) oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) with oss.open(versioned_file, "wb") as fo: fo.write(b"1") # moto doesn't correctly return a versionId for a multipart upload. So we resort to this. # version_id = fo.version_id versions = oss.object_version_info(versioned_file) version_ids = [version.versionid for version in reversed(versions)] version_id = version_ids[0] with oss.open(versioned_file, "wb") as fo: fo.write(b"2") file_with_version = "{}?versionId={}".format(versioned_file, version_id) with oss.open(file_with_version, "rb") as fo: assert fo.version_id == version_id assert fo.read() == b"1"
def test_versions(oss): versioned_file = versioned_bucket_name + "/versioned_file_" + str(uuid.uuid4()) oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) with oss.open(versioned_file, "wb") as fo: fo.write(b"1") with oss.open(versioned_file, "wb") as fo: fo.write(b"2") assert oss.isfile(versioned_file) versions = oss.object_version_info(versioned_file) version_ids = [version.versionid for version in reversed(versions)] assert len(version_ids) == 2 with oss.open(versioned_file) as fo: assert fo.version_id == version_ids[1] assert fo.read() == b"2" with oss.open(versioned_file, version_id=version_ids[0]) as fo: assert fo.version_id == version_ids[0] assert fo.read() == b"1"
def test_version_sizes(oss): # protect against caching of incorrect version details oss = OSSFileSystem(key=key, secret=secret, endpoint=endpoint, version_aware=True) import gzip path = f"oss://{versioned_bucket_name}/test.txt.gz" versions = [ oss.pipe_file(path, gzip.compress(text)) for text in ( b"good morning!", b"hello!", b"hi!", b"hello!", ) ] for version in versions: version_id = version.versionid with oss.open(path, version_id=version_id) as f: with gzip.open(f) as zfp: zfp.read()