def main(): read_only = False data_dir = os.path.expanduser('~/output/tmp/redis-test') os.makedirs(data_dir, exist_ok=True) key = 'train.what' bs = 10 sts = 500 * 20 ns = 200 n_arrays = 100 # serialization = Serialization.PYARROW serialization = Serialization.PICKLE if not read_only: arrays = np.random.rand(n_arrays, bs, sts, ns) if os.path.exists(data_dir): print("Removing existing dir") shutil.rmtree(data_dir) os.makedirs(data_dir, exist_ok=True) with RedisServer(data_directory=data_dir, serialization=serialization): ## WRITE redis_datastore = RedisDataStore(server_host='localhost') recorder = Recorder(redis_datastore) with Timer() as wt: write_times = [] for i in range(n_arrays): array = arrays[i] with Timer() as st: recorder.record(key, array) print("%d: Storing took %.2fs" % (i, st.difftime)) write_times.append(st.difftime) print("Mean write time was %.4fs (+/- %.4f)" % (np.mean(write_times), np.std(write_times))) recorder.close() print("Total write time was %.2fs" % wt.difftime) ## END WRITE print("Dir size after write is %d MiB" % (int(get_size(data_dir)) / 1024 / 1024)) ## READ with RedisServer(data_directory=data_dir, serialization=serialization): redis_datastore = RedisDataStore(server_host='localhost', data_directory=data_dir, serialization=serialization) recorder = Recorder(redis_datastore) with Timer() as rt: l = recorder.get_all(key) print("Reading took %.2fs" % rt.difftime) with Timer() as rrt: l = np.array(l) print("Into array took %.2f" % rrt.difftime) print("Mean is", np.mean(l), l.shape) recorder.close()
def test_inmemorydatastore_single_value(self): ## WRITE inmem_datastore = InMemoryDataStore() recorder = Recorder(inmem_datastore) recorder.set(self.key, self.val1) recorder.set(self.key, self.val) recorder.close() ## END WRITE ## READ # If the in-memory datastore is initialized, all data will be reset! recorder = Recorder(inmem_datastore) l = recorder.get(self.key) l = np.array(l) self.assertTrue((self.val == l).all()) recorder.close()
def main(): read_only = False data_dir = os.path.expanduser('~/output/tmp/zarr-test') file_pth = os.path.join(data_dir, 'data.mdb') key = 'train/what' bs = 10 sts = 500 * 20 ns = 200 n_arrays = 100 chunk_size_mb = 0.1 if not read_only: arrays = np.random.rand(n_arrays, bs, sts, ns) os.makedirs(data_dir, exist_ok=True) if os.path.exists(file_pth): print("Removing existing dir") shutil.rmtree(file_pth) ## WRITE zarr_datastore = ZarrDataStore(file_pth, desired_chunk_size_bytes=chunk_size_mb * 1024**2) recorder = Recorder(zarr_datastore) with Timer() as wt: write_times = [] for i in range(n_arrays): array = arrays[i] with Timer() as st: recorder.record(key, array) print("%d: Storing took %.2fs" % (i, st.difftime)) write_times.append(st.difftime) print("Mean write time was %.4fs (+/- %.4f)" % (np.mean(write_times), np.std(write_times))) recorder.close() print("Total write time was %.2fs" % wt.difftime) ## END WRITE print("Dir size after write is %d MiB" % (int(get_size(file_pth)) / 1024 / 1024)) ## READ zarr_datastore = ZarrDataStore(file_pth, desired_chunk_size_bytes=chunk_size_mb * 1024**2) recorder = Recorder(zarr_datastore) with Timer() as rt: l = recorder.get_all(key) print("Reading took %.2fs" % rt.difftime) read_times = [] for i in range(20): b = np.random.randint(bs) st = np.random.randint(sts) with Timer() as rrt: ll = np.array(l[:(n_arrays // 2), b, st, :]) print("Into sub-array took %.4fs" % rrt.difftime) read_times.append(rrt.difftime) print("Into sub-array mean readtime was %.4fs (+/- %.4f)" % (np.mean(read_times), np.std(read_times))) with Timer() as rrt: l = np.array(l) print("Into array (Total read time) was %.2fs" % rrt.difftime) print("Data mean is", np.mean(l), l.shape) recorder.close()
def test_hdf5datastore_single_value(self): ## WRITE self.file_pth = os.path.join(self.data_dir, 'data.h5') hdf5_datastore = HDF5DataStore(self.file_pth) recorder = Recorder(hdf5_datastore) recorder.set(self.key, self.val1) recorder.set(self.key, self.val) recorder.close() ## END WRITE ## READ hdf5_datastore = HDF5DataStore(self.file_pth) recorder = Recorder(hdf5_datastore) l = recorder.get(self.key) l = np.array(l) self.assertTrue((self.val == l).all()) recorder.close()
def test_inmemorydatastore_list(self): ## WRITE inmem_datastore = InMemoryDataStore() recorder = Recorder(inmem_datastore) for i in range(self.n_arrays): array = self.arrays[i] recorder.record(self.key, array) recorder.close() ## END WRITE ## READ # If the in-memory datastore is initialized, all data will be reset! recorder = Recorder(inmem_datastore) l = recorder.get_all(self.key) l = np.array(l) self.assertTrue((self.arrays == l).all()) recorder.close()
def test_zarrdatastore_single_value(self): ## WRITE assert not os.path.exists(os.path.join(self.data_dir, 'test.mdb')) zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'), datastore_type=DatastoreType.DIRECTORY, compression_type=CompressionType.LZMA) recorder = Recorder(zarr_datastore) recorder.set(self.key, self.val1) recorder.set(self.key, self.val) recorder.close() ## END WRITE ## READ zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'), datastore_type=DatastoreType.DIRECTORY, compression_type=CompressionType.LZMA) recorder = Recorder(zarr_datastore) l = recorder.get(self.key) l = np.array(l) self.assertTrue((self.val == l).all()) recorder.close()
def test_hdf5datastore_list(self): ## WRITE self.file_pth = os.path.join(self.data_dir, 'data.h5') hdf5_datastore = HDF5DataStore(self.file_pth) recorder = Recorder(hdf5_datastore) for i in range(self.n_arrays): array = self.arrays[i] recorder.record(self.key, array) # if i == 0: # hdf5_datastore.enable_swmr() recorder.close() ## END WRITE ## READ hdf5_datastore = HDF5DataStore(self.file_pth) recorder = Recorder(hdf5_datastore) l = recorder.get_all(self.key) l = np.array(l) self.assertTrue((self.arrays == l).all()) recorder.close()
def test_zarrdatastore_list_scalar(self): ## WRITE assert not os.path.exists(os.path.join(self.data_dir, 'test.mdb')) zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'), datastore_type=DatastoreType.DIRECTORY, compression_type=CompressionType.LZMA) recorder = Recorder(zarr_datastore) recorder.record(self.key, 10.) recorder.record(self.key, 20.) recorder.close() ## END WRITE ## READ zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'), datastore_type=DatastoreType.DIRECTORY, compression_type=CompressionType.LZMA) recorder = Recorder(zarr_datastore) l = recorder.get_all(self.key) l = np.array(l) self.assertTrue((np.array([10., 20.]) == l).all()) recorder.close()
def test_zarrdatastore_list(self): ## WRITE assert not os.path.exists(os.path.join(self.data_dir, 'test.mdb')) zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'), datastore_type=DatastoreType.DIRECTORY, compression_type=CompressionType.LZMA) recorder = Recorder(zarr_datastore) for i in range(self.n_arrays): array = self.arrays[i] recorder.record(self.key, array) recorder.close() ## END WRITE ## READ zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'), datastore_type=DatastoreType.DIRECTORY, compression_type=CompressionType.LZMA) recorder = Recorder(zarr_datastore) l = recorder.get_all(self.key) l = np.array(l) self.assertTrue((self.arrays == l).all()) recorder.close()
def test_redisdatastore_single_value(self): with RedisServer(data_directory=self.data_dir): ## WRITE redis_datastore = RedisDataStore(server_host='localhost') recorder = Recorder(redis_datastore) recorder.set(self.key, self.val1) recorder.set(self.key, self.val) recorder.close() ## END WRITE with RedisServer(data_directory=self.data_dir): ## READ redis_datastore = RedisDataStore(server_host='localhost') recorder = Recorder(redis_datastore) l = recorder.get(self.key) l = np.array(l) self.assertTrue((self.val == l).all()) recorder.close()
def test_redisdatastore_list(self): with RedisServer(data_directory=self.data_dir): ## WRITE redis_datastore = RedisDataStore(server_host='localhost') recorder = Recorder(redis_datastore) for i in range(self.n_arrays): array = self.arrays[i] recorder.record(self.key, array) recorder.close() ## END WRITE with RedisServer(data_directory=self.data_dir): ## READ redis_datastore = RedisDataStore(server_host='localhost') recorder = Recorder(redis_datastore) l = recorder.get_all(self.key) l = np.array(l) self.assertTrue((self.arrays == l).all()) recorder.close()
def test_zarrdatastore_object(self): ## WRITE assert not os.path.exists(os.path.join(self.data_dir, 'test.mdb')) zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'), datastore_type=DatastoreType.DIRECTORY, compression_type=CompressionType.LZMA) recorder = Recorder(zarr_datastore) test_list_1 = [ np.random.rand(3, 1), np.random.rand(4, 1), np.array([]) ] test_list_2 = [ np.random.rand(3, 1), np.random.rand(4, 1), np.array([]) ] test_list = [test_list_1, test_list_2] recorder.record(self.key, test_list_1) recorder.record(self.key, test_list_2) recorder.close() ## END WRITE ## READ zarr_datastore = ZarrDataStore(os.path.join(self.data_dir, 'test.mdb'), datastore_type=DatastoreType.DIRECTORY, compression_type=CompressionType.LZMA) recorder = Recorder(zarr_datastore) l = recorder.get_all(self.key) for j in range(len(l)): for i in range(3): self.assertTrue((test_list[j][i] == l[j][i]).all()) recorder.close()