def test_device_host_file_short(tmp_path, num_device_arrays, num_host_arrays, array_size_range): tmpdir = tmp_path / "storage" tmpdir.mkdir() dhf = DeviceHostFile(device_memory_limit=1024 * 16, memory_limit=1024 * 16, local_directory=tmpdir) host = [("x-%d" % i, np.random.random(randint(*array_size_range))) for i in range(num_host_arrays)] device = [("dx-%d" % i, cupy.random.random(randint(*array_size_range))) for i in range(num_device_arrays)] import random full = host + device random.shuffle(full) for k, v in full: dhf[k] = v random.shuffle(full) for k, original in full: acquired = dhf[k] da.assert_eq(original, acquired) del dhf[k] assert set(dhf.device.keys()) == set() assert set(dhf.host.keys()) == set() assert set(dhf.disk.keys()) == set()
def assert_apply_xarray(self, expr, expected, skip_dask=False, skip_no_index=False): import xarray as xr if np.isscalar(expected): # Pandas input self.assertEqual(expr.apply(self.dataset_xarray, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset_xarray, keep_index=True), expected) return # Make sure expected is a pandas Series self.assertIsInstance(expected, xr.DataArray) # Check using dataset backed by pandas DataFrame # keep_index=False if not skip_no_index: np.testing.assert_equal(expr.apply(self.dataset_xarray), expected.values) # keep_index=True xr.testing.assert_equal( expr.apply(self.dataset_xarray, keep_index=True), expected) if skip_dask or da is None: return # Check using dataset backed by Dask DataFrame expected_da = da.from_array(expected.values) expected_dask = expected.copy() expected_dask.data = expected_da # keep_index=False, compute=False if not skip_no_index: da.assert_eq(expr.apply(self.dataset_xarray_dask, compute=False), expected_dask.data) # keep_index=True, compute=False xr.testing.assert_equal( expr.apply(self.dataset_xarray_dask, keep_index=True, compute=False), expected_dask, ) # keep_index=False, compute=True if not skip_no_index: np.testing.assert_equal( expr.apply(self.dataset_xarray_dask, compute=True), expected_dask.data.compute()) # keep_index=True, compute=True xr.testing.assert_equal( expr.apply(self.dataset_xarray_dask, keep_index=True, compute=True), expected_dask.compute(), )
def check_apply(self, expr, expected, skip_dask=False): if np.isscalar(expected): # Pandas input self.assertEqual(expr.apply(self.dataset, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset, keep_index=True), expected) if dd is None: return # Dask input self.assertEqual(expr.apply(self.dataset_dask, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset_dask, keep_index=True), expected) return # Make sure expected is a pandas Series self.assertIsInstance(expected, pd.Series) # Check using dataset backed by pandas DataFrame # keep_index=False np.testing.assert_equal(expr.apply(self.dataset), expected.values) # keep_index=True pd.testing.assert_series_equal(expr.apply(self.dataset, keep_index=True), expected, check_names=False) if skip_dask or dd is None: return # Check using dataset backed by Dask DataFrame expected_dask = dd.from_pandas(expected, npartitions=2) # keep_index=False, compute=False da.assert_eq(expr.apply(self.dataset_dask, compute=False), expected_dask.values) # keep_index=True, compute=False dd.assert_eq(expr.apply(self.dataset_dask, keep_index=True, compute=False), expected_dask, check_names=False) # keep_index=False, compute=True np.testing.assert_equal(expr.apply(self.dataset_dask, compute=True), expected_dask.values.compute()) # keep_index=True, compute=True pd.testing.assert_series_equal(expr.apply(self.dataset_dask, keep_index=True, compute=True), expected_dask.compute(), check_names=False)
def test_latrd_data_default_keys(dummy_data): """ Test that the latrd_data context manager correctly reconstructs the dummy data. Construct a LATRD data dictionary from the dummy data files and check that there is an entry for each of the default keys, and that each entry has the expected values. """ with latrd_data(sorted(dummy_data.iterdir())) as data: assert set(data.keys()) == set(cue_keys + event_keys) # By using the same seed as when generating the test data, # we should be able to reproduce them. np.random.seed(0) for key in cue_keys + event_keys: da.assert_eq(data[key][:10], np.random.randint(10, size=10))
def test_device_host_file_step_by_step(tmp_path): tmpdir = tmp_path / "storage" tmpdir.mkdir() dhf = DeviceHostFile(device_memory_limit=1024 * 16, memory_limit=1024 * 16, local_directory=tmpdir) a = np.random.random(1000) b = cupy.random.random(1000) dhf["a1"] = a assert set(dhf.device.keys()) == set() assert set(dhf.host.keys()) == set(["a1"]) assert set(dhf.disk.keys()) == set() dhf["b1"] = b assert set(dhf.device.keys()) == set(["b1"]) assert set(dhf.host.keys()) == set(["a1"]) assert set(dhf.disk.keys()) == set() dhf["b2"] = b assert set(dhf.device.keys()) == set(["b1", "b2"]) assert set(dhf.host.keys()) == set(["a1"]) assert set(dhf.disk.keys()) == set() dhf["b3"] = b assert set(dhf.device.keys()) == set(["b2", "b3"]) assert set(dhf.host.keys()) == set(["a1", "b1"]) assert set(dhf.disk.keys()) == set() dhf["a2"] = a assert set(dhf.device.keys()) == set(["b2", "b3"]) assert set(dhf.host.keys()) == set(["a2", "b1"]) assert set(dhf.disk.keys()) == set(["a1"]) dhf["b4"] = b assert set(dhf.device.keys()) == set(["b3", "b4"]) assert set(dhf.host.keys()) == set(["a2", "b2"]) assert set(dhf.disk.keys()) == set(["a1", "b1"]) dhf["b4"] = b assert set(dhf.device.keys()) == set(["b3", "b4"]) assert set(dhf.host.keys()) == set(["a2", "b2"]) assert set(dhf.disk.keys()) == set(["a1", "b1"]) da.assert_eq(dhf["a1"], a) del dhf["a1"] da.assert_eq(dhf["a2"], a) del dhf["a2"] da.assert_eq(dhf["b1"], b) del dhf["b1"] da.assert_eq(dhf["b2"], b) del dhf["b2"] da.assert_eq(dhf["b3"], b) del dhf["b3"] da.assert_eq(dhf["b4"], b) del dhf["b4"] assert set(dhf.device.keys()) == set() assert set(dhf.host.keys()) == set() assert set(dhf.disk.keys()) == set()