def test_fixed_datasets(vars_to_dims, sizes, data): elements = None coords_elements = None min_side, max_side = sizes # special dims just filled with dim name on coords as test case all_dims = sorted(set(sum((list(dd) for dd in vars_to_dims.values()), []))) special_dims = data.draw(hxr.subset_lists(all_dims)) coords_st = { dd: lists(just(dd), min_side, max_side) for dd in special_dims } dtype_d = data.draw( fixed_dictionaries({vv: dtypes() for vv in vars_to_dims})) S = hxr.fixed_datasets(vars_to_dims, dtype_d, elements, coords_elements, min_side, max_side, coords_st) ds = data.draw(S) assert list(ds) == list(vars_to_dims.keys()) assert all(ds[vv].dims == tuple(vars_to_dims[vv]) for vv in vars_to_dims) assert all(ds[vv].dtype == np.dtype(dtype_d[vv]) for vv in vars_to_dims) assert all( all(ss == dd for ss in ds.coords[dd].values.tolist()) for dd in special_dims) for dd in all_dims: L = ds.coords[dd].values.tolist() assert len(L) >= min_side assert (max_side is None) or (len(L) <= max_side) assert (dd in special_dims) or all(isinstance(ss, int) for ss in L) assert (dd in special_dims) or len(set(L)) == len(L)
def build_it(vars_to_dims_): all_dims = list( set(sum((list(dd) for dd in vars_to_dims_.values()), []))) ds = fixed_datasets(vars_to_dims_) vars_ = subset_lists(list(vars_to_dims_.keys())) dims = subset_lists(all_dims) return tuples(ds, vars_, dims)
def build_it(vars_to_dims_): all_dims = list( set(sum((list(dd) for dd in vars_to_dims_.values()), []))) ds = fixed_datasets(vars_to_dims_) dims = subset_lists(all_dims) vars_ = sampled_from(list(vars_to_dims_.keys())) vars_dict = dictionaries(vars_, dims, dict_class=OrderedDict) vars_dict = vars_dict.map(OrderedDict.items).map(list) return tuples(ds, vars_dict, just(all_dims))
def data_to_concat(): def separate(ds): G = product( ds.coords[TEST_CASE].values.tolist(), ds.coords[METHOD].values.tolist(), ds.coords[TRIAL].values.tolist() ) L = [] for test_case, method, trial in G: # Could swap out trial for UUID here meta_data = (test_case, method, trial) ds_sub = ds.sel({TEST_CASE: test_case, METHOD: method, TRIAL: trial}, drop=True) perf_da = ds_sub["perf"] time_ds = ds_sub[[SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE]] sig = ds_sub["sig"].values.tolist() data = (perf_da, time_ds, sig) L.append((meta_data, data)) assert not np.any(np.isnan(perf_da.values)) assert not any(np.any(np.isnan(time_ds[kk].values)) for kk in time_ds) assert not np.any(np.isnan(sig)) return L vars_to_dims = { "perf": (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL), "sig": (SIG_POINT, TEST_CASE, METHOD, TRIAL), SUGGEST_PHASE: (ITER, TEST_CASE, METHOD, TRIAL), EVAL_PHASE: (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL), OBS_PHASE: (ITER, TEST_CASE, METHOD, TRIAL), } float_no_nan = floats(allow_nan=False, min_value=-10, max_value=10) dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_, "perf": np.float_, "sig": np.float_} # Using on str following dim conventions for coords here coords_st = { ITER: simple_coords(min_side=1), SUGGEST: simple_coords(min_side=1), TEST_CASE: xr_coords(elements=xr_dims(), min_side=1), METHOD: xr_coords(elements=xr_dims(), min_side=1), TRIAL: simple_coords(min_side=1), SIG_POINT: simple_coords(min_side=N_SIG, max_side=N_SIG), } S = fixed_datasets(vars_to_dims, dtype=dtype, elements=float_no_nan, coords_st=coords_st, min_side=1).map(separate) return S
def data_to_concat(): def separate(ds): G = product(ds.coords[TEST_CASE].values.tolist(), ds.coords[METHOD].values.tolist(), ds.coords[TRIAL].values.tolist()) L = [] for test_case, method, trial in G: # Could swap out trial for UUID here meta_data = (test_case, method, trial) ds_sub = ds.sel( { TEST_CASE: test_case, METHOD: method, TRIAL: trial }, drop=True) perf_ds = ds_sub[list(OBJECTIVE_NAMES)] time_ds = ds_sub[[SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE]] suggest_ds = ds_sub[["foo", "bar", "baz"]] sig = ds_sub["sig"].values.tolist() data = (perf_ds, time_ds, suggest_ds, sig) L.append((meta_data, data)) assert not any( np.any(np.isnan(perf_ds[kk].values)) for kk in perf_ds) assert not any( np.any(np.isnan(time_ds[kk].values)) for kk in time_ds) assert not any( np.any(np.isnan(suggest_ds[kk].values)) for kk in suggest_ds) assert not np.any(np.isnan(sig)) return L vars_to_dims = { "sig": (SIG_POINT, TEST_CASE, METHOD, TRIAL), SUGGEST_PHASE: (ITER, TEST_CASE, METHOD, TRIAL), EVAL_PHASE: (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL), OBS_PHASE: (ITER, TEST_CASE, METHOD, TRIAL), } dtype = { SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_, "sig": np.float_ } for obj in OBJECTIVE_NAMES: vars_to_dims[obj] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) dtype[obj] = np.float_ # We should also generate this using the space strategy, but hard coding this test case is good enough got now. input_vars = {"foo": np.float_, "bar": np.float_, "baz": np.int_} for vv, dd in input_vars.items(): vars_to_dims[vv] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) dtype[vv] = dd float_no_nan = floats(allow_nan=False, min_value=-10, max_value=10) # Using on str following dim conventions for coords here coords_st = { ITER: simple_coords(min_side=1), SUGGEST: simple_coords(min_side=1), TEST_CASE: xr_coords(elements=xr_dims(), min_side=1), METHOD: xr_coords(elements=xr_dims(), min_side=1), TRIAL: simple_coords(min_side=1), SIG_POINT: simple_coords(min_side=N_SIG, max_side=N_SIG), } S = fixed_datasets(vars_to_dims, dtype=dtype, elements=float_no_nan, coords_st=coords_st, min_side=1).map(separate) return S