Beispiel #1
0
def test_fixed_datasets(vars_to_dims, sizes, data):
    elements = None
    coords_elements = None
    min_side, max_side = sizes

    # special dims just filled with dim name on coords as test case
    all_dims = sorted(set(sum((list(dd) for dd in vars_to_dims.values()), [])))
    special_dims = data.draw(hxr.subset_lists(all_dims))
    coords_st = {
        dd: lists(just(dd), min_side, max_side)
        for dd in special_dims
    }

    dtype_d = data.draw(
        fixed_dictionaries({vv: dtypes()
                            for vv in vars_to_dims}))

    S = hxr.fixed_datasets(vars_to_dims, dtype_d, elements, coords_elements,
                           min_side, max_side, coords_st)

    ds = data.draw(S)

    assert list(ds) == list(vars_to_dims.keys())
    assert all(ds[vv].dims == tuple(vars_to_dims[vv]) for vv in vars_to_dims)
    assert all(ds[vv].dtype == np.dtype(dtype_d[vv]) for vv in vars_to_dims)
    assert all(
        all(ss == dd for ss in ds.coords[dd].values.tolist())
        for dd in special_dims)
    for dd in all_dims:
        L = ds.coords[dd].values.tolist()
        assert len(L) >= min_side
        assert (max_side is None) or (len(L) <= max_side)
        assert (dd in special_dims) or all(isinstance(ss, int) for ss in L)
        assert (dd in special_dims) or len(set(L)) == len(L)
    def build_it(vars_to_dims_):
        all_dims = list(
            set(sum((list(dd) for dd in vars_to_dims_.values()), [])))

        ds = fixed_datasets(vars_to_dims_)
        vars_ = subset_lists(list(vars_to_dims_.keys()))
        dims = subset_lists(all_dims)
        return tuples(ds, vars_, dims)
    def build_it(vars_to_dims_):
        all_dims = list(
            set(sum((list(dd) for dd in vars_to_dims_.values()), [])))

        ds = fixed_datasets(vars_to_dims_)

        dims = subset_lists(all_dims)

        vars_ = sampled_from(list(vars_to_dims_.keys()))
        vars_dict = dictionaries(vars_, dims, dict_class=OrderedDict)
        vars_dict = vars_dict.map(OrderedDict.items).map(list)

        return tuples(ds, vars_dict, just(all_dims))
Beispiel #4
0
def data_to_concat():
    def separate(ds):
        G = product(
            ds.coords[TEST_CASE].values.tolist(), ds.coords[METHOD].values.tolist(), ds.coords[TRIAL].values.tolist()
        )

        L = []
        for test_case, method, trial in G:
            # Could swap out trial for UUID here
            meta_data = (test_case, method, trial)

            ds_sub = ds.sel({TEST_CASE: test_case, METHOD: method, TRIAL: trial}, drop=True)

            perf_da = ds_sub["perf"]
            time_ds = ds_sub[[SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE]]
            sig = ds_sub["sig"].values.tolist()
            data = (perf_da, time_ds, sig)
            L.append((meta_data, data))
            assert not np.any(np.isnan(perf_da.values))
            assert not any(np.any(np.isnan(time_ds[kk].values)) for kk in time_ds)
            assert not np.any(np.isnan(sig))
        return L

    vars_to_dims = {
        "perf": (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL),
        "sig": (SIG_POINT, TEST_CASE, METHOD, TRIAL),
        SUGGEST_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),
        EVAL_PHASE: (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL),
        OBS_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),
    }

    float_no_nan = floats(allow_nan=False, min_value=-10, max_value=10)
    dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_, "perf": np.float_, "sig": np.float_}
    # Using on str following dim conventions for coords here
    coords_st = {
        ITER: simple_coords(min_side=1),
        SUGGEST: simple_coords(min_side=1),
        TEST_CASE: xr_coords(elements=xr_dims(), min_side=1),
        METHOD: xr_coords(elements=xr_dims(), min_side=1),
        TRIAL: simple_coords(min_side=1),
        SIG_POINT: simple_coords(min_side=N_SIG, max_side=N_SIG),
    }
    S = fixed_datasets(vars_to_dims, dtype=dtype, elements=float_no_nan, coords_st=coords_st, min_side=1).map(separate)
    return S
def data_to_concat():
    def separate(ds):
        G = product(ds.coords[TEST_CASE].values.tolist(),
                    ds.coords[METHOD].values.tolist(),
                    ds.coords[TRIAL].values.tolist())

        L = []
        for test_case, method, trial in G:
            # Could swap out trial for UUID here
            meta_data = (test_case, method, trial)

            ds_sub = ds.sel(
                {
                    TEST_CASE: test_case,
                    METHOD: method,
                    TRIAL: trial
                },
                drop=True)

            perf_ds = ds_sub[list(OBJECTIVE_NAMES)]
            time_ds = ds_sub[[SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE]]
            suggest_ds = ds_sub[["foo", "bar", "baz"]]
            sig = ds_sub["sig"].values.tolist()
            data = (perf_ds, time_ds, suggest_ds, sig)
            L.append((meta_data, data))
            assert not any(
                np.any(np.isnan(perf_ds[kk].values)) for kk in perf_ds)
            assert not any(
                np.any(np.isnan(time_ds[kk].values)) for kk in time_ds)
            assert not any(
                np.any(np.isnan(suggest_ds[kk].values)) for kk in suggest_ds)
            assert not np.any(np.isnan(sig))
        return L

    vars_to_dims = {
        "sig": (SIG_POINT, TEST_CASE, METHOD, TRIAL),
        SUGGEST_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),
        EVAL_PHASE: (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL),
        OBS_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),
    }
    dtype = {
        SUGGEST_PHASE: np.float_,
        EVAL_PHASE: np.float_,
        OBS_PHASE: np.float_,
        "sig": np.float_
    }

    for obj in OBJECTIVE_NAMES:
        vars_to_dims[obj] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
        dtype[obj] = np.float_

    # We should also generate this using the space strategy, but hard coding this test case is good enough got now.
    input_vars = {"foo": np.float_, "bar": np.float_, "baz": np.int_}
    for vv, dd in input_vars.items():
        vars_to_dims[vv] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
        dtype[vv] = dd

    float_no_nan = floats(allow_nan=False, min_value=-10, max_value=10)
    # Using on str following dim conventions for coords here
    coords_st = {
        ITER: simple_coords(min_side=1),
        SUGGEST: simple_coords(min_side=1),
        TEST_CASE: xr_coords(elements=xr_dims(), min_side=1),
        METHOD: xr_coords(elements=xr_dims(), min_side=1),
        TRIAL: simple_coords(min_side=1),
        SIG_POINT: simple_coords(min_side=N_SIG, max_side=N_SIG),
    }
    S = fixed_datasets(vars_to_dims,
                       dtype=dtype,
                       elements=float_no_nan,
                       coords_st=coords_st,
                       min_side=1).map(separate)
    return S