def test_log_int_values_warp_unwarp(args): x, values = args warp = "log" v = np.unique(values) # Also sort assert len(v) >= 2 f = interp1d(v, v, kind="nearest", fill_value="extrapolate") x = f(x).astype(values.dtype) assert x.ndim == 1 # make sure interp1d did not mess it up S = sp.Integer(warp=warp, values=values) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2)
def test_cat_warp_unwarp(args): x, values = args assert len(set(values)) >= 2 x = values[x % len(values)] assert x.ndim == 1 S = sp.Categorical(values=values) y = S.warp(x) assert y.shape == x.shape + (len(values),) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2)
def test_real_range_unwarp_warp(warp, args): x_w, range_ = args if warp == "log": range_ = range_[range_ > 0] if warp == "logit": range_ = range_[(0 < range_) & (range_ < 1)] range_ = np.sort(range_) assume(len(range_) == 2 and range_[0] < range_[1]) range_warped = sp.WARP_DICT[warp](range_) x_w = np.clip(x_w, range_warped[0], range_warped[1]) S = sp.Real(warp=warp, range_=range_) # Test bounds lower, upper = S.get_bounds().T x_w = linear_rescale(x_w, lb0=-1000, ub0=1000, lb1=lower, ub1=upper) x = S.unwarp(x_w) assert x_w.shape == x.shape + (1,) assert x.dtype == range_.dtype assert x.dtype == S.dtype x2 = S.validate(x) assert close_enough(x, x2) x_w2 = S.warp(x) assert x_w2.shape == x_w.shape x_w3 = S.validate_warped(x_w2) assert close_enough(x_w2, x_w3) assert close_enough(x_w, x_w2)
def test_joint_space_warp_missing(args): meta, X, _, fixed_vars = args S = sp.JointSpace(meta) X_w = S.warp([fixed_vars]) assert X_w.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all((lower <= X_w) | np.isnan(X_w)) assert np.all((X_w <= upper) | np.isnan(X_w)) for param, xx in zip(S.param_list, np.hsplit(X_w, S.blocks[:-1])): xx, = xx if param in fixed_vars: x_orig = S.spaces[param].unwarp(xx).item() S.spaces[param].validate(x_orig) assert close_enough(x_orig, fixed_vars[param]) # check other direction x_w2 = S.spaces[param].warp(fixed_vars[param]) assert close_enough(xx, x_w2) else: assert np.all(np.isnan(xx))
def test_real_range_warp_unwarp(warp, args): x, range_ = args if warp == "log": range_ = range_[range_ > 0] if warp == "logit": range_ = range_[(0 < range_) & (range_ < 1)] range_ = np.sort(range_) assume(len(range_) == 2 and range_[0] < range_[1]) x = np.clip(x, range_[0], range_[1]) S = sp.Real(warp=warp, range_=range_) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2)
def test_linear_rescale_bounds(args): lb0, ub0, lb1, ub1 = args # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 <= ub1) lb1_ = np_util.linear_rescale(lb0, lb0, ub0, lb1, ub1) assert close_enough(lb1, lb1_) ub1_ = np_util.linear_rescale(ub0, lb0, ub0, lb1, ub1) assert close_enough(ub1, ub1_)
def test_linear_rescale_inverse(args): X, lb0, ub0, lb1, ub1, enforce_bounds = args enforce_bounds = enforce_bounds >= 0 # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 < ub1) # Can't expect numerics to work well in these extreme cases: assume((ub0 - lb0) < 1e3 * (ub1 - lb1)) if enforce_bounds: X = np.clip(X, lb0, ub0) X_ = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=enforce_bounds) X_ = np_util.linear_rescale(X_, lb1, ub1, lb0, ub0, enforce_bounds=enforce_bounds) assert close_enough(X_, X)
def test_robust_standardize_to_sklearn(args): X, q_level = args q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level) assert close_enough(q1 - q0, q_level) X_bo = stats.robust_standardize(X, q_level=q_level) X = X[:, None] X_skl = robust_scale(X, axis=0, with_centering=True, with_scaling=True, quantile_range=[100.0 * q0, 100.0 * q1]) X_skl = X_skl[:, 0] * (sst.norm.ppf(q1) - sst.norm.ppf(q0)) assert close_enough(X_bo, X_skl, equal_nan=True)
def test_f(args): X, q_level = args R1 = stats.robust_standardize(X, q_level) R2 = f_vec(X.T, q_level).T assert R1.dtype == "float64" assert R2.dtype == "float64" assert close_enough(R1, R2, equal_nan=True)
def test_bilog_props(args): x, = args y = sp.bilog(x) assert sp.bilog(0) == 0 # This could be its own test assert close_enough(y, -sp.bilog(-x), equal_nan=True) assert np.isfinite(y) == np.isfinite(x)
def test_encoder_to_sklearn(args): # sklearn cannot handle this correctly unless n >= 3 X, labels, assume_sorted, dtype, assume_valid = args Y = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid) enc = LabelBinarizer() enc.fit(labels) Y2 = enc.transform(X) assert close_enough(Y, Y2.astype(dtype))
def test_encode_decode(args): X, labels, assume_sorted, dtype, assume_valid = args Y = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid) if assume_sorted: # otherwise labels will be re-arranged (idx,), = np.where(Y > 0) assert np.asarray(labels[idx]) == X assert Y.dtype == dtype X2 = sp.decode(Y, labels, assume_sorted=assume_sorted) assert close_enough(X, X2)
def test_int_range_warp_unwarp(warp, args): """Warning: this explicitly ignores issues with min max if going to int limit, since >>> np.array(INT_MAX).astype(np.float32).astype(np.int32) array(-2147483648, dtype=int32) Without any warning from numpy. """ x, range_ = args # We could split out log into diff function without this pruning if we # start failing hypothesis health check. if warp == "log": range_ = range_[range_ > 0] range_ = np.sort(range_) assume(len(range_) == 2 and range_[0] < range_[1]) x = np.clip(x, range_[0], range_[1]).astype(range_.dtype) S = sp.Integer(warp=warp, range_=range_) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert x.dtype == x2.dtype # Close enough when evaluated as floats assert close_enough(x.astype("f"), x2.astype("f"))
def test_joint_grid(args, max_interp): meta, _, _, _ = args type_whitelist = (bool, int, float, CAT_NATIVE_DTYPE) S = sp.JointSpace(meta) lower, upper = S.get_bounds().T G = S.grid(max_interp=max_interp) assert sorted(G.keys()) == sorted(meta.keys()) for var, grid in G.items(): curr_space = S.spaces[var] # Make sure same as calling direct grid2 = curr_space.grid(max_interp) assert grid == grid2 if len(grid) == 0: assert grid == [] assert max_interp == 0 if curr_space.values is None else len(curr_space.values) == 0 continue # Make sure native type assert all(type(xx) in type_whitelist for xx in grid) tt = type(grid[0]) assert all(type(xx) == tt for xx in grid) assert np.all(np.array(grid) == np.unique(grid)) if max_interp >= 2: assert curr_space.lower is None or close_enough(curr_space.lower, grid[0]) assert curr_space.upper is None or close_enough(curr_space.upper, grid[-1]) if curr_space.values is not None: assert np.all(curr_space.values == grid) else: assert len(grid) <= max_interp
def test_bool_warp_unwarp(args): x, = args S = sp.Boolean() y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2)
def test_decode_to_sklearn(args): Y, labels, assume_sorted, dtype, assume_valid = args assert Y.ndim >= 1 and Y.shape[-1] == len(labels) X = sp.decode(Y, labels, assume_sorted=assume_sorted) enc = LabelBinarizer() enc.fit(labels) X2 = enc.inverse_transform(Y) assert X.dtype.kind == CAT_KIND assert close_enough(X, X2.astype(X.dtype))
def test_real_values_warp_unwarp(warp, args): x, values = args if warp == "log": values = values[values > 0] if warp == "logit": values = values[(0 < values) & (values < 1)] # We could eliminate need for this if we split out test for log and logit # cases and specify unique flag, but works as is v = np.unique(values) assume(len(v) >= 2) f = interp1d(v, v, kind="nearest", fill_value="extrapolate") x = f(x) assert x.ndim == 1 # make sure interp1d did not mess it up S = sp.Real(warp=warp, values=values) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2)
def test_linear_rescale_bound_modes(args): X, lb0, ub0, lb1, ub1 = args # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 <= ub1) X = np.clip(X, lb0, ub0) Y1 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=False) Y2 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True) assert close_enough(Y1, Y2)
def test_joint_space_unwarp_warp(args): meta, X, _, _ = args S = sp.JointSpace(meta) S.validate(X) X_w2 = S.warp(X) assert X_w2.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= X_w2) assert np.all(X_w2 <= upper) X2 = S.unwarp(X_w2) assert all(all(close_enough(X[ii][vv], X2[ii][vv]) for vv in X[ii]) for ii in range(len(X))) S.validate(X2)
def test_random_search_suggest_sanity(api_args, n_suggest, seed): meta, X, y, _ = api_args # Get the unwarped X S = sp.JointSpace(meta) lower, upper = S.get_bounds().T S.validate(X) N = len(X) # Split history and call twice with diff histories but same seed M = N // 2 X1, X2 = X[:M], X[M:] y1, y2 = y[:M], y[M:] x_guess = suggest_dict(X1, y1, meta, n_suggest, random=np.random.RandomState(seed)) x_guess2 = suggest_dict(X2, y2, meta, n_suggest, random=np.random.RandomState(seed)) # Check types too assert len(x_guess) == n_suggest assert all( all( close_enough(x_guess[nn][k], x_guess2[nn][k]) for k in x_guess[nn]) for nn in range(len(x_guess))) assert np.all(x_guess == x_guess2) # Make sure validated S.validate(x_guess) S.validate(x_guess2) # Test sanity of output D, = lower.shape x_guess_w = S.warp(x_guess) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper)
def test_bilog_biexp(args): x, = args assert close_enough(sp.biexp(sp.bilog(x)), x, equal_nan=True)