def test_broadcast_shapes_gufunc_args(parsed_sig_and_size, max_dims_extra, dtype, unique, data): parsed_sig, min_side, max_side = parsed_sig_and_size signature = unparse(parsed_sig) parsed_sig, _ = parsed_sig excluded = data.draw(sets(integers(0, len(parsed_sig) - 1)).map(tuple)) elements = from_dtype(np.dtype(dtype)) S = gu.gufunc_args( signature, excluded=excluded, min_side=min_side, max_side=max_side, max_dims_extra=max_dims_extra, dtype=dtype, elements=elements, unique=unique, ) X = data.draw(S) shapes = [np.shape(xx) for xx in X] validate_bcast_shapes(shapes, parsed_sig, excluded, min_side, max_side, max_dims_extra) validate_elements(X, dtype=dtype, unique=unique)
def test_broadcast_elements_gufunc_args(parsed_sig, min_side, max_side, max_dims_extra, dtype, data): signature = unparse(parsed_sig) parsed_sig, _ = parsed_sig excluded = data.draw(sets(integers(0, len(parsed_sig) - 1)).map(tuple)) min_side, max_side = sorted([min_side, max_side]) choices = data.draw(real_from_dtype(dtype)) elements = sampled_from(choices) S = gu.gufunc_args( signature, excluded=excluded, min_side=min_side, max_side=max_side, max_dims_extra=max_dims_extra, dtype=dtype, elements=elements, ) X = data.draw(S) validate_elements(X, choices=choices, dtype=dtype)
def test_robust_standardize_broadcast(): """Need to do things different here since standardize broadcasts over the wrong dimension (0 instead of -1). """ # Build vectorize version, this is just loop inside. f_vec = np.vectorize(stats.robust_standardize, signature="(n),()->(n)", otypes=["float64"]) @given( gufunc_args("(n,m),()->(n,m)", dtype=np.float_, min_side={"n": 2}, elements=[mfloats(), probs()])) def test_f(args): X, q_level = args R1 = stats.robust_standardize(X, q_level) R2 = f_vec(X.T, q_level).T assert R1.dtype == "float64" assert R2.dtype == "float64" assert close_enough(R1, R2, equal_nan=True) # Call the test test_f()
def gufunc_floats(signature, min_side=0, max_side=5, **kwargs): elements = floats(**kwargs) S = gufunc_args(signature, dtype=np.float_, elements=elements, unique=False, min_side=min_side, max_side=max_side) return S
def test_elements_gufunc_args(parsed_sig, min_side, max_side, dtype, data): choices = data.draw(real_from_dtype(dtype)) elements = sampled_from(choices) signature = unparse(parsed_sig) min_side, max_side = sorted([min_side, max_side]) S = gu.gufunc_args(signature, min_side=min_side, max_side=max_side, dtype=dtype, elements=elements) X = data.draw(S) validate_elements(X, choices=choices, dtype=dtype)
def test_shapes_gufunc_args(parsed_sig_and_size, dtype, unique, data): parsed_sig, min_side, max_side = parsed_sig_and_size signature = unparse(parsed_sig) # We could also test using elements strategy that then requires casting, # but that would be kind of complicated to come up with compatible combos elements = from_dtype(np.dtype(dtype)) # Assumes zero broadcast dims by default S = gu.gufunc_args(signature, min_side=min_side, max_side=max_side, dtype=dtype, elements=elements, unique=unique) X = data.draw(S) shapes = [np.shape(xx) for xx in X] validate_shapes(shapes, parsed_sig[0], min_side, max_side) validate_elements(X, dtype=dtype, unique=unique)
def test_elements_tuple_of_arrays(shapes, dtype, data): choices = data.draw(real_from_dtype(dtype)) elements = sampled_from(choices) S = gu._tuple_of_arrays(shapes, dtype, elements=elements) X = data.draw(S) validate_elements(X, choices=choices, dtype=dtype) @given( gu.gufunc_args( "(1),(1),(1),()->()", dtype=["object", "object", "object", "bool"], elements=[_st_shape, scalar_dtypes(), just(None), booleans()], min_side=1, max_dims_extra=1, ), data(), ) def test_bcast_tuple_of_arrays(args, data): """Now testing broadcasting of tuple_of_arrays, kind of crazy since it uses gufuncs to test itself. Some awkwardness here since there are a lot of corner cases when dealing with object types in the numpy extension. For completeness, should probably right a function like this for the other functions, but there always just pass dtype, elements, unique to `_tuple_of_arrays` anyway, so this should be pretty good. """
def broadcasted(f, signature, itypes, otypes, elements, unique=False, excluded=(), min_side=0, max_side=5, max_dims_extra=2): """Strategy that makes it easy to test the broadcasting semantics of a function against the 'ground-truth' broadcasting convention provided by :obj:`numpy.vectorize`. Parameters ---------- f : callable This is the original function handles broadcasting itself. It must return an `ndarray` or multiple `ndarray` (which Python treats as a `tuple`) if returning 2-or-more output arguments. signature : str Signature for shapes to be compatible with. Expects string in format of numpy generalized universal function signature, e.g., `'(m,n),(n)->(m)'` for vectorized matrix-vector multiplication. Officially, only supporting ascii characters. itypes : list-like of dtype List of numpy `dtype` for each argument. These can be either strings (``'int64'``), type (``np.int64``), or numpy `dtype` (``np.dtype('int64')``). A single `dtype` can be supplied for all arguments. otypes : list of dtype The dtype for the the outputs of `f`. It must be a list with one dtype for each output argument of `f`. It must be a singleton list if `f` only returns a single output. It can also be set to `None` to leave it to be inferred, but this can create issues with empty arrays, so it is not officially supported here. elements : list-like of strategy Strategies to fill in array elements on a per argument basis. One can also specify a single strategy (e.g., :func:`hypothesis.strategies.floats`) and have it applied to all arguments. unique : list-like of bool Boolean flag to specify if all elements in an array must be unique. One can also specify a single boolean to apply it to all arguments. excluded : list-like of integers Set of integers representing the positional for which the function will not be vectorized. Uses same format as :obj:`numpy.vectorize`. min_side : int or dict Minimum size of any side of the arrays. It is good to test the corner cases of 0 or 1 sized dimensions when applicable, but if not, a min size can be supplied here. Minimums can be provided on a per-dimension basis using a dict, e.g. ``min_side={'n': 2}``. One can use, e.g., ``min_side={hypothesis.extra.gufunc.BCAST_DIM: 2}`` to limit the size of the broadcasted dimensions. max_side : int or dict Maximum size of any side of the arrays. This can usually be kept small and still find most corner cases in testing. Dictionaries can be supplied as with `min_side`. max_dims_extra : int Maximum number of extra dimensions that can be appended on left of arrays for broadcasting. This should be kept small as the memory used grows exponentially with extra dimensions. Returns ------- f : callable This is the original function handles broadcasting itself. f_vec : callable Function that should be functionaly equivalent to `f` but broadcasting is handled by :obj:`numpy.vectorize`. res : tuple of ndarrays Resulting ndarrays with shapes consistent with `signature`. Extra dimensions for broadcasting will be present. Examples -------- .. code-block:: pycon >>> import numpy as np >>> from hypothesis.strategies import integers, booleans >>> broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'], elements=[integers(0,9), booleans()], unique=[True, False]).example() (<ufunc 'add'>, <numpy.lib.function_base.vectorize at 0x11a777690>, (array([5, 6]), array([ True], dtype=bool))) >>> broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'], elements=[integers(0,9), booleans()], excluded=(1,)).example() (<ufunc 'add'>, <numpy.lib.function_base.vectorize at 0x11a715b10>, (array([9]), array(True, dtype=bool))) >>> f, fv, args = broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'], elements=[integers(0,9), booleans()], min_side=1, max_side=3, max_dims_extra=1).example() >>> f is np.add True >>> f(*args) 7 >>> fv(*args) array(7) """ # cache and doc not needed for property testing, excluded not actually # needed here because we don't generate extra dims for the excluded args. # Using the excluded argument in np.vectorize only seems to confuse it in # corner cases. f_vec = np.vectorize(f, signature=signature, otypes=otypes) broadcasted_args = gufunc_args( signature, itypes, elements, unique=unique, excluded=excluded, min_side=min_side, max_side=max_side, max_dims_extra=max_dims_extra, ) funcs_and_args = tuples(just(f), just(f_vec), broadcasted_args) return funcs_and_args
return 1.0 # Can't say anything about scale => p=1 _, pval = sst.ttest_1samp(x, 0.0) if np.isnan(pval): # Should only be possible if scale underflowed to zero: assert np.var(x, ddof=1) <= 1e-100 # It is debatable if the condition should be ``np.mean(x) == 0.0`` or # ``np.all(x == 0.0)``. Should not matter in practice. pval = np.float(np.mean(x) == 0.0) assert 0.0 <= pval and pval <= 1.0 return pval @given( gufunc_args("(n),()->(n)", dtype=np.float_, elements=[mfloats(), probs()], min_side=2)) def test_robust_standardize_to_sklearn(args): X, q_level = args q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level) assert close_enough(q1 - q0, q_level) X_bo = stats.robust_standardize(X, q_level=q_level) X = X[:, None] X_skl = robust_scale(X, axis=0, with_centering=True, with_scaling=True, quantile_range=[100.0 * q0, 100.0 * q1])
exp.run_sklearn_study(opt_class, opt_kwargs, model_name, dataset, scorer, n_calls, n_suggestions) @given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS)) @settings(deadline=None) def test_get_objective_signature(model_name, dataset, scorer): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) exp.get_objective_signature(model_name, dataset, scorer) @given(gufunc_args("(n,m)->()", dtype=np.float_, elements=floats())) def test_build_eval_ds(args): function_evals, = args exp.build_eval_ds(function_evals) @given( gufunc_args("(n),(n,m),(n)->()", dtype=np.float_, elements=floats(min_value=0, max_value=1e6))) def test_build_timing_ds(args): suggest_time, eval_time, observe_time = args exp.build_timing_ds(suggest_time, eval_time, observe_time) def test_get_opt_class_module():
scorer, n_calls, n_suggestions) @given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS)) @settings(deadline=None) def test_get_objective_signature(model_name, dataset, scorer): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) exp.get_objective_signature(model_name, dataset, scorer) @given( gufunc_args("(n,m,k),(k)->()", dtype=[np.float_, str], elements=[floats(), text()], unique=[False, True])) def test_build_eval_ds(args): function_evals, objective_names = args exp.build_eval_ds(function_evals, objective_names) @given( gufunc_args("(n),(n,m),(n)->()", dtype=np.float_, elements=floats(min_value=0, max_value=1e6))) def test_build_timing_ds(args): suggest_time, eval_time, observe_time = args exp.build_timing_ds(suggest_time, eval_time, observe_time)