def test_quantile(args): X, = args ll = qt.quantile(X, np.nextafter(0, 1)) assert ll == -np.inf if len(X) == 0 else ll == np.min(X) uu = qt.quantile(X, np.nextafter(1, 0)) assert uu == -np.inf if len(X) == 0 else uu == np.max(X) if len(X) % 2 == 1: mm = qt.quantile(X, 0.5) assert mm == np.median(X)
def test_quantile_to_np(args): X, q = args estimate = qt.quantile(X, q) # Correct the off-by-1 error in numpy percentile. This might still have # issues due to round off error by multiplying by 100 since powers of 10 # are not very fp friendly. estimate_np = np.percentile(np.concatenate(([-np.inf], X)), 100 * q, interpolation="higher") assert estimate == estimate_np
def test_quantile_and_CI(args): X, q, alpha = args estimate0, LB0, UB0 = qt.quantile_and_CI(X, q, alpha) assert LB0 <= estimate0 assert estimate0 <= UB0 # Recompute without using _ internal funcs LB, UB = qt.quantile_CI(X, q, alpha=alpha) estimate = qt.quantile(X, q) assert estimate0 == estimate assert LB0 == LB assert UB0 == UB
def test_min_quantile_CI(args): X, q, m, alpha = args estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha) assert LB0 <= estimate0 assert estimate0 <= UB0 # Recompute without using _ internal funcs q = 1.0 - (1.0 - q)**(1.0 / m) LB, UB = qt.quantile_CI(X, q, alpha=alpha) estimate = qt.quantile(X, q) assert estimate0 == estimate assert LB0 == LB assert UB0 == UB
def test_quantile_CI(args): X, q, alpha = args idx_q = qt._quantile(len(X), q) idx_l, idx_u = qt._quantile_CI(len(X), q, alpha) assert idx_l <= idx_q assert idx_q <= idx_u # Lot's of checks already inside quantile_CI LB, UB = qt.quantile_CI(X, q, alpha) assert LB <= UB estimate = qt.quantile(X, q) assert LB <= estimate assert estimate <= UB
def compute_baseline(perf_da): """Compute a performance baseline of base and best performance from the aggregate experimental results. Parameters ---------- perf_da : :class:`xarray:xarray.DataArray` Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values. Returns ------- baseline_ds : :class:`xarray:xarray.Dataset` Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively. `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance. Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean. `PERF_BEST` is an estimate on the global minimum. """ validate_agg_perf(perf_da) ref_prefix = str_join_safe(ARG_DELIM, (cc.RANDOM_SEARCH, "")) ref_random = [kk for kk in perf_da.coords[METHOD].values if kk.startswith(ref_prefix)] assert len(ref_random) > 0, "Did not find any random search in methods." # Now many points we will have after each batch trials_grid = perf_da.sizes[SUGGEST] * (1 + np.arange(perf_da.sizes[ITER])) # Now iterate over problems and get baseline performance baseline_ds = ds_like_mixed( perf_da, [ (PERF_MED, [ITER, TEST_CASE]), (PERF_MEAN, [ITER, TEST_CASE]), (PERF_CLIP, [TEST_CASE]), (PERF_BEST, [TEST_CASE]), ], (ITER, TEST_CASE), ) for func_name in perf_da.coords[TEST_CASE].values: random_evals = np.ravel(perf_da.sel({METHOD: ref_random, TEST_CASE: func_name}, drop=True).values) assert random_evals.size > 0 # We will likely change this to a min mean (instead of median) using a different util in near future: assert np.all(trials_grid == perf_da.sizes[SUGGEST] * (1 + baseline_ds.coords[ITER].values)) rand_perf, _, _ = qt.min_quantile_CI(random_evals, EVAL_Q, trials_grid, alpha=ALPHA) baseline_ds[PERF_MED].loc[{TEST_CASE: func_name}] = rand_perf # Decide on a level to clip when computing the mean base_clip_val = qt.quantile(random_evals, EVAL_Q) assert np.isfinite(base_clip_val), "Median random search performance is not even finite." assert (perf_da.sizes[SUGGEST] > 1) or np.isclose(base_clip_val, rand_perf[0]) baseline_ds[PERF_CLIP].loc[{TEST_CASE: func_name}] = base_clip_val # Estimate the global min via best of any method best_opt = np.min(perf_da.sel({TEST_CASE: func_name}, drop=True).values) if np.any(rand_perf <= best_opt): warnings.warn( "Random search is also the best search on %s, the normalized score may be meaningless." % func_name, RuntimeWarning, ) assert np.isfinite(best_opt), "Best performance found is not even finite." logger.info("best %s %f" % (func_name, best_opt)) # Now make sure strictly less than to avoid assert error in linear_rescale. This will likely give normalized # scores of +inf or -inf, but with median summary that is ok. When everything goes to mean, we will need to # change this: pad = PAD_FACTOR * np.spacing(-np.maximum(MIN_POS, np.abs(best_opt))) assert pad < 0 best_opt = best_opt + pad assert np.isfinite(best_opt), "Best performance too close to limit of float range." assert np.all(rand_perf > best_opt) baseline_ds[PERF_BEST].loc[{TEST_CASE: func_name}] = best_opt random_evals = np.minimum(base_clip_val, random_evals) assert np.all(np.isfinite(random_evals)) assert np.all(best_opt <= random_evals) rand_perf = em.expected_min(random_evals, trials_grid) rand_perf_fixed = np.minimum(base_clip_val, rand_perf) assert np.allclose(rand_perf, rand_perf_fixed) rand_perf_fixed = np.minimum.accumulate(rand_perf_fixed) assert np.allclose(rand_perf, rand_perf_fixed) baseline_ds[PERF_MEAN].loc[{TEST_CASE: func_name}] = rand_perf_fixed assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds) validate(baseline_ds) return baseline_ds