def test_min_quantile_CI(args): X, q, m, alpha = args estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha) assert LB0 <= estimate0 assert estimate0 <= UB0 # Recompute without using _ internal funcs q = 1.0 - (1.0 - q)**(1.0 / m) LB, UB = qt.quantile_CI(X, q, alpha=alpha) estimate = qt.quantile(X, q) assert estimate0 == estimate assert LB0 == LB assert UB0 == UB
def mc_test_min_quantile_CI(mc_runs=1000, n=2000, q=0.5, m=100, alpha=0.05, random=np.random): qq_level = 1.0 - (1.0 - q)**(1.0 / m) q0 = ss.norm.ppf(qq_level) X = random.randn(mc_runs, n) R = np.array([qt.min_quantile_CI(xx, q, m, alpha) for xx in X]) LB, UB = R[:, 1], R[:, 2] n_pass = np.sum((LB <= q0) & (q0 <= UB)) # This is only a one-sided test pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha) return pval
def test_min_quantile_CI_to_max(args): X, q, m, alpha = args epsilon = 1e-8 # Small allowance for numerics estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha) # Try just above and below to allow for numerics error in case we are # just on the boundary. estimate1, LB1, UB1 = qt.max_quantile_CI(-X, (1.0 - q) - epsilon, m, alpha) estimate2, LB2, UB2 = qt.max_quantile_CI(-X, 1.0 - q, m, alpha) estimate3, LB3, UB3 = qt.max_quantile_CI(-X, (1.0 - q) + epsilon, m, alpha) if len(X) == 0: assert estimate0 == -np.inf # quantile spec rounds down if n=0 else: assert -estimate0 in (estimate1, estimate2, estimate3) assert -LB0 in (UB1, UB2, UB3) assert -UB0 in (LB1, LB2, LB3)
def compute_baseline(perf_da): """Compute a performance baseline of base and best performance from the aggregate experimental results. Parameters ---------- perf_da : :class:`xarray:xarray.DataArray` Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values. Returns ------- baseline_ds : :class:`xarray:xarray.Dataset` Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively. `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance. Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean. `PERF_BEST` is an estimate on the global minimum. """ validate_agg_perf(perf_da) ref_prefix = str_join_safe(ARG_DELIM, (cc.RANDOM_SEARCH, "")) ref_random = [kk for kk in perf_da.coords[METHOD].values if kk.startswith(ref_prefix)] assert len(ref_random) > 0, "Did not find any random search in methods." # Now many points we will have after each batch trials_grid = perf_da.sizes[SUGGEST] * (1 + np.arange(perf_da.sizes[ITER])) # Now iterate over problems and get baseline performance baseline_ds = ds_like_mixed( perf_da, [ (PERF_MED, [ITER, TEST_CASE]), (PERF_MEAN, [ITER, TEST_CASE]), (PERF_CLIP, [TEST_CASE]), (PERF_BEST, [TEST_CASE]), ], (ITER, TEST_CASE), ) for func_name in perf_da.coords[TEST_CASE].values: random_evals = np.ravel(perf_da.sel({METHOD: ref_random, TEST_CASE: func_name}, drop=True).values) assert random_evals.size > 0 # We will likely change this to a min mean (instead of median) using a different util in near future: assert np.all(trials_grid == perf_da.sizes[SUGGEST] * (1 + baseline_ds.coords[ITER].values)) rand_perf, _, _ = qt.min_quantile_CI(random_evals, EVAL_Q, trials_grid, alpha=ALPHA) baseline_ds[PERF_MED].loc[{TEST_CASE: func_name}] = rand_perf # Decide on a level to clip when computing the mean base_clip_val = qt.quantile(random_evals, EVAL_Q) assert np.isfinite(base_clip_val), "Median random search performance is not even finite." assert (perf_da.sizes[SUGGEST] > 1) or np.isclose(base_clip_val, rand_perf[0]) baseline_ds[PERF_CLIP].loc[{TEST_CASE: func_name}] = base_clip_val # Estimate the global min via best of any method best_opt = np.min(perf_da.sel({TEST_CASE: func_name}, drop=True).values) if np.any(rand_perf <= best_opt): warnings.warn( "Random search is also the best search on %s, the normalized score may be meaningless." % func_name, RuntimeWarning, ) assert np.isfinite(best_opt), "Best performance found is not even finite." logger.info("best %s %f" % (func_name, best_opt)) # Now make sure strictly less than to avoid assert error in linear_rescale. This will likely give normalized # scores of +inf or -inf, but with median summary that is ok. When everything goes to mean, we will need to # change this: pad = PAD_FACTOR * np.spacing(-np.maximum(MIN_POS, np.abs(best_opt))) assert pad < 0 best_opt = best_opt + pad assert np.isfinite(best_opt), "Best performance too close to limit of float range." assert np.all(rand_perf > best_opt) baseline_ds[PERF_BEST].loc[{TEST_CASE: func_name}] = best_opt random_evals = np.minimum(base_clip_val, random_evals) assert np.all(np.isfinite(random_evals)) assert np.all(best_opt <= random_evals) rand_perf = em.expected_min(random_evals, trials_grid) rand_perf_fixed = np.minimum(base_clip_val, rand_perf) assert np.allclose(rand_perf, rand_perf_fixed) rand_perf_fixed = np.minimum.accumulate(rand_perf_fixed) assert np.allclose(rand_perf, rand_perf_fixed) baseline_ds[PERF_MEAN].loc[{TEST_CASE: func_name}] = rand_perf_fixed assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds) validate(baseline_ds) return baseline_ds