Example #1
0
def test_min_quantile_CI(args):
    X, q, m, alpha = args

    estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha)
    assert LB0 <= estimate0
    assert estimate0 <= UB0

    # Recompute without using _ internal funcs
    q = 1.0 - (1.0 - q)**(1.0 / m)
    LB, UB = qt.quantile_CI(X, q, alpha=alpha)
    estimate = qt.quantile(X, q)

    assert estimate0 == estimate
    assert LB0 == LB
    assert UB0 == UB
Example #2
0
def mc_test_min_quantile_CI(mc_runs=1000,
                            n=2000,
                            q=0.5,
                            m=100,
                            alpha=0.05,
                            random=np.random):
    qq_level = 1.0 - (1.0 - q)**(1.0 / m)
    q0 = ss.norm.ppf(qq_level)

    X = random.randn(mc_runs, n)
    R = np.array([qt.min_quantile_CI(xx, q, m, alpha) for xx in X])
    LB, UB = R[:, 1], R[:, 2]

    n_pass = np.sum((LB <= q0) & (q0 <= UB))
    # This is only a one-sided test
    pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha)
    return pval
Example #3
0
def test_min_quantile_CI_to_max(args):
    X, q, m, alpha = args

    epsilon = 1e-8  # Small allowance for numerics

    estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha)

    # Try just above and below to allow for numerics error in case we are
    # just on the boundary.
    estimate1, LB1, UB1 = qt.max_quantile_CI(-X, (1.0 - q) - epsilon, m, alpha)
    estimate2, LB2, UB2 = qt.max_quantile_CI(-X, 1.0 - q, m, alpha)
    estimate3, LB3, UB3 = qt.max_quantile_CI(-X, (1.0 - q) + epsilon, m, alpha)

    if len(X) == 0:
        assert estimate0 == -np.inf  # quantile spec rounds down if n=0
    else:
        assert -estimate0 in (estimate1, estimate2, estimate3)

    assert -LB0 in (UB1, UB2, UB3)
    assert -UB0 in (LB1, LB2, LB3)
def compute_baseline(perf_da):
    """Compute a performance baseline of base and best performance from the aggregate experimental results.

    Parameters
    ----------
    perf_da : :class:`xarray:xarray.DataArray`
        Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions
        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values.

    Returns
    -------
    baseline_ds : :class:`xarray:xarray.Dataset`
        Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with
        dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively.
        `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance.
        Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean.
        `PERF_BEST` is an estimate on the global minimum.
    """
    validate_agg_perf(perf_da)

    ref_prefix = str_join_safe(ARG_DELIM, (cc.RANDOM_SEARCH, ""))
    ref_random = [kk for kk in perf_da.coords[METHOD].values if kk.startswith(ref_prefix)]
    assert len(ref_random) > 0, "Did not find any random search in methods."

    # Now many points we will have after each batch
    trials_grid = perf_da.sizes[SUGGEST] * (1 + np.arange(perf_da.sizes[ITER]))

    # Now iterate over problems and get baseline performance
    baseline_ds = ds_like_mixed(
        perf_da,
        [
            (PERF_MED, [ITER, TEST_CASE]),
            (PERF_MEAN, [ITER, TEST_CASE]),
            (PERF_CLIP, [TEST_CASE]),
            (PERF_BEST, [TEST_CASE]),
        ],
        (ITER, TEST_CASE),
    )
    for func_name in perf_da.coords[TEST_CASE].values:
        random_evals = np.ravel(perf_da.sel({METHOD: ref_random, TEST_CASE: func_name}, drop=True).values)
        assert random_evals.size > 0

        # We will likely change this to a min mean (instead of median) using a different util in near future:
        assert np.all(trials_grid == perf_da.sizes[SUGGEST] * (1 + baseline_ds.coords[ITER].values))
        rand_perf, _, _ = qt.min_quantile_CI(random_evals, EVAL_Q, trials_grid, alpha=ALPHA)
        baseline_ds[PERF_MED].loc[{TEST_CASE: func_name}] = rand_perf

        # Decide on a level to clip when computing the mean
        base_clip_val = qt.quantile(random_evals, EVAL_Q)
        assert np.isfinite(base_clip_val), "Median random search performance is not even finite."
        assert (perf_da.sizes[SUGGEST] > 1) or np.isclose(base_clip_val, rand_perf[0])
        baseline_ds[PERF_CLIP].loc[{TEST_CASE: func_name}] = base_clip_val

        # Estimate the global min via best of any method
        best_opt = np.min(perf_da.sel({TEST_CASE: func_name}, drop=True).values)
        if np.any(rand_perf <= best_opt):
            warnings.warn(
                "Random search is also the best search on %s, the normalized score may be meaningless." % func_name,
                RuntimeWarning,
            )
        assert np.isfinite(best_opt), "Best performance found is not even finite."
        logger.info("best %s %f" % (func_name, best_opt))

        # Now make sure strictly less than to avoid assert error in linear_rescale. This will likely give normalized
        # scores of +inf or -inf, but with median summary that is ok. When everything goes to mean, we will need to
        # change this:
        pad = PAD_FACTOR * np.spacing(-np.maximum(MIN_POS, np.abs(best_opt)))
        assert pad < 0
        best_opt = best_opt + pad
        assert np.isfinite(best_opt), "Best performance too close to limit of float range."
        assert np.all(rand_perf > best_opt)
        baseline_ds[PERF_BEST].loc[{TEST_CASE: func_name}] = best_opt

        random_evals = np.minimum(base_clip_val, random_evals)
        assert np.all(np.isfinite(random_evals))
        assert np.all(best_opt <= random_evals)

        rand_perf = em.expected_min(random_evals, trials_grid)
        rand_perf_fixed = np.minimum(base_clip_val, rand_perf)
        assert np.allclose(rand_perf, rand_perf_fixed)
        rand_perf_fixed = np.minimum.accumulate(rand_perf_fixed)
        assert np.allclose(rand_perf, rand_perf_fixed)
        baseline_ds[PERF_MEAN].loc[{TEST_CASE: func_name}] = rand_perf_fixed
    assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds)
    validate(baseline_ds)
    return baseline_ds