Ejemplo n.º 1
0
def test_linear_rescale_inverse(args):
    X, lb0, ub0, lb1, ub1, enforce_bounds = args
    enforce_bounds = enforce_bounds >= 0

    # Use sorted because hypothesis doesn't like using assume too often
    lb0, ub0 = sorted([lb0, ub0])
    lb1, ub1 = sorted([lb1, ub1])

    assume(lb0 < ub0)
    assume(lb1 < ub1)
    # Can't expect numerics to work well in these extreme cases:
    assume((ub0 - lb0) < 1e3 * (ub1 - lb1))

    if enforce_bounds:
        X = np.clip(X, lb0, ub0)

    X_ = np_util.linear_rescale(X,
                                lb0,
                                ub0,
                                lb1,
                                ub1,
                                enforce_bounds=enforce_bounds)
    X_ = np_util.linear_rescale(X_,
                                lb1,
                                ub1,
                                lb0,
                                ub0,
                                enforce_bounds=enforce_bounds)

    assert close_enough(X_, X)
Ejemplo n.º 2
0
def test_linear_rescale_bounds(args):
    lb0, ub0, lb1, ub1 = args

    # Use sorted because hypothesis doesn't like using assume too often
    lb0, ub0 = sorted([lb0, ub0])
    lb1, ub1 = sorted([lb1, ub1])

    assume(lb0 < ub0)
    assume(lb1 <= ub1)

    lb1_ = np_util.linear_rescale(lb0, lb0, ub0, lb1, ub1)
    assert close_enough(lb1, lb1_)

    ub1_ = np_util.linear_rescale(ub0, lb0, ub0, lb1, ub1)
    assert close_enough(ub1, ub1_)
Ejemplo n.º 3
0
def test_real_range_unwarp_warp(warp, args):
    x_w, range_ = args

    if warp == "log":
        range_ = range_[range_ > 0]
    if warp == "logit":
        range_ = range_[(0 < range_) & (range_ < 1)]

    range_ = np.sort(range_)
    assume(len(range_) == 2 and range_[0] < range_[1])

    range_warped = sp.WARP_DICT[warp](range_)

    x_w = np.clip(x_w, range_warped[0], range_warped[1])

    S = sp.Real(warp=warp, range_=range_)

    # Test bounds
    lower, upper = S.get_bounds().T
    x_w = linear_rescale(x_w, lb0=-1000, ub0=1000, lb1=lower, ub1=upper)

    x = S.unwarp(x_w)
    assert x_w.shape == x.shape + (1,)
    assert x.dtype == range_.dtype
    assert x.dtype == S.dtype

    x2 = S.validate(x)
    assert close_enough(x, x2)

    x_w2 = S.warp(x)
    assert x_w2.shape == x_w.shape
    x_w3 = S.validate_warped(x_w2)
    assert close_enough(x_w2, x_w3)

    assert close_enough(x_w, x_w2)
Ejemplo n.º 4
0
def test_linear_rescale_bound_modes(args):
    X, lb0, ub0, lb1, ub1 = args

    # Use sorted because hypothesis doesn't like using assume too often
    lb0, ub0 = sorted([lb0, ub0])
    lb1, ub1 = sorted([lb1, ub1])

    assume(lb0 < ub0)
    assume(lb1 <= ub1)

    X = np.clip(X, lb0, ub0)

    Y1 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=False)
    Y2 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True)

    assert close_enough(Y1, Y2)
def test_random_search_suggest_diff(api_args, n_suggest, seed):
    # Hard to know how many iters needed for arbitrary space that we need to
    # run so that we don't get dupes by chance. So, for now, let's just stick
    # with this simple space.
    dim = {"space": "linear", "type": "real", "range": [1.0, 5.0]}

    # Use at least 10 n_suggest to make sure don't get same answer by chance
    X_w, y = api_args

    D = X_w.shape[1]
    param_names = ["x%d" % ii for ii in range(5)]
    meta = dict(zip(param_names, [dim] * D))

    # Get the unwarped X
    S = sp.JointSpace(meta)
    lower, upper = S.get_bounds().T
    X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)
    X = S.unwarp(X_w)
    S.validate(X)

    seed = seed // 2  # Keep in bounds even after add 7

    x_guess = suggest_dict(X,
                           y,
                           meta,
                           n_suggest,
                           random=np.random.RandomState(seed))
    # Use diff seed to intentionally get diff result
    x_guess2 = suggest_dict(X,
                            y,
                            meta,
                            n_suggest,
                            random=np.random.RandomState(seed + 7))

    # Check types too
    assert len(x_guess) == n_suggest
    assert len(x_guess2) == n_suggest
    assert not np.all(x_guess == x_guess2)
    # Make sure validated
    S.validate(x_guess)
    S.validate(x_guess2)

    # Test sanity of output
    D, = lower.shape

    x_guess_w = S.warp(x_guess)
    assert type(x_guess_w) == np.ndarray
    assert x_guess_w.dtype.kind == "f"
    assert x_guess_w.shape == (n_suggest, D)
    assert x_guess_w.shape == (n_suggest, D)
    assert np.all(x_guess_w <= upper)

    x_guess_w = S.warp(x_guess2)
    assert type(x_guess_w) == np.ndarray
    assert x_guess_w.dtype.kind == "f"
    assert x_guess_w.shape == (n_suggest, D)
    assert x_guess_w.shape == (n_suggest, D)
    assert np.all(x_guess_w <= upper)
Ejemplo n.º 6
0
def test_linear_rescale_inner(args):
    X, lb0, ub0, lb1, ub1 = args

    # Use sorted because hypothesis doesn't like using assume too often
    lb0, ub0 = sorted([lb0, ub0])
    lb1, ub1 = sorted([lb1, ub1])

    assume(lb0 < ub0)
    assume(lb1 <= ub1)

    X = np.clip(X, lb0, ub0)

    X = np_util.linear_rescale(X, lb0, ub0, lb1, ub1)

    assert np.all(X <= ub1)
    assert np.all(lb1 <= X)
Ejemplo n.º 7
0
    def postwarp(self, xxw):
        """Extra work needed to undo the Gaussian space representation."""
        xx = {}
        for arg_name, vv in xxw.items():
            assert np.isscalar(vv)
            space = self.space[arg_name]

            if space is not None:
                # Now make std Gaussian apriori
                vv = norm.cdf(vv)

                # Now make uniform on [0, 1]
                (lb, ub), = space.get_bounds()
                vv = linear_rescale(vv, 0, 1, lb, ub)

                # Warp so we think it is apriori uniform in [a, b]
                vv = space.unwarp([vv])
            assert np.isscalar(vv)
            xx[arg_name] = vv
        return xx
Ejemplo n.º 8
0
        def log_mean_score_json(evals, iters):
            assert evals.shape == (len(OBJECTIVE_NAMES), )
            assert not np.any(np.isnan(evals))

            log_msg = {
                cc.TEST_CASE: test_case_str,
                cc.METHOD: optimizer_str,
                cc.TRIAL: args[CmdArgs.uuid],
                cc.ITER: iters,
            }

            for idx, obj in enumerate(OBJECTIVE_NAMES):
                assert OBJECTIVE_NAMES[idx] == obj

                # Extract relevant rescaling info
                slice_ = {cc.TEST_CASE: test_case_str, OBJECTIVE: obj}
                best_opt = baseline_ds[cc.PERF_BEST].sel(
                    slice_, drop=True).values.item()
                base_clip_val = baseline_ds[cc.PERF_CLIP].sel(
                    slice_, drop=True).values.item()

                # Perform the same rescaling as found in experiment_analysis.compute_aggregates()
                score = linear_rescale(evals[idx],
                                       best_opt,
                                       base_clip_val,
                                       0.0,
                                       1.0,
                                       enforce_bounds=False)
                # Also, clip the score from below at -1 to limit max influence of single run on final average
                score = np.clip(score, -1.0, 1.0)
                score = score.item()  # Make easiest for logging in JSON
                assert isinstance(score, float)

                # Note: This is not the raw score but the rescaled one!
                log_msg[obj] = score
            log_msg = json.dumps(log_msg)
            print(log_msg, flush=True)
            # One second safety delay to protect against subprocess stdout getting lost
            sleep(1)
Ejemplo n.º 9
0
    def prewarp(self, xx):
        """Extra work needed to get variables into the Gaussian space
        representation."""
        xxw = {}
        for arg_name, vv in xx.items():
            assert np.isscalar(vv)
            space = self.space[arg_name]

            if space is not None:
                # Warp so we think it is apriori uniform in [a, b]
                vv = space.warp(vv)
                assert vv.size == 1

                # Now make uniform on [0, 1], also unpack warped to scalar
                (lb, ub), = space.get_bounds()
                vv = linear_rescale(vv.item(), lb, ub, 0, 1)

                # Now make std Gaussian apriori
                vv = norm.ppf(vv)
            assert np.isscalar(vv)
            xxw[arg_name] = vv
        return xxw
Ejemplo n.º 10
0
def compute_aggregates(perf_da, baseline_ds):
    """Aggregate function evaluations in the experiments to get performance summaries of each method.

    Parameters
    ----------
    perf_da : :class:`xarray:xarray.DataArray`
        Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions
        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values.
    baseline_ds : :class:`xarray:xarray.Dataset`
        Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with
        dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively.
        `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance.
        Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean.
        `PERF_BEST` is an estimate on the global minimum.

    Returns
    -------
    agg_result : :class:`xarray:xarray.Dataset`
        Dataset with summary of performance for each method and test case combination. Contains variables:
        ``(PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN)``
        each with dimensions ``(ITER, METHOD, TEST_CASE)``. `PERF_MED` is a median summary of performance with `LB_MED`
        and `UB_MED` as error bars. `NORMED_MED` is a rescaled `PERF_MED` so we expect the optimal performance is 0,
        and random search gives 1 at all `ITER`. Likewise, `PERF_MEAN`, `LB_MEAN`, `UB_MEAN`, `NORMED_MEAN` are for
        mean performance.
    summary : :class:`xarray:xarray.Dataset`
        Dataset with overall summary of performance of each method. Contains variables
        ``(PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN)``
        each with dimensions ``(ITER, METHOD)``.
    """
    validate_agg_perf(perf_da, min_trial=1)

    assert isinstance(baseline_ds, xr.Dataset)
    assert tuple(baseline_ds[PERF_BEST].dims) == (TEST_CASE,)
    assert tuple(baseline_ds[PERF_CLIP].dims) == (TEST_CASE,)
    assert tuple(baseline_ds[PERF_MED].dims) == (ITER, TEST_CASE)
    assert tuple(baseline_ds[PERF_MEAN].dims) == (ITER, TEST_CASE)
    assert xru.coord_compat((perf_da, baseline_ds), (ITER, TEST_CASE))
    assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds)

    # Now actually get the aggregate performance numbers per test case
    agg_result = xru.ds_like(
        perf_da,
        (PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN),
        (ITER, METHOD, TEST_CASE),
    )
    baseline_mean_da = xru.only_dataarray(xru.ds_like(perf_da, ["ref"], (ITER, TEST_CASE)))
    # Using values here since just clearer to get raw items than xr object for func_name
    for func_name in perf_da.coords[TEST_CASE].values:
        rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values
        rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values
        best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values
        base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values

        assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(rand_perf_med > best_opt)
        assert np.all(rand_perf_mean > best_opt)
        assert np.all(rand_perf_mean <= base_clip_val)

        baseline_mean_da.loc[{TEST_CASE: func_name}] = linear_rescale(
            rand_perf_mean, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False
        )
        for method_name in perf_da.coords[METHOD].values:
            # Take the minimum over all suggestion at given iter + sanity check perf_da
            curr_da = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True).min(dim=SUGGEST)
            assert curr_da.dims == (ITER, TRIAL)

            # Want to evaluate minimum so far during optimization
            perf_array = np.minimum.accumulate(curr_da.values, axis=0)

            # Compute median perf and CI on it
            med_perf, LB, UB = qt.quantile_and_CI(perf_array, EVAL_Q, alpha=ALPHA)
            assert med_perf.shape == rand_perf_med.shape
            agg_result[PERF_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = med_perf
            agg_result[LB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = LB
            agg_result[UB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = UB

            # Now store normed version, which is better for aggregation
            normed = linear_rescale(med_perf, best_opt, rand_perf_med, 0.0, 1.0, enforce_bounds=False)
            agg_result[NORMED_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed

            # Compute mean perf and CI on it
            perf_array = np.minimum(base_clip_val, perf_array)
            mean_perf = np.mean(perf_array, axis=1)
            assert mean_perf.shape == rand_perf_mean.shape
            EB = t_EB(perf_array, alpha=ALPHA, axis=1)
            assert EB.shape == rand_perf_mean.shape
            agg_result[PERF_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf
            agg_result[LB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf - EB
            agg_result[UB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf + EB

            # Now store normed version, which is better for aggregation
            normed = linear_rescale(mean_perf, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False)
            agg_result[NORMED_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed
    assert not any(np.any(np.isnan(agg_result[kk].values)) for kk in agg_result)

    # Compute summary score over all test cases, summarize performance of each method
    summary = xru.ds_like(
        perf_da,
        (PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN, LB_NORMED_MEAN, UB_NORMED_MEAN),
        (ITER, METHOD),
    )
    summary[PERF_MED], summary[LB_MED], summary[UB_MED] = xr.apply_ufunc(
        qt.quantile_and_CI,
        agg_result[NORMED_MED],
        input_core_dims=[[TEST_CASE]],
        kwargs={"q": EVAL_Q, "alpha": ALPHA},
        output_core_dims=[[], [], []],
    )

    summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE)
    EB = xr.apply_ufunc(t_EB, agg_result[NORMED_MEAN], input_core_dims=[[TEST_CASE]])
    summary[LB_MEAN] = summary[PERF_MEAN] - EB
    summary[UB_MEAN] = summary[PERF_MEAN] + EB

    normalizer = baseline_mean_da.mean(dim=TEST_CASE)
    summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer
    summary[LB_NORMED_MEAN] = summary[LB_MEAN] / normalizer
    summary[UB_NORMED_MEAN] = summary[UB_MEAN] / normalizer

    assert all(tuple(summary[kk].dims) == (ITER, METHOD) for kk in summary)
    return agg_result, summary