Esempio n. 1
0
def test_coord_compat_false(ds):
    all_dims = [ds[kk].dims for kk in ds]
    common_dims = sorted(intersect_seq(all_dims))
    da_seq = [ds[kk] for kk in ds]

    assume(len(da_seq) > 0)
    assume(len(da_seq[0].dims) > 0)

    da = da_seq[0]
    kk = da.dims[0]
    da_seq[0] = da.assign_coords(**{kk: range(da.sizes[kk])})

    xru.coord_compat(da_seq, common_dims)
Esempio n. 2
0
def test_coord_compat(ds):
    all_dims = [ds[kk].dims for kk in ds]
    common_dims = sorted(intersect_seq(all_dims))
    da_seq = [ds[kk] for kk in ds]

    compat = xru.coord_compat(da_seq, common_dims)
    assert compat
Esempio n. 3
0
def compute_aggregates(perf_da, baseline_ds):
    """Aggregate function evaluations in the experiments to get performance summaries of each method.

    Parameters
    ----------
    perf_da : :class:`xarray:xarray.DataArray`
        Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions
        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values.
    baseline_ds : :class:`xarray:xarray.Dataset`
        Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with
        dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively.
        `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance.
        Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean.
        `PERF_BEST` is an estimate on the global minimum.

    Returns
    -------
    agg_result : :class:`xarray:xarray.Dataset`
        Dataset with summary of performance for each method and test case combination. Contains variables:
        ``(PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN)``
        each with dimensions ``(ITER, METHOD, TEST_CASE)``. `PERF_MED` is a median summary of performance with `LB_MED`
        and `UB_MED` as error bars. `NORMED_MED` is a rescaled `PERF_MED` so we expect the optimal performance is 0,
        and random search gives 1 at all `ITER`. Likewise, `PERF_MEAN`, `LB_MEAN`, `UB_MEAN`, `NORMED_MEAN` are for
        mean performance.
    summary : :class:`xarray:xarray.Dataset`
        Dataset with overall summary of performance of each method. Contains variables
        ``(PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN)``
        each with dimensions ``(ITER, METHOD)``.
    """
    validate_agg_perf(perf_da, min_trial=1)

    assert isinstance(baseline_ds, xr.Dataset)
    assert tuple(baseline_ds[PERF_BEST].dims) == (TEST_CASE,)
    assert tuple(baseline_ds[PERF_CLIP].dims) == (TEST_CASE,)
    assert tuple(baseline_ds[PERF_MED].dims) == (ITER, TEST_CASE)
    assert tuple(baseline_ds[PERF_MEAN].dims) == (ITER, TEST_CASE)
    assert xru.coord_compat((perf_da, baseline_ds), (ITER, TEST_CASE))
    assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds)

    # Now actually get the aggregate performance numbers per test case
    agg_result = xru.ds_like(
        perf_da,
        (PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN),
        (ITER, METHOD, TEST_CASE),
    )
    baseline_mean_da = xru.only_dataarray(xru.ds_like(perf_da, ["ref"], (ITER, TEST_CASE)))
    # Using values here since just clearer to get raw items than xr object for func_name
    for func_name in perf_da.coords[TEST_CASE].values:
        rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values
        rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values
        best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values
        base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values

        assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(rand_perf_med > best_opt)
        assert np.all(rand_perf_mean > best_opt)
        assert np.all(rand_perf_mean <= base_clip_val)

        baseline_mean_da.loc[{TEST_CASE: func_name}] = linear_rescale(
            rand_perf_mean, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False
        )
        for method_name in perf_da.coords[METHOD].values:
            # Take the minimum over all suggestion at given iter + sanity check perf_da
            curr_da = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True).min(dim=SUGGEST)
            assert curr_da.dims == (ITER, TRIAL)

            # Want to evaluate minimum so far during optimization
            perf_array = np.minimum.accumulate(curr_da.values, axis=0)

            # Compute median perf and CI on it
            med_perf, LB, UB = qt.quantile_and_CI(perf_array, EVAL_Q, alpha=ALPHA)
            assert med_perf.shape == rand_perf_med.shape
            agg_result[PERF_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = med_perf
            agg_result[LB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = LB
            agg_result[UB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = UB

            # Now store normed version, which is better for aggregation
            normed = linear_rescale(med_perf, best_opt, rand_perf_med, 0.0, 1.0, enforce_bounds=False)
            agg_result[NORMED_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed

            # Compute mean perf and CI on it
            perf_array = np.minimum(base_clip_val, perf_array)
            mean_perf = np.mean(perf_array, axis=1)
            assert mean_perf.shape == rand_perf_mean.shape
            EB = t_EB(perf_array, alpha=ALPHA, axis=1)
            assert EB.shape == rand_perf_mean.shape
            agg_result[PERF_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf
            agg_result[LB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf - EB
            agg_result[UB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf + EB

            # Now store normed version, which is better for aggregation
            normed = linear_rescale(mean_perf, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False)
            agg_result[NORMED_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed
    assert not any(np.any(np.isnan(agg_result[kk].values)) for kk in agg_result)

    # Compute summary score over all test cases, summarize performance of each method
    summary = xru.ds_like(
        perf_da,
        (PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN, LB_NORMED_MEAN, UB_NORMED_MEAN),
        (ITER, METHOD),
    )
    summary[PERF_MED], summary[LB_MED], summary[UB_MED] = xr.apply_ufunc(
        qt.quantile_and_CI,
        agg_result[NORMED_MED],
        input_core_dims=[[TEST_CASE]],
        kwargs={"q": EVAL_Q, "alpha": ALPHA},
        output_core_dims=[[], [], []],
    )

    summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE)
    EB = xr.apply_ufunc(t_EB, agg_result[NORMED_MEAN], input_core_dims=[[TEST_CASE]])
    summary[LB_MEAN] = summary[PERF_MEAN] - EB
    summary[UB_MEAN] = summary[PERF_MEAN] + EB

    normalizer = baseline_mean_da.mean(dim=TEST_CASE)
    summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer
    summary[LB_NORMED_MEAN] = summary[LB_MEAN] / normalizer
    summary[UB_NORMED_MEAN] = summary[UB_MEAN] / normalizer

    assert all(tuple(summary[kk].dims) == (ITER, METHOD) for kk in summary)
    return agg_result, summary
def concat_experiments(all_experiments, ravel=False):
    """Aggregate the Datasets from a series of experiments into combined Dataset.

    Parameters
    ----------
    all_experiments : typing.Iterable
        Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is
        a pair containing ``(meta_data, data)``. See `load_experiments` for details on these variables,
    ravel : bool
        If true, ravel all studies to store batch suggestions as if they were serial.

    Returns
    -------
    all_perf : :class:`xarray:xarray.Dataset`
        DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included
        as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the
        `uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`,
        `METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel`
        is true, then the `SUGGEST` is singleton.
    all_time : :class:`xarray:xarray.Dataset`
        Dataset containing all of the `time_ds` from the experiments. The new dimensions are
        ``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`.
    all_suggest : :class:`xarray:xarray.Dataset`
        DataArray containing all of the `suggest_ds` from the experiments. It has dimensions
        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``.
    all_sigs : dict(str, list(list(float)))
        Aggregate of all experiment signatures.
    """
    all_perf = {}
    all_time = {}
    all_suggest = {}
    all_sigs = {}
    trial_counter = Counter()
    for (test_case, optimizer, uuid), (perf_ds, time_ds, suggest_ds, sig) in all_experiments:
        if ravel:
            raise NotImplementedError("ravel is deprecated. Just reshape in analysis steps instead.")

        case_key = (test_case, optimizer, trial_counter[(test_case, optimizer)])
        trial_counter[(test_case, optimizer)] += 1

        # Process perf data
        assert all(perf_ds[kk].dims == (ITER, SUGGEST) for kk in perf_ds)
        all_perf[case_key] = perf_ds

        # Process time data
        all_time[case_key] = summarize_time(time_ds)

        # Process suggestion data
        all_suggest_curr = all_suggest.setdefault(test_case, {})
        all_suggest_curr[case_key] = suggest_ds

        # Handle the signatures
        all_sigs.setdefault(test_case, []).append(sig)
    assert min(trial_counter.values()) == max(trial_counter.values()), "Uneven number of trials per test case"

    # Now need to concat dict of datasets into single dataset
    all_perf = xru.ds_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL))
    assert all(all_perf[kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) for kk in all_perf)
    assert not any(
        np.any(np.isnan(all_perf[kk].values)) for kk in all_perf
    ), "Missing combinations of method and test case"

    all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL))
    assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL) for kk in all_time)
    assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time)
    assert xru.coord_compat((all_perf, all_time), (ITER, TEST_CASE, METHOD, TRIAL))

    for test_case in all_suggest:
        all_suggest[test_case] = xru.ds_concat(all_suggest[test_case], dims=(TEST_CASE, METHOD, TRIAL))
        assert all(
            all_suggest[test_case][kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
            for kk in all_suggest[test_case]
        )
        assert not any(np.any(np.isnan(all_suggest[test_case][kk].values)) for kk in all_suggest[test_case])
        assert xru.coord_compat((all_perf, all_suggest[test_case]), (ITER, METHOD, TRIAL))
        assert all_suggest[test_case].coords[TEST_CASE].shape == (1,), "test case should be singleton"

    return all_perf, all_time, all_suggest, all_sigs
Esempio n. 5
0
def concat_experiments(all_experiments, ravel=False):
    """Aggregate the Datasets from a series of experiments into combined Dataset.

    Parameters
    ----------
    all_experiments : typing.Iterable
        Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is
        a pair containing ``(meta_data, data)``. The `meta_data` contains a `tuple` of `str` with
        ``test_case, optimizer, uuid``. The `data` contains a tuple of ``(perf_da, time_ds, sig)``. The `perf_da` is an
        :class:`xarray:xarray.DataArray` containing the evaluation results with dimensions ``(ITER, SUGGEST)``. The
        `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of the form accepted by
        `summarize_time`. The coordinates must be compatible with `perf_da`. Finally, `sig` contains the `test_case`
        signature and must be `list(float)`.
    ravel : bool
        If true, ravel all studies to store batch suggestions as if they were serial.

    Returns
    -------
    all_perf : :class:`xarray:xarray.DataArray`
        DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included
        as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the
        `uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`,
        `METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel`
        is true, then the `SUGGEST` is singleton.
    all_time : :class:`xarray:xarray.Dataset`
        Dataset containing all of the `time_ds` from the experiments. The new dimensions are
        ``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`.
    all_sigs : dict(str, list(list(float)))
        Aggregate of all experiment signatures.
    """
    all_perf = {}
    all_time = {}
    all_sigs = {}
    trial_counter = Counter()
    for (test_case, optimizer, uuid), (perf_da, time_ds,
                                       sig) in all_experiments:
        if ravel:
            n_suggest = perf_da.sizes[SUGGEST]
            perf_da = _ravel_perf(perf_da)
            time_ds = _ravel_time(time_ds)
            optimizer = str_join_safe(ARG_DELIM,
                                      (optimizer, "p%d" % n_suggest),
                                      append=True)

        case_key = (test_case, optimizer, trial_counter[(test_case,
                                                         optimizer)])
        trial_counter[(test_case, optimizer)] += 1

        # Process perf data
        assert perf_da.dims == (ITER, SUGGEST)
        all_perf[case_key] = perf_da

        # Process time data
        all_time[case_key] = summarize_time(time_ds)

        # Handle the signatures
        all_sigs.setdefault(test_case, []).append(sig)
    assert min(trial_counter.values()) == max(
        trial_counter.values()), "Uneven number of trials per test case"

    # Now need to concat dict of datasets into single dataset
    all_perf = xru.da_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL))
    assert all_perf.dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
    assert not np.any(np.isnan(
        all_perf.values)), "Missing combinations of method and test case"

    all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL))
    assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL)
               for kk in all_time)
    assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time)
    assert xru.coord_compat((all_perf, all_time),
                            (ITER, TEST_CASE, METHOD, TRIAL))

    return all_perf, all_time, all_sigs