def _convert_to_hparams_domain(domain: Domain) -> Dict[str, hp.HParam]: hparams = {} for var_name, dim in domain.flatten().items(): dim_type = Domain.get_type(dim) joined_name = utils.join_strings(var_name, join_char="/") if dim_type == Domain.Continuous: hp_dim_type = hp.RealInterval vals = list(map(float, dim)) elif dim_type in [Domain.Discrete, Domain.Categorical]: hp_dim_type = hp.Discrete vals = (dim, ) else: raise TypeError(f"Cannot map subdomain of type {dim_type} " f"to a known HParams domain.") hparams[joined_name] = hp.HParam(joined_name, hp_dim_type(*vals)) return hparams
def test_bo_gp_mcmc_model(): domain = Domain({"x": [-1., 6.]}) bayes_opt = bo.BayesianOptimization(domain=domain, seed=7) test_utils.evaluate_continuous_1d(bayes_opt, batch_size=1, n_steps=7, model="GP_MCMC", evaluator_type="sequential")
def generated_history(): domain = Domain({ "x": [-5., 6.], "y": {"sin", "sqr"}, "z": set(range(4)) }, seed=7) n_samples = 10 history = [ HistoryPoint(sample=domain.sample(), metrics={ "metric_1": EvaluationScore(float(i)), "metric_2": EvaluationScore(i * 2.) }) for i in range(n_samples) ] if len(history) == 1: history = history[0] return history, domain
def test_from_list(): lst = [(("a", "b"), {2, 3, 4}), (("c", ), {0, 0.1}), (("d", "e", "f"), {0, 1}), (("d", "g"), {2, 3})] domain_true = Domain({ "a": { "b": {2, 3, 4} }, "c": {0, 0.1}, "d": { "e": { "f": {0, 1} }, "g": {2, 3} } }) domain_from_list = Domain.from_list(lst) assert domain_true == domain_from_list assert lst == list(domain_true.flatten().items())
def _convert_to_gpyopt_domain( orig_domain: Domain ) -> Tuple[GPyOptDomain, GPyOptCategoricalValueMapper, GPyOptDiscreteTypeMapper]: """Convert a :class:`Domain` type object to :obj:`GPyOptDomain`. Args: orig_domain: :class:`Domain` to convert. Returns: A tuple of the converted :obj:`GPyOptDomain` object and a value mapper to assign each categorical value to an integer (0, 1, 2, 3 ...). This is done to abstract away the type of the categorical domain from the `GPyOpt` internals and thus arbitrary types are supported. Notes: The categorical options must be hashable. This behaviour may change in the future. """ gpyopt_domain = [] value_mapper = {} type_mapper = {} flat_domain = orig_domain.flatten() for names, vals in flat_domain.items(): dim_name = utils.join_strings(names) domain_type = Domain.get_type(vals) if domain_type == Domain.Continuous: dim_type = BayesianOptimisation.CONTINUOUS_TYPE elif domain_type == Domain.Discrete: dim_type = BayesianOptimisation.DISCRETE_TYPE type_mapper[dim_name] = {v: type(v) for v in vals} elif domain_type == Domain.Categorical: dim_type = BayesianOptimisation.CATEGORICAL_TYPE value_mapper[dim_name] = {v: i for i, v in enumerate(vals)} vals = tuple(range(len(vals))) else: raise ValueError( f"Badly specified subdomain {names} with values {vals}.") gpyopt_domain.append({ "name": dim_name, "type": dim_type, "domain": tuple(vals) }) assert len(gpyopt_domain) == len( orig_domain), "Mismatching dimensionality after domain conversion." return gpyopt_domain, value_mapper, type_mapper
def test_local_from_script_and_cmdline_named_args(): domain = Domain( { "--x": {0, 1, 2, 3}, "--y": [-1., 1.], "--z": {"acb123", "abc"} }, seed=7) jobs = [ Job(task="hypertunity/scheduling/tests/script.py", args=domain.sample().as_dict(), meta={"binary": "python"}) for _ in range(10) ] results = run_jobs(jobs) assert all([ r.data == script.main(**{k.lstrip("-"): v for k, v in j.args.items()}) for r, j in zip(results, jobs) ])
def __init__(self, domain, seed=None): """Initialise the optimiser's domain. Args: domain: :class:`Domain`. The domain of the objective function. seed: (optional) :obj:`int`. The seed of the optimiser. Used for reproducibility purposes. """ np.random.seed(seed) domain = Domain(domain.as_dict(), seed=seed) super(BayesianOptimisation, self).__init__(domain) converted_and_mappers = self._convert_to_gpyopt_domain(self.domain) self.gpyopt_domain, self._categorical_value_mapper, self._discrete_type_mapper = converted_and_mappers self._inv_categorical_value_mapper = { name: {v: k for k, v in mapping.items()} for name, mapping in self._categorical_value_mapper.items() } self._data_x = np.array([[]]) self._data_fx = np.array([[]]) self.__is_empty_data = True
def test_grid_simple_discrete(): domain = Domain({ "x": {1, 2, 3, 4}, "y": {-3, 2, 5}, "z": {"small", "large"} }) gs = exhaustive.GridSearch(domain=domain) test_utils.evaluate_discrete_3d(gs, batch_size=4, n_steps=3 * 2) with pytest.raises(exhaustive.ExhaustedSearchSpaceError): gs.run_step(batch_size=4) gs.reset() assert len(gs.run_step(batch_size=4)) == 4
def __init__(self, domain: Domain, sample_continuous: bool = False, seed: int = None): """Initialise the :class:`GridSearch` optimiser from a discrete domain. If the domain contains continuous subspaces, then they could be sampled if `sample_continuous` is enabled. Args: domain: :class:`Domain`. The domain to iterate over. sample_continuous: (optional) :obj:`bool`. Whether to sample the continuous subspaces of the domain. seed: (optional) :obj:`int`. Seed for the sampling of the continuous subspace if necessary. """ if domain.is_continuous and not sample_continuous: raise DomainNotIterableError( "Cannot perform grid search on (partially) continuous domain. " "To enable grid search in this case, set the argument " "'sample_continuous' to True.") super(GridSearch, self).__init__(domain) (discrete_domain, categorical_domain, continuous_domain) = domain.split_by_type() # unify the discrete and the categorical into one, # as they can be iterated: self.discrete_domain = discrete_domain + categorical_domain if seed is not None: self.continuous_domain = Domain(continuous_domain.as_dict(), seed=seed) else: self.continuous_domain = continuous_domain self._discrete_domain_iter = iter(self.discrete_domain) self._is_exhausted = len(self.discrete_domain) == 0 self.__exhausted_err = ExhaustedSearchSpaceError( "The domain has been exhausted. Reset the optimiser to start again." )
def test_bo_update_and_reset(): domain = Domain({"a": {"b": [2, 3], "d": {"f": [3, 4]}}, "c": [0, 0.1]}) bayes_opt = bo.BayesianOptimisation(domain, seed=7) samples = [] n_reps = 3 for i in range(n_reps): samples.extend(bayes_opt.run_step(batch_size=1, minimise=False)) bayes_opt.update(samples[-1], base.EvaluationScore(2. * i)) assert len(bayes_opt._data_x) == n_reps assert len(bayes_opt._data_fx) == n_reps assert np.all(bayes_opt._data_x == np.array( [bayes_opt._convert_to_gpyopt_sample(s) for s in samples])) assert np.all(bayes_opt._data_fx == 2. * np.arange(n_reps).reshape(n_reps, 1)) bayes_opt.reset() assert len(bayes_opt.history) == 0
def test_valid(): with pytest.raises(TypeError): Domain({{"b": lambda x: x}, [0, 0.1]}) with pytest.raises(DomainSpecificationError): Domain({1: {"b": [2, 3]}, "c": [0, 0.1]}) with pytest.raises(DomainSpecificationError): Domain({"a": {"b": (1, 2, 3, 4)}, "c": [0, 0.1]}) with pytest.raises(DomainSpecificationError): Domain({"a": {"b": lambda x: x}, "c": [0, 0.1]}) with pytest.raises(ValueError): # this one should fail from the ast.literal_eval parsing Domain('{"a": {"b": lambda x: x}, "c": [0, 0.1]}') Domain({"a": {"b": {0, 1}}, "c": [0, 0.1]}) Domain('{"a": {"b": {0, 1}}, "c": [0, 0.1]}')
def test_valid_domain(domain): Domain(domain)
def test_iter(): with pytest.raises(DomainNotIterableError): list(iter(Domain({"a": {"b": {2, 3, 4}}, "c": [0, 0.1]}))) discrete_domain = Domain({ "a": { "b": {2, 3, 4}, "j": { "d": {5, 6}, "f": { "g": {7} } } }, "c": {"op1", 0.1} }) all_samples = set(iter(discrete_domain)) assert all_samples == { Sample({ 'a': { 'b': 2, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 3, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 4, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 2, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 3, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 4, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 2, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 3, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 4, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 2, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 3, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 4, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 0.1 }) }
def test_grid_simple_mixed(): domain = Domain({"x": [-5., 6.], "y": {"sin", "sqr"}, "z": set(range(4))}) with pytest.raises(exhaustive.DomainNotIterableError): _ = exhaustive.GridSearch(domain) gs = exhaustive.GridSearch(domain, sample_continuous=True, seed=93) assert len(gs.run_step(batch_size=8)) == 8
def test_as_dict(): dict_domain = {"a": {"b": [2, 3]}, "c": [0, 0.1]} domain = Domain(dict_domain) assert domain.as_dict() == dict_domain
def test_as_namedtuple(): domain = Domain({"a": {"b": {2, 3, 4}}, "c": [0, 0.1]}) nt = domain.as_namedtuple() assert nt.a == namedtuple("_", "b")({2, 3, 4}) assert nt.a.b == {2, 3, 4} assert nt.c == [0, 0.1]
def test_fail_iter_cont_domain(): with pytest.raises(DomainNotIterableError): list(iter(Domain({"a": {"b": {2, 3, 4}}, "c": [0, 0.1]})))
def test_bo_simple_mixed(): domain = Domain({"x": [-5., 6.], "y": {"sin", "sqr"}, "z": set(range(4))}) bayes_opt = bo.BayesianOptimization(domain=domain, seed=7) test_utils.evaluate_heterogeneous_3d(bayes_opt, batch_size=7, n_steps=3)
def test_iter(): discrete_domain = Domain({ "a": { "b": {2, 3, 4}, "j": { "d": {5, 6}, "f": { "g": {7} } } }, "c": {"op1", 0.1} }) all_samples = set(iter(discrete_domain)) assert all_samples == { Sample({ 'a': { 'b': 2, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 3, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 4, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 2, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 3, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 4, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 'op1' }), Sample({ 'a': { 'b': 2, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 3, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 4, 'j': { 'd': 5, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 2, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 3, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 0.1 }), Sample({ 'a': { 'b': 4, 'j': { 'd': 6, 'f': { 'g': 7 } } }, 'c': 0.1 }) }
def test_invalid_domain(domain, expectation): with expectation: Domain(domain)
def test_random_simple_mixed(): domain = Domain({"x": [-5., 6.], "y": {"sin", "sqr"}, "z": set(range(4))}) rs = random.RandomSearch(domain=domain, seed=1) test_utils.evaluate_heterogeneous_3d(rs, batch_size=50, n_steps=25)
def test_random_simple_continuous(): domain = Domain({"x": [-1., 6.]}) rs = random.RandomSearch(domain=domain, seed=7) test_utils.evaluate_continuous_1d(rs, batch_size=50, n_steps=2)
def test_eq(): d1 = Domain({"a": {"b": [2, 3]}, "c": [0, 0.1]}) d2 = Domain({"a": {"b": [2, 3]}, "c": [0, 0.1]}) assert d1 == d2
def test_local_from_fn(): domain = Domain({"x": [0., 1.]}, seed=7) jobs = [Job(task=square, args=(domain.sample(), )) for _ in range(10)] results = run_jobs(jobs) assert all( [r.data.value == square(*j.args).value for r, j in zip(results, jobs)])
def test_sampling(): domain = Domain({"a": {"b": {2, 3, 4}}, "c": [0, 0.1]}) for i in range(10): sample = domain.sample() assert sample["a"]["b"] in {2, 3, 4} and 0. <= sample["c"] <= 0.1
def test_bo_simple_continuous(): domain = Domain({"x": [-1., 6.]}) bayes_opt = bo.BayesianOptimization(domain=domain, seed=7) test_utils.evaluate_continuous_1d(bayes_opt, batch_size=2, n_steps=7)
def test_flatten(): dom = Domain({"a": {"b": [0, 1]}, "c": [0, 0.1]}) assert dom.flatten() == {("a", "b"): [0, 1], ("c", ): [0, 0.1]}
def test_serialisation(): domain = Domain({"a": [1, 2], "b": {"c": {1, 2, 3}, "d": {"o1", "o2"}}}) serialised = domain.serialise() deserialised = Domain.deserialise(serialised) assert deserialised == domain
class GridSearch(Optimiser): """Grid search pseudo-optimiser.""" def __init__(self, domain: Domain, sample_continuous: bool = False, seed: int = None): """Initialise the :class:`GridSearch` optimiser from a discrete domain. If the domain contains continuous subspaces, then they could be sampled if `sample_continuous` is enabled. Args: domain: :class:`Domain`. The domain to iterate over. sample_continuous: (optional) :obj:`bool`. Whether to sample the continuous subspaces of the domain. seed: (optional) :obj:`int`. Seed for the sampling of the continuous subspace if necessary. """ if domain.is_continuous and not sample_continuous: raise DomainNotIterableError( "Cannot perform grid search on (partially) continuous domain. " "To enable grid search in this case, set the argument " "'sample_continuous' to True.") super(GridSearch, self).__init__(domain) (discrete_domain, categorical_domain, continuous_domain) = domain.split_by_type() # unify the discrete and the categorical into one, # as they can be iterated: self.discrete_domain = discrete_domain + categorical_domain if seed is not None: self.continuous_domain = Domain(continuous_domain.as_dict(), seed=seed) else: self.continuous_domain = continuous_domain self._discrete_domain_iter = iter(self.discrete_domain) self._is_exhausted = len(self.discrete_domain) == 0 self.__exhausted_err = ExhaustedSearchSpaceError( "The domain has been exhausted. Reset the optimiser to start again." ) def run_step(self, batch_size: int = 1, **kwargs) -> List[Sample]: """Get the next `batch_size` samples from the Cartesian-product walk over the domain. Args: batch_size: (optional) :obj:`int`. The number of samples to suggest at once. Returns: A list of :class:`Sample` instances from the domain. Raises: :class:`ExhaustedSearchSpaceError`: if the (discrete part of the) domain is fully exhausted and no samples can be generated. Notes: This method does not guarantee that the returned list of :class:`Samples` will be of length `batch_size`. This is due to the size of the domain and the fact that samples will not be repeated. """ if self._is_exhausted: raise self.__exhausted_err samples = [] for i in range(batch_size): try: discrete = next(self._discrete_domain_iter) except StopIteration: self._is_exhausted = True break if self.continuous_domain: continuous = self.continuous_domain.sample() samples.append(discrete + continuous) else: samples.append(discrete) if samples: return samples raise self.__exhausted_err def reset(self): """Reset the optimiser to the beginning of the Cartesian-product walk.""" super(GridSearch, self).reset() self._discrete_domain_iter = iter(self.discrete_domain) self._is_exhausted = len(self.discrete_domain) == 0