def test_bound_scaler() -> None: ref = p.Instrumentation( p.Array(shape=(1, 2)).set_bounds(-12, 12, method="arctan"), p.Array(shape=(2, )).set_bounds(-12, 12, full_range_sampling=False), lr=p.Log(lower=0.001, upper=1000), stuff=p.Scalar(lower=-1, upper=2), unbounded=p.Scalar(lower=-1, init=0.0), value=p.Scalar(), letter=p.Choice("abc"), ) param = ref.spawn_child() scaler = utils.BoundScaler(param) output = scaler.transform([1.0] * param.dimension, lambda x: x) param.set_standardized_data(output) (array1, array2), values = param.value np.testing.assert_array_almost_equal(array1, [[12, 12]]) np.testing.assert_array_almost_equal(array2, [1, 1]) assert values["stuff"] == 2 assert values["unbounded"] == 1 assert values["value"] == 1 np.testing.assert_almost_equal(values["lr"], 1000) # again, on the middle point output = scaler.transform([0] * param.dimension, lambda x: x) param.set_standardized_data(output) np.testing.assert_almost_equal(param.value[1]["lr"], 1.0) np.testing.assert_almost_equal(param.value[1]["stuff"], 0.5)
def test_bound_scaler() -> None: ref = p.Instrumentation( p.Array(shape=(1, 2)).set_bounds(-12, 12, method="arctan"), p.Array(shape=(2, )).set_bounds(-12, 12, full_range_sampling=False), lr=p.Log(lower=0.001, upper=1000), stuff=p.Scalar(lower=-1, upper=2), unbounded=p.Scalar(lower=-1, init=0.0), value=p.Scalar(), letter=p.Choice("abc"), ) # make sure the order is preserved using legacy split method expected = [x[1] for x in split_as_data_parameters(ref)] assert p.helpers.list_data(ref) == expected # check the bounds param = ref.spawn_child() scaler = utils.BoundScaler(param) output = scaler.transform([1.0] * param.dimension, lambda x: x) param.set_standardized_data(output) (array1, array2), values = param.value np.testing.assert_array_almost_equal(array1, [[12, 12]]) np.testing.assert_array_almost_equal(array2, [1, 1]) assert values["stuff"] == 2 assert values["unbounded"] == 1 assert values["value"] == 1 assert values["lr"] == pytest.approx(1000) # again, on the middle point output = scaler.transform([0] * param.dimension, lambda x: x) param.set_standardized_data(output) assert param.value[1]["lr"] == pytest.approx(1.0) assert param.value[1]["stuff"] == pytest.approx(0.5)
def test_callable_parametrization() -> None: ifunc = base.ExperimentFunction( lambda x: x**2, p.Scalar(2).set_mutation(2)) # type: ignore np.testing.assert_equal(ifunc.descriptors["name"], "<lambda>") ifunc = base.ExperimentFunction(_Callable(), p.Scalar(2).set_mutation(sigma=2)) np.testing.assert_equal(ifunc.descriptors["name"], "_Callable")
def __init__(self, transform: tp.Optional[str] = None) -> None: super().__init__(self._get_pixel_value, p.Instrumentation(p.Scalar(), p.Scalar()).set_name("standard")) self.register_initialization(transform=transform) self._image = datasets.get_data("Landscape") if transform == "gaussian": variables = list(p.TransitionChoice(list(range(x))) for x in self._image.shape) self.parametrization = p.Instrumentation(*variables).set_name("gaussian") elif transform == "square": stds = (np.array(self._image.shape) - 1.) / 2. variables2 = list(p.Scalar(init=s).set_mutation(sigma=s) for s in stds) self.parametrization = p.Instrumentation(*variables2).set_name("square") # maybe buggy, try again? elif transform is not None: raise ValueError(f"Unknown transform {transform}") self._max = float(self._image.max())
def _make_pyomo_range_set_to_parametrization( domain: pyomo.RangeSet, params: ParamDict, params_name: str ) -> ParamDict: # https://pyomo.readthedocs.io/en/stable/pyomo_modeling_components/Sets.html # Refer to the implementation in pyomo/core/base/set.py ranges = list(domain.ranges()) num_ranges = len(ranges) if num_ranges == 1 and (ranges[0].step in [-1, 0, 1]): if isinstance(ranges[0], pyomo.base.range.NumericRange): lb, ub = ranges[0].start, ranges[0].end if ranges[0].step < 0: lb, ub = ub, lb if (lb is not None) and (not ranges[0].closed[0]): lb = float(np.nextafter(lb, 1)) if (ub is not None) and (not ranges[0].closed[1]): ub = float(np.nextafter(ub, -1)) params[params_name] = p.Scalar(lower=lb, upper=ub) if ranges[0].step in [-1, 1]: # May consider using nested param params[params_name].set_integer_casting() # type: ignore else: raise NotImplementedError(f"Cannot handle range type {type(ranges[0])}") elif isinstance(domain, pyomo.FiniteSimpleRangeSet): # Need to handle step size params[params_name] = p.Choice([range(*r) for r in domain.ranges()]) # Assume the ranges do not overlapped else: raise NotImplementedError(f"Cannot handle domain type {type(domain)}") return params
def test_packed_function() -> None: ifunc = base.ExperimentFunction(_Callable(), p.Scalar(1).set_name("")) with pytest.raises(AssertionError): base.MultiExperiment([ifunc, ifunc], [100, 100]) pfunc = base.MultiExperiment([ifunc, ifunc.copy()], [100, 100]) np.testing.assert_equal(pfunc.descriptors["name"], "_Callable,_Callable") np.testing.assert_array_equal(pfunc(-3), [3, 3])
def test_instrumented_function_kwarg_order() -> None: ifunc = base.ExperimentFunction(_arg_return, p.Instrumentation( # type: ignore kw4=p.Choice([1, 0]), kw2="constant", kw3=p.Array(shape=(2, 2)), kw1=p.Scalar(2.0).set_mutation(sigma=2.0) )) np.testing.assert_equal(ifunc.dimension, 7) data = np.array([-1, 1, 2, 3, 4, 100, -100]) args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data(data).value # this is very stupid and should be removed when Parameter is in use kwargs: tp.Any = ifunc(*args0, **kwargs0)[1] # type: ignore testing.printed_assert_equal(kwargs, {"kw1": 0, "kw2": "constant", "kw3": [[1, 2], [3, 4]], "kw4": 1})
args, kwargs = ifunc.parametrization.spawn_child( ).set_standardized_data(data, deterministic=True).value testing.printed_assert_equal(args, [0]) testing.printed_assert_equal(kwargs, {"y": 0}) arg_sum, kwarg_sum = 0, 0 for _ in range(24): args, kwargs = ifunc.parametrization.spawn_child( ).set_standardized_data(data, deterministic=False).value arg_sum += args[0] kwarg_sum += kwargs["y"] assert arg_sum != 0 assert kwarg_sum != 0 @testing.parametrized( floats=((p.Scalar(), p.Scalar(init=12.0)), True, False), array_int=((p.Scalar(), p.Array(shape=(1, )).set_integer_casting()), False, False), softmax_noisy=((p.Choice(["blue", "red"]), p.Array(shape=(1, ))), True, True), softmax_deterministic=((p.Choice(["blue", "red"], deterministic=True), p.Array(shape=(1, ))), False, False), ordered_discrete=((p.TransitionChoice([True, False]), p.Array(shape=(1, ))), False, False), ) def test_parametrization_continuous_noisy(variables: tp.Tuple[p.Parameter, ...], continuous: bool, noisy: bool) -> None: instru = p.Instrumentation(*variables) assert instru.descriptors.continuous == continuous
def __init__( self, regressor: str, data_dimension: tp.Optional[int] = None, dataset: str = "artificial", overfitter: bool = False ) -> None: self.regressor = regressor self.data_dimension = data_dimension self.dataset = dataset self.overfitter = overfitter self._descriptors: tp.Dict[str, tp.Any] = {} self.add_descriptors(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter) self.name = regressor + f"Dim{data_dimension}" self.num_data = 120 # default for artificial function self._cross_val_num = 10 # number of cross validation # Dimension does not make sense if we use a real world dataset. assert bool("artificial" in dataset) == bool(data_dimension is not None) # Variables for storing the training set and the test set. self.X: np.ndarray = np.array([]) self.y: np.ndarray # Variables for storing the cross-validation splits. self.X_train_cv: tp.List[tp.Any] = [] # This will be the list of training subsets. self.X_valid_cv: tp.List[tp.Any] = [] # This will be the list of validation subsets. self.y_train_cv: tp.List[tp.Any] = [] self.y_valid_cv: tp.List[tp.Any] = [] self.X_train: np.ndarray self.y_train: np.ndarray self.X_test: np.ndarray self.y_test: np.ndarray evalparams: tp.Dict[str, tp.Any] = {} if regressor == "decision_tree_depth": # Only the depth, as an evaluation. parametrization = p.Instrumentation(depth=p.Scalar(lower=1, upper=1200).set_integer_casting()) # We optimize only the depth, so we fix all other parameters than the depth params = dict(noise_free=False, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "any": # First we define the list of parameters in the optimization parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), # Depth, in case we use a decision tree. criterion=p.Choice(["mse", "friedman_mse", "mae"]), # Criterion for building the decision tree. min_samples_split=p.Log(lower=0.0000001, upper=1), # Min ratio of samples in a node for splitting. regressor=p.Choice(["mlp", "decision_tree"]), # Type of regressor. activation=p.Choice(["identity", "logistic", "tanh", "relu"]), # Activation function, in case we use a net. solver=p.Choice(["lbfgs", "sgd", "adam"]), # Numerical optimizer. learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), # Learning rate schedule. alpha=p.Log(lower=0.0000001, upper=1.), # Complexity penalization. ) # noise_free is False (meaning that we consider the cross-validation loss) during the optimization. params = dict(noise_free=False) elif regressor == "decision_tree": # We specify below the list of hyperparameters for the decision trees. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), criterion=p.Choice(["mse", "friedman_mse", "mae"]), min_samples_split=p.Log(lower=0.0000001, upper=1), regressor="decision_tree", ) params = dict(noise_free=False, alpha=1.0, learning_rate="no", regressor="decision_tree", activation="no", solver="no") evalparams = dict(params, criterion="mse", min_samples_split=0.00001) elif regressor == "mlp": # Let us define the parameters of the neural network. parametrization = p.Instrumentation( activation=p.Choice(["identity", "logistic", "tanh", "relu"]), solver=p.Choice(["lbfgs", "sgd", "adam"]), regressor="mlp", learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), alpha=p.Log(lower=0.0000001, upper=1.), ) params = dict(noise_free=False, regressor="mlp", depth=-3, criterion="no", min_samples_split=0.1) else: assert False, f"Problem type {regressor} undefined!" # build eval params if not specified if not evalparams: evalparams = dict(params) # For the evaluation we remove the noise (unless overfitter) evalparams["noise_free"] = not overfitter super().__init__(partial(self._ml_parametrization, **params), parametrization.set_name("")) self._evalparams = evalparams self.register_initialization(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter)
def __init__(self, regressor: str, data_dimension: tp.Optional[int] = None, dataset: str = "artificial", overfitter: bool = False) -> None: self.regressor = regressor self.data_dimension = data_dimension self.dataset = dataset self.overfitter = overfitter self._descriptors: tp.Dict[str, tp.Any] = {} self.add_descriptors(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter) self.name = regressor + f"Dim{data_dimension}" self.num_data: int = 0 # Dimension does not make sense if we use a real world dataset. assert bool("artificial" in dataset) == bool( data_dimension is not None) # Variables for storing the training set and the test set. self.X: np.ndarray = np.array([]) self.y: np.ndarray # Variables for storing the cross-validation splits. self.X_train: tp.List[tp.Any] = [ ] # This will be the list of training subsets. self.X_valid: tp.List[tp.Any] = [ ] # This will be the list of validation subsets. self.y_train: tp.List[tp.Any] = [] self.y_valid: tp.List[tp.Any] = [] self.X_test: np.ndarray self.y_test: np.ndarray if regressor == "decision_tree_depth": # Only the depth, as an evaluation. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting()) # We optimize only the depth, so we fix all other parameters than the depth, using "partial". super().__init__( partial(self._ml_parametrization, noise_free=False, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no"), parametrization) # For the evaluation, we remove the noise. self.evaluation_function = partial( self._ml_parametrization, # type: ignore noise_free=not overfitter, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "any": # First we define the list of parameters in the optimization parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting( ), # Depth, in case we use a decision tree. criterion=p.Choice( ["mse", "friedman_mse", "mae"]), # Criterion for building the decision tree. min_samples_split=p.Log( lower=0.0000001, upper=1), # Min ratio of samples in a node for splitting. regressor=p.Choice(["mlp", "decision_tree"]), # Type of regressor. activation=p.Choice( ["identity", "logistic", "tanh", "relu"]), # Activation function, in case we use a net. solver=p.Choice(["lbfgs", "sgd", "adam"]), # Numerical optimizer. learning_rate=p.Choice(["constant", "invscaling", "adaptive" ]), # Learning rate schedule. alpha=p.Log(lower=0.0000001, upper=1.), # Complexity penalization. ) # Only the dimension is fixed, so "partial" is just used for fixing the dimension. # noise_free is False (meaning that we consider the cross-validation loss) during the optimization. super().__init__( partial(self._ml_parametrization, noise_free=False), parametrization) # For the evaluation we use the test set, which is big, so noise_free = True. self.evaluation_function = partial( self._ml_parametrization, # type: ignore noise_free=not overfitter) elif regressor == "decision_tree": # We specify below the list of hyperparameters for the decision trees. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), criterion=p.Choice(["mse", "friedman_mse", "mae"]), min_samples_split=p.Log(lower=0.0000001, upper=1), regressor="decision_tree", ) # We use "partial" for fixing the parameters of the neural network, given that we work on the decision tree only. super().__init__( partial(self._ml_parametrization, noise_free=False, alpha=1.0, learning_rate="no", regressor="decision_tree", activation="no", solver="no"), parametrization) # For the test we just switch noise_free to True. self.evaluation_function = partial( self._ml_parametrization, criterion="mse", # type: ignore min_samples_split=0.00001, regressor="decision_tree", noise_free=not overfitter, alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "mlp": # Let us define the parameters of the neural network. parametrization = p.Instrumentation( activation=p.Choice(["identity", "logistic", "tanh", "relu"]), solver=p.Choice(["lbfgs", "sgd", "adam"]), regressor="mlp", learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), alpha=p.Log(lower=0.0000001, upper=1.), ) # And, using partial, we get rid of the parameters of the decision tree (we work on the neural net, not # on the decision tree). super().__init__( partial(self._ml_parametrization, noise_free=False, regressor="mlp", depth=-3, criterion="no", min_samples_split=0.1), parametrization) self.evaluation_function = partial( self._ml_parametrization, # type: ignore regressor="mlp", noise_free=not overfitter, depth=-3, criterion="no", min_samples_split=0.1) else: assert False, f"Problem type {regressor} undefined!" # assert data_dimension is not None or dataset[:10] != "artificial" # self.get_dataset(data_dimension, dataset) self.register_initialization(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter)