Exemple #1
0
def test_experiment_function() -> None:
    ifunc = base.ExperimentFunction(
        _arg_return,
        p.Instrumentation(  # type: ignore
            p.Choice([1, 12]),
            "constant",
            p.Array(shape=(2, 2)),
            constkwarg="blublu",
            plop=p.Choice([3, 4]),
        ))
    np.testing.assert_equal(ifunc.dimension, 8)
    data = [-100.0, 100, 1, 2, 3, 4, 100, -100]
    args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data(
        data).value
    output = ifunc(
        *args0, **kwargs0
    )  # this is very stupid and should be removed when Parameter is in use
    args: tp.Any = output[0]  # type: ignore
    kwargs: tp.Any = output[1]  # type: ignore
    testing.printed_assert_equal(args, [12, "constant", [[1, 2], [3, 4]]])
    testing.printed_assert_equal(kwargs, {"constkwarg": "blublu", "plop": 3})
    instru_str = ("Instrumentation(Tuple(Choice(choices=Tuple(1,12),"
                  "weights=Array{(1,2)}),constant,"
                  "Array{(2,2)}),"
                  "Dict(constkwarg=blublu,plop=Choice(choices=Tuple(3,4),"
                  "weights=Array{(1,2)})))")
    testing.printed_assert_equal(
        ifunc.descriptors,
        {
            "dimension": 8,
            "name": "_arg_return",
            "function_class": "ExperimentFunction",
            "parametrization": instru_str,
        },
    )
Exemple #2
0
def _make_pyomo_variable_to_parametrization(model_component: pyomo.Var, params: ParamDict) -> ParamDict:
    # https://pyomo.readthedocs.io/en/stable/pyomo_modeling_components/Sets.html
    # Refer to the implementation in pyomo/core/base/var.py
    # To further improve the readability function, we should find out how to represent {None: ng.p.Scalar(), 1: ng.p.Scalar()} in ng.p.Dict
    # We do not adopt nested parameterization, which will require type information between string and int.
    # Such conversion has to be done in _pyomo_obj_function_wrapper and _pyomo_constraint_wrapper, which slows down optimization.
    if not isinstance(model_component, (pyomo.base.var.IndexedVar, pyomo.base.var.SimpleVar)):
        raise NotImplementedError  # Normally, Pyomo will create a set for the indices used by a variable
    for k, v in model_component._data.items():
        if isinstance(v, pyomo.base.var._GeneralVarData):
            if v.is_fixed():
                raise NotImplementedError
            if k is None:
                params_name = str(model_component.name)
            else:
                params_name = f"{model_component.name}[{_convert_to_ng_name(k)}]"
            if isinstance(v.domain, pyomo.RangeSet):
                params = _make_pyomo_range_set_to_parametrization(v.domain, params, params_name)
            elif isinstance(v.domain, pyomo.Set) and v.domain.isfinite():
                if v.domain.isordered():
                    params[params_name] = p.Choice(list(v.domain.ordered_data()))
                else:
                    params[params_name] = p.Choice(list(v.domain.data()))
            else:
                raise NotImplementedError(f"Cannot handle domain type {type(v.domain)}")
        else:
            raise NotImplementedError(f"Cannot handle variable type {type(v)}")
    return params
Exemple #3
0
def test_experiment_function() -> None:
    param = p.Instrumentation(
        p.Choice([1, 12]),
        "constant",
        p.Array(shape=(2, 2)),
        constkwarg="blublu",
        plop=p.Choice([3, 4]),
    )
    with pytest.raises(RuntimeError):
        base.ExperimentFunction(_arg_return, param)
    param.set_name("myparam")
    ifunc = base.ExperimentFunction(_arg_return, param)
    np.testing.assert_equal(ifunc.dimension, 8)
    data = [-100.0, 100, 1, 2, 3, 4, 100, -100]
    args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data(
        data).value
    output: tp.Any = ifunc(*args0, **kwargs0)
    args: tp.Any = output[0]
    kwargs: tp.Any = output[1]
    testing.printed_assert_equal(args, [12, "constant", [[1, 2], [3, 4]]])
    testing.printed_assert_equal(kwargs, {"constkwarg": "blublu", "plop": 3})
    testing.printed_assert_equal(
        ifunc.descriptors,
        {
            "dimension": 8,
            "name": "_arg_return",
            "function_class": "ExperimentFunction",
            "parametrization": "myparam"
        },
    )
Exemple #4
0
def test_deterministic_data_setter() -> None:
    instru = p.Instrumentation(p.Choice([0, 1, 2, 3]), y=p.Choice([0, 1, 2, 3]))
    ifunc = base.ExperimentFunction(_Callable(), instru)
    data = [0.01, 0, 0, 0, 0.01, 0, 0, 0]
    for _ in range(20):
        args, kwargs = ifunc.parametrization.spawn_child().set_standardized_data(data, deterministic=True).value
        testing.printed_assert_equal(args, [0])
        testing.printed_assert_equal(kwargs, {"y": 0})
    arg_sum, kwarg_sum = 0, 0
    for _ in range(24):
        args, kwargs = ifunc.parametrization.spawn_child().set_standardized_data(data, deterministic=False).value
        arg_sum += args[0]
        kwarg_sum += kwargs["y"]
    assert arg_sum != 0
    assert kwarg_sum != 0
Exemple #5
0
def _make_pyomo_range_set_to_parametrization(
    domain: pyomo.RangeSet, params: ParamDict, params_name: str
) -> ParamDict:
    # https://pyomo.readthedocs.io/en/stable/pyomo_modeling_components/Sets.html
    # Refer to the implementation in pyomo/core/base/set.py
    ranges = list(domain.ranges())
    num_ranges = len(ranges)
    if num_ranges == 1 and (ranges[0].step in [-1, 0, 1]):
        if isinstance(ranges[0], pyomo.base.range.NumericRange):
            lb, ub = ranges[0].start, ranges[0].end
            if ranges[0].step < 0:
                lb, ub = ub, lb
            if (lb is not None) and (not ranges[0].closed[0]):
                lb = float(np.nextafter(lb, 1))
            if (ub is not None) and (not ranges[0].closed[1]):
                ub = float(np.nextafter(ub, -1))
            params[params_name] = p.Scalar(lower=lb, upper=ub)
            if ranges[0].step in [-1, 1]:
                # May consider using nested param
                params[params_name].set_integer_casting()  # type: ignore
        else:
            raise NotImplementedError(f"Cannot handle range type {type(ranges[0])}")
    elif isinstance(domain, pyomo.FiniteSimpleRangeSet):
        # Need to handle step size
        params[params_name] = p.Choice([range(*r) for r in domain.ranges()])  # Assume the ranges do not overlapped
    else:
        raise NotImplementedError(f"Cannot handle domain type {type(domain)}")
    return params
Exemple #6
0
def test_bound_scaler() -> None:
    ref = p.Instrumentation(
        p.Array(shape=(1, 2)).set_bounds(-12, 12, method="arctan"),
        p.Array(shape=(2, )).set_bounds(-12, 12, full_range_sampling=False),
        lr=p.Log(lower=0.001, upper=1000),
        stuff=p.Scalar(lower=-1, upper=2),
        unbounded=p.Scalar(lower=-1, init=0.0),
        value=p.Scalar(),
        letter=p.Choice("abc"),
    )
    param = ref.spawn_child()
    scaler = utils.BoundScaler(param)
    output = scaler.transform([1.0] * param.dimension, lambda x: x)
    param.set_standardized_data(output)
    (array1, array2), values = param.value
    np.testing.assert_array_almost_equal(array1, [[12, 12]])
    np.testing.assert_array_almost_equal(array2, [1, 1])
    assert values["stuff"] == 2
    assert values["unbounded"] == 1
    assert values["value"] == 1
    np.testing.assert_almost_equal(values["lr"], 1000)
    # again, on the middle point
    output = scaler.transform([0] * param.dimension, lambda x: x)
    param.set_standardized_data(output)
    np.testing.assert_almost_equal(param.value[1]["lr"], 1.0)
    np.testing.assert_almost_equal(param.value[1]["stuff"], 0.5)
Exemple #7
0
def test_bound_scaler() -> None:
    ref = p.Instrumentation(
        p.Array(shape=(1, 2)).set_bounds(-12, 12, method="arctan"),
        p.Array(shape=(2, )).set_bounds(-12, 12, full_range_sampling=False),
        lr=p.Log(lower=0.001, upper=1000),
        stuff=p.Scalar(lower=-1, upper=2),
        unbounded=p.Scalar(lower=-1, init=0.0),
        value=p.Scalar(),
        letter=p.Choice("abc"),
    )
    # make sure the order is preserved using legacy split method
    expected = [x[1] for x in split_as_data_parameters(ref)]
    assert p.helpers.list_data(ref) == expected
    # check the bounds
    param = ref.spawn_child()
    scaler = utils.BoundScaler(param)
    output = scaler.transform([1.0] * param.dimension, lambda x: x)
    param.set_standardized_data(output)
    (array1, array2), values = param.value
    np.testing.assert_array_almost_equal(array1, [[12, 12]])
    np.testing.assert_array_almost_equal(array2, [1, 1])
    assert values["stuff"] == 2
    assert values["unbounded"] == 1
    assert values["value"] == 1
    assert values["lr"] == pytest.approx(1000)
    # again, on the middle point
    output = scaler.transform([0] * param.dimension, lambda x: x)
    param.set_standardized_data(output)
    assert param.value[1]["lr"] == pytest.approx(1.0)
    assert param.value[1]["stuff"] == pytest.approx(0.5)
Exemple #8
0
def _make_parametrization(name: str,
                          dimension: int,
                          bounding_method: str = "bouncing",
                          rolling: bool = False) -> p.Array:
    """Creates appropriate parametrization for a Photonics problem

    Parameters
    name: str
        problem name, among bragg, chirped and morpho
    dimension: int
        size of the problem among 16, 40 and 60 (morpho) or 80 (bragg and chirped)
    bounding_method: str
        transform type for the bounding ("arctan", "tanh", "bouncing" or "clipping"see `Array.bounded`)

    Returns
    -------
    Instrumentation
        the parametrization for the problem
    """
    if name == "bragg":
        shape = (2, dimension // 2)
        bounds = [(2, 3), (30, 180)]
    elif name == "chirped":
        shape = (1, dimension)
        bounds = [(30, 180)]
    elif name == "morpho":
        shape = (4, dimension // 4)
        bounds = [(0, 300), (0, 600), (30, 600), (0, 300)]
    else:
        raise NotImplementedError(f"Transform for {name} is not implemented")
    divisor = max(2, len(bounds))
    assert not dimension % divisor, f"points length should be a multiple of {divisor}, got {dimension}"
    assert shape[0] * shape[
        1] == dimension, f"Cannot work with dimension {dimension} for {name}: not divisible by {shape[0]}."
    b_array = np.array(bounds)
    assert b_array.shape[0] == shape[0]  # pylint: disable=unsubscriptable-object
    init = np.sum(b_array, axis=1, keepdims=True).dot(np.ones((
        1,
        shape[1],
    ))) / 2
    array = p.Array(init=init)
    if bounding_method not in ("arctan", "tanh"):
        # sigma must be adapted for clipping and constraint methods
        sigma = p.Array(init=[[10.0]] if name != "bragg" else [[0.03], [10.0]]
                        ).set_mutation(exponent=2.0)  # type: ignore
        array.set_mutation(sigma=sigma)
    if rolling:
        array.set_mutation(custom=p.Choice(
            ["gaussian", "cauchy",
             p.mutation.Translation(axis=1)]))
    array.set_bounds(b_array[:, [0]],
                     b_array[:, [1]],
                     method=bounding_method,
                     full_range_sampling=True)
    array.set_recombination(p.mutation.Crossover(axis=1)).set_name("")
    assert array.dimension == dimension, f"Unexpected {array} for dimension {dimension}"
    return array
Exemple #9
0
def test_instrumented_function_kwarg_order() -> None:
    ifunc = base.ExperimentFunction(_arg_return, p.Instrumentation(  # type: ignore
        kw4=p.Choice([1, 0]), kw2="constant", kw3=p.Array(shape=(2, 2)), kw1=p.Scalar(2.0).set_mutation(sigma=2.0)
    ))
    np.testing.assert_equal(ifunc.dimension, 7)
    data = np.array([-1, 1, 2, 3, 4, 100, -100])
    args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data(data).value
    # this is very stupid and should be removed when Parameter is in use
    kwargs: tp.Any = ifunc(*args0, **kwargs0)[1]   # type: ignore
    testing.printed_assert_equal(kwargs, {"kw1": 0, "kw2": "constant", "kw3": [[1, 2], [3, 4]], "kw4": 1})
Exemple #10
0
        testing.printed_assert_equal(args, [0])
        testing.printed_assert_equal(kwargs, {"y": 0})
    arg_sum, kwarg_sum = 0, 0
    for _ in range(24):
        args, kwargs = ifunc.parametrization.spawn_child(
        ).set_standardized_data(data, deterministic=False).value
        arg_sum += args[0]
        kwarg_sum += kwargs["y"]
    assert arg_sum != 0
    assert kwarg_sum != 0


@testing.parametrized(
    floats=((p.Scalar(), p.Scalar(init=12.0)), True, False),
    array_int=((p.Scalar(), p.Array(shape=(1, )).set_integer_casting()), False,
               False),
    softmax_noisy=((p.Choice(["blue",
                              "red"]), p.Array(shape=(1, ))), True, True),
    softmax_deterministic=((p.Choice(["blue", "red"], deterministic=True),
                            p.Array(shape=(1, ))), False, False),
    ordered_discrete=((p.TransitionChoice([True, False]),
                       p.Array(shape=(1, ))), False, False),
)
def test_parametrization_continuous_noisy(variables: tp.Tuple[p.Parameter,
                                                              ...],
                                          continuous: bool,
                                          noisy: bool) -> None:
    instru = p.Instrumentation(*variables)
    assert instru.descriptors.continuous == continuous
    assert instru.descriptors.deterministic != noisy
Exemple #11
0
    def __init__(
        self,
        regressor: str,
        data_dimension: tp.Optional[int] = None,
        dataset: str = "artificial",
        overfitter: bool = False
    ) -> None:
        self.regressor = regressor
        self.data_dimension = data_dimension
        self.dataset = dataset
        self.overfitter = overfitter
        self._descriptors: tp.Dict[str, tp.Any] = {}
        self.add_descriptors(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter)
        self.name = regressor + f"Dim{data_dimension}"
        self.num_data = 120  # default for artificial function
        self._cross_val_num = 10  # number of cross validation
        # Dimension does not make sense if we use a real world dataset.
        assert bool("artificial" in dataset) == bool(data_dimension is not None)

        # Variables for storing the training set and the test set.
        self.X: np.ndarray = np.array([])
        self.y: np.ndarray

        # Variables for storing the cross-validation splits.
        self.X_train_cv: tp.List[tp.Any] = []  # This will be the list of training subsets.
        self.X_valid_cv: tp.List[tp.Any] = []  # This will be the list of validation subsets.
        self.y_train_cv: tp.List[tp.Any] = []
        self.y_valid_cv: tp.List[tp.Any] = []
        self.X_train: np.ndarray
        self.y_train: np.ndarray
        self.X_test: np.ndarray
        self.y_test: np.ndarray

        evalparams: tp.Dict[str, tp.Any] = {}
        if regressor == "decision_tree_depth":
            # Only the depth, as an evaluation.
            parametrization = p.Instrumentation(depth=p.Scalar(lower=1, upper=1200).set_integer_casting())
            # We optimize only the depth, so we fix all other parameters than the depth
            params = dict(noise_free=False, criterion="mse",
                          min_samples_split=0.00001,
                          regressor="decision_tree",
                          alpha=1.0, learning_rate="no",
                          activation="no", solver="no")
        elif regressor == "any":
            # First we define the list of parameters in the optimization
            parametrization = p.Instrumentation(
                depth=p.Scalar(lower=1, upper=1200).set_integer_casting(),  # Depth, in case we use a decision tree.
                criterion=p.Choice(["mse", "friedman_mse", "mae"]),  # Criterion for building the decision tree.
                min_samples_split=p.Log(lower=0.0000001, upper=1),  # Min ratio of samples in a node for splitting.
                regressor=p.Choice(["mlp", "decision_tree"]),  # Type of regressor.
                activation=p.Choice(["identity", "logistic", "tanh", "relu"]),  # Activation function, in case we use a net.
                solver=p.Choice(["lbfgs", "sgd", "adam"]),  # Numerical optimizer.
                learning_rate=p.Choice(["constant", "invscaling", "adaptive"]),  # Learning rate schedule.
                alpha=p.Log(lower=0.0000001, upper=1.),  # Complexity penalization.
            )
            # noise_free is False (meaning that we consider the cross-validation loss) during the optimization.
            params = dict(noise_free=False)
        elif regressor == "decision_tree":
            # We specify below the list of hyperparameters for the decision trees.
            parametrization = p.Instrumentation(
                depth=p.Scalar(lower=1, upper=1200).set_integer_casting(),
                criterion=p.Choice(["mse", "friedman_mse", "mae"]),
                min_samples_split=p.Log(lower=0.0000001, upper=1),
                regressor="decision_tree",
            )
            params = dict(noise_free=False,
                          alpha=1.0, learning_rate="no", regressor="decision_tree",
                          activation="no", solver="no")
            evalparams = dict(params, criterion="mse", min_samples_split=0.00001)
        elif regressor == "mlp":
            # Let us define the parameters of the neural network.
            parametrization = p.Instrumentation(
                activation=p.Choice(["identity", "logistic", "tanh", "relu"]),
                solver=p.Choice(["lbfgs", "sgd", "adam"]),
                regressor="mlp",
                learning_rate=p.Choice(["constant", "invscaling", "adaptive"]),
                alpha=p.Log(lower=0.0000001, upper=1.),
            )
            params = dict(noise_free=False, regressor="mlp", depth=-3, criterion="no", min_samples_split=0.1)
        else:
            assert False, f"Problem type {regressor} undefined!"
        # build eval params if not specified
        if not evalparams:
            evalparams = dict(params)
        # For the evaluation we remove the noise (unless overfitter)
        evalparams["noise_free"] = not overfitter
        super().__init__(partial(self._ml_parametrization, **params), parametrization.set_name(""))
        self._evalparams = evalparams
        self.register_initialization(regressor=regressor, data_dimension=data_dimension, dataset=dataset,
                                     overfitter=overfitter)
Exemple #12
0
    def __init__(self,
                 regressor: str,
                 data_dimension: tp.Optional[int] = None,
                 dataset: str = "artificial",
                 overfitter: bool = False) -> None:
        self.regressor = regressor
        self.data_dimension = data_dimension
        self.dataset = dataset
        self.overfitter = overfitter
        self._descriptors: tp.Dict[str, tp.Any] = {}
        self.add_descriptors(regressor=regressor,
                             data_dimension=data_dimension,
                             dataset=dataset,
                             overfitter=overfitter)
        self.name = regressor + f"Dim{data_dimension}"
        self.num_data: int = 0
        # Dimension does not make sense if we use a real world dataset.
        assert bool("artificial" in dataset) == bool(
            data_dimension is not None)

        # Variables for storing the training set and the test set.
        self.X: np.ndarray = np.array([])
        self.y: np.ndarray

        # Variables for storing the cross-validation splits.
        self.X_train: tp.List[tp.Any] = [
        ]  # This will be the list of training subsets.
        self.X_valid: tp.List[tp.Any] = [
        ]  # This will be the list of validation subsets.
        self.y_train: tp.List[tp.Any] = []
        self.y_valid: tp.List[tp.Any] = []
        self.X_test: np.ndarray
        self.y_test: np.ndarray

        if regressor == "decision_tree_depth":
            # Only the depth, as an evaluation.
            parametrization = p.Instrumentation(
                depth=p.Scalar(lower=1, upper=1200).set_integer_casting())
            # We optimize only the depth, so we fix all other parameters than the depth, using "partial".
            super().__init__(
                partial(self._ml_parametrization,
                        noise_free=False,
                        criterion="mse",
                        min_samples_split=0.00001,
                        regressor="decision_tree",
                        alpha=1.0,
                        learning_rate="no",
                        activation="no",
                        solver="no"), parametrization)
            # For the evaluation, we remove the noise.
            self.evaluation_function = partial(
                self._ml_parametrization,  # type: ignore
                noise_free=not overfitter,
                criterion="mse",
                min_samples_split=0.00001,
                regressor="decision_tree",
                alpha=1.0,
                learning_rate="no",
                activation="no",
                solver="no")
        elif regressor == "any":
            # First we define the list of parameters in the optimization
            parametrization = p.Instrumentation(
                depth=p.Scalar(lower=1, upper=1200).set_integer_casting(
                ),  # Depth, in case we use a decision tree.
                criterion=p.Choice(
                    ["mse", "friedman_mse",
                     "mae"]),  # Criterion for building the decision tree.
                min_samples_split=p.Log(
                    lower=0.0000001,
                    upper=1),  # Min ratio of samples in a node for splitting.
                regressor=p.Choice(["mlp",
                                    "decision_tree"]),  # Type of regressor.
                activation=p.Choice(
                    ["identity", "logistic", "tanh",
                     "relu"]),  # Activation function, in case we use a net.
                solver=p.Choice(["lbfgs", "sgd",
                                 "adam"]),  # Numerical optimizer.
                learning_rate=p.Choice(["constant", "invscaling", "adaptive"
                                        ]),  # Learning rate schedule.
                alpha=p.Log(lower=0.0000001,
                            upper=1.),  # Complexity penalization.
            )
            # Only the dimension is fixed, so "partial" is just used for fixing the dimension.
            # noise_free is False (meaning that we consider the cross-validation loss) during the optimization.
            super().__init__(
                partial(self._ml_parametrization, noise_free=False),
                parametrization)
            # For the evaluation we use the test set, which is big, so noise_free = True.
            self.evaluation_function = partial(
                self._ml_parametrization,  # type: ignore
                noise_free=not overfitter)
        elif regressor == "decision_tree":
            # We specify below the list of hyperparameters for the decision trees.
            parametrization = p.Instrumentation(
                depth=p.Scalar(lower=1, upper=1200).set_integer_casting(),
                criterion=p.Choice(["mse", "friedman_mse", "mae"]),
                min_samples_split=p.Log(lower=0.0000001, upper=1),
                regressor="decision_tree",
            )
            # We use "partial" for fixing the parameters of the neural network, given that we work on the decision tree only.
            super().__init__(
                partial(self._ml_parametrization,
                        noise_free=False,
                        alpha=1.0,
                        learning_rate="no",
                        regressor="decision_tree",
                        activation="no",
                        solver="no"), parametrization)
            # For the test we just switch noise_free to True.
            self.evaluation_function = partial(
                self._ml_parametrization,
                criterion="mse",  # type: ignore
                min_samples_split=0.00001,
                regressor="decision_tree",
                noise_free=not overfitter,
                alpha=1.0,
                learning_rate="no",
                activation="no",
                solver="no")
        elif regressor == "mlp":
            # Let us define the parameters of the neural network.
            parametrization = p.Instrumentation(
                activation=p.Choice(["identity", "logistic", "tanh", "relu"]),
                solver=p.Choice(["lbfgs", "sgd", "adam"]),
                regressor="mlp",
                learning_rate=p.Choice(["constant", "invscaling", "adaptive"]),
                alpha=p.Log(lower=0.0000001, upper=1.),
            )
            # And, using partial, we get rid of the parameters of the decision tree (we work on the neural net, not
            # on the decision tree).
            super().__init__(
                partial(self._ml_parametrization,
                        noise_free=False,
                        regressor="mlp",
                        depth=-3,
                        criterion="no",
                        min_samples_split=0.1), parametrization)
            self.evaluation_function = partial(
                self._ml_parametrization,  # type: ignore
                regressor="mlp",
                noise_free=not overfitter,
                depth=-3,
                criterion="no",
                min_samples_split=0.1)
        else:
            assert False, f"Problem type {regressor} undefined!"

        # assert data_dimension is not None or dataset[:10] != "artificial"
        # self.get_dataset(data_dimension, dataset)
        self.register_initialization(regressor=regressor,
                                     data_dimension=data_dimension,
                                     dataset=dataset,
                                     overfitter=overfitter)