def __init__(self, transform: tp.Optional[str] = None) -> None: super().__init__(self._get_pixel_value, p.Instrumentation(p.Scalar(), p.Scalar()).set_name("standard")) self.register_initialization(transform=transform) self._image = datasets.get_data("Landscape") if transform == "gaussian": variables = list(p.TransitionChoice(list(range(x))) for x in self._image.shape) self.parametrization = p.Instrumentation(*variables).set_name("gaussian") elif transform == "square": stds = (np.array(self._image.shape) - 1.) / 2. variables2 = list(p.Scalar(init=s).set_mutation(sigma=s) for s in stds) self.parametrization = p.Instrumentation(*variables2).set_name("square") # maybe buggy, try again? elif transform is not None: raise ValueError(f"Unknown transform {transform}") self._max = float(self._image.max())
def test_softmax_categorical() -> None: np.random.seed(12) token = p.Instrumentation( variables.SoftmaxCategorical(["blu", "blublu", "blublublu"])) assert token.data_to_arguments([0.5, 1.0, 1.5]) == wrap_arg("blublu") assert token.data_to_arguments(token.arguments_to_data("blu"), deterministic=True) == wrap_arg("blu")
def test_ordered_discrete() -> None: token = p.Instrumentation( variables.OrderedDiscrete(["blu", "blublu", "blublublu"])) assert token.data_to_arguments([5]) == wrap_arg("blublublu") assert token.data_to_arguments([0]) == wrap_arg("blublu") assert token.data_to_arguments(token.arguments_to_data("blu"), deterministic=True) == wrap_arg("blu")
def test_bound_scaler() -> None: ref = p.Instrumentation( p.Array(shape=(1, 2)).set_bounds(-12, 12, method="arctan"), p.Array(shape=(2, )).set_bounds(-12, 12, full_range_sampling=False), lr=p.Log(lower=0.001, upper=1000), stuff=p.Scalar(lower=-1, upper=2), unbounded=p.Scalar(lower=-1, init=0.0), value=p.Scalar(), letter=p.Choice("abc"), ) param = ref.spawn_child() scaler = utils.BoundScaler(param) output = scaler.transform([1.0] * param.dimension, lambda x: x) param.set_standardized_data(output) (array1, array2), values = param.value np.testing.assert_array_almost_equal(array1, [[12, 12]]) np.testing.assert_array_almost_equal(array2, [1, 1]) assert values["stuff"] == 2 assert values["unbounded"] == 1 assert values["value"] == 1 np.testing.assert_almost_equal(values["lr"], 1000) # again, on the middle point output = scaler.transform([0] * param.dimension, lambda x: x) param.set_standardized_data(output) np.testing.assert_almost_equal(param.value[1]["lr"], 1.0) np.testing.assert_almost_equal(param.value[1]["stuff"], 0.5)
def test_experiment_function() -> None: ifunc = base.ExperimentFunction( _arg_return, p.Instrumentation( # type: ignore p.Choice([1, 12]), "constant", p.Array(shape=(2, 2)), constkwarg="blublu", plop=p.Choice([3, 4]), )) np.testing.assert_equal(ifunc.dimension, 8) data = [-100.0, 100, 1, 2, 3, 4, 100, -100] args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data( data).value output = ifunc( *args0, **kwargs0 ) # this is very stupid and should be removed when Parameter is in use args: tp.Any = output[0] # type: ignore kwargs: tp.Any = output[1] # type: ignore testing.printed_assert_equal(args, [12, "constant", [[1, 2], [3, 4]]]) testing.printed_assert_equal(kwargs, {"constkwarg": "blublu", "plop": 3}) instru_str = ("Instrumentation(Tuple(Choice(choices=Tuple(1,12)," "weights=Array{(1,2)}),constant," "Array{(2,2)})," "Dict(constkwarg=blublu,plop=Choice(choices=Tuple(3,4)," "weights=Array{(1,2)})))") testing.printed_assert_equal( ifunc.descriptors, { "dimension": 8, "name": "_arg_return", "function_class": "ExperimentFunction", "parametrization": instru_str, }, )
def test_experiment_function() -> None: param = p.Instrumentation( p.Choice([1, 12]), "constant", p.Array(shape=(2, 2)), constkwarg="blublu", plop=p.Choice([3, 4]), ) with pytest.raises(RuntimeError): base.ExperimentFunction(_arg_return, param) param.set_name("myparam") ifunc = base.ExperimentFunction(_arg_return, param) np.testing.assert_equal(ifunc.dimension, 8) data = [-100.0, 100, 1, 2, 3, 4, 100, -100] args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data( data).value output: tp.Any = ifunc(*args0, **kwargs0) args: tp.Any = output[0] kwargs: tp.Any = output[1] testing.printed_assert_equal(args, [12, "constant", [[1, 2], [3, 4]]]) testing.printed_assert_equal(kwargs, {"constkwarg": "blublu", "plop": 3}) testing.printed_assert_equal( ifunc.descriptors, { "dimension": 8, "name": "_arg_return", "function_class": "ExperimentFunction", "parametrization": "myparam" }, )
def __init__(self, function: tp.Callable[..., float], parametrization: p.Parameter) -> None: assert callable(function) assert not hasattr( self, "_initialization_kwargs" ), '"register_initialization" was called before super().__init__' self._initialization_kwargs: tp.Optional[tp.Dict[str, tp.Any]] = None self._descriptors: tp.Dict[str, tp.Any] = { "function_class": self.__class__.__name__ } self._parametrization = Parameter() self.parametrization = parametrization if isinstance( parametrization, Parameter) else p.Instrumentation(parametrization) self._function = function # if this is not a function bound to this very instance, add the function/callable name to the descriptors if not hasattr( function, '__self__') or function.__self__ != self: # type: ignore name = function.__name__ if hasattr( function, "__name__") else function.__class__.__name__ self._descriptors.update(name=name) if hasattr(self, "get_postponing_delay"): raise RuntimeError( '"get_posponing_delay" has been replaced by "compute_pseudotime" and has been aggressively deprecated' ) if hasattr(self, "noisefree_function"): raise RuntimeError( '"noisefree_function" has been replaced by "evaluation_function" and has been aggressively deprecated' )
def test_parametrization_continuous_noisy(variables: tp.Tuple[p.Parameter, ...], continuous: bool, noisy: bool) -> None: instru = p.Instrumentation(*variables) assert instru.descriptors.continuous == continuous assert instru.descriptors.deterministic != noisy
def test_bound_scaler() -> None: ref = p.Instrumentation( p.Array(shape=(1, 2)).set_bounds(-12, 12, method="arctan"), p.Array(shape=(2, )).set_bounds(-12, 12, full_range_sampling=False), lr=p.Log(lower=0.001, upper=1000), stuff=p.Scalar(lower=-1, upper=2), unbounded=p.Scalar(lower=-1, init=0.0), value=p.Scalar(), letter=p.Choice("abc"), ) # make sure the order is preserved using legacy split method expected = [x[1] for x in split_as_data_parameters(ref)] assert p.helpers.list_data(ref) == expected # check the bounds param = ref.spawn_child() scaler = utils.BoundScaler(param) output = scaler.transform([1.0] * param.dimension, lambda x: x) param.set_standardized_data(output) (array1, array2), values = param.value np.testing.assert_array_almost_equal(array1, [[12, 12]]) np.testing.assert_array_almost_equal(array2, [1, 1]) assert values["stuff"] == 2 assert values["unbounded"] == 1 assert values["value"] == 1 assert values["lr"] == pytest.approx(1000) # again, on the middle point output = scaler.transform([0] * param.dimension, lambda x: x) param.set_standardized_data(output) assert param.value[1]["lr"] == pytest.approx(1.0) assert param.value[1]["stuff"] == pytest.approx(0.5)
def __init__(self, model: pyomo.Model) -> None: if isinstance(model, pyomo.ConcreteModel): self._model_instance = model.clone( ) # To enable the objective function to run in parallel else: raise NotImplementedError( "AbstractModel is not supported. Please use create_instance() in Pyomo to create a model instance." ) instru_params: ParamDict = {} self.all_vars: tp.List[pyomo.Var] = [] self.all_params: tp.List[pyomo.Param] = [] self.all_constraints: tp.List[pyomo.Constraint] = [] self.all_objectives: tp.List[pyomo.Objective] = [] # Relevant document: https://pyomo.readthedocs.io/en/stable/working_models.html for v in self._model_instance.component_objects(pyomo.Var, active=True): self.all_vars.append(v) _make_pyomo_variable_to_parametrization(v, instru_params) for v in self._model_instance.component_objects(pyomo.Param, active=True): self.all_params.append(v) for v in self._model_instance.component_objects(pyomo.Constraint, active=True): self.all_constraints.append(v) for v in self._model_instance.component_objects(pyomo.Objective, active=True): if v.sense == -1: print( f"Only minimization problem is supported. The value of the objective function {v.name} will be multiplied by -1." ) self.all_objectives.append(v) if not self.all_objectives: raise NotImplementedError("Cannot find objective function") if len(self.all_objectives) > 1: raise NotImplementedError( "Multi-objective function is not supported yet.") self._value_assignment_code_obj = "" instru = p.Instrumentation(**instru_params) for c_idx in range(0, len(self.all_constraints)): instru.register_cheap_constraint( partial(self._pyomo_constraint_wrapper, c_idx)) super().__init__(function=partial(self._pyomo_obj_function_wrapper, 0), parametrization=instru) # Single objective exp_tag = ",".join([n.name for n in self.all_objectives]) exp_tag += "|" + ",".join([n.name for n in self.all_vars]) exp_tag += "|" + ",".join([n.name for n in self.all_constraints]) self.register_initialization(name=exp_tag, model=self._model_instance) self._descriptors.update(name=exp_tag)
def test_instrumented_function_kwarg_order() -> None: ifunc = base.ExperimentFunction(_arg_return, p.Instrumentation( # type: ignore kw4=p.Choice([1, 0]), kw2="constant", kw3=p.Array(shape=(2, 2)), kw1=p.Scalar(2.0).set_mutation(sigma=2.0) )) np.testing.assert_equal(ifunc.dimension, 7) data = np.array([-1, 1, 2, 3, 4, 100, -100]) args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data(data).value # this is very stupid and should be removed when Parameter is in use kwargs: tp.Any = ifunc(*args0, **kwargs0)[1] # type: ignore testing.printed_assert_equal(kwargs, {"kw1": 0, "kw2": "constant", "kw3": [[1, 2], [3, 4]], "kw4": 1})
def __init__(self, module: nn.Module, deterministic: bool = True, instrumentation_std: float = 0.1) -> None: super().__init__() self.deterministic = deterministic self.module = module kwargs = { name: p.Array(shape=value.shape).set_mutation(sigma=instrumentation_std).set_bounds(-10, 10, method="arctan") for name, value in module.state_dict().items() # type: ignore } # bounded to avoid overflows self.instrumentation = p.Instrumentation(**kwargs)
def _make_instrumentation(name: str, dimension: int, transform: str = "tanh") -> p.Instrumentation: """Creates appropriate instrumentation for a Photonics problem Parameters name: str problem name, among bragg, chirped and morpho dimension: int size of the problem among 16, 40 and 60 (morpho) or 80 (bragg and chirped) transform: str transform type for the bounding ("arctan", "tanh" or "clipping", see `Array.bounded`) Returns ------- Instrumentation the instrumentation for the problem """ assert not dimension % 4, f"points length should be a multiple of 4, got {dimension}" n = dimension // 4 arrays: tp.List[p.Array] = [] ones = np.ones((n, ), dtype=float) if name == "bragg": # n multiple of 2, from 16 to 80 # main (n=60): [2,3]^30 x [0,300]^30 arrays.extend([ p.Array(init=2.5 * ones).set_bounds(2, 3, method=transform) for _ in range(2) ]) arrays.extend([ p.Array(init=150 * ones).set_bounds(0, 300, method=transform) for _ in range(2) ]) elif name == "chirped": # n multiple of 2, from 10 to 80 # domain (n=60): [0,300]^60 arrays = [ p.Array(init=150 * ones).set_bounds(0, 300, method=transform) for _ in range(4) ] elif name == "morpho": # n multiple of 4, from 16 to 60 # domain (n=60): [0,300]^15 x [0,600]^15 x [30,600]^15 x [0,300]^15 arrays.extend([ p.Array(init=150 * ones).set_bounds(0, 300, method=transform), p.Array(init=300 * ones).set_bounds(0, 600, method=transform), p.Array(init=315 * ones).set_bounds(30, 600, method=transform), p.Array(init=150 * ones).set_bounds(0, 300, method=transform) ]) else: raise NotImplementedError(f"Transform for {name} is not implemented") instrumentation = p.Instrumentation(*arrays) assert instrumentation.dimension == dimension return instrumentation
def __init__( self, num_dams: int = 13, depth: int = 3, width: int = 3, year_to_day_ratio: float = 2., constant_to_year_ratio: float = 1., back_to_normal: float = 0.5, consumption_noise: float = 0.1, num_thermal_plants: int = 7, num_years: int = 1, failure_cost: float = 500., ) -> None: params = { x: y for x, y in locals().items() if x not in ["self", "__class__"] } # for copying self.num_dams = num_dams self.losses: tp.List[float] = [] self.marginal_costs: tp.List[float] = [] # Parameters describing the problem. self.year_to_day_ratio = year_to_day_ratio self.constant_to_year_ratio = constant_to_year_ratio self.back_to_normal = back_to_normal self.consumption_noise = consumption_noise self.num_thermal_plants = num_thermal_plants self.number_of_years = num_years self.failure_cost = failure_cost self.hydro_prod_per_time_step: tp.List[tp.Any] = [ ] # TODO @oteytaud initial values? self.consumption_per_time_step: tp.List[tp.Any] = [] self.average_consumption = self.constant_to_year_ratio * self.year_to_day_ratio self.thermal_power_capacity = self.average_consumption * np.random.rand( self.num_thermal_plants) self.thermal_power_prices = np.random.rand(num_thermal_plants) dam_agents: tp.List[tp.Any] = [] for _ in range(num_dams): dam_agents += [ Agent(10 + num_dams + 2 * self.num_thermal_plants, depth, width) ] # dimension = int(sum([a.dimension for a in dam_agents])) parameter = p.Instrumentation( *[p.Array(shape=(int(a.dimension), )) for a in dam_agents]).set_name("") super().__init__(self._simulate_power_system, parameter) self.parametrization.descriptors.deterministic_function = False self.register_initialization(**params) self.dam_agents = dam_agents self._descriptors.update(num_dams=num_dams, depth=depth, width=width)
def test_deterministic_data_setter() -> None: instru = p.Instrumentation(p.Choice([0, 1, 2, 3]), y=p.Choice([0, 1, 2, 3])) ifunc = base.ExperimentFunction(_Callable(), instru) data = [0.01, 0, 0, 0, 0.01, 0, 0, 0] for _ in range(20): args, kwargs = ifunc.parametrization.spawn_child().set_standardized_data(data, deterministic=True).value testing.printed_assert_equal(args, [0]) testing.printed_assert_equal(kwargs, {"y": 0}) arg_sum, kwarg_sum = 0, 0 for _ in range(24): args, kwargs = ifunc.parametrization.spawn_child().set_standardized_data(data, deterministic=False).value arg_sum += args[0] kwarg_sum += kwargs["y"] assert arg_sum != 0 assert kwarg_sum != 0
def __init__( self, num_dams: int = 13, depth: int = 3, width: int = 3, year_to_day_ratio: float = 2.0, constant_to_year_ratio: float = 1.0, back_to_normal: float = 0.5, consumption_noise: float = 0.1, num_thermal_plants: int = 7, num_years: float = 1.0, failure_cost: float = 500.0, ) -> None: self.num_dams = num_dams self.losses: tp.List[float] = [] self.marginal_costs: tp.List[float] = [] # Parameters describing the problem. self.year_to_day_ratio = year_to_day_ratio self.constant_to_year_ratio = constant_to_year_ratio self.back_to_normal = back_to_normal self.consumption_noise = consumption_noise self.num_thermal_plants = num_thermal_plants self.number_of_years = num_years self.failure_cost = failure_cost self.hydro_prod_per_time_step: tp.List[tp.Any] = [ ] # TODO @oteytaud initial values? self.consumption_per_time_step: tp.List[tp.Any] = [] self.average_consumption = self.constant_to_year_ratio * self.year_to_day_ratio self.thermal_power_capacity = self.average_consumption * np.random.rand( self.num_thermal_plants) self.thermal_power_prices = np.random.rand(num_thermal_plants) self.dam_agents = [ Agent(10 + num_dams + 2 * self.num_thermal_plants, depth, width) for _ in range(num_dams) ] parameter = p.Instrumentation( *[p.Array(shape=(int(a.dimension), )) for a in self.dam_agents]).set_name("") super().__init__(self._simulate_power_system, parameter) self.parametrization.descriptors.deterministic_function = False
def __init__( self, regressor: str, data_dimension: tp.Optional[int] = None, dataset: str = "artificial", overfitter: bool = False ) -> None: self.regressor = regressor self.data_dimension = data_dimension self.dataset = dataset self.overfitter = overfitter self._descriptors: tp.Dict[str, tp.Any] = {} self.add_descriptors(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter) self.name = regressor + f"Dim{data_dimension}" self.num_data = 120 # default for artificial function self._cross_val_num = 10 # number of cross validation # Dimension does not make sense if we use a real world dataset. assert bool("artificial" in dataset) == bool(data_dimension is not None) # Variables for storing the training set and the test set. self.X: np.ndarray = np.array([]) self.y: np.ndarray # Variables for storing the cross-validation splits. self.X_train_cv: tp.List[tp.Any] = [] # This will be the list of training subsets. self.X_valid_cv: tp.List[tp.Any] = [] # This will be the list of validation subsets. self.y_train_cv: tp.List[tp.Any] = [] self.y_valid_cv: tp.List[tp.Any] = [] self.X_train: np.ndarray self.y_train: np.ndarray self.X_test: np.ndarray self.y_test: np.ndarray evalparams: tp.Dict[str, tp.Any] = {} if regressor == "decision_tree_depth": # Only the depth, as an evaluation. parametrization = p.Instrumentation(depth=p.Scalar(lower=1, upper=1200).set_integer_casting()) # We optimize only the depth, so we fix all other parameters than the depth params = dict(noise_free=False, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "any": # First we define the list of parameters in the optimization parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), # Depth, in case we use a decision tree. criterion=p.Choice(["mse", "friedman_mse", "mae"]), # Criterion for building the decision tree. min_samples_split=p.Log(lower=0.0000001, upper=1), # Min ratio of samples in a node for splitting. regressor=p.Choice(["mlp", "decision_tree"]), # Type of regressor. activation=p.Choice(["identity", "logistic", "tanh", "relu"]), # Activation function, in case we use a net. solver=p.Choice(["lbfgs", "sgd", "adam"]), # Numerical optimizer. learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), # Learning rate schedule. alpha=p.Log(lower=0.0000001, upper=1.), # Complexity penalization. ) # noise_free is False (meaning that we consider the cross-validation loss) during the optimization. params = dict(noise_free=False) elif regressor == "decision_tree": # We specify below the list of hyperparameters for the decision trees. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), criterion=p.Choice(["mse", "friedman_mse", "mae"]), min_samples_split=p.Log(lower=0.0000001, upper=1), regressor="decision_tree", ) params = dict(noise_free=False, alpha=1.0, learning_rate="no", regressor="decision_tree", activation="no", solver="no") evalparams = dict(params, criterion="mse", min_samples_split=0.00001) elif regressor == "mlp": # Let us define the parameters of the neural network. parametrization = p.Instrumentation( activation=p.Choice(["identity", "logistic", "tanh", "relu"]), solver=p.Choice(["lbfgs", "sgd", "adam"]), regressor="mlp", learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), alpha=p.Log(lower=0.0000001, upper=1.), ) params = dict(noise_free=False, regressor="mlp", depth=-3, criterion="no", min_samples_split=0.1) else: assert False, f"Problem type {regressor} undefined!" # build eval params if not specified if not evalparams: evalparams = dict(params) # For the evaluation we remove the noise (unless overfitter) evalparams["noise_free"] = not overfitter super().__init__(partial(self._ml_parametrization, **params), parametrization.set_name("")) self._evalparams = evalparams self.register_initialization(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter)
var = variables.Log(0.9, 0.999) out = var.data_to_arguments(np.array([value])) np.testing.assert_approx_equal(out[0][0], expected, significant=4) @pytest.mark.parametrize( # type: ignore "var,data,expected", [ (variables.Log(0.9, 0.999), [0], 0.9482), (variables.Array(2).affined(10, 100), [0, 3], [100, 130]), (variables.Scalar().affined(10, 100).bounded(-200, 200), [0 ], 198.7269), (variables.Scalar(int).affined(10, 100).bounded(-200, 200), [0], 199), (variables.Scalar().exponentiated(10, -1), [1], 0.1), (variables.Scalar().exponentiated(2, 3), [4], 4096), (variables.Scalar().affined(10, 100).bounded(-200, 200), [-10], 0), (variables.Scalar().affined(10, 100).bounded( -200, 200, transform="clipping"), [1], 110), (variables.Gaussian(3, 5, shape=(2, )), [-2, 1], [-7, 8]), (variables.Gaussian(3, 5), [-2], -7), (p.Instrumentation(variables.OrderedDiscrete(list( range(100)))), [1.4], 91), ]) def test_expected_value(var: variables.Variable, data: tp.List[float], expected: tp.Any) -> None: check_parameter_features(var) out = var.data_to_arguments(np.array(data))[0][0] if isinstance(out, np.ndarray): np.testing.assert_array_almost_equal(out, expected) else: np.testing.assert_approx_equal(out, expected, significant=4)
def __init__( self, name: str = "gym_anm:ANM6Easy-v0", control: str = "conformant", neural_factor: tp.Optional[int] = 1, randomized: bool = True, compiler_gym_pb_index: tp.Optional[int] = None, limited_compiler_gym: tp.Optional[bool] = None, optimization_scale: int = 0, greedy_bias: bool = False, ) -> None: # limited_compiler_gym: bool or None. # whether we work with the limited version self.limited_compiler_gym = limited_compiler_gym self.optimization_scale = optimization_scale self.num_training_codes = 100 if limited_compiler_gym else 5000 self.uses_compiler_gym = "compiler" in name self.stochastic_problem = "stoc" in name self.greedy_bias = greedy_bias if "conformant" in control or control == "linear": assert neural_factor is None if os.name == "nt": raise ng.errors.UnsupportedExperiment("Windows is not supported") if self.uses_compiler_gym: # Long special case for Compiler Gym. # CompilerGym sends http requests that CircleCI does not like. if os.environ.get("CIRCLECI", False): raise ng.errors.UnsupportedExperiment( "No HTTP request in CircleCI") assert limited_compiler_gym is not None self.num_episode_steps = 45 if limited_compiler_gym else 50 import compiler_gym env = gym.make("llvm-v0", observation_space="Autophase", reward_space="IrInstructionCountOz") env = self.observation_wrap(self.wrap_env(env)) self.uris = list( env.datasets["benchmark://cbench-v1"].benchmark_uris()) # For training, in the "stochastic" case, we use Csmith. from itertools import islice self.csmith = list( islice(env.datasets["generator://csmith-v0"].benchmark_uris(), self.num_training_codes)) if self.stochastic_problem: assert ( compiler_gym_pb_index is None ), "compiler_gym_pb_index should not be defined in the stochastic case." self.compilergym_index = None # In training, we randomly draw in csmith (but we are allowed to use 100x more budget :-) ). o = env.reset(benchmark=np.random.choice(self.csmith)) else: assert compiler_gym_pb_index is not None self.compilergym_index = compiler_gym_pb_index o = env.reset(benchmark=self.uris[self.compilergym_index]) # env.require_dataset("cBench-v1") # env.unwrapped.benchmark = "benchmark://cBench-v1/qsort" else: # Here we are not in CompilerGym anymore. assert limited_compiler_gym is None assert ( compiler_gym_pb_index is None ), "compiler_gym_pb_index should not be defined if not CompilerGym." env = gym.make(name if "LANM" not in name else "gym_anm:ANM6Easy-v0") o = env.reset() self.env = env # Build various attributes. self.name = ( (name if not self.uses_compiler_gym else name + str(env)) + "__" + control + "__" + str(neural_factor)) if randomized: self.name += "_unseeded" self.randomized = randomized try: self.num_time_steps = env._max_episode_steps # I know! This is a private variable. except AttributeError: # Not all environements have a max number of episodes! assert any(x in name for x in NO_LENGTH), name if (self.uses_compiler_gym and not self.limited_compiler_gym ): # The unlimited Gym uses 50 time steps. self.num_time_steps = 50 elif self.uses_compiler_gym and self.limited_compiler_gym: # Other Compiler Gym: 45 time steps. self.num_time_steps = 45 elif "LANM" not in name: # Most cases: let's say 100 time steps. self.num_time_steps = 100 else: # LANM is a special case with 3000 time steps. self.num_time_steps = 3000 self.gamma = 0.995 if "LANM" in name else 1.0 self.neural_factor = neural_factor # Infer the action space. if isinstance(env.action_space, gym.spaces.Discrete): output_dim = env.action_space.n output_shape = (output_dim, ) discrete = True assert output_dim is not None, env.action_space.n else: # Continuous action space output_shape = env.action_space.shape if output_shape is None: output_shape = tuple( np.asarray(env.action_space.sample()).shape) # When the shape is not available we might do: # output_shape = tuple(np.asarray(env.action_space.sample()).shape) discrete = False output_dim = np.prod(output_shape) self.discrete = discrete # Infer the observation space. assert (env.observation_space is not None or self.uses_compiler_gym or "llvm" in name), "An observation space should be defined." if self.uses_compiler_gym: input_dim = 98 if self.limited_compiler_gym else 179 self.discrete_input = False elif env.observation_space is not None and env.observation_space.dtype == int: # Direct inference for corner cases: # if "int" in str(type(o)): input_dim = env.observation_space.n assert input_dim is not None, env.observation_space.n self.discrete_input = True else: input_dim = np.prod(env.observation_space.shape ) if env.observation_space is not None else 0 if input_dim is None: input_dim = np.prod(np.asarray(o).shape) self.discrete_input = False # Infer the action type. a = env.action_space.sample() self.action_type = type(a) self.subaction_type = None if hasattr(a, "__iter__"): self.subaction_type = type(a[0]) # Prepare the policy shape. if neural_factor is None: assert (control == "linear" or "conformant" in control), f"{control} has neural_factor {neural_factor}" neural_factor = 1 self.output_shape = output_shape self.num_stacking = 1 self.memory_len = neural_factor * input_dim if "memory" in control else 0 self.extended_input_len = ( input_dim + output_dim) * self.num_stacking if "stacking" in control else 0 input_dim = input_dim + self.memory_len + self.extended_input_len self.extended_input = np.zeros(self.extended_input_len) output_dim = output_dim + self.memory_len self.input_dim = input_dim self.output_dim = output_dim self.num_neurons = 1 + ((neural_factor * (input_dim - self.extended_input_len)) // 7) self.num_neurons = neural_factor * (input_dim - self.extended_input_len) self.num_internal_layers = 1 if "semi" in control else 3 internal = self.num_internal_layers * (self.num_neurons** 2) if "deep" in control else 0 unstructured_neural_size = (output_dim * self.num_neurons + self.num_neurons * (input_dim + 1) + internal, ) neural_size = unstructured_neural_size if self.greedy_bias: neural_size = (unstructured_neural_size[0] + 1, ) assert "multi" not in control assert "structured" not in control assert control in CONTROLLERS or control == "conformant", f"{control} not known as a form of control" self.control = control if "neural" in control: self.first_size = self.num_neurons * (self.input_dim + 1) self.second_size = self.num_neurons * self.output_dim self.first_layer_shape = (self.input_dim + 1, self.num_neurons) self.second_layer_shape = (self.num_neurons, self.output_dim) shape_dict = { "conformant": (self.num_time_steps, ) + output_shape, "stochastic_conformant": (self.num_time_steps, ) + output_shape, "linear": (input_dim + 1, output_dim), "memory_neural": neural_size, "neural": neural_size, "deep_neural": neural_size, "semideep_neural": neural_size, "deep_memory_neural": neural_size, "semideep_memory_neural": neural_size, "deep_stackingmemory_neural": neural_size, "stackingmemory_neural": neural_size, "semideep_stackingmemory_neural": neural_size, "deep_extrapolatestackingmemory_neural": neural_size, "extrapolatestackingmemory_neural": neural_size, "semideep_extrapolatestackingmemory_neural": neural_size, "structured_neural": neural_size, "multi_neural": (min(self.num_time_steps, 50), ) + unstructured_neural_size, "noisy_neural": neural_size, "noisy_scrambled_neural": neural_size, "scrambled_neural": neural_size, } shape = shape_dict[control] assert all(c in shape_dict for c in self.controllers ), f"{self.controllers} subset of {shape_dict.keys()}" shape = tuple(map(int, shape)) self.policy_shape = shape if "structured" not in control else None # Create the parametrization. parametrization = parameter.Array(shape=shape).set_name("ng_default") if "structured" in control and "neural" in control and "multi" not in control: parametrization = parameter.Instrumentation( # type: ignore parameter.Array(shape=tuple(map(int, self.first_layer_shape))), parameter.Array( shape=tuple(map(int, self.second_layer_shape))), ).set_name("ng_struct") elif "conformant" in control: try: if env.action_space.low is not None and env.action_space.high is not None: low = np.repeat(np.expand_dims(env.action_space.low, 0), self.num_time_steps, axis=0) high = np.repeat(np.expand_dims(env.action_space.high, 0), self.num_time_steps, axis=0) init = 0.5 * (low + high) parametrization = parameter.Array(init=init) parametrization.set_bounds(low, high) except AttributeError: # Not all env.action_space have a low and a high. pass if self.subaction_type == int: parametrization.set_integer_casting() parametrization.set_name("conformant") # Now initializing. super().__init__(self.gym_multi_function, parametrization=parametrization) self.greedy_coefficient = 0.0 self.parametrization.function.deterministic = not self.uses_compiler_gym self.archive: tp.List[tp.Any] = [] self.mean_loss = 0.0 self.num_losses = 0
def __init__( # pylint: disable=too-many-arguments self, name: str, block_dimension: int, num_blocks: int = 1, useless_variables: int = 0, noise_level: float = 0, noise_dissymmetry: bool = False, rotation: bool = False, translation_factor: float = 1.0, hashing: bool = False, aggregator: str = "max", split: bool = False, ) -> None: # pylint: disable=too-many-locals self.name = name self._parameters = {x: y for x, y in locals().items() if x not in ["__class__", "self"]} # basic checks assert noise_level >= 0, "Noise level must be greater or equal to 0" if not all(isinstance(x, bool) for x in [noise_dissymmetry, hashing, rotation]): raise TypeError("hashing and rotation should be bools") for param, mini in [("block_dimension", 1), ("num_blocks", 1), ("useless_variables", 0)]: value = self._parameters[param] if not isinstance(value, int): raise TypeError(f'"{param}" must be an int') if value < mini: raise ValueError(f'"{param}" must be greater or equal to {mini}') if not isinstance(translation_factor, (float, int)): raise TypeError(f"Got non-float value {translation_factor}") if name not in corefuncs.registry: available = ", ".join(self.list_sorted_function_names()) raise ValueError(f'Unknown core function "{name}". Available names are:\n-----\n{available}') # record necessary info and prepare transforms self._dimension = block_dimension * num_blocks + useless_variables self._func = corefuncs.registry[name] # special case info = corefuncs.registry.get_info(self._parameters["name"]) only_index_transform = info.get("no_transform", False) assert not (split and hashing) assert not (split and useless_variables > 0) parametrization = ( p.Array(shape=(1,) if hashing else (self._dimension,)).set_name("") if not split else ( p.Instrumentation(*[p.Array(shape=(block_dimension,)) for _ in range(num_blocks)]).set_name( "split" ) ) ) if noise_level > 0: parametrization.descriptors.deterministic_function = False super().__init__(self.noisy_function, parametrization) # variable, must come after super().__init__(...) to bind the random_state # may consider having its a local random_state instead but less reproducible self.transform_var = ArtificialVariable( dimension=self._dimension, num_blocks=num_blocks, block_dimension=block_dimension, translation_factor=translation_factor, rotation=rotation, hashing=hashing, only_index_transform=only_index_transform, random_state=self._parametrization.random_state, ) self._aggregator = {"max": np.max, "mean": np.mean, "sum": np.sum}[aggregator] info = corefuncs.registry.get_info(self._parameters["name"]) # add descriptors self.add_descriptors( useful_dimensions=block_dimension * num_blocks, discrete=any(x in name for x in ["onemax", "leadingones", "jump"]), )
def test_softmax_categorical_deterministic() -> None: token = p.Instrumentation( variables.SoftmaxCategorical(["blu", "blublu", "blublublu"], deterministic=True)) assert token.data_to_arguments( [1, 1, 1.01], deterministic=False) == wrap_arg("blublublu")
def _make_parametrization( name: str, dimension: int, bounding_method: str = "bouncing", rolling: bool = False, as_tuple: bool = False, ) -> p.Parameter: """Creates appropriate parametrization for a Photonics problem Parameters name: str problem name, among bragg, chirped, cf_photosic_realistic, cf_photosic_reference and morpho dimension: int size of the problem among 16, 40 and 60 (morpho) or 80 (bragg and chirped) bounding_method: str transform type for the bounding ("arctan", "tanh", "bouncing" or "clipping"see `Array.bounded`) as_tuple: bool whether we should use a Tuple of Array instead of a 2D-array. Returns ------- Instrumentation the parametrization for the problem """ if name == "bragg": shape = (2, dimension // 2) bounds = [(2, 3), (30, 180)] elif name == "cf_photosic_realistic": shape = (2, dimension // 2) bounds = [(1, 9), (30, 180)] elif name == "cf_photosic_reference": shape = (1, dimension) bounds = [(30, 180)] elif name == "chirped": shape = (1, dimension) bounds = [(30, 180)] elif name == "morpho": shape = (4, dimension // 4) bounds = [(0, 300), (0, 600), (30, 600), (0, 300)] else: raise NotImplementedError(f"Transform for {name} is not implemented") divisor = max(2, len(bounds)) assert not dimension % divisor, f"points length should be a multiple of {divisor}, got {dimension}" assert ( shape[0] * shape[1] == dimension ), f"Cannot work with dimension {dimension} for {name}: not divisible by {shape[0]}." b_array = np.array(bounds) assert b_array.shape[0] == shape[0] # pylint: disable=unsubscriptable-object ones = np.ones((1, shape[1])) init = np.sum(b_array, axis=1, keepdims=True).dot(ones) / 2 if as_tuple: instrum = p.Instrumentation(*[ p.Array(init=init[:, i]).set_bounds(b_array[:, 0], b_array[:, 1], method=bounding_method, full_range_sampling=True) for i in range(init.shape[1]) ]).set_name("as_tuple") assert instrum.dimension == dimension, instrum return instrum array = p.Array(init=init) if bounding_method not in ("arctan", "tanh"): # sigma must be adapted for clipping and constraint methods sigma = p.Array(init=[[10.0]] if name != "bragg" else [[0.03], [10.0]] ).set_mutation(exponent=2.0) # type: ignore array.set_mutation(sigma=sigma) if rolling: array.set_mutation(custom=p.Choice( ["gaussian", "cauchy", p.mutation.Translation(axis=1)])) array.set_bounds(b_array[:, [0]], b_array[:, [1]], method=bounding_method, full_range_sampling=True) array.set_recombination(p.mutation.Crossover(axis=1)).set_name("") assert array.dimension == dimension, f"Unexpected {array} for dimension {dimension}" return array
def __init__(self, regressor: str, data_dimension: tp.Optional[int] = None, dataset: str = "artificial", overfitter: bool = False) -> None: self.regressor = regressor self.data_dimension = data_dimension self.dataset = dataset self.overfitter = overfitter self._descriptors: tp.Dict[str, tp.Any] = {} self.add_descriptors(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter) self.name = regressor + f"Dim{data_dimension}" self.num_data: int = 0 # Dimension does not make sense if we use a real world dataset. assert bool("artificial" in dataset) == bool( data_dimension is not None) # Variables for storing the training set and the test set. self.X: np.ndarray = np.array([]) self.y: np.ndarray # Variables for storing the cross-validation splits. self.X_train: tp.List[tp.Any] = [ ] # This will be the list of training subsets. self.X_valid: tp.List[tp.Any] = [ ] # This will be the list of validation subsets. self.y_train: tp.List[tp.Any] = [] self.y_valid: tp.List[tp.Any] = [] self.X_test: np.ndarray self.y_test: np.ndarray if regressor == "decision_tree_depth": # Only the depth, as an evaluation. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting()) # We optimize only the depth, so we fix all other parameters than the depth, using "partial". super().__init__( partial(self._ml_parametrization, noise_free=False, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no"), parametrization) # For the evaluation, we remove the noise. self.evaluation_function = partial( self._ml_parametrization, # type: ignore noise_free=not overfitter, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "any": # First we define the list of parameters in the optimization parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting( ), # Depth, in case we use a decision tree. criterion=p.Choice( ["mse", "friedman_mse", "mae"]), # Criterion for building the decision tree. min_samples_split=p.Log( lower=0.0000001, upper=1), # Min ratio of samples in a node for splitting. regressor=p.Choice(["mlp", "decision_tree"]), # Type of regressor. activation=p.Choice( ["identity", "logistic", "tanh", "relu"]), # Activation function, in case we use a net. solver=p.Choice(["lbfgs", "sgd", "adam"]), # Numerical optimizer. learning_rate=p.Choice(["constant", "invscaling", "adaptive" ]), # Learning rate schedule. alpha=p.Log(lower=0.0000001, upper=1.), # Complexity penalization. ) # Only the dimension is fixed, so "partial" is just used for fixing the dimension. # noise_free is False (meaning that we consider the cross-validation loss) during the optimization. super().__init__( partial(self._ml_parametrization, noise_free=False), parametrization) # For the evaluation we use the test set, which is big, so noise_free = True. self.evaluation_function = partial( self._ml_parametrization, # type: ignore noise_free=not overfitter) elif regressor == "decision_tree": # We specify below the list of hyperparameters for the decision trees. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), criterion=p.Choice(["mse", "friedman_mse", "mae"]), min_samples_split=p.Log(lower=0.0000001, upper=1), regressor="decision_tree", ) # We use "partial" for fixing the parameters of the neural network, given that we work on the decision tree only. super().__init__( partial(self._ml_parametrization, noise_free=False, alpha=1.0, learning_rate="no", regressor="decision_tree", activation="no", solver="no"), parametrization) # For the test we just switch noise_free to True. self.evaluation_function = partial( self._ml_parametrization, criterion="mse", # type: ignore min_samples_split=0.00001, regressor="decision_tree", noise_free=not overfitter, alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "mlp": # Let us define the parameters of the neural network. parametrization = p.Instrumentation( activation=p.Choice(["identity", "logistic", "tanh", "relu"]), solver=p.Choice(["lbfgs", "sgd", "adam"]), regressor="mlp", learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), alpha=p.Log(lower=0.0000001, upper=1.), ) # And, using partial, we get rid of the parameters of the decision tree (we work on the neural net, not # on the decision tree). super().__init__( partial(self._ml_parametrization, noise_free=False, regressor="mlp", depth=-3, criterion="no", min_samples_split=0.1), parametrization) self.evaluation_function = partial( self._ml_parametrization, # type: ignore regressor="mlp", noise_free=not overfitter, depth=-3, criterion="no", min_samples_split=0.1) else: assert False, f"Problem type {regressor} undefined!" # assert data_dimension is not None or dataset[:10] != "artificial" # self.get_dataset(data_dimension, dataset) self.register_initialization(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter)