Exemple #1
0
def get_mixed_gp(cat_dims, cont_dims, rs, noise=1e-3, normalize_y=True):
    from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel, HammingKernel

    cat_dims = np.array(cat_dims, dtype=np.int)
    cont_dims = np.array(cont_dims, dtype=np.int)
    n_dimensions = len(cat_dims) + len(cont_dims)
    cov_amp = ConstantKernel(
        2.0,
        constant_value_bounds=(1e-10, 2),
        prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rs),
    )

    exp_kernel = Matern(
        np.ones([len(cont_dims)]),
        [(np.exp(-10), np.exp(2)) for _ in range(len(cont_dims))],
        nu=2.5,
        operate_on=cont_dims,
    )

    ham_kernel = HammingKernel(
        np.ones([len(cat_dims)]),
        [(np.exp(-10), np.exp(2)) for _ in range(len(cat_dims))],
        operate_on=cat_dims,
    )
    noise_kernel = WhiteKernel(
        noise_level=noise,
        noise_level_bounds=(1e-10, 2),
        prior=HorseshoePrior(scale=0.1, rng=rs),
    )
    kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel

    bounds = [0] * n_dimensions
    types = np.zeros(n_dimensions)
    for c in cont_dims:
        bounds[c] = (0., 1.)
    for c in cat_dims:
        types[c] = 3
        bounds[c] = (3, np.nan)

    cs = ConfigurationSpace()
    for c in cont_dims:
        cs.add_hyperparameter(UniformFloatHyperparameter('X%d' % c, 0, 1))
    for c in cat_dims:
        cs.add_hyperparameter(
            CategoricalHyperparameter('X%d' % c, [0, 1, 2, 3]))

    model = GaussianProcess(
        configspace=cs,
        bounds=bounds,
        types=types,
        kernel=kernel,
        seed=rs.randint(low=1, high=10000),
        normalize_y=normalize_y,
    )
    return model
Exemple #2
0
def _construct_model(configspace, rng):
    types, bounds = _configspace_to_types_and_bounds(configspace)
    cont_dims = np.nonzero(types == 0)[0]
    cat_dims = np.nonzero(types != 0)[0]

    cov_amp = ConstantKernel(
        2.0,
        constant_value_bounds=(np.exp(-10), np.exp(2)),
        prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rng),
    )
    if len(cont_dims) > 0:
        exp_kernel = Matern(
            np.ones([len(cont_dims)]),
            [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in
             range(len(cont_dims))],
            nu=2.5,
            operate_on=cont_dims,
        )
    if len(cat_dims) > 0:
        ham_kernel = HammingKernel(
            np.ones([len(cat_dims)]),
            [(np.exp(-6.754111155189306), np.exp(0.0858637988771976)) for _ in
             range(len(cat_dims))],
            operate_on=cat_dims,
        )
    noise_kernel = WhiteKernel(
        noise_level=1e-8,
        noise_level_bounds=(np.exp(-25), np.exp(2)),
        prior=HorseshoePrior(scale=0.1, rng=rng),
    )

    if len(cont_dims) > 0 and len(cat_dims) > 0:
        # both
        kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel
    elif len(cont_dims) > 0 and len(cat_dims) == 0:
        # only cont
        kernel = cov_amp * exp_kernel + noise_kernel
    elif len(cont_dims) == 0 and len(cat_dims) > 0:
        # only cont
        kernel = cov_amp * ham_kernel + noise_kernel
    else:
        raise ValueError()

    def _impute_inactive(self, X):
        X = X.copy()
        return _impute_conditional_data(X, self.configspace)

    seed = random.randint(0, 100)
    GaussianProcess._impute_inactive = _impute_inactive
    return GaussianProcess(
        configspace=configspace, types=types, bounds=bounds, seed=seed, kernel=kernel
    )
Exemple #3
0
def get_gp(n_dimensions, rs, noise=1e-3):
    cov_amp = 2

    initial_ls = np.ones([n_dimensions])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dimensions)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1, rng=rs)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    bounds = [(0., 1.) for _ in range(n_dimensions)]
    types = np.zeros(n_dimensions)

    model = GaussianProcess(
        bounds=bounds, types=types, kernel=kernel,
        prior=prior, rng=rs, noise=noise,
        normalize_output=False, normalize_input=True,
    )
    return model
Exemple #4
0
def get_gp(n_dimensions, rs, noise=1e-3, normalize_y=True) -> GaussianProcess:
    from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel

    cov_amp = ConstantKernel(
        2.0,
        constant_value_bounds=(1e-10, 2),
        prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rs),
    )
    exp_kernel = Matern(
        np.ones([n_dimensions]),
        [(np.exp(-10), np.exp(2)) for _ in range(n_dimensions)],
        nu=2.5,
    )
    noise_kernel = WhiteKernel(
        noise_level=noise,
        noise_level_bounds=(1e-10, 2),
        prior=HorseshoePrior(scale=0.1, rng=rs),
    )
    kernel = cov_amp * exp_kernel + noise_kernel

    bounds = [(0., 1.) for _ in range(n_dimensions)]
    types = np.zeros(n_dimensions)

    configspace = ConfigurationSpace()
    for i in range(n_dimensions):
        configspace.add_hyperparameter(
            UniformFloatHyperparameter('x%d' % i, 0, 1))

    model = GaussianProcess(
        configspace=configspace,
        bounds=bounds,
        types=types,
        kernel=kernel,
        seed=rs.randint(low=1, high=10000),
        normalize_y=normalize_y,
        n_opt_restarts=2,
    )
    return model
Exemple #5
0
    def _train(self, X: np.ndarray, y: np.ndarray):
        """Trains the random forest on X and y.

        Parameters
        ----------
        X : np.ndarray [n_samples, n_features (config + instance features)]
            Input data points.
        Y : np.ndarray [n_samples, ]
            The corresponding target values.

        Returns
        -------
        self
        """

        self.X = X
        self.y = y.flatten()

        from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel, HammingKernel
        from smac.epm.gp_base_prior import HorseshoePrior, LognormalPrior

        self.rf = sklearn.ensemble.RandomForestRegressor(
            max_features=0.5,
            bootstrap=True,
            max_depth=3,
            min_samples_leaf=10,
            n_estimators=N_EST,
        )
        # self.rf.fit(X, np.log(y - np.min(y) + 1e-7).ravel())
        self.rf.fit(X, y.ravel())
        indicators = np.array(self.rf.apply(X))
        all_datasets = []
        all_targets = []
        all_mappings = []
        for est in range(N_EST):
            unique = np.unique(indicators[:, est])
            mapping = {j: i for i, j in enumerate(unique)}
            datasets = [[] for _ in unique]
            targets = [[] for _ in indicators]
            for indicator, x, y_ in zip(indicators[:, est], X, y):
                index = mapping[indicator]
                datasets[index].append(x)
                targets[index].append(y_)
            all_mappings.append(mapping)
            all_datasets.append(datasets)
            all_targets.append(targets)

        # print('Before')
        # for est in range(N_EST):
        #     for dataset in all_datasets[est]:
        #         print(len(dataset))

        for est in range(N_EST):
            n_nodes = self.rf.estimators_[est].tree_.node_count
            children_left = self.rf.estimators_[est].tree_.children_left
            children_right = self.rf.estimators_[est].tree_.children_right
            feature = self.rf.estimators_[est].tree_.feature
            threshold = self.rf.estimators_[est].tree_.threshold

            # The tree structure can be traversed to compute various properties such
            # as the depth of each node and whether or not it is a leaf.
            node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
            is_leaves = np.zeros(shape=n_nodes, dtype=bool)
            stack = [(0, -1)]  # seed is the root node id and its parent depth
            while len(stack) > 0:
                node_id, parent_depth = stack.pop()
                node_depth[node_id] = parent_depth + 1

                # If we have a test node
                if (children_left[node_id] != children_right[node_id]):
                    stack.append((children_left[node_id], parent_depth + 1))
                    stack.append((children_right[node_id], parent_depth + 1))
                else:
                    is_leaves[node_id] = True

            rules = {}
            import copy

            def extend(rule, idx):
                if is_leaves[idx]:
                    rules[idx] = rule
                else:
                    rule_left = copy.deepcopy(rule)
                    rule_left.append((threshold[idx], '<=', feature[idx]))
                    extend(rule_left, children_left[idx])
                    rule_right = copy.deepcopy(rule)
                    rule_right.append((threshold[idx], '>', feature[idx]))
                    extend(rule_right, children_right[idx])

            extend([], 0)
            #print(rules)

            for key, rule in rules.items():
                lower = -np.ones((X.shape[1], )) * np.inf
                upper = np.ones((X.shape[1], )) * np.inf
                for element in rule:
                    if element[1] == '<=':
                        if element[0] < upper[element[2]]:
                            upper[element[2]] = element[0]
                    else:
                        if element[0] > lower[element[2]]:
                            lower[element[2]] = element[0]

                for feature_idx in range(X.shape[1]):
                    closest_lower = -np.inf
                    closes_lower_idx = None
                    closest_upper = np.inf
                    closest_upper_idx = None
                    for x in X:
                        if x[feature_idx] > lower[feature_idx] and x[
                                feature_idx] < upper[feature_idx]:
                            continue
                        if x[feature_idx] <= lower[feature_idx]:
                            if x[feature_idx] > closest_lower:
                                closest_lower = x[feature_idx]
                                closes_lower_idx = feature_idx
                        if x[feature_idx] >= upper[feature_idx]:
                            if x[feature_idx] < closest_upper:
                                closest_upper = x[feature_idx]
                                closest_upper_idx = feature_idx

                    if closest_upper_idx is not None:
                        all_datasets[est][all_mappings[est][key]].append(
                            X[closest_upper_idx])
                        all_targets[est][all_mappings[est][key]].append(
                            y[closest_upper_idx])
                    if closes_lower_idx is not None:
                        all_datasets[est][all_mappings[est][key]].append(
                            X[closes_lower_idx])
                        all_targets[est][all_mappings[est][key]].append(
                            y[closes_lower_idx])

        # print('After')
        # for est in range(N_EST):
        #     for dataset in all_datasets[est]:
        #         print(len(dataset))

        self.all_mappings = all_mappings
        self.models = []
        for est in range(N_EST):
            models = []
            for dataset, targets_ in zip(all_datasets[est], all_targets[est]):

                cov_amp = ConstantKernel(
                    2.0,
                    constant_value_bounds=(np.exp(-10), np.exp(2)),
                    prior=LognormalPrior(mean=0.0, sigma=1.0, rng=self.rng),
                )

                cont_dims = np.nonzero(self.types == 0)[0]
                cat_dims = np.nonzero(self.types != 0)[0]

                if len(cont_dims) > 0:
                    exp_kernel = Matern(
                        np.ones([len(cont_dims)]),
                        [(np.exp(-10), np.exp(2))
                         for _ in range(len(cont_dims))],
                        nu=2.5,
                        operate_on=cont_dims,
                    )

                if len(cat_dims) > 0:
                    ham_kernel = HammingKernel(
                        np.ones([len(cat_dims)]),
                        [(np.exp(-10), np.exp(2))
                         for _ in range(len(cat_dims))],
                        operate_on=cat_dims,
                    )

                noise_kernel = WhiteKernel(
                    noise_level=1e-8,
                    noise_level_bounds=(np.exp(-25), np.exp(2)),
                    prior=HorseshoePrior(scale=0.1, rng=self.rng),
                )

                if len(cont_dims) > 0 and len(cat_dims) > 0:
                    # both
                    kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel
                elif len(cont_dims) > 0 and len(cat_dims) == 0:
                    # only cont
                    kernel = cov_amp * exp_kernel + noise_kernel
                elif len(cont_dims) == 0 and len(cat_dims) > 0:
                    # only cont
                    kernel = cov_amp * ham_kernel + noise_kernel
                else:
                    raise ValueError()

                gp = GaussianProcess(
                    configspace=self.configspace,
                    types=self.types,
                    bounds=self.bounds,
                    kernel=kernel,
                    normalize_y=True,
                    seed=self.rng.randint(low=0, high=10000),
                )
                gp.train(np.array(dataset), np.array(targets_))
                gp._train(X, y, do_optimize=False)
                models.append(gp)
            self.models.append(models)
        return self
    def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True) -> 'GaussianProcessMCMC':
        """
        Performs MCMC sampling to sample hyperparameter configurations from the
        likelihood and trains for each sample a GP on X and y

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true we perform MCMC sampling otherwise we just use the
            hyperparameter specified in the kernel.
        """
        X = self._impute_inactive(X)
        if self.normalize_y:
            # A note on normalization for the Gaussian process with MCMC:
            # Scikit-learn uses a different "normalization" than we use in SMAC3. Scikit-learn normalizes the data to
            # have zero mean, while we normalize it to have zero mean unit variance. To make sure the scikit-learn GP
            # behaves the same when we use it directly or indirectly (through the gaussian_process.py file), we
            # normalize the data here. Then, after the individual GPs are fit, we inject the statistics into them so
            # they unnormalize the data at prediction time.
            y = self._normalize_y(y)

        self.gp = self._get_gp()

        if do_optimize:
            self.gp.fit(X, y)
            self._all_priors = self._get_all_priors(
                add_bound_priors=True,
                add_soft_bounds=True if self.mcmc_sampler == 'nuts' else False,
            )

            if self.mcmc_sampler == 'emcee':
                sampler = emcee.EnsembleSampler(self.n_mcmc_walkers,
                                                len(self.kernel.theta),
                                                self._ll)
                sampler.random_state = self.rng.get_state()
                # Do a burn-in in the first iteration
                if not self.burned:
                    # Initialize the walkers by sampling from the prior
                    dim_samples = []

                    prior = None  # type: typing.Optional[typing.Union[typing.List[Prior], Prior]]
                    for dim, prior in enumerate(self._all_priors):
                        # Always sample from the first prior
                        if isinstance(prior, list):
                            if len(prior) == 0:
                                prior = None
                            else:
                                prior = prior[0]
                        prior = typing.cast(typing.Optional[Prior], prior)
                        if prior is None:
                            raise NotImplementedError()
                        else:
                            dim_samples.append(prior.sample_from_prior(self.n_mcmc_walkers).flatten())
                    self.p0 = np.vstack(dim_samples).transpose()

                    # Run MCMC sampling
                    with warnings.catch_warnings():
                        warnings.filterwarnings('ignore', r'invalid value encountered in double_scalars.*')
                        self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                         self.burnin_steps)

                    self.burned = True

                # Start sampling & save the current position, it will be the start point in the next iteration
                with warnings.catch_warnings():
                    warnings.filterwarnings('ignore', r'invalid value encountered in double_scalars.*')
                    self.p0, _, _ = sampler.run_mcmc(self.p0, self.chain_length)

                # Take the last samples from each walker
                self.hypers = sampler.get_chain()[:, -1]
            elif self.mcmc_sampler == 'nuts':
                # Originally published as:
                # http://www.stat.columbia.edu/~gelman/research/published/nuts.pdf
                # A good explanation of HMC:
                # https://theclevermachine.wordpress.com/2012/11/18/mcmc-hamiltonian-monte-carlo-a-k-a-hybrid-monte-carlo/
                # A good explanation of HMC and NUTS can be found in:
                # https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.12681

                # Do not require the installation of NUTS for SMAC
                # This requires NUTS from https://github.com/mfeurer/NUTS
                import nuts.nuts

                # Perform initial fit to the data to obtain theta0
                if not self.burned:
                    theta0 = self.gp.kernel.theta
                    self.burned = True
                else:
                    theta0 = self.p0
                samples, _, _ = nuts.nuts.nuts6(
                    f=self._ll_w_grad,
                    Madapt=self.burnin_steps,
                    M=self.chain_length,
                    theta0=theta0,
                    # Increasing this value results in longer running times
                    delta=0.5,
                    adapt_mass=False,
                    # Rather low max depth to keep the number of required gradient steps low
                    max_depth=10,
                    rng=self.rng,
                )
                indices = [int(np.rint(ind)) for ind in np.linspace(start=0, stop=len(samples) - 1, num=10)]
                self.hypers = samples[indices]
                self.p0 = self.hypers.mean(axis=0)
            else:
                raise ValueError(self.mcmc_sampler)

            if self.average_samples:
                self.hypers = [self.hypers.mean(axis=0)]

        else:
            self.hypers = self.gp.kernel.theta
            self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            if (sample < -50).any():
                sample[sample < -50] = -50
            if (sample > 50).any():
                sample[sample > 50] = 50

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            kernel.theta = sample
            model = GaussianProcess(
                configspace=self.configspace,
                types=self.types,
                bounds=self.bounds,
                kernel=kernel,
                normalize_y=False,
                seed=self.rng.randint(low=0, high=10000),
            )
            try:
                model._train(X, y, do_optimize=False)
                self.models.append(model)
            except np.linalg.LinAlgError:
                pass

        if len(self.models) == 0:
            kernel = deepcopy(self.kernel)
            kernel.theta = self.p0
            model = GaussianProcess(
                configspace=self.configspace,
                types=self.types,
                bounds=self.bounds,
                kernel=kernel,
                normalize_y=False,
                seed=self.rng.randint(low=0, high=10000),
            )
            model._train(X, y, do_optimize=False)
            self.models.append(model)

        if self.normalize_y:
            # Inject the normalization statistics into the individual models. Setting normalize_y to True makes the
            # individual GPs unnormalize the data at predict time.
            for model in self.models:
                model.normalize_y = True
                model.mean_y_ = self.mean_y_
                model.std_y_ = self.std_y_

        self.is_trained = True
        return self
Exemple #7
0
    def __init__(self, api_config, config_space, parallel_setting="LS"):
        super(SMAC4EPMOpimizer, self).__init__(api_config)
        self.cs = config_space
        self.num_hps = len(self.cs.get_hyperparameters())

        if parallel_setting not in ["CL_min", "CL_max", "CL_mean", "KB", "LS"]:
            raise ValueError(
                "parallel_setting can only be one of the following: "
                "CL_min, CL_max, CL_mean, KB, LS")
        self.parallel_setting = parallel_setting

        rng = np.random.RandomState(seed=0)
        scenario = Scenario({
            "run_obj": "quality",  # we optimize quality (alt. to runtime)
            "runcount-limit": 128,
            "cs": self.cs,  # configuration space
            "deterministic": True,
            "limit_resources": False,
        })

        self.stats = Stats(scenario)
        # traj = TrajLogger(output_dir=None, stats=self.stats)

        self.runhistory = RunHistory()

        r2e_def_kwargs = {
            "scenario": scenario,
            "num_params": self.num_hps,
            "success_states": [
                StatusType.SUCCESS,
            ],
            "impute_censored_data": False,
            "scale_perc": 5,
        }

        self.random_chooser = ChooserProb(rng=rng, prob=0.0)

        types, bounds = get_types(self.cs, instance_features=None)
        model_kwargs = {
            "configspace": self.cs,
            "types": types,
            "bounds": bounds,
            "seed": rng.randint(MAXINT),
        }

        models = []

        cov_amp = ConstantKernel(
            2.0,
            constant_value_bounds=(np.exp(-10), np.exp(2)),
            prior=LognormalPrior(mean=0.0, sigma=1.0, rng=rng),
        )

        cont_dims = np.array(np.where(np.array(types) == 0)[0], dtype=np.int)
        cat_dims = np.where(np.array(types) != 0)[0]

        if len(cont_dims) > 0:
            exp_kernel = Matern(
                np.ones([len(cont_dims)]),
                [(np.exp(-6.754111155189306), np.exp(0.0858637988771976))
                 for _ in range(len(cont_dims))],
                nu=2.5,
                operate_on=cont_dims,
            )

        if len(cat_dims) > 0:
            ham_kernel = HammingKernel(
                np.ones([len(cat_dims)]),
                [(np.exp(-6.754111155189306), np.exp(0.0858637988771976))
                 for _ in range(len(cat_dims))],
                operate_on=cat_dims,
            )
        assert len(cont_dims) + len(cat_dims) == len(
            scenario.cs.get_hyperparameters())

        noise_kernel = WhiteKernel(
            noise_level=1e-8,
            noise_level_bounds=(np.exp(-25), np.exp(2)),
            prior=HorseshoePrior(scale=0.1, rng=rng),
        )

        if len(cont_dims) > 0 and len(cat_dims) > 0:
            # both
            kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel
        elif len(cont_dims) > 0 and len(cat_dims) == 0:
            # only cont
            kernel = cov_amp * exp_kernel + noise_kernel
        elif len(cont_dims) == 0 and len(cat_dims) > 0:
            # only cont
            kernel = cov_amp * ham_kernel + noise_kernel
        else:
            raise ValueError()
        gp_kwargs = {"kernel": kernel}

        rf_kwargs = {}
        rf_kwargs["num_trees"] = model_kwargs.get("num_trees", 10)
        rf_kwargs["do_bootstrapping"] = model_kwargs.get(
            "do_bootstrapping", True)
        rf_kwargs["ratio_features"] = model_kwargs.get("ratio_features", 1.0)
        rf_kwargs["min_samples_split"] = model_kwargs.get(
            "min_samples_split", 2)
        rf_kwargs["min_samples_leaf"] = model_kwargs.get("min_samples_leaf", 1)
        rf_kwargs["log_y"] = model_kwargs.get("log_y", True)

        rf_log = RandomForestWithInstances(**model_kwargs, **rf_kwargs)

        rf_kwargs = copy.deepcopy(rf_kwargs)
        rf_kwargs["log_y"] = False
        rf_no_log = RandomForestWithInstances(**model_kwargs, **rf_kwargs)

        rh2epm_cost = RunHistory2EPM4Cost(**r2e_def_kwargs)
        rh2epm_log_cost = RunHistory2EPM4LogScaledCost(**r2e_def_kwargs)
        rh2epm_copula = RunHistory2EPM4GaussianCopulaCorrect(**r2e_def_kwargs)

        self.combinations = []

        # 2 models * 4 acquisition functions
        acq_funcs = [EI, PI, LogEI, LCB]
        acq_func_instances = []
        # acq_func_maximizer_instances = []

        n_sls_iterations = {
            1: 10,
            2: 10,
            3: 10,
            4: 10,
            5: 10,
            6: 10,
            7: 8,
            8: 6,
        }.get(len(self.cs.get_hyperparameters()), 5)

        acq_func_maximizer_kwargs = {
            "config_space": self.cs,
            "rng": rng,
            "max_steps": 5,
            "n_steps_plateau_walk": 5,
            "n_sls_iterations": n_sls_iterations,
        }
        self.idx_ei = 0

        self.num_models = len(models)
        self.num_acq_funcs = len(acq_funcs)

        no_transform_gp = GaussianProcess(**copy.deepcopy(model_kwargs),
                                          **copy.deepcopy(gp_kwargs))
        ei = EI(model=no_transform_gp)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((no_transform_gp, ei, ei_opt, rh2epm_cost))

        pi = PI(model=no_transform_gp)
        acq_func_maximizer_kwargs["acquisition_function"] = pi
        pi_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((no_transform_gp, pi, pi_opt, rh2epm_cost))

        lcb = LCB(model=no_transform_gp)
        acq_func_maximizer_kwargs["acquisition_function"] = lcb
        lcb_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((no_transform_gp, lcb, lcb_opt, rh2epm_cost))

        gp = GaussianProcess(**copy.deepcopy(model_kwargs),
                             **copy.deepcopy(gp_kwargs))
        ei = EI(model=gp)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((gp, ei, ei_opt, rh2epm_copula))

        gp = GaussianProcess(**copy.deepcopy(model_kwargs),
                             **copy.deepcopy(gp_kwargs))
        ei = LogEI(model=gp)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((gp, ei, ei_opt, rh2epm_log_cost))

        ei = EI(model=rf_no_log)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((rf_no_log, ei, ei_opt, rh2epm_cost))

        ei = LogEI(model=rf_log)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((rf_log, ei, ei_opt, rh2epm_log_cost))

        ei = EI(model=rf_no_log)
        acq_func_maximizer_kwargs["acquisition_function"] = ei
        ei_opt = LocalAndSortedRandomSearch(**acq_func_maximizer_kwargs)
        self.combinations.append((rf_no_log, ei, ei_opt, rh2epm_copula))

        self.num_acq_instances = len(acq_func_instances)
        self.best_observation = np.inf

        self.next_evaluations = []
Exemple #8
0
    def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True):
        """
        Performs MCMC sampling to sample hyperparameter configurations from the
        likelihood and trains for each sample a GP on X and y

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true we perform MCMC sampling otherwise we just use the
            hyperparameter specified in the kernel.
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(
                X, self.lower, self.upper)
        else:
            self.X = X

        if len(y.shape) > 1:
            y = y.flatten()
            if len(y) != len(X):
                raise ValueError('Shape mismatch: %s vs %s' %
                                 (y.shape, X.shape))

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(
                y)
            if self.y_std == 0:
                raise ValueError(
                    "Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)
        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            # We have one walker for each hyperparameter configuration
            sampler = emcee.EnsembleSampler(self.n_hypers,
                                            len(self.kernel) + 1,
                                            self._loglikelihood)
            sampler.random_state = self.rng.get_state()
            # Do a burn-in in the first iteration
            if not self.burned:
                # Initialize the walkers by sampling from the prior
                if self.prior is None:
                    self.p0 = self.rng.rand(self.n_hypers,
                                            len(self.kernel) + 1)
                else:
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                # Run MCMC sampling
                self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                 self.burnin_steps,
                                                 rstate0=self.rng)

                self.burned = True

            # Start sampling
            pos, _, _ = sampler.run_mcmc(self.p0,
                                         self.chain_length,
                                         rstate0=self.rng)

            # Save the current position, it will be the start point in
            # the next iteration
            self.p0 = pos

            # Take the last samples from each walker
            self.hypers = sampler.chain[:, -1]

        else:
            self.hypers = self.gp.kernel.get_parameter_vector().tolist()
            self.hypers.append(self.noise)
            self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            kernel.set_parameter_vector(sample[:-1])
            noise = np.exp(sample[-1])
            model = GaussianProcess(
                types=self.types,
                bounds=self.bounds,
                kernel=kernel,
                normalize_output=self.normalize_output,
                normalize_input=self.normalize_input,
                noise=noise,
                rng=self.rng,
            )
            model._train(X, y, do_optimize=False)
            self.models.append(model)

        self.is_trained = True