예제 #1
0
def test_acquisition():
    n_params = 2
    n = 10
    n2 = 5
    parameter_names = ['a', 'b']
    bounds = {'a': [-2, 3], 'b': [5, 6]}
    target_model = GPyRegression(parameter_names, bounds=bounds)
    x1 = np.random.uniform(*bounds['a'], n)
    x2 = np.random.uniform(*bounds['b'], n)
    x = np.column_stack((x1, x2))
    y = np.random.rand(n)
    target_model.update(x, y)

    # check acquisition without noise
    acq_noise_var = 0
    t = 1
    acquisition_method = acquisition.LCBSC(target_model, noise_var=acq_noise_var)
    new = acquisition_method.acquire(n2, t=t)
    assert np.allclose(new[1:, 0], new[0, 0])
    assert np.allclose(new[1:, 1], new[0, 1])

    # check acquisition with scalar noise
    acq_noise_var = 2
    t = 1
    acquisition_method = acquisition.LCBSC(target_model, noise_var=acq_noise_var)
    new = acquisition_method.acquire(n2, t=t)
    assert new.shape == (n2, n_params)
    assert np.all((new[:, 0] >= bounds['a'][0]) & (new[:, 0] <= bounds['a'][1]))
    assert np.all((new[:, 1] >= bounds['b'][0]) & (new[:, 1] <= bounds['b'][1]))

    # check acquisition with separate variance for dimensions
    acq_noise_var = np.random.uniform(0, 5, size=2)
    t = 1
    acquisition_method = acquisition.LCBSC(target_model, noise_var=acq_noise_var)
    new = acquisition_method.acquire(n2, t=t)
    assert new.shape == (n2, n_params)
    assert np.all((new[:, 0] >= bounds['a'][0]) & (new[:, 0] <= bounds['a'][1]))
    assert np.all((new[:, 1] >= bounds['b'][0]) & (new[:, 1] <= bounds['b'][1]))

    # check acquisition with arbitrary covariance matrix
    acq_noise_cov = np.random.rand(n_params, n_params) * 0.5
    acq_noise_cov += acq_noise_cov.T
    acq_noise_cov += n_params * np.eye(n_params)
    t = 1
    with pytest.raises(ValueError):
        acquisition.LCBSC(target_model, noise_var=acq_noise_cov)

    # test Uniform Acquisition
    t = 1
    acquisition_method = acquisition.UniformAcquisition(target_model, noise_var=acq_noise_var)
    new = acquisition_method.acquire(n2, t=t)
    assert new.shape == (n2, n_params)
    assert np.all((new[:, 0] >= bounds['a'][0]) & (new[:, 0] <= bounds['a'][1]))
    assert np.all((new[:, 1] >= bounds['b'][0]) & (new[:, 1] <= bounds['b'][1]))
예제 #2
0
 def _gp(self):
     input_dim = len(self.params.bounds)
     kernel = self.params.kernel_class(input_dim=input_dim,
                                       variance=self.params.kernel_var,
                                       lengthscale=self.params.kernel_scale,
                                       ARD=self.params.ARD)
     noise_prior = None
     if self.params.kernel_prior is not None:
         kernel.lengthscale.set_prior(GPy.priors.Gamma.from_EV(
             self.params.kernel_prior["scale_E"],
             self.params.kernel_prior["scale_V"]),
                                      warning=False)
         kernel.variance.set_prior(GPy.priors.Gamma.from_EV(
             self.params.kernel_prior["var_E"],
             self.params.kernel_prior["var_V"]),
                                   warning=False)
         noise_prior = GPy.priors.Gamma.from_EV(
             self.params.kernel_prior["noise_E"],
             self.params.kernel_prior["noise_V"])
     gp = GPyRegression(parameter_names=self.model.parameter_names,
                        bounds=self.params.bounds,
                        optimizer=self.params.gp_params_optimizer,
                        max_opt_iters=self.params.gp_params_max_opt_iters,
                        kernel=kernel,
                        noise_var=self.params.noise_var,
                        noise_prior=noise_prior)
     return gp
예제 #3
0
 def _resolve_target_model(self, target_model):
     """Resolve target model."""
     if target_model is None:
         return GPyRegression(self.model.parameter_names, self.bounds)
     if isinstance(target_model, GPyRegression):
         return target_model
     raise TypeError('target_model must be an instance of GPyRegression.')
    def build(self,
              model,
              pattern,
              prior_pos,
              prior_cov=64,
              r_bound=47.9,
              pmt_mask=np.ones(127)):
        ### Build Priors
        px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov)
        py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov)

        ### Build Model
        model = elfi.tools.vectorize(model)
        Y = elfi.Simulator(model, px, py, observed=np.array([pattern]))

        # TODO implement PMT mask here
        #def summarize(data, key):
        #    # Select either energy or time for model output.
        #    return np.array([v[key] for v in data])
        def summarize(data):
            return np.array(
                [list(v['energy']) + list(v['time']) for v in data])

        # Build summary stat for energy and time
        #S1 = elfi.Summary(summarize, Y, 'energy')
        #S2 = elfi.Summary(summarize, Y, 'time')
        S1 = elfi.Summary(summarize, Y)

        d = elfi.Distance('braycurtis', S1)
        log_d = elfi.Operation(np.log, d)

        # set the ELFI model so we can remove it later
        self.model = px.model

        ### Setup BOLFI
        bounds = {'px': (-r_bound, r_bound), 'py': (-r_bound, r_bound)}

        target_model = GPyRegression(log_d.model.parameter_names,
                                     bounds=bounds)

        acquisition_method = ConstraintLCBSC(target_model,
                                             prior=ModelPrior(log_d.model),
                                             noise_var=[1, 1],
                                             exploration_rate=10)

        bolfi = elfi.BOLFI(
            log_d,
            batch_size=1,
            initial_evidence=50,
            update_interval=1,
            # bounds=bounds,  # Not used when using target_model
            target_model=target_model,
            # acq_noise_var=[0.1, 0.1],  # Not used when using acq method
            acquisition_method=acquisition_method,
        )
        return bolfi
예제 #5
0
def _get_dependencies_acq_fn():
    """Provide the requirements for the MaxVar-based acquisition function initialisation.

    Returns
    -------
    (GPy.model.GPRegression, elfi.methods.utils.ModelPrior)
        Tuple containing a fit gp and a prior.

    """
    mean = [4, 4]
    cov_matrix = [[1, .5], [.5, 1]]
    names_param = ['mu_0', 'mu_1']
    eps_prior = 5  # The prior's range indicator used in the Gaussian noise model.
    bounds_param = {
        'mu_0': (mean[0] - eps_prior, mean[0] + eps_prior),
        'mu_1': (mean[1] - eps_prior, mean[1] + eps_prior)
    }

    # Initialising the prior.
    gm_2d = elfi.examples.gauss.get_model(true_params=mean,
                                          nd_mean=True,
                                          cov_matrix=cov_matrix)
    prior = ModelPrior(gm_2d)

    # Generating the coordinates and the values of the fitting data.
    n_pts_fit = 10
    x1 = np.random.uniform(*bounds_param['mu_0'], n_pts_fit)
    x2 = np.random.uniform(*bounds_param['mu_1'], n_pts_fit)
    x = np.column_stack((x1, x2))
    y = np.random.rand(n_pts_fit)

    # Fitting the gp with the generated points.
    gp = GPyRegression(names_param, bounds=bounds_param)
    gp.update(x, y)

    return gp, prior
예제 #6
0
    def build(self,
              model,
              pattern,
              prior_pos,
              prior_cov=25,
              r_bound=47.9,
              pmt_mask=np.ones(127)):
        ### Build Priors
        px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov)
        py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov)

        ### Build Model
        model = elfi.tools.vectorize(model)
        Y = elfi.Simulator(model, px, py, observed=np.array([pattern]))

        # TODO implement PMT mask here
        d = elfi.Distance('braycurtis', Y)
        log_d = elfi.Operation(np.log, d)

        # set the ELFI model so we can remove it later
        self.model = px.model

        ### Setup BOLFI
        bounds = {'px': (-r_bound, r_bound), 'py': (-r_bound, r_bound)}

        target_model = GPyRegression(log_d.model.parameter_names,
                                     bounds=bounds)

        acquisition_method = ConstraintLCBSC(target_model,
                                             prior=ModelPrior(log_d.model),
                                             noise_var=[5, 5],
                                             exploration_rate=10)

        bolfi = elfi.BOLFI(
            log_d,
            batch_size=1,
            initial_evidence=50,
            update_interval=1,
            # bounds=bounds,  # Not used when using target_model
            target_model=target_model,
            # acq_noise_var=[0.1, 0.1],  # Not used when using acq method
            acquisition_method=acquisition_method,
        )
        return bolfi
예제 #7
0
class BayesianOptimization(ParameterInference):
    """Bayesian Optimization of an unknown target function."""

    def __init__(self,
                 model,
                 target_name=None,
                 bounds=None,
                 initial_evidence=None,
                 update_interval=10,
                 target_model=None,
                 acquisition_method=None,
                 acq_noise_var=0,
                 exploration_rate=10,
                 batch_size=1,
                 batches_per_acquisition=None,
                 async=False,
                 **kwargs):
        """Initialize Bayesian optimization.

        Parameters
        ----------
        model : ElfiModel or NodeReference
        target_name : str or NodeReference
            Only needed if model is an ElfiModel
        bounds : dict, optional
            The region where to estimate the posterior for each parameter in
            model.parameters: dict('parameter_name':(lower, upper), ... )`. Not used if
            custom target_model is given.
        initial_evidence : int, dict, optional
            Number of initial evidence or a precomputed batch dict containing parameter
            and discrepancy values. Default value depends on the dimensionality.
        update_interval : int, optional
            How often to update the GP hyperparameters of the target_model
        target_model : GPyRegression, optional
        acquisition_method : Acquisition, optional
            Method of acquiring evidence points. Defaults to LCBSC.
        acq_noise_var : float or np.array, optional
            Variance(s) of the noise added in the default LCBSC acquisition method.
            If an array, should be 1d specifying the variance for each dimension.
        exploration_rate : float, optional
            Exploration rate of the acquisition method
        batch_size : int, optional
            Elfi batch size. Defaults to 1.
        batches_per_acquisition : int, optional
            How many batches will be requested from the acquisition function at one go.
            Defaults to max_parallel_batches.
        async : bool, optional
            Allow acquisitions to be made asynchronously, i.e. do not wait for all the
            results from the previous acquisition before making the next. This can be more
            efficient with a large amount of workers (e.g. in cluster environments) but
            forgoes the guarantee for the exactly same result with the same initial
            conditions (e.g. the seed). Default False.
        **kwargs

        """
        model, target_name = self._resolve_model(model, target_name)
        output_names = [target_name] + model.parameter_names
        super(BayesianOptimization, self).__init__(
            model, output_names, batch_size=batch_size, **kwargs)

        target_model = target_model or GPyRegression(self.model.parameter_names, bounds=bounds)

        self.target_name = target_name
        self.target_model = target_model

        n_precomputed = 0
        n_initial, precomputed = self._resolve_initial_evidence(initial_evidence)
        if precomputed is not None:
            params = batch_to_arr2d(precomputed, self.parameter_names)
            n_precomputed = len(params)
            self.target_model.update(params, precomputed[target_name])

        self.batches_per_acquisition = batches_per_acquisition or self.max_parallel_batches
        self.acquisition_method = acquisition_method or LCBSC(self.target_model,
                                                              prior=ModelPrior(self.model),
                                                              noise_var=acq_noise_var,
                                                              exploration_rate=exploration_rate,
                                                              seed=self.seed)

        self.n_initial_evidence = n_initial
        self.n_precomputed_evidence = n_precomputed
        self.update_interval = update_interval
        self.async = async
예제 #8
0
    def build(self,
              model,
              pattern,
              prior_pos,
              prior_cov=25,
              r_bound=47.9,
              pmt_mask=np.ones(127),
              pax_e=25):
        ### Build Priors
        mu_e = pax_e
        std_e = pax_e**0.5
        px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov)
        py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov)
        pe = elfi.Prior(
            'truncnorm',
            (10 - mu_e) / std_e,
            (90 - mu_e) / std_e,
            25,  # mu_e,
            3,  # std_e,
            name='pe')

        ### Build Model
        model = elfi.tools.vectorize(model)
        Y = elfi.Simulator(model, px, py, pe, observed=np.array([pattern]))

        def summarize(x, k):
            return np.array([e[k] for e in x])

        S1 = elfi.Summary(summarize, Y, 'energy')
        S2 = elfi.Summary(summarize, Y, 'time')

        de = elfi.Distance('braycurtis', S1)
        dt = elfi.Distance('braycurtis', S2)
        d = elfi.Operation(lambda a, b: a + b, de, dt)

        # TODO implement PMT mask here
        #d = elfi.Distance('braycurtis', Y)
        log_d = elfi.Operation(np.log, d)

        # set the ELFI model so we can remove it later
        self.model = px.model
        print(self.model.parameter_names)

        self.d0 = self.model.parameter_names.index('px')
        self.d1 = self.model.parameter_names.index('py')
        self.d2 = self.model.parameter_names.index('pe')

        ### Setup BOLFI
        bounds = {
            'px': (-r_bound, r_bound),
            'py': (-r_bound, r_bound),
            'pe': (10, 90)
        }
        noise_vars = [5, 5, 5]
        #noise_vars[self.d2] = 10  # energy noise variance

        target_model = GPyRegression(self.model.parameter_names, bounds=bounds)

        acquisition_method = ConstraintLCBSC(target_model,
                                             prior=ModelPrior(self.model),
                                             noise_var=noise_vars,
                                             exploration_rate=10)
        acquisition_method.d0 = self.d0
        acquisition_method.d1 = self.d1

        bolfi = elfi.BOLFI(
            log_d,
            batch_size=1,
            initial_evidence=50,
            update_interval=1,
            # bounds=bounds,  # Not used when using target_model
            target_model=target_model,
            # acq_noise_var=[0.1, 0.1],  # Not used when using acq method
            acquisition_method=acquisition_method,
        )
        return bolfi
예제 #9
0
        true_samples_dict = {
            key: true_samples_dict[key].flatten()
            for key in par_names
        }
        true_samples_df = pd.DataFrame(true_samples_dict)

    # ==========================
    # DO INFERENCE:
    # ==========================
    evidence = int(args.evidence)
    if meth == 'BO':
        surrogate = str(args.surrogate)
        q = float(args.q)
        init_ev = int(evidence / 2)
        if surrogate == 'GP':
            target_model = GPyRegression(parameter_names=par_names,
                                         bounds=bounds)
            meth += '-' + surrogate
        elif surrogate == 'DGP':
            # set the DGP architecture
            LVlayer = eval(args.lv)
            GPlayers = int(args.gplayers) - 1 if LVlayer is True else int(
                args.gplayers) - 2
            surrogate = 'LV-' if LVlayer else ''
            surrogate += str(GPlayers + 1) + '*GP'
            target_model = DGPRegression(parameter_names=par_names,
                                         bounds=bounds,
                                         GPlayers=GPlayers,
                                         LVlayer=LVlayer,
                                         Ms=50,
                                         IW_samples=5,
                                         pred_samples=20,
예제 #10
0
def run_BOLFI_single(index, true_x, true_y, folder):
    ### Setup

    model = Model('XENON1T_ABC_all_pmts_on.ini')
    model.change_defaults(s2_electrons = 25)

    prior_mean = PriorPosition()

    pattern = model(true_x, true_y)
    pax_pos = model.get_latest_pax_position()
    prior_pos = prior_mean(pattern)

    r_bound = 47.8
    pmt_mask = model.pmt_mask[:127].astype(int)

    ### Build Priors
    px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, 64)
    py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, 64)

    ### Build Model
    model=elfi.tools.vectorize(model)
    Y = elfi.Simulator(model, px, py, observed=pattern)


    def likelihood_chisquare(y, n, w=None):
        if w is not None:
            y = y[:,w.astype(bool)]
            n = n[:,w.astype(bool)]

        n = np.clip(n, 1e-10, None)
        y = np.clip(y, 1e-10, None)
        res = 2 * np.sum(y - n  + n * np.log(n/y), axis=1)
        lres = np.log(res)
        #if lres > 10:
        #    lres = np.ones(lres.shape) * 9
        return lres

    def chisquare(y, n, w=None):
        if w is not None:
            y = y[:,w.astype(bool)]
            n = n[:,w.astype(bool)]
        y = np.clip(y, 1e-1, None)
        #print('y shape', y.shape)
        #print('n shape', n.shape)
        chisq, p = sps.chisquare(n, y, axis=1)
        return np.array(np.log(chisq))

    def k2_test(y, n, w=None):
        if w is not None:
            y = y[:,w.astype(bool)]
            n = n[:,w.astype(bool)]

        #d, p = sps.ks_2samp(n, y)  # , axis=1)
        # ks_2samp does not have axis arg
        ds = [sps.ks_2samp(n[0], y[i])[0] for i in range(y.shape[0])]
        return np.array(ds)

    def sqrt_euclid(y, n, w=None):
        if w is not None:
            y = y[:,w.astype(bool)]
            n = n[:,w.astype(bool)]

        d = np.sum(np.sqrt(np.abs(y - n)), axis=1)
        return d

    #likelihood_chisquare_masked = partial(likelihood_chisquare, w=pmt_mask)
    #log_d = elfi.Distance(likelihood_chisquare_masked, Y)

    #chisquare_masked = partial(chisquare, w=pmt_mask)
    #log_d = elfi.Distance(chisquare_masked, Y)

    #k2_test_masked = partial(k2_test, w=pmt_mask)
    #d = elfi.Distance(k2_test_masked, Y)
    #log_d = elfi.Operation(np.log, d)

    #sqrt_euclid_masked = partial(sqrt_euclid, w=pmt_mask)
    #d = elfi.Distance(sqrt_euclid_masked, Y)
    #log_d = elfi.Operation(np.log, d)

    d = elfi.Distance('euclidean', Y, w=pmt_mask)
    log_d = elfi.Operation(np.log, d)

    ### Setup BOLFI
    bounds = {'px':(-r_bound, r_bound), 'py':(-r_bound, r_bound)}

    target_model = GPyRegression(log_d.model.parameter_names,
                                 bounds=bounds)

    acquisition_method = ConstraintLCBSC(target_model,
                                         prior=ModelPrior(log_d.model),
                                         noise_var=[0.1, 0.1],
                                         exploration_rate=10)

    bolfi = elfi.BOLFI(log_d, batch_size=1, initial_evidence=20, update_interval=1,
                       # bounds=bounds,  # Not used when using target_model
                       target_model=target_model,
                       # acq_noise_var=[0.1, 0.1],  # Not used when using acq method
                       acquisition_method=acquisition_method,
                       )

    ### Run BOLFI
    post = bolfi.fit(n_evidence=200)

    bolfi.plot_discrepancy()
    plt.savefig(folder + 'bolfi_disc_%d.png' % index, dpi = 150)
    plt.close()

    result_BOLFI = bolfi.sample(1000, info_freq=1000)
    samples = result_BOLFI.samples_array

    means = result_BOLFI.sample_means
    modes = sps.mode(samples).mode[0]
    medians = np.median(samples, axis=0)

    pax_pos['truth'] = {'x': true_x, 'y': true_y}
    pax_pos['BOLFI_mean'] = {'x': means['px'], 'y': means['py']}
    pax_pos['BOLFI_mode'] = {'x': modes[0], 'y': modes[1]}
    pax_pos['BOLFI_median'] = {'x': medians[0], 'y': medians[1]}
    return pax_pos