Esempio n. 1
0
    def test_basics(self, ma2, distribution_test):
        # A 1D case
        normal = elfi.Prior('normal', 5, model=elfi.ElfiModel())
        normal_prior = ModelPrior(normal.model)
        distribution_test(normal_prior)

        # A 2D case
        prior = ModelPrior(ma2)
        distribution_test(prior)
Esempio n. 2
0
    def __init__(self, model, discrepancy_name=None, output_names=None, **kwargs):
        """Initialize the SMC-ABC sampler.

        Parameters
        ----------
        model : ElfiModel or NodeReference
        discrepancy_name : str, NodeReference, optional
            Only needed if model is an ElfiModel
        output_names : list, optional
            Additional outputs from the model to be included in the inference result, e.g.
            corresponding summaries to the acquired samples
        kwargs:
            See InferenceMethod

        """
        model, discrepancy_name = self._resolve_model(model, discrepancy_name)

        super(SMC, self).__init__(model, output_names, **kwargs)

        self._prior = ModelPrior(self.model)
        self.discrepancy_name = discrepancy_name
        self.state['round'] = 0
        self._populations = []
        self._rejection = None
        self._round_random_state = None
Esempio n. 3
0
    def __init__(self,
                 model,
                 params_grid,
                 marginal=None,
                 logreg_config=None,
                 output_names=None,
                 parallel_cv=True,
                 seed_marginal=None,
                 **kwargs):
        """Initializes LFIRE.

        Parameters
        ----------
        model: ElfiModel
            The elfi graph used by the algorithm.
        params_grid: np.ndarray
            A grid over which posterior values are evaluated.
        marginal: np.ndarray, optional
            Marginal data.
        logreg_config: dict, optional
            A config dictionary for logistic regression.
        output_names: list, optional
            Names of the nodes whose outputs are included in the batches.
        parallel_cv: bool, optional
            Either cross-validation or elfi can be run in parallel.
        batch_size: int, optional
            A size of training data.
        seed_marginal: int, optional
            Seed for marginal data generation.
        kwargs:
            See InferenceMethod.

        """
        super(LFIRE, self).__init__(model, output_names, **kwargs)

        self.summary_names = self._get_summary_names()
        if len(self.summary_names) == 0:
            raise NotImplementedError(
                'Your model must have at least one Summary node.')

        self.params_grid = self._resolve_params_grid(params_grid)
        self.marginal = self._resolve_marginal(marginal, seed_marginal)
        self.observed = self._get_observed_summary_values()
        self.joint_prior = ModelPrior(self.model)
        self.logreg_config = self._resolve_logreg_config(
            logreg_config, parallel_cv)

        self._resolve_elfi_client(parallel_cv)

        n_batches = self.params_grid.shape[0]
        self.state['posterior'] = np.empty(n_batches)
        self.state['lambda'] = np.empty(n_batches)
        self.state['coef'] = np.empty((n_batches, self.observed.shape[1]))
        self.state['intercept'] = np.empty(n_batches)
        self.state['infinity'] = {
            parameter_name: []
            for parameter_name in self.parameter_names
        }
        for parameter_name in self.parameter_names:
            self.state[parameter_name] = np.empty(n_batches)
Esempio n. 4
0
 def test_numerical_grad_logpdf(self):
     # Test gradient with a normal distribution
     loc = 2.2
     scale = 1.1
     x = np.random.rand()
     analytical_grad_logpdf = -(x - loc) / scale**2
     prior_node = elfi.Prior('normal', loc, scale, model=elfi.ElfiModel())
     num_grad = ModelPrior(prior_node.model).gradient_logpdf(x)
     assert np.isclose(num_grad, analytical_grad_logpdf, atol=0.01)
    def build(self,
              model,
              pattern,
              prior_pos,
              prior_cov=64,
              r_bound=47.9,
              pmt_mask=np.ones(127)):
        ### Build Priors
        px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov)
        py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov)

        ### Build Model
        model = elfi.tools.vectorize(model)
        Y = elfi.Simulator(model, px, py, observed=np.array([pattern]))

        # TODO implement PMT mask here
        #def summarize(data, key):
        #    # Select either energy or time for model output.
        #    return np.array([v[key] for v in data])
        def summarize(data):
            return np.array(
                [list(v['energy']) + list(v['time']) for v in data])

        # Build summary stat for energy and time
        #S1 = elfi.Summary(summarize, Y, 'energy')
        #S2 = elfi.Summary(summarize, Y, 'time')
        S1 = elfi.Summary(summarize, Y)

        d = elfi.Distance('braycurtis', S1)
        log_d = elfi.Operation(np.log, d)

        # set the ELFI model so we can remove it later
        self.model = px.model

        ### Setup BOLFI
        bounds = {'px': (-r_bound, r_bound), 'py': (-r_bound, r_bound)}

        target_model = GPyRegression(log_d.model.parameter_names,
                                     bounds=bounds)

        acquisition_method = ConstraintLCBSC(target_model,
                                             prior=ModelPrior(log_d.model),
                                             noise_var=[1, 1],
                                             exploration_rate=10)

        bolfi = elfi.BOLFI(
            log_d,
            batch_size=1,
            initial_evidence=50,
            update_interval=1,
            # bounds=bounds,  # Not used when using target_model
            target_model=target_model,
            # acq_noise_var=[0.1, 0.1],  # Not used when using acq method
            acquisition_method=acquisition_method,
        )
        return bolfi
Esempio n. 6
0
    def extract_posterior(self, threshold=None):
        """Returns an object representing the approximate posterior based on
        surrogate model regression.

        Parameters
        ----------
        threshold: float
            Discrepancy threshold for creating the posterior (log with log discrepancy).

        Returns
        -------
        posterior : elfi.methods.posteriors.BolfiPosterior
        """
        if self.state['n_batches'] == 0:
            raise ValueError('Model is not fitted yet, please see the `fit` method.')

        return BolfiPosterior(self.target_model, threshold=threshold, prior=ModelPrior(self.model))
Esempio n. 7
0
    def build(self,
              model,
              pattern,
              prior_pos,
              prior_cov=25,
              r_bound=47.9,
              pmt_mask=np.ones(127)):
        ### Build Priors
        px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov)
        py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov)

        ### Build Model
        model = elfi.tools.vectorize(model)
        Y = elfi.Simulator(model, px, py, observed=np.array([pattern]))

        # TODO implement PMT mask here
        d = elfi.Distance('braycurtis', Y)
        log_d = elfi.Operation(np.log, d)

        # set the ELFI model so we can remove it later
        self.model = px.model

        ### Setup BOLFI
        bounds = {'px': (-r_bound, r_bound), 'py': (-r_bound, r_bound)}

        target_model = GPyRegression(log_d.model.parameter_names,
                                     bounds=bounds)

        acquisition_method = ConstraintLCBSC(target_model,
                                             prior=ModelPrior(log_d.model),
                                             noise_var=[5, 5],
                                             exploration_rate=10)

        bolfi = elfi.BOLFI(
            log_d,
            batch_size=1,
            initial_evidence=50,
            update_interval=1,
            # bounds=bounds,  # Not used when using target_model
            target_model=target_model,
            # acq_noise_var=[0.1, 0.1],  # Not used when using acq method
            acquisition_method=acquisition_method,
        )
        return bolfi
Esempio n. 8
0
def _get_dependencies_acq_fn():
    """Provide the requirements for the MaxVar-based acquisition function initialisation.

    Returns
    -------
    (GPy.model.GPRegression, elfi.methods.utils.ModelPrior)
        Tuple containing a fit gp and a prior.

    """
    mean = [4, 4]
    cov_matrix = [[1, .5], [.5, 1]]
    names_param = ['mu_0', 'mu_1']
    eps_prior = 5  # The prior's range indicator used in the Gaussian noise model.
    bounds_param = {
        'mu_0': (mean[0] - eps_prior, mean[0] + eps_prior),
        'mu_1': (mean[1] - eps_prior, mean[1] + eps_prior)
    }

    # Initialising the prior.
    gm_2d = elfi.examples.gauss.get_model(true_params=mean,
                                          nd_mean=True,
                                          cov_matrix=cov_matrix)
    prior = ModelPrior(gm_2d)

    # Generating the coordinates and the values of the fitting data.
    n_pts_fit = 10
    x1 = np.random.uniform(*bounds_param['mu_0'], n_pts_fit)
    x2 = np.random.uniform(*bounds_param['mu_1'], n_pts_fit)
    x = np.column_stack((x1, x2))
    y = np.random.rand(n_pts_fit)

    # Fitting the gp with the generated points.
    gp = GPyRegression(names_param, bounds=bounds_param)
    gp.update(x, y)

    return gp, prior
Esempio n. 9
0
class BayesianOptimization(ParameterInference):
    """Bayesian Optimization of an unknown target function."""

    def __init__(self,
                 model,
                 target_name=None,
                 bounds=None,
                 initial_evidence=None,
                 update_interval=10,
                 target_model=None,
                 acquisition_method=None,
                 acq_noise_var=0,
                 exploration_rate=10,
                 batch_size=1,
                 batches_per_acquisition=None,
                 async=False,
                 **kwargs):
        """Initialize Bayesian optimization.

        Parameters
        ----------
        model : ElfiModel or NodeReference
        target_name : str or NodeReference
            Only needed if model is an ElfiModel
        bounds : dict, optional
            The region where to estimate the posterior for each parameter in
            model.parameters: dict('parameter_name':(lower, upper), ... )`. Not used if
            custom target_model is given.
        initial_evidence : int, dict, optional
            Number of initial evidence or a precomputed batch dict containing parameter
            and discrepancy values. Default value depends on the dimensionality.
        update_interval : int, optional
            How often to update the GP hyperparameters of the target_model
        target_model : GPyRegression, optional
        acquisition_method : Acquisition, optional
            Method of acquiring evidence points. Defaults to LCBSC.
        acq_noise_var : float or np.array, optional
            Variance(s) of the noise added in the default LCBSC acquisition method.
            If an array, should be 1d specifying the variance for each dimension.
        exploration_rate : float, optional
            Exploration rate of the acquisition method
        batch_size : int, optional
            Elfi batch size. Defaults to 1.
        batches_per_acquisition : int, optional
            How many batches will be requested from the acquisition function at one go.
            Defaults to max_parallel_batches.
        async : bool, optional
            Allow acquisitions to be made asynchronously, i.e. do not wait for all the
            results from the previous acquisition before making the next. This can be more
            efficient with a large amount of workers (e.g. in cluster environments) but
            forgoes the guarantee for the exactly same result with the same initial
            conditions (e.g. the seed). Default False.
        **kwargs

        """
        model, target_name = self._resolve_model(model, target_name)
        output_names = [target_name] + model.parameter_names
        super(BayesianOptimization, self).__init__(
            model, output_names, batch_size=batch_size, **kwargs)

        target_model = target_model or GPyRegression(self.model.parameter_names, bounds=bounds)

        self.target_name = target_name
        self.target_model = target_model

        n_precomputed = 0
        n_initial, precomputed = self._resolve_initial_evidence(initial_evidence)
        if precomputed is not None:
            params = batch_to_arr2d(precomputed, self.parameter_names)
            n_precomputed = len(params)
            self.target_model.update(params, precomputed[target_name])

        self.batches_per_acquisition = batches_per_acquisition or self.max_parallel_batches
        self.acquisition_method = acquisition_method or LCBSC(self.target_model,
                                                              prior=ModelPrior(self.model),
                                                              noise_var=acq_noise_var,
                                                              exploration_rate=exploration_rate,
                                                              seed=self.seed)

        self.n_initial_evidence = n_initial
        self.n_precomputed_evidence = n_precomputed
        self.update_interval = update_interval
        self.async = async
Esempio n. 10
0
 def test_gradient_logpdf(self, ma2):
     prior = ModelPrior(ma2)
     rv = prior.rvs(size=10)
     grads = prior.gradient_logpdf(rv)
     assert grads.shape == rv.shape
     assert np.allclose(grads, 0)
Esempio n. 11
0
 def test_pdf(self, ma2):
     prior = ModelPrior(ma2)
     rv = prior.rvs(size=10)
     assert np.allclose(prior.pdf(rv), np.exp(prior.logpdf(rv)))
Esempio n. 12
0
    def build(self,
              model,
              pattern,
              prior_pos,
              prior_cov=25,
              r_bound=47.9,
              pmt_mask=np.ones(127),
              pax_e=25):
        ### Build Priors
        mu_e = pax_e
        std_e = pax_e**0.5
        px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, prior_cov)
        py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, prior_cov)
        pe = elfi.Prior(
            'truncnorm',
            (10 - mu_e) / std_e,
            (90 - mu_e) / std_e,
            25,  # mu_e,
            3,  # std_e,
            name='pe')

        ### Build Model
        model = elfi.tools.vectorize(model)
        Y = elfi.Simulator(model, px, py, pe, observed=np.array([pattern]))

        def summarize(x, k):
            return np.array([e[k] for e in x])

        S1 = elfi.Summary(summarize, Y, 'energy')
        S2 = elfi.Summary(summarize, Y, 'time')

        de = elfi.Distance('braycurtis', S1)
        dt = elfi.Distance('braycurtis', S2)
        d = elfi.Operation(lambda a, b: a + b, de, dt)

        # TODO implement PMT mask here
        #d = elfi.Distance('braycurtis', Y)
        log_d = elfi.Operation(np.log, d)

        # set the ELFI model so we can remove it later
        self.model = px.model
        print(self.model.parameter_names)

        self.d0 = self.model.parameter_names.index('px')
        self.d1 = self.model.parameter_names.index('py')
        self.d2 = self.model.parameter_names.index('pe')

        ### Setup BOLFI
        bounds = {
            'px': (-r_bound, r_bound),
            'py': (-r_bound, r_bound),
            'pe': (10, 90)
        }
        noise_vars = [5, 5, 5]
        #noise_vars[self.d2] = 10  # energy noise variance

        target_model = GPyRegression(self.model.parameter_names, bounds=bounds)

        acquisition_method = ConstraintLCBSC(target_model,
                                             prior=ModelPrior(self.model),
                                             noise_var=noise_vars,
                                             exploration_rate=10)
        acquisition_method.d0 = self.d0
        acquisition_method.d1 = self.d1

        bolfi = elfi.BOLFI(
            log_d,
            batch_size=1,
            initial_evidence=50,
            update_interval=1,
            # bounds=bounds,  # Not used when using target_model
            target_model=target_model,
            # acq_noise_var=[0.1, 0.1],  # Not used when using acq method
            acquisition_method=acquisition_method,
        )
        return bolfi
Esempio n. 13
0
    def __init__(self,
                 model,
                 params_grid,
                 marginal=None,
                 classifier=None,
                 output_names=None,
                 seed_marginal=None,
                 precomputed_models=None,
                 **kwargs):
        """Initializes LFIRE.

        Parameters
        ----------
        model: ElfiModel
            The elfi graph used by the algorithm.
        params_grid: np.ndarray
            A grid over which posterior values are evaluated.
        marginal: np.ndarray, optional
            Marginal data.
        classifier: str, optional
            Classifier to be used. Default LogisticRegression.
        output_names: list, optional
            Names of the nodes whose outputs are included in the batches.
        batch_size: int, optional
            A size of training data.
        seed_marginal: int, optional
            Seed for marginal data generation.
        precomputed_models: file or str, optional
            Precomputed classifier parameters file.
        kwargs:
            See InferenceMethod.

        """
        super(LFIRE, self).__init__(model, output_names, **kwargs)

        # 1. parse model:
        self.summary_names = self._get_summary_names()
        if len(self.summary_names) == 0:
            raise NotImplementedError(
                'Your model must have at least one Summary node.')
        self.joint_prior = ModelPrior(self.model)

        # 2. LFIRE setup:
        self.params_grid = self._resolve_params_grid(params_grid)
        self.classifier = self._resolve_classifier(classifier)
        self._resolve_elfi_client(self.classifier.parallel_cv)
        n_batches = self.params_grid.shape[0]

        # 3. initialise results containers:
        self.state['posterior'] = np.empty(n_batches)
        self.state['infinity'] = {
            parameter_name: []
            for parameter_name in self.parameter_names
        }

        # 4. initialise or load likelihood ratio models:
        if precomputed_models is None:
            self.marginal = self._resolve_marginal(marginal, seed_marginal)
            for parameter_name in self.parameter_names:
                self.state[parameter_name] = np.empty(n_batches)
        else:
            self.load_models(precomputed_models)

        # 5. calculate prior probabilities:
        self.state['prior'] = self.joint_prior.pdf(params_grid)
Esempio n. 14
0
def run_BOLFI_single(index, true_x, true_y, folder):
    ### Setup

    model = Model('XENON1T_ABC_all_pmts_on.ini')
    model.change_defaults(s2_electrons = 25)

    prior_mean = PriorPosition()

    pattern = model(true_x, true_y)
    pax_pos = model.get_latest_pax_position()
    prior_pos = prior_mean(pattern)

    r_bound = 47.8
    pmt_mask = model.pmt_mask[:127].astype(int)

    ### Build Priors
    px = elfi.Prior(BoundedNormal_x, r_bound, prior_pos, 64)
    py = elfi.Prior(BoundedNormal_y, px, r_bound, prior_pos, 64)

    ### Build Model
    model=elfi.tools.vectorize(model)
    Y = elfi.Simulator(model, px, py, observed=pattern)


    def likelihood_chisquare(y, n, w=None):
        if w is not None:
            y = y[:,w.astype(bool)]
            n = n[:,w.astype(bool)]

        n = np.clip(n, 1e-10, None)
        y = np.clip(y, 1e-10, None)
        res = 2 * np.sum(y - n  + n * np.log(n/y), axis=1)
        lres = np.log(res)
        #if lres > 10:
        #    lres = np.ones(lres.shape) * 9
        return lres

    def chisquare(y, n, w=None):
        if w is not None:
            y = y[:,w.astype(bool)]
            n = n[:,w.astype(bool)]
        y = np.clip(y, 1e-1, None)
        #print('y shape', y.shape)
        #print('n shape', n.shape)
        chisq, p = sps.chisquare(n, y, axis=1)
        return np.array(np.log(chisq))

    def k2_test(y, n, w=None):
        if w is not None:
            y = y[:,w.astype(bool)]
            n = n[:,w.astype(bool)]

        #d, p = sps.ks_2samp(n, y)  # , axis=1)
        # ks_2samp does not have axis arg
        ds = [sps.ks_2samp(n[0], y[i])[0] for i in range(y.shape[0])]
        return np.array(ds)

    def sqrt_euclid(y, n, w=None):
        if w is not None:
            y = y[:,w.astype(bool)]
            n = n[:,w.astype(bool)]

        d = np.sum(np.sqrt(np.abs(y - n)), axis=1)
        return d

    #likelihood_chisquare_masked = partial(likelihood_chisquare, w=pmt_mask)
    #log_d = elfi.Distance(likelihood_chisquare_masked, Y)

    #chisquare_masked = partial(chisquare, w=pmt_mask)
    #log_d = elfi.Distance(chisquare_masked, Y)

    #k2_test_masked = partial(k2_test, w=pmt_mask)
    #d = elfi.Distance(k2_test_masked, Y)
    #log_d = elfi.Operation(np.log, d)

    #sqrt_euclid_masked = partial(sqrt_euclid, w=pmt_mask)
    #d = elfi.Distance(sqrt_euclid_masked, Y)
    #log_d = elfi.Operation(np.log, d)

    d = elfi.Distance('euclidean', Y, w=pmt_mask)
    log_d = elfi.Operation(np.log, d)

    ### Setup BOLFI
    bounds = {'px':(-r_bound, r_bound), 'py':(-r_bound, r_bound)}

    target_model = GPyRegression(log_d.model.parameter_names,
                                 bounds=bounds)

    acquisition_method = ConstraintLCBSC(target_model,
                                         prior=ModelPrior(log_d.model),
                                         noise_var=[0.1, 0.1],
                                         exploration_rate=10)

    bolfi = elfi.BOLFI(log_d, batch_size=1, initial_evidence=20, update_interval=1,
                       # bounds=bounds,  # Not used when using target_model
                       target_model=target_model,
                       # acq_noise_var=[0.1, 0.1],  # Not used when using acq method
                       acquisition_method=acquisition_method,
                       )

    ### Run BOLFI
    post = bolfi.fit(n_evidence=200)

    bolfi.plot_discrepancy()
    plt.savefig(folder + 'bolfi_disc_%d.png' % index, dpi = 150)
    plt.close()

    result_BOLFI = bolfi.sample(1000, info_freq=1000)
    samples = result_BOLFI.samples_array

    means = result_BOLFI.sample_means
    modes = sps.mode(samples).mode[0]
    medians = np.median(samples, axis=0)

    pax_pos['truth'] = {'x': true_x, 'y': true_y}
    pax_pos['BOLFI_mean'] = {'x': means['px'], 'y': means['py']}
    pax_pos['BOLFI_mode'] = {'x': modes[0], 'y': modes[1]}
    pax_pos['BOLFI_median'] = {'x': medians[0], 'y': medians[1]}
    return pax_pos