예제 #1
0
    def create_model(self, prior: R.SdPrior, data: pd.Series):
        """
        Args:
          prior: an R.SdPrior object describing the prior distribution on the
            residual variance paramter.
          data:  The time series of observations as a Pandas Series.

        Returns:
          A boom.StateSpaceModel object.
        """
        boom_data = boom.Vector(data.values)
        is_observed = ~data.isna()
        self._model = boom.StateSpaceModel(boom_data, is_observed)

        if prior is None:
            sdy = np.std(data)
            prior = R.SdPrior(sigma_guess=sdy, upper_limit=sdy * 1.2)

        boom_prior = boom.ChisqModel(prior.sample_size, prior.sigma_guess)
        observation_model_sampler = boom.ZeroMeanGaussianConjSampler(
            self._model.observation_model,
            boom_prior)
        observation_model_sampler.set_sigma_upper_limit(
            prior.upper_limit)
        self._model.observation_model.set_method(observation_model_sampler)

        sampler = boom.StateSpacePosteriorSampler(
            self._model, boom.GlobalRng.rng)
        self._model.set_method(sampler)

        self._original_series = data

        return self._model
예제 #2
0
    def __init__(self, y, sigma_prior=None, initial_state_prior=None,
                 sdy=None, initial_y=None):
        """
        Args:
          y: The data to be modeled.  If sdy and initial_y are supplied
            this is not used.
          sigma_prior: An object of class boom.GammaModelBase serving as the
            prior on the precision (reciprocal variance) of the innovation
            terms.  If None then 'sdy' will be used to choose a defalt.
          initial_state_prior: An object of class boom.GaussianModel serving as
            the prior distribution on the value of the state at time 0 (the
            time of the first observation).  If None then initial_y and sdy
            will be used to choose a defalt.
          sdy: The standard deviation of y.  If None then this will be computed
            from y.  This argument is primarily intended to handle unusual
            cases where 'y' is unavailable.
          initial_y: The first element of y.  If None then this will be
            computed from y.  This argument is primarily intended to handle
            unusual cases where 'y' is unavailable.

        Returns:
          A StateModel object representing a local level model.
        """
        if sigma_prior is None:
            if sdy is None:
                sdy = np.std(y)
            sigma_prior = R.SdPrior(sigma_guess=.01 * sdy,
                                    sample_size=.01,
                                    upper_limit=sdy)
            if not isinstance(sigma_prior, R.SdPrior):
                raise Exception("sigma_prior should be an R.SdPrior.")

        if initial_state_prior is None:
            if initial_y is None:
                initial_y = y[0]
            if sdy is None:
                sdy = np.std(y)
            initial_y = float(initial_y)
            sdy = float(sdy)
            initial_state_prior = boom.GaussianModel(initial_y, sdy**2)
        if not isinstance(initial_state_prior, boom.GaussianModel):
            raise Exception(
                "initial_state_prior should be a boom.GaussianModel.")

        self._state_model = boom.LocalLevelStateModel()
        self._state_model.set_initial_state_mean(initial_state_prior.mu)
        self._state_model.set_initial_state_variance(
            initial_state_prior.sigsq)

        innovation_precision_prior = boom.ChisqModel(
            sigma_prior.sigma_guess,
            sigma_prior.sample_size)
        state_model_sampler = self._state_model.set_posterior_sampler(
            innovation_precision_prior)
        state_model_sampler.set_sigma_upper_limit(sigma_prior.upper_limit)
        self._state_contribution = None
예제 #3
0
 def _assign_posterior_sampler(self, innovation_sd_prior: R.SdPrior):
     innovation_precision_prior = boom.ChisqModel(
         innovation_sd_prior.sigma_guess,
         innovation_sd_prior.sample_size)
     state_model_sampler = boom.ZeroMeanGaussianConjSampler(
         self._state_model,
         innovation_precision_prior,
         seeding_rng=boom.GlobalRng.rng)
     state_model_sampler.set_sigma_upper_limit(
         innovation_sd_prior.upper_limit)
     self._state_model.set_method(state_model_sampler)
예제 #4
0
 def _build_state_model(self):
     self._state_model = boom.LocalLevelStateModel()
     self._state_model.set_initial_state_mean(
         self._initial_state_prior.mean)
     self._state_model.set_initial_state_variance(
         self._initial_state_prior.variance)
     innovation_precision_prior = boom.ChisqModel(
         self._sigma_prior.sigma_guess, self._sigma_prior.sample_size)
     state_model_sampler = self._state_model.set_posterior_sampler(
         innovation_precision_prior)
     state_model_sampler.set_sigma_upper_limit(
         self._sigma_prior.upper_limit)
 def test_mcmc(self):
     true_sigma = 2.3
     data = np.random.randn(100) * true_sigma
     prior = boom.ChisqModel(1.0, 1.0)
     self.model.set_data(boom.Vector(data))
     sampler = boom.ZeroMeanGaussianConjSampler(self.model, prior)
     self.model.set_method(sampler)
     niter = 1000
     draws = np.zeros(niter)
     for i in range(niter):
         self.model.sample_posterior()
         draws[i] = self.model.sigma
     self.assertNotAlmostEqual(draws[0], draws[-1])
    def test_mcmc():
        model = boom.GaussianModel()
        mu = -16
        sigma = 7
        data = np.random.randn(10000) * sigma + mu
        model.set_data(boom.Vector(data))

        mean_prior = boom.GaussianModelGivenSigma(
            model.sigsq_parameter,
            mu,
            1.0)
        sigsq_prior = boom.ChisqModel(1.0, sigma)
        sampler = boom.GaussianConjugateSampler(
            model, mean_prior, sigsq_prior)
        model.set_method(sampler)
        for _ in range(100):
            model.sample_posterior()
예제 #7
0
    def __init__(self,
                 x,
                 y=None,
                 expected_r2=.5,
                 prior_df=.01,
                 expected_model_size=1,
                 prior_information_weight=.01,
                 diagonal_shrinkage=.5,
                 optional_coefficient_estimate=None,
                 max_flips=-1,
                 mean_y=None,
                 sdy=None,
                 prior_inclusion_probabilities=None,
                 sigma_upper_limit=np.Inf):
        """
        Computes information that is shared by the different implementation of
        spike and slab priors.  Currently, the only difference between the
        different priors is the prior variance on the regression coefficients.
        When that changes, change this class accordingly, and change all the
        classes that inherit from it.

        Args:
          number_of_variables: The number of columns in the design matrix for
            the regression begin modeled.  The maximum size of the coefficient
            vector.

          expected_r2: The R^2 statistic that the model is expected
            to achieve.  Used along with 'sdy' to derive a prior distribution
            for the residual variance.

          prior_df: The number of observations worth of weight to give to the
            guess at the residual variance.

          expected_model_size: The expected number of nonzero coefficients in
            the model.  Used to set prior_inclusion_probabilities to
            expected_model_size / number_of_variables.  If expected_model_size
            is either negative or larger than number.of.variables then all
            elements of prior_inclusion_probabilities will be set to 1.0 and
            the model will be fit with all available coefficients.

          optional_coefficient_estimate: A vector of length number.of.variables
            to use as the prior mean of the regression coefficients.  This can
            also be None, in which case the prior mean for the intercept will
            be set to mean.y, and the prior mean for all slopes will be 0.

          mean.y: The mean of the response variable.  Used to create a sensible
            default prior mean for the regression coefficients when
            optional_coefficient_estimate is None.

          sdy: Used along with expected_r2 to create a prior guess at the
            residual variance.

          prior_inclusion_probabilities: A vector of length number.of.variables
            giving the prior inclusion probability of each coefficient.  Each
            element must be between 0 and 1, inclusive.  If left as None then a
            default value will be created with all elements set to
            expected_model_size / number_of_variables.

          sigma_upper_limit: The largest acceptable value for the residual
            standard deviation.
        """
        if isinstance(x, np.ndarray):
            x = boom.Matrix(x)
        if not isinstance(x, boom.Matrix):
            raise Exception(
                "x should either be a 2-dimensional np.array or a boom.Matrix."
            )

        if mean_y is None:
            if y is None:
                raise Exception("Either 'y' or 'mean_y' must be specified.")
            if isinstance(y, np.ndarray):
                y = boom.Vector(y)
            mean_y = boom.mean(y)
        if optional_coefficient_estimate is None:
            optional_coefficient_estimate = np.zeros(x.ncol)
            optional_coefficient_estimate[0] = mean_y
        self._mean = boom.Vector(optional_coefficient_estimate)

        sample_size = x.nrow
        ods = 1. - diagonal_shrinkage
        scale_factor = prior_information_weight * ods / sample_size
        self._unscaled_prior_precision = x.inner() * scale_factor
        diag_view = self._unscaled_prior_precision.diag()
        diag_view /= ods

        if prior_inclusion_probabilities is None:
            potential_nvars = x.ncol
            prob = expected_model_size / potential_nvars
            if prob > 1:
                prob = 1
            if prob < 0:
                prob = 0
            self._prior_inclusion_probabilities = boom.Vector(
                potential_nvars, prob)
        else:
            self._prior_inclusion_probabilities = boom.Vector(
                prior_inclusion_probabilities)

        if sdy is None:
            sdy = boom.sd(y)
        sample_variance = sdy**2
        expected_residual_variance = (1 - expected_r2) * sample_variance
        self._residual_precision_prior = boom.ChisqModel(
            prior_df, np.sqrt(expected_residual_variance))
예제 #8
0
파일: bayes.py 프로젝트: autumnjolitz/BOOM
 def boom(self):
     """
     Return the boom.ChisqModel corresponding to the input parameters.
     """
     import BayesBoom.boom as boom
     return boom.ChisqModel(self.sample_size, self.sigma_guess)
예제 #9
0
파일: bayes.py 프로젝트: autumnjolitz/BOOM
 def create_chisq_model(self):
     import BayesBoom.boom as boom
     return boom.ChisqModel(self.sample_size, self.sigma_guess)
예제 #10
0
 def __setstate__(self, payload):
     self.__dict__.update(payload)
     self._residual_precision_prior = boom.ChisqModel(
         self.prior_df, np.sqrt(self.prior_ss / self.prior_df))
     del self.prior_df
     del self.prior_ss
예제 #11
0
    def __init__(self,
                 y,
                 nseasons: int,
                 season_duration: int = 1,
                 initial_state_prior: boom.MvnModel = None,
                 innovation_sd_prior: R.SdPrior = None,
                 sdy: float = None):
        """
        Args:
          y: The time series being modeled.  This can be omitted if either (a)
            initial_state_prior and sdy and initial_y are passed, or (b) sdy
            and initial_y are passed.
          nseasons: The number of seasons in a cycle.
          season_duration:  The number of time periods each season.  See below.
          initial_state_prior: A multivariate normal distribution of dimension
            nseasons - 1.  This is a distribution on the seasonal value at time
            0 and on the nseasons-2 previous values.  If None is passed then a
            default prior will be assumed.
          innovation_sd_prior: Prior distribution on the standard deviation of
            the innovation terms.  If None, then a default prior will be
            assumed.
          sdy: The standard deviation of the time series being modeled.

        Details:

        """
        self._state_model = boom.SeasonalStateModel(
            nseasons=nseasons, season_duration=season_duration)

        if initial_state_prior is None:
            if sdy is None:
                if y is None:
                    raise Exception("One of 'y', 'sdy', or "
                                    "'initial_state_prior' must be supplied.")
                sdy = np.nanstd(y)
            initial_state_prior = self._default_initial_state_prior(sdy)
        if innovation_sd_prior is None:
            if sdy is None:
                if y is None:
                    raise Exception("One of 'y', 'sdy', or "
                                    "'innovation_sd_prior' must be supplied.")
                sdy = np.nanstd(y)
            innovation_sd_prior = self._default_sigma_prior(sdy)

        self._state_model.set_initial_state_mean(
            initial_state_prior.mu)
        self._state_model.set_initial_state_variance(
            initial_state_prior.Sigma)

        innovation_precision_prior = boom.ChisqModel(
            innovation_sd_prior.sigma_guess,
            innovation_sd_prior.sample_size)
        state_model_sampler = boom.ZeroMeanGaussianConjSampler(
            self._state_model,
            innovation_precision_prior,
            seeding_rng=boom.GlobalRng.rng)

        state_model_sampler.set_sigma_upper_limit(
            innovation_sd_prior.upper_limit)
        self._state_model.set_method(state_model_sampler)

        self._state_contribution = None