def _validate_priors(self, level_sigma_prior, level_nu_prior, slope_sigma_prior, slope_nu_prior, y, sdy): if level_sigma_prior is None: sdy = self._compute_sdy(sdy, y, "level_sigma_prior") level_sigma_prior = R.SdPrior( sigma_guess=.01 * sdy, upper_limit=sdy) if not isinstance(level_sigma_prior, R.SdPrior): raise Exception("Unexpected type for level_sigma_prior.") if slope_sigma_prior is None: sdy = self._compute_sdy(sdy, y, "slope_sigma_prior") slope_sigma_prior = R.SdPrior( sigma_guess=.01 * sdy, upper_limit=sdy) if not isinstance(slope_sigma_prior, R.SdPrior): raise Exception("Unexpected type for slope_sigma_prior.") if level_nu_prior is None: level_nu_prior = R.UniformPrior(0.1, 100) if not isinstance(level_nu_prior, R.DoubleModel): raise Exception("Unexpected type for level_nu_prior.") if slope_nu_prior is None: slope_nu_prior = R.UniformPrior(0.1, 100) if not isinstance(slope_nu_prior, R.DoubleModel): raise Exception("Unexpected type for slope_nu_prior.") self._level_sigma_prior = level_sigma_prior self._slope_sigma_prior = slope_sigma_prior self._level_nu_prior = level_nu_prior self._slope_nu_prior = slope_nu_prior
def _set_posterior_sampler(self, y, level_sigma_prior, slope_sigma_prior, sdy): """ A utility called by the constructor. See the __init__ method for argument documentation. """ if level_sigma_prior is None: sdy = self._compute_sdy(sdy, y, "level_sigma_prior") level_sigma_prior = R.SdPrior(sigma_guess=.01 * sdy, upper_limit=sdy) if not isinstance(level_sigma_prior, R.SdPrior): raise Exception("Unexpected type for level_sigma_prior.") if slope_sigma_prior is None: sdy = self._compute_sdy(sdy, y, "slope_sigma_prior") slope_sigma_prior = R.SdPrior(sigma_guess=0.1 * sdy, upper_limit=sdy) if not isinstance(slope_sigma_prior, R.SdPrior): raise Exception("Unexpected type for slope_sigma_prior.") self._state_model.set_posterior_sampler( level_sigma_prior.create_chisq_model(), level_sigma_prior.upper_limit, slope_sigma_prior.create_chisq_model(), slope_sigma_prior.upper_limit, boom.GlobalRng.rng)
def __setstate__(self, payload): self.level_sigma_prior = R.SdPrior(1, 1) self.slope_sigma_prior = R.SdPrior(1, 1) self.slope_ar1_prior = R.Ar1CoefficientPrior() self.slope_mean_prior = R.NormalPrior(0, 1) self.level_sigma_prior.__setstate__(payload["level_sigma_prior"]) self.slope_sigma_prior.__setstate__(payload["slope_sigma_prior"]) self.slope_ar1_prior.__setstate__(payload["slope_ar1_prior"]) self.slope_mean_prior.__setstate__(payload["slope_mean_prior"]) self._create_model() initial_state_mean = payload["initial_state_mean"] initial_state_variance = payload["initial_state_variance"] self._set_initial_state_distribution( initial_level_prior=R.NormalPrior( initial_state_mean[0], np.sqrt(initial_state_variance[0, 0])), initial_slope_prior=R.NormalPrior( initial_state_mean[0], np.sqrt(initial_state_variance[0, 0])) ) self.level_sigma = payload.get("level_sigma", None) self.slope_sigma = payload.get("slope_sigma", None) self.slope_ar1 = payload.get("slope_ar1", None) self.slope_mean = payload.get("slope_mean", None) self._state_contribution = payload.get("state_contribution", None)
def _validate_coefficient_innovation_priors(self): """ Ensure that self._coefficient_innovation_priors are a list of SdPriors. """ if (isinstance(self._coefficient_innovation_priors, list) and np.all([ isinstance(x, R.SdPrior) for x in self._coefficient_innovation_priors ])): return if isinstance(self._coefficient_innovation_priors, R.SdPrior): self._coefficient_innovation_priors = [ self._coefficient_innovation_priors ] * self.xdim return if self._coefficient_innovation_priors is not None: raise Exception("coefficient_innovation_priors must either be an " "R.SdPrior or a list of such priors.") sdy = self._response_suf.sample_sd self._coefficient_innovation_priors = [ R.SdPrior(.01 * sdy / self._predictor_suf[i].sample_sd, 1) for i in range(self.xdim) ]
def _validate_slope_sigma_prior(slope_sigma_prior, sdy): if slope_sigma_prior is None: slope_sigma_prior = R.SdPrior(.01 * sdy, upper_limit=sdy) if not isinstance(slope_sigma_prior, R.SdPrior): raise Exception("Wrong type passed for slope_sigma_prior. " "Expected an R.SdPrior") return slope_sigma_prior
def create_model(self, prior: R.SdPrior, data: pd.Series): """ Args: prior: an R.SdPrior object describing the prior distribution on the residual variance paramter. data: The time series of observations as a Pandas Series. Returns: A boom.StateSpaceModel object. """ boom_data = boom.Vector(data.values) is_observed = ~data.isna() self._model = boom.StateSpaceModel(boom_data, is_observed) if prior is None: sdy = np.std(data) prior = R.SdPrior(sigma_guess=sdy, upper_limit=sdy * 1.2) boom_prior = boom.ChisqModel(prior.sample_size, prior.sigma_guess) observation_model_sampler = boom.ZeroMeanGaussianConjSampler( self._model.observation_model, boom_prior) observation_model_sampler.set_sigma_upper_limit( prior.upper_limit) self._model.observation_model.set_method(observation_model_sampler) sampler = boom.StateSpacePosteriorSampler( self._model, boom.GlobalRng.rng) self._model.set_method(sampler) self._original_series = data return self._model
def __init__(self, y, sigma_prior=None, initial_state_prior=None, sdy=None, initial_y=None): """ Args: y: The data to be modeled. If sdy and initial_y are supplied this is not used. sigma_prior: An object of class boom.GammaModelBase serving as the prior on the precision (reciprocal variance) of the innovation terms. If None then 'sdy' will be used to choose a defalt. initial_state_prior: An object of class boom.GaussianModel serving as the prior distribution on the value of the state at time 0 (the time of the first observation). If None then initial_y and sdy will be used to choose a defalt. sdy: The standard deviation of y. If None then this will be computed from y. This argument is primarily intended to handle unusual cases where 'y' is unavailable. initial_y: The first element of y. If None then this will be computed from y. This argument is primarily intended to handle unusual cases where 'y' is unavailable. Returns: A StateModel object representing a local level model. """ if sigma_prior is None: if sdy is None: sdy = np.std(y) sigma_prior = R.SdPrior(sigma_guess=.01 * sdy, sample_size=.01, upper_limit=sdy) if not isinstance(sigma_prior, R.SdPrior): raise Exception("sigma_prior should be an R.SdPrior.") if initial_state_prior is None: if initial_y is None: initial_y = y[0] if sdy is None: sdy = np.std(y) initial_y = float(initial_y) sdy = float(sdy) initial_state_prior = boom.GaussianModel(initial_y, sdy**2) if not isinstance(initial_state_prior, boom.GaussianModel): raise Exception( "initial_state_prior should be a boom.GaussianModel.") self._state_model = boom.LocalLevelStateModel() self._state_model.set_initial_state_mean(initial_state_prior.mu) self._state_model.set_initial_state_variance( initial_state_prior.sigsq) innovation_precision_prior = boom.ChisqModel( sigma_prior.sigma_guess, sigma_prior.sample_size) state_model_sampler = self._state_model.set_posterior_sampler( innovation_precision_prior) state_model_sampler.set_sigma_upper_limit(sigma_prior.upper_limit) self._state_contribution = None
def _validate_sigma_prior(self, sigma_prior, y, sdy): if sigma_prior is None: if sdy is None: sdy = np.nanstd(y, ddof=1) sigma_prior = R.SdPrior(sigma_guess=.01 * sdy, sample_size=.01, upper_limit=sdy) if not isinstance(sigma_prior, R.SdPrior): raise Exception("sigma_prior should be an R.SdPrior.") self._sigma_prior = sigma_prior
def _ensure_priors(self): if self._residual_sd_prior is None: sample_var = self._response_suf.sample_var residual_var = (1 - self._expected_Rsqure) * sample_var self._residual_sd_prior = R.SdPrior( np.sqrt(residual_var, self._prior_sample_size)) if self._sampler is None: self._sampler = boom.BigAssSpikeSlabSampler( self._model, boom.VariableSelectionPrior(self._spike), self._slab.boom(self._model.Sigsq_prm), self._residual_sd_prior.boom()) self._model.set_method(self._sampler)
def _validate_residual_precision_prior(self): """ Ensure the self._residual_precision_prior is the correct class. Preconditions: self._set_data must have been run. """ if isinstance(self._residual_precision_prior, R.SdPrior): return # Assume an expected R^2 of 50%. target_variance = self._response_suf.sample_variance / 2.0 self._residual_precision_prior = R.SdPrior( sigma_guess=np.sqrt(target_variance), sample_size=1.0)
def _verify_prior(self, sigma_prior, sdy, sdx): if sigma_prior is None: self._sigma_prior = [ R.SdPrior(.01 * sdy / sdxi, 1) for sdxi in sdx ] elif isinstance(sigma_prior, R.SdPrior): self._sigma_prior = [sigma_prior] * len(sdx) if not R.is_iterable(self._sigma_prior) and all( [isinstance(x, R.SdPrior) for x in self._sigma_prior]): raise Exception( "sigma_prior must be a list-like of R.SdPrior objects.") return self._sigma_prior
def __init__(self, lags, sdy=None, prior_inclusion_probabilities=None, prior_mean=None, prior_sd=None, prior_df=1, expected_r2=0.5, sigma_upper_limit=np.Inf, max_flips=np.Inf): self._lags = int(lags) if self._lags < 0: raise Exception("'lags' argument must be non-negative.") self._max_flips = max_flips if prior_inclusion_probabilities is None: prior_inclusion_probabilities = np.geomspace(.8, .8**self._lags, num=self._lags) self._prior_inclusion_probabilities = np.array( prior_inclusion_probabilities) if prior_mean is None: self._prior_mean = np.zeros(self._lags) else: self._prior_mean = np.array(prior_mean, dtype=float) if len(self._prior_mean) != self._lags: raise Exception("prior_mean argument must have length 'lags'.") if prior_sd is None: self._prior_sd = np.geomspace(.5, .5 * .8**(self._lags - 1), num=self._lags) elif isinstance(self._prior, Number): self._prior_sd = np.array([prior_sd] * self._lags) else: self._prior_sd = np.array(prior_sd, dtype=float) sigsq_guess = sdy * expected_r2 self._residual_precision = R.SdPrior(sigma_guess=np.sqrt(sigsq_guess), sample_size=prior_df, upper_limit=sigma_upper_limit)
def test_mcmc(self): xdim = 4 true_residual_sd = .25 data, coefficients, inclusion = self.simulate_data_from_model( time_dimension=200, typical_sample_size=5000, xdim=xdim, residual_sd=true_residual_sd, unscaled_innovation_sd=np.array([.01] * xdim), p00=np.array([.95] * xdim), p11=np.array([.99] * xdim), ) model = dynreg.SparseDynamicRegressionModel( "y ~ " + ss.dot(data, ["y", "timestamp", "(Intercept)"]), data=data, timestamps="timestamp", niter=100, residual_precision_prior=R.SdPrior(true_residual_sd, 1), seed=8675309) model.plot() for i in range(4): self.assertEqual( "", boom.check_stochastic_process( boom.Matrix(model._beta_draws[:, i, :]), boom.Vector(coefficients[i, :]), confidence=.95, sd_ratio_threshold=10000, # Turn off the sd_ratio check. )) posterior_mean_residual_sd = np.mean(model._residual_sd_draws[10:]) self.assertGreater(posterior_mean_residual_sd, true_residual_sd - .02) self.assertLess(posterior_mean_residual_sd, true_residual_sd + .02) sd_fig, sd_ax = plt.subplots(1, 2) model.plot_residual_sd(ax=sd_ax[0]) model.plot_residual_sd(ax=sd_ax[1], type="ts") # sd_fig.show() size_fig, size_ax = plt.subplots(1, 1) model.plot_size(ax=size_ax)
def _validate_prior(self, prior, sdy): if prior is None: prior = R.SdPrior(sdy * .01, upper_limit=sdy) if not isinstance(prior, R.SdPrior): raise Exception("Wrong type for prior.") self._sigma_prior = prior
def __init__(self, y, nseasons: int, initial_state_prior=None, level_precision_priors=None, slope_precision_priors=None, sdy: float = None): """ Args: y: The time series to be modeled. This can be "None" if 'sdy' is supplied. nseasons: The number of seasons in a cycle. initial_state_prior: An R.NormalPrior object describing the initial distribution of the state at time 0. If None then a default prior will be assumed. level_precision_priors: A list of R.SdPrior objects describing the prior distribution on the innovation standard deviations for the level portion of the model. There is one such prior for each season in the cycle. slope_precision_priors: A list of R.SdPrior objects describing the prior distribution on the innovation standard deviations for the slope portion of the model. There is one such prior for each season in the cycle. sdy: The standard deviation of the time series to be modeled. This is not needed if 'y' is supplied, or if all the prior distributions are explicity supplied. """ self._nseasons = int(nseasons) if nseasons <= 1: raise Exception("Seasonal models require at least 2 seasons.") if initial_state_prior is None: if sdy is None: sdy = self._compute_sdy(y, "initial_state_prior") initial_state_prior = self._default_initial_state_prior(sdy) if isinstance(initial_state_prior, R.NormalPrior): dim = 2 * self._nseasons mu = np.zeros(dim) sigma = initial_state_prior.sd Sigma = np.diag(np.ones(dim) * sigma**2) self._initial_state_prior = R.MvnPrior(mu, Sigma) else: self._initial_state_prior = initial_state_prior assert isinstance(self._initial_state_prior, R.MvnPrior) if level_precision_priors is None: if sdy is None: sdy = self._compute_sdy(y, "level_precision_priors") self._level_precision_priors = [ R.SdPrior(sdy / 100, .1, upper_limit=sdy) for i in range(self.nseasons) ] else: self._level_precision_priors = level_precision_priors msg = "level_precision_priors must be a list of R.SdPrior objects" if not isinstance(self._level_precision_priors, list): raise Exception(msg) for x in self._level_precision_priors: if not isinstance(x, R.SdPrior): raise Exception(msg) if slope_precision_priors is None: if sdy is None: sdy = self._compute_sdy(y, "slope_precision_priors") self._slope_precision_priors = [ R.SdPrior(sdy / 100, .1, upper_limit=sdy) for i in range(self.nseasons) ] else: self._slope_precision_priors = slope_precision_priors msg = "slope_precision_priors must be a list of R.SdPrior objects" if not isinstance(self._slope_precision_priors, list): raise Exception(msg) for x in self._slope_precision_priors: if not isinstance(x, R.SdPrior): raise Exception(msg) self._build_model() self._state_contribution = None
def _default_sigma_prior(sdy): """ The default prior to use for the innovation standard deviation. """ return R.SdPrior(.01 * sdy, upper_limit=sdy)