def __call__(self): response = self.response num_of_obs = self.num_of_obs extra_out = {} # smoothing params if self.lev_sm_input < 0: lev_sm = pyro.sample("lev_sm", dist.Uniform(0, 1)) else: lev_sm = torch.tensor(self.lev_sm_input, dtype=torch.double) extra_out['lev_sm'] = lev_sm if self.slp_sm_input < 0: slp_sm = pyro.sample("slp_sm", dist.Uniform(0, 1)) else: slp_sm = torch.tensor(self.slp_sm_input, dtype=torch.double) extra_out['slp_sm'] = slp_sm # residual tuning parameters nu = pyro.sample("nu", dist.Uniform(self.min_nu, self.max_nu)) # prior for residuals obs_sigma = pyro.sample("obs_sigma", dist.HalfCauchy(self.cauchy_sd)) # regression parameters if self.num_of_pr == 0: pr = torch.zeros(num_of_obs) pr_beta = pyro.deterministic("pr_beta", torch.zeros(0)) else: with pyro.plate("pr", self.num_of_pr): # fixed scale ridge if self.reg_penalty_type == 0: pr_sigma = self.pr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma pr_sigma = pyro.sample( "pr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas pr_beta = pyro.sample( "pr_beta", dist.FoldedDistribution( dist.Normal(self.pr_beta_prior, pr_sigma))) else: pr_beta = pyro.sample( "pr_beta", dist.FoldedDistribution( dist.Laplace(self.pr_beta_prior, self.lasso_scale))) pr = pr_beta @ self.pr_mat.transpose(-1, -2) if self.num_of_nr == 0: nr = torch.zeros(num_of_obs) nr_beta = pyro.deterministic("nr_beta", torch.zeros(0)) else: with pyro.plate("nr", self.num_of_nr): # fixed scale ridge if self.reg_penalty_type == 0: nr_sigma = self.nr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma nr_sigma = pyro.sample( "nr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas nr_beta = pyro.sample( "nr_beta", dist.FoldedDistribution( dist.Normal(self.nr_beta_prior, nr_sigma))) else: nr_beta = pyro.sample( "nr_beta", dist.FoldedDistribution( dist.Laplace(self.nr_beta_prior, self.lasso_scale))) nr = nr_beta @ self.nr_mat.transpose(-1, -2) if self.num_of_rr == 0: rr = torch.zeros(num_of_obs) rr_beta = pyro.deterministic("rr_beta", torch.zeros(0)) else: with pyro.plate("rr", self.num_of_rr): # fixed scale ridge if self.reg_penalty_type == 0: rr_sigma = self.rr_sigma_prior # auto scale ridge elif self.reg_penalty_type == 2: # weak prior for sigma rr_sigma = pyro.sample( "rr_sigma", dist.HalfCauchy(self.auto_ridge_scale)) # case when it is not lasso if self.reg_penalty_type != 1: # weak prior for betas rr_beta = pyro.sample( "rr_beta", dist.Normal(self.rr_beta_prior, rr_sigma)) else: rr_beta = pyro.sample( "rr_beta", dist.Laplace(self.rr_beta_prior, self.lasso_scale)) rr = rr_beta @ self.rr_mat.transpose(-1, -2) # a hack to make sure we don't use a dimension "1" due to rr_beta and pr_beta sampling r = pr + nr + rr if r.dim() > 1: r = r.unsqueeze(-2) # trend parameters # local trend proportion lt_coef = pyro.sample("lt_coef", dist.Uniform(0, 1)) # global trend proportion gt_coef = pyro.sample("gt_coef", dist.Uniform(-0.5, 0.5)) # global trend parameter gt_pow = pyro.sample("gt_pow", dist.Uniform(0, 1)) # seasonal parameters if self.is_seasonal: # seasonality smoothing parameter if self.sea_sm_input < 0: sea_sm = pyro.sample("sea_sm", dist.Uniform(0, 1)) else: sea_sm = torch.tensor(self.sea_sm_input, dtype=torch.double) extra_out['sea_sm'] = sea_sm # initial seasonality # 33% lift is with 1 sd prob. init_sea = pyro.sample( "init_sea", dist.Normal(0, 0.33).expand([self.seasonality]).to_event(1)) init_sea = init_sea - init_sea.mean(-1, keepdim=True) b = [None] * num_of_obs # slope l = [None] * num_of_obs # level if self.is_seasonal: s = [None] * (self.num_of_obs + self.seasonality) for t in range(self.seasonality): s[t] = init_sea[..., t] s[self.seasonality] = init_sea[..., 0] else: s = [torch.tensor(0.)] * num_of_obs # states initial condition b[0] = torch.zeros_like(slp_sm) if self.is_seasonal: l[0] = response[0] - r[..., 0] - s[0] else: l[0] = response[0] - r[..., 0] # update process for t in range(1, num_of_obs): # this update equation with l[t-1] ONLY. # intentionally different from the Holt-Winter form # this change is suggested from Slawek's original SLGT model l[t] = lev_sm * (response[t] - s[t] - r[..., t]) + (1 - lev_sm) * l[t - 1] b[t] = slp_sm * (l[t] - l[t - 1]) + (1 - slp_sm) * b[t - 1] if self.is_seasonal: s[t + self.seasonality] = \ sea_sm * (response[t] - l[t] - r[..., t]) + (1 - sea_sm) * s[t] # evaluation process # vectorize as much math as possible for lst in [b, l, s]: # torch.stack requires all items to have the same shape, but the # initial items of our lists may not have batch_shape, so we expand. lst[0] = lst[0].expand_as(lst[-1]) b = torch.stack(b, dim=-1).reshape(b[0].shape[:-1] + (-1, )) l = torch.stack(l, dim=-1).reshape(l[0].shape[:-1] + (-1, )) s = torch.stack(s, dim=-1).reshape(s[0].shape[:-1] + (-1, )) lgt_sum = l + gt_coef * l.abs()**gt_pow + lt_coef * b lgt_sum = torch.cat([l[..., :1], lgt_sum[..., :-1]], dim=-1) # shift by 1 # a hack here as well to get rid of the extra "1" in r.shape if r.dim() >= 2: r = r.squeeze(-2) yhat = lgt_sum + s[..., :num_of_obs] + r with pyro.plate("response_plate", num_of_obs - 1): pyro.sample("response", dist.StudentT(nu, yhat[..., 1:], obs_sigma), obs=response[1:]) # we care beta not the pr_beta, nr_beta, ... extra_out['beta'] = torch.cat([pr_beta, nr_beta, rr_beta], dim=-1) extra_out.update({'b': b, 'l': l, 's': s, 'lgt_sum': lgt_sum}) return extra_out
def _(d, batch_shape): base_dist = reshape_batch(d.base_dist, batch_shape) return dist.FoldedDistribution(base_dist)
def __call__(self): """ Notes ----- Labeling system: 1. for kernel level of parameters such as rho, span, nkots, kerenel etc., use suffix _lev and _coef for levels and regression to partition 2. for knots level of parameters such as coef, loc and scale priors, use prefix _lev and _rr _pr for levels, regular and positive regressors to partition 3. reduce ambigious by replacing all greeks by labels more intuitive use _coef, _weight etc. instead of _beta, use _scale instead of _sigma """ response = self.response which_valid = self.which_valid_res n_obs = self.n_obs # n_valid = self.n_valid_res sdy = self.sdy meany = self.mean_y dof = self.dof lev_knot_loc = self.lev_knot_loc seas_term = self.seas_term pr = self.pr rr = self.rr n_pr = self.n_pr n_rr = self.n_rr k_lev = self.k_lev k_coef = self.k_coef n_knots_lev = self.n_knots_lev n_knots_coef = self.n_knots_coef lev_knot_scale = self.lev_knot_scale # mult var norm stuff mvn = self.mvn geometric_walk = self.geometric_walk min_residuals_sd = self.min_residuals_sd if min_residuals_sd > 1.0: min_residuals_sd = torch.tensor(1.0) if min_residuals_sd < 0: min_residuals_sd = torch.tensor(0.0) # expand dim to n_rr x n_knots_coef rr_init_knot_loc = self.rr_init_knot_loc rr_init_knot_scale = self.rr_init_knot_scale rr_knot_scale = self.rr_knot_scale # this does not need to expand dim since it is used as latent grand mean pr_init_knot_loc = self.pr_init_knot_loc pr_init_knot_scale = self.pr_init_knot_scale pr_knot_scale = self.pr_knot_scale # transformation of data regressors = torch.zeros(n_obs) if n_pr > 0 and n_rr > 0: regressors = torch.cat([rr, pr], dim=-1) elif n_pr > 0: regressors = pr elif n_rr > 0: regressors = rr response_tran = response - meany - seas_term # sampling begins here extra_out = {} # levels sampling lev_knot_tran = pyro.sample( "lev_knot_tran", dist.Normal(lev_knot_loc - meany, lev_knot_scale).expand([n_knots_lev]).to_event(1)) lev = (lev_knot_tran @ k_lev.transpose(-2, -1)) # using hierarchical priors vs. multivariate priors if mvn == 0: # regular regressor sampling if n_rr > 0: # pooling latent variables rr_init_knot = pyro.sample( "rr_init_knot", dist.Normal(rr_init_knot_loc, rr_init_knot_scale).to_event(1)) rr_knot = pyro.sample( "rr_knot", dist.Normal( rr_init_knot.unsqueeze(-1) * torch.ones(n_rr, n_knots_coef), rr_knot_scale).to_event(2)) rr_coef = (rr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) # positive regressor sampling if n_pr > 0: if geometric_walk: # TODO: development method pr_init_knot = pyro.sample( "pr_init_knot", dist.FoldedDistribution( dist.Normal(pr_init_knot_loc, pr_init_knot_scale)).to_event(1)) pr_knot_step = pyro.sample( "pr_knot_step", # note that unlike rr_knot, the first one is ignored as we use the initial scale # to sample the first knot dist.Normal(torch.zeros(n_pr, n_knots_coef), pr_knot_scale).to_event(2)) pr_knot = pr_init_knot.unsqueeze(-1) * pr_knot_step.cumsum( -1).exp() pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) else: # TODO: original method # pooling latent variables pr_init_knot = pyro.sample( "pr_knot_loc", dist.FoldedDistribution( dist.Normal(pr_init_knot_loc, pr_init_knot_scale)).to_event(1)) pr_knot = pyro.sample( "pr_knot", dist.FoldedDistribution( dist.Normal( pr_init_knot.unsqueeze(-1) * torch.ones(n_pr, n_knots_coef), pr_knot_scale)).to_event(2)) pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) else: # regular regressor sampling if n_rr > 0: rr_init_knot = pyro.deterministic( "rr_init_knot", torch.zeros(rr_init_knot_loc.shape)) # updated mod loc_temp = rr_init_knot_loc.unsqueeze(-1) * torch.ones( n_rr, n_knots_coef) scale_temp = torch.diag_embed( rr_init_knot_scale.unsqueeze(-1) * torch.ones(n_rr, n_knots_coef)) # the sampling rr_knot = pyro.sample( "rr_knot", dist.MultivariateNormal( loc=loc_temp, covariance_matrix=scale_temp).to_event(1)) rr_coef = (rr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) # positive regressor sampling if n_pr > 0: # this part is junk just so that the pr_init_knot has a prior; but it does not connect to anything else # pooling latent variables pr_init_knot = pyro.sample( "pr_init_knot", dist.FoldedDistribution( dist.Normal(pr_init_knot_loc, pr_init_knot_scale)).to_event(1)) # updated mod loc_temp = pr_init_knot_loc.unsqueeze(-1) * torch.ones( n_pr, n_knots_coef) scale_temp = torch.diag_embed( pr_init_knot_scale.unsqueeze(-1) * torch.ones(n_pr, n_knots_coef)) pr_knot = pyro.sample( "pr_knot", dist.MultivariateNormal( loc=loc_temp, covariance_matrix=scale_temp).to_event(1)) pr_knot = torch.exp(pr_knot) pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose( -2, -1) # concatenating all latent variables coef_init_knot = torch.zeros(n_rr + n_pr) coef_knot = torch.zeros((n_rr + n_pr, n_knots_coef)) coef = torch.zeros(n_obs) if n_pr > 0 and n_rr > 0: coef_knot = torch.cat([rr_knot, pr_knot], dim=-2) coef_init_knot = torch.cat([rr_init_knot, pr_init_knot], dim=-1) coef = torch.cat([rr_coef, pr_coef], dim=-1) elif n_pr > 0: coef_knot = pr_knot coef_init_knot = pr_init_knot coef = pr_coef elif n_rr > 0: coef_knot = rr_knot coef_init_knot = rr_init_knot coef = rr_coef # coefficients likelihood/priors coef_prior_list = self.coef_prior_list if coef_prior_list: for x in coef_prior_list: name = x['name'] # TODO: we can move torch conversion to init to enhance speed m = torch.tensor(x['prior_mean']) sd = torch.tensor(x['prior_sd']) # tp = torch.tensor(x['prior_tp_idx']) # idx = torch.tensor(x['prior_regressor_col_idx']) start_tp_idx = x['prior_start_tp_idx'] end_tp_idx = x['prior_end_tp_idx'] idx = x['prior_regressor_col_idx'] pyro.sample("prior_{}".format(name), dist.Normal(m, sd).to_event(2), obs=coef[..., start_tp_idx:end_tp_idx, idx]) # observation likelihood yhat = lev + (regressors * coef).sum(-1) obs_scale_base = pyro.sample("obs_scale_base", dist.Beta(2, 2)).unsqueeze(-1) # from 0.5 * sdy to sdy obs_scale = ((obs_scale_base * (1.0 - min_residuals_sd)) + min_residuals_sd) * sdy # with pyro.plate("response_plate", n_valid): # pyro.sample("response", # dist.StudentT(dof, yhat[..., which_valid], obs_scale), # obs=response_tran[which_valid]) pyro.sample("response", dist.StudentT(dof, yhat[..., which_valid], obs_scale).to_event(1), obs=response_tran[which_valid]) lev_knot = lev_knot_tran + meany extra_out.update({ 'yhat': yhat + seas_term + meany, 'lev': lev + meany, 'lev_knot': lev_knot, 'coef': coef, 'coef_knot': coef_knot, 'coef_init_knot': coef_init_knot, 'obs_scale': obs_scale, }) return extra_out
def _(d, data): base_dist = prefix_condition(d.base_dist, data) return dist.FoldedDistribution(base_dist)