Exemple #1
0
    def __call__(self):
        response = self.response
        num_of_obs = self.num_of_obs
        extra_out = {}

        # smoothing params
        if self.lev_sm_input < 0:
            lev_sm = pyro.sample("lev_sm", dist.Uniform(0, 1))
        else:
            lev_sm = torch.tensor(self.lev_sm_input, dtype=torch.double)
            extra_out['lev_sm'] = lev_sm
        if self.slp_sm_input < 0:
            slp_sm = pyro.sample("slp_sm", dist.Uniform(0, 1))
        else:
            slp_sm = torch.tensor(self.slp_sm_input, dtype=torch.double)
            extra_out['slp_sm'] = slp_sm

        # residual tuning parameters
        nu = pyro.sample("nu", dist.Uniform(self.min_nu, self.max_nu))

        # prior for residuals
        obs_sigma = pyro.sample("obs_sigma", dist.HalfCauchy(self.cauchy_sd))

        # regression parameters
        if self.num_of_pr == 0:
            pr = torch.zeros(num_of_obs)
            pr_beta = pyro.deterministic("pr_beta", torch.zeros(0))
        else:
            with pyro.plate("pr", self.num_of_pr):
                # fixed scale ridge
                if self.reg_penalty_type == 0:
                    pr_sigma = self.pr_sigma_prior
                # auto scale ridge
                elif self.reg_penalty_type == 2:
                    # weak prior for sigma
                    pr_sigma = pyro.sample(
                        "pr_sigma", dist.HalfCauchy(self.auto_ridge_scale))
                # case when it is not lasso
                if self.reg_penalty_type != 1:
                    # weak prior for betas
                    pr_beta = pyro.sample(
                        "pr_beta",
                        dist.FoldedDistribution(
                            dist.Normal(self.pr_beta_prior, pr_sigma)))
                else:
                    pr_beta = pyro.sample(
                        "pr_beta",
                        dist.FoldedDistribution(
                            dist.Laplace(self.pr_beta_prior,
                                         self.lasso_scale)))
            pr = pr_beta @ self.pr_mat.transpose(-1, -2)

        if self.num_of_nr == 0:
            nr = torch.zeros(num_of_obs)
            nr_beta = pyro.deterministic("nr_beta", torch.zeros(0))
        else:
            with pyro.plate("nr", self.num_of_nr):
                # fixed scale ridge
                if self.reg_penalty_type == 0:
                    nr_sigma = self.nr_sigma_prior
                # auto scale ridge
                elif self.reg_penalty_type == 2:
                    # weak prior for sigma
                    nr_sigma = pyro.sample(
                        "nr_sigma", dist.HalfCauchy(self.auto_ridge_scale))
                # case when it is not lasso
                if self.reg_penalty_type != 1:
                    # weak prior for betas
                    nr_beta = pyro.sample(
                        "nr_beta",
                        dist.FoldedDistribution(
                            dist.Normal(self.nr_beta_prior, nr_sigma)))
                else:
                    nr_beta = pyro.sample(
                        "nr_beta",
                        dist.FoldedDistribution(
                            dist.Laplace(self.nr_beta_prior,
                                         self.lasso_scale)))
            nr = nr_beta @ self.nr_mat.transpose(-1, -2)

        if self.num_of_rr == 0:
            rr = torch.zeros(num_of_obs)
            rr_beta = pyro.deterministic("rr_beta", torch.zeros(0))
        else:
            with pyro.plate("rr", self.num_of_rr):
                # fixed scale ridge
                if self.reg_penalty_type == 0:
                    rr_sigma = self.rr_sigma_prior
                # auto scale ridge
                elif self.reg_penalty_type == 2:
                    # weak prior for sigma
                    rr_sigma = pyro.sample(
                        "rr_sigma", dist.HalfCauchy(self.auto_ridge_scale))
                # case when it is not lasso
                if self.reg_penalty_type != 1:
                    # weak prior for betas
                    rr_beta = pyro.sample(
                        "rr_beta", dist.Normal(self.rr_beta_prior, rr_sigma))
                else:
                    rr_beta = pyro.sample(
                        "rr_beta",
                        dist.Laplace(self.rr_beta_prior, self.lasso_scale))
            rr = rr_beta @ self.rr_mat.transpose(-1, -2)

        # a hack to make sure we don't use a dimension "1" due to rr_beta and pr_beta sampling
        r = pr + nr + rr
        if r.dim() > 1:
            r = r.unsqueeze(-2)

        # trend parameters
        # local trend proportion
        lt_coef = pyro.sample("lt_coef", dist.Uniform(0, 1))
        # global trend proportion
        gt_coef = pyro.sample("gt_coef", dist.Uniform(-0.5, 0.5))
        # global trend parameter
        gt_pow = pyro.sample("gt_pow", dist.Uniform(0, 1))

        # seasonal parameters
        if self.is_seasonal:
            # seasonality smoothing parameter
            if self.sea_sm_input < 0:
                sea_sm = pyro.sample("sea_sm", dist.Uniform(0, 1))
            else:
                sea_sm = torch.tensor(self.sea_sm_input, dtype=torch.double)
                extra_out['sea_sm'] = sea_sm

            # initial seasonality
            # 33% lift is with 1 sd prob.
            init_sea = pyro.sample(
                "init_sea",
                dist.Normal(0, 0.33).expand([self.seasonality]).to_event(1))
            init_sea = init_sea - init_sea.mean(-1, keepdim=True)

        b = [None] * num_of_obs  # slope
        l = [None] * num_of_obs  # level
        if self.is_seasonal:
            s = [None] * (self.num_of_obs + self.seasonality)
            for t in range(self.seasonality):
                s[t] = init_sea[..., t]
            s[self.seasonality] = init_sea[..., 0]
        else:
            s = [torch.tensor(0.)] * num_of_obs

        # states initial condition
        b[0] = torch.zeros_like(slp_sm)
        if self.is_seasonal:
            l[0] = response[0] - r[..., 0] - s[0]
        else:
            l[0] = response[0] - r[..., 0]

        # update process
        for t in range(1, num_of_obs):
            # this update equation with l[t-1] ONLY.
            # intentionally different from the Holt-Winter form
            # this change is suggested from Slawek's original SLGT model
            l[t] = lev_sm * (response[t] - s[t] -
                             r[..., t]) + (1 - lev_sm) * l[t - 1]
            b[t] = slp_sm * (l[t] - l[t - 1]) + (1 - slp_sm) * b[t - 1]
            if self.is_seasonal:
                s[t + self.seasonality] = \
                    sea_sm * (response[t] - l[t] - r[..., t]) + (1 - sea_sm) * s[t]

        # evaluation process
        # vectorize as much math as possible
        for lst in [b, l, s]:
            # torch.stack requires all items to have the same shape, but the
            # initial items of our lists may not have batch_shape, so we expand.
            lst[0] = lst[0].expand_as(lst[-1])
        b = torch.stack(b, dim=-1).reshape(b[0].shape[:-1] + (-1, ))
        l = torch.stack(l, dim=-1).reshape(l[0].shape[:-1] + (-1, ))
        s = torch.stack(s, dim=-1).reshape(s[0].shape[:-1] + (-1, ))

        lgt_sum = l + gt_coef * l.abs()**gt_pow + lt_coef * b
        lgt_sum = torch.cat([l[..., :1], lgt_sum[..., :-1]],
                            dim=-1)  # shift by 1
        # a hack here as well to get rid of the extra "1" in r.shape
        if r.dim() >= 2:
            r = r.squeeze(-2)
        yhat = lgt_sum + s[..., :num_of_obs] + r

        with pyro.plate("response_plate", num_of_obs - 1):
            pyro.sample("response",
                        dist.StudentT(nu, yhat[..., 1:], obs_sigma),
                        obs=response[1:])

        # we care beta not the pr_beta, nr_beta, ...
        extra_out['beta'] = torch.cat([pr_beta, nr_beta, rr_beta], dim=-1)

        extra_out.update({'b': b, 'l': l, 's': s, 'lgt_sum': lgt_sum})
        return extra_out
Exemple #2
0
def _(d, batch_shape):
    base_dist = reshape_batch(d.base_dist, batch_shape)
    return dist.FoldedDistribution(base_dist)
Exemple #3
0
    def __call__(self):
        """
        Notes
        -----
        Labeling system:
        1. for kernel level of parameters such as rho, span, nkots, kerenel etc.,
        use suffix _lev and _coef for levels and regression to partition
        2. for knots level of parameters such as coef, loc and scale priors,
        use prefix _lev and _rr _pr for levels, regular and positive regressors to partition
        3. reduce ambigious by replacing all greeks by labels more intuitive
        use _coef, _weight etc. instead of _beta, use _scale instead of _sigma
        """

        response = self.response
        which_valid = self.which_valid_res

        n_obs = self.n_obs
        # n_valid = self.n_valid_res
        sdy = self.sdy
        meany = self.mean_y
        dof = self.dof
        lev_knot_loc = self.lev_knot_loc
        seas_term = self.seas_term

        pr = self.pr
        rr = self.rr
        n_pr = self.n_pr
        n_rr = self.n_rr

        k_lev = self.k_lev
        k_coef = self.k_coef
        n_knots_lev = self.n_knots_lev
        n_knots_coef = self.n_knots_coef

        lev_knot_scale = self.lev_knot_scale
        # mult var norm stuff
        mvn = self.mvn
        geometric_walk = self.geometric_walk
        min_residuals_sd = self.min_residuals_sd
        if min_residuals_sd > 1.0:
            min_residuals_sd = torch.tensor(1.0)
        if min_residuals_sd < 0:
            min_residuals_sd = torch.tensor(0.0)
        # expand dim to n_rr x n_knots_coef
        rr_init_knot_loc = self.rr_init_knot_loc
        rr_init_knot_scale = self.rr_init_knot_scale
        rr_knot_scale = self.rr_knot_scale

        # this does not need to expand dim since it is used as latent grand mean
        pr_init_knot_loc = self.pr_init_knot_loc
        pr_init_knot_scale = self.pr_init_knot_scale
        pr_knot_scale = self.pr_knot_scale

        # transformation of data
        regressors = torch.zeros(n_obs)
        if n_pr > 0 and n_rr > 0:
            regressors = torch.cat([rr, pr], dim=-1)
        elif n_pr > 0:
            regressors = pr
        elif n_rr > 0:
            regressors = rr

        response_tran = response - meany - seas_term

        # sampling begins here
        extra_out = {}

        # levels sampling
        lev_knot_tran = pyro.sample(
            "lev_knot_tran",
            dist.Normal(lev_knot_loc - meany,
                        lev_knot_scale).expand([n_knots_lev]).to_event(1))
        lev = (lev_knot_tran @ k_lev.transpose(-2, -1))

        # using hierarchical priors vs. multivariate priors
        if mvn == 0:
            # regular regressor sampling
            if n_rr > 0:
                # pooling latent variables
                rr_init_knot = pyro.sample(
                    "rr_init_knot",
                    dist.Normal(rr_init_knot_loc,
                                rr_init_knot_scale).to_event(1))
                rr_knot = pyro.sample(
                    "rr_knot",
                    dist.Normal(
                        rr_init_knot.unsqueeze(-1) *
                        torch.ones(n_rr, n_knots_coef),
                        rr_knot_scale).to_event(2))
                rr_coef = (rr_knot @ k_coef.transpose(-2, -1)).transpose(
                    -2, -1)

            # positive regressor sampling
            if n_pr > 0:
                if geometric_walk:
                    # TODO: development method
                    pr_init_knot = pyro.sample(
                        "pr_init_knot",
                        dist.FoldedDistribution(
                            dist.Normal(pr_init_knot_loc,
                                        pr_init_knot_scale)).to_event(1))
                    pr_knot_step = pyro.sample(
                        "pr_knot_step",
                        # note that unlike rr_knot, the first one is ignored as we use the initial scale
                        # to sample the first knot
                        dist.Normal(torch.zeros(n_pr, n_knots_coef),
                                    pr_knot_scale).to_event(2))
                    pr_knot = pr_init_knot.unsqueeze(-1) * pr_knot_step.cumsum(
                        -1).exp()
                    pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose(
                        -2, -1)
                else:
                    # TODO: original method
                    # pooling latent variables
                    pr_init_knot = pyro.sample(
                        "pr_knot_loc",
                        dist.FoldedDistribution(
                            dist.Normal(pr_init_knot_loc,
                                        pr_init_knot_scale)).to_event(1))

                    pr_knot = pyro.sample(
                        "pr_knot",
                        dist.FoldedDistribution(
                            dist.Normal(
                                pr_init_knot.unsqueeze(-1) *
                                torch.ones(n_pr, n_knots_coef),
                                pr_knot_scale)).to_event(2))
                    pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose(
                        -2, -1)
        else:
            # regular regressor sampling
            if n_rr > 0:
                rr_init_knot = pyro.deterministic(
                    "rr_init_knot", torch.zeros(rr_init_knot_loc.shape))

                # updated mod
                loc_temp = rr_init_knot_loc.unsqueeze(-1) * torch.ones(
                    n_rr, n_knots_coef)
                scale_temp = torch.diag_embed(
                    rr_init_knot_scale.unsqueeze(-1) *
                    torch.ones(n_rr, n_knots_coef))

                # the sampling
                rr_knot = pyro.sample(
                    "rr_knot",
                    dist.MultivariateNormal(
                        loc=loc_temp,
                        covariance_matrix=scale_temp).to_event(1))
                rr_coef = (rr_knot @ k_coef.transpose(-2, -1)).transpose(
                    -2, -1)

            # positive regressor sampling
            if n_pr > 0:
                # this part is junk just so that the pr_init_knot has a prior; but it does not connect to anything else
                # pooling latent variables
                pr_init_knot = pyro.sample(
                    "pr_init_knot",
                    dist.FoldedDistribution(
                        dist.Normal(pr_init_knot_loc,
                                    pr_init_knot_scale)).to_event(1))
                # updated mod
                loc_temp = pr_init_knot_loc.unsqueeze(-1) * torch.ones(
                    n_pr, n_knots_coef)
                scale_temp = torch.diag_embed(
                    pr_init_knot_scale.unsqueeze(-1) *
                    torch.ones(n_pr, n_knots_coef))

                pr_knot = pyro.sample(
                    "pr_knot",
                    dist.MultivariateNormal(
                        loc=loc_temp,
                        covariance_matrix=scale_temp).to_event(1))
                pr_knot = torch.exp(pr_knot)
                pr_coef = (pr_knot @ k_coef.transpose(-2, -1)).transpose(
                    -2, -1)

        # concatenating all latent variables
        coef_init_knot = torch.zeros(n_rr + n_pr)
        coef_knot = torch.zeros((n_rr + n_pr, n_knots_coef))

        coef = torch.zeros(n_obs)
        if n_pr > 0 and n_rr > 0:
            coef_knot = torch.cat([rr_knot, pr_knot], dim=-2)
            coef_init_knot = torch.cat([rr_init_knot, pr_init_knot], dim=-1)
            coef = torch.cat([rr_coef, pr_coef], dim=-1)
        elif n_pr > 0:
            coef_knot = pr_knot
            coef_init_knot = pr_init_knot
            coef = pr_coef
        elif n_rr > 0:
            coef_knot = rr_knot
            coef_init_knot = rr_init_knot
            coef = rr_coef

        # coefficients likelihood/priors
        coef_prior_list = self.coef_prior_list
        if coef_prior_list:
            for x in coef_prior_list:
                name = x['name']
                # TODO: we can move torch conversion to init to enhance speed
                m = torch.tensor(x['prior_mean'])
                sd = torch.tensor(x['prior_sd'])
                # tp = torch.tensor(x['prior_tp_idx'])
                # idx = torch.tensor(x['prior_regressor_col_idx'])
                start_tp_idx = x['prior_start_tp_idx']
                end_tp_idx = x['prior_end_tp_idx']
                idx = x['prior_regressor_col_idx']
                pyro.sample("prior_{}".format(name),
                            dist.Normal(m, sd).to_event(2),
                            obs=coef[..., start_tp_idx:end_tp_idx, idx])

        # observation likelihood
        yhat = lev + (regressors * coef).sum(-1)
        obs_scale_base = pyro.sample("obs_scale_base",
                                     dist.Beta(2, 2)).unsqueeze(-1)
        # from 0.5 * sdy to sdy
        obs_scale = ((obs_scale_base *
                      (1.0 - min_residuals_sd)) + min_residuals_sd) * sdy

        # with pyro.plate("response_plate", n_valid):
        #     pyro.sample("response",
        #                 dist.StudentT(dof, yhat[..., which_valid], obs_scale),
        #                 obs=response_tran[which_valid])

        pyro.sample("response",
                    dist.StudentT(dof, yhat[..., which_valid],
                                  obs_scale).to_event(1),
                    obs=response_tran[which_valid])

        lev_knot = lev_knot_tran + meany

        extra_out.update({
            'yhat': yhat + seas_term + meany,
            'lev': lev + meany,
            'lev_knot': lev_knot,
            'coef': coef,
            'coef_knot': coef_knot,
            'coef_init_knot': coef_init_knot,
            'obs_scale': obs_scale,
        })
        return extra_out
Exemple #4
0
def _(d, data):
    base_dist = prefix_condition(d.base_dist, data)
    return dist.FoldedDistribution(base_dist)