예제 #1
0
class KernelSampler(object):
    def __init__(self, outcome_upper_support, outcome_lower_support,
                 outcome_names, treatment_names, backdoor_variables, data,
                 dep_type, indep_type, bw, defaults):
        self._data = data
        self._outcome_names = outcome_names
        self._treatment_names = treatment_names
        self._backdoor_variables = backdoor_variables
        self.dep_type = dep_type
        self.indep_type = indep_type
        self.bw = bw
        self.defaults = defaults
        self.outcome_lower_support = outcome_lower_support
        self.outcome_upper_support = outcome_upper_support
        self.conditional_density = KDEMultivariateConditional(
            endog=self._data[self._outcome_names],
            exog=self._data[self._treatment_names + self._backdoor_variables],
            dep_type=''.join(self.dep_type),
            indep_type=''.join(self.indep_type),
            bw=self.bw,
            defaults=self.defaults)

    def sample_point(self, x_z):
        y_bw = 1.06 * self._data[self._outcome_names].std() * (
            self._data[self._outcome_names].count())**(-1. / 5.)
        n = 5 * np.ceil(
            (self.outcome_upper_support - self.outcome_lower_support) / y_bw)
        cum_ranges = [
            np.linspace(self.outcome_lower_support[i],
                        self.outcome_upper_support[i], n[i])
            for i in range(len(self._outcome_names))
        ]

        res = np.meshgrid(*cum_ranges)
        points = np.array(res).reshape(len(self._outcome_names),
                                       np.int(n.cumprod()[-1])).T

        x_z_repeated = np.repeat(x_z,
                                 len(points)).reshape(len(points), len(x_z))
        cdf_vals = self._evaluate_cdf(points, x_z_repeated)
        cdf_vals = np.hstack([[0.], cdf_vals, [1.]])
        points = np.vstack([[self.outcome_lower_support - 3. * y_bw], points,
                            [self.outcome_upper_support + 3. * y_bw]])
        inv_cdf = interp1d(cdf_vals.flatten(),
                           points.flatten(),
                           fill_value=0.,
                           axis=0)
        r = np.random.rand()
        try:
            return inv_cdf(r)
        except ValueError:
            return self.sample_point(x_z)

    def _evaluate_cdf(self, y, x_z):
        return self.conditional_density.cdf(endog_predict=[y],
                                            exog_predict=x_z)