예제 #1
0
 def __init__(self,
              y,
              x,
              z,
              X,
              alpha,
              variable_types={},
              burn=1000,
              thin=10,
              bins={}):
     self.variable_types = variable_types
     self.bins = bins
     self.alpha = alpha
     self.x = x
     self.y = y
     self.z = z
     if len(X) > 300 or max(len(x + z), len(y + z)) >= 3:
         self.defaults = EstimatorSettings(n_jobs=4, efficient=True)
     else:
         self.defaults = EstimatorSettings(n_jobs=-1, efficient=False)
     self.densities = self.estimate_densities(x, y, z, X)
     self.N = len(X)
     self.mcmc_initialization = X[x + y + z].median().values
     self.burn = burn
     self.thin = thin
     self.null_df = self.generate_ci_sample()
     _, _, self.chi2_bound = self.discretize_and_get_chi2(self.null_df)
     self.chi2 = self.discretize_and_get_chi2(X)[1]
예제 #2
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)


        self.logger.info("Using KernelDensitySampler for do sampling.")
        if len(self._data) > 300 or max(len(self._treatment_names+self._target_estimand.backdoor_variables),len(self._outcome_names+self._target_estimand.backdoor_variables)) >= 3:
            self.defaults=EstimatorSettings(n_jobs=4, efficient=True)
        else:
            self.defaults=EstimatorSettings(n_jobs=-1, efficient=False)

        if 'c' not in self._variable_types.values():
            self.bw = 'cv_ml'
        else:
            self.bw = 'normal_reference'

        self.sampler = self._construct_sampler()
예제 #3
0
def continuous_treatment_model(data, covariates, treatment, variable_types):
    data, covariates = binarize_discrete(data, covariates, variable_types)
    if len(data) > 300 or len(treatment + covariates) >= 3:
        defaults = EstimatorSettings(n_jobs=4, efficient=True)
    else:
        defaults = EstimatorSettings(n_jobs=-1, efficient=False)

    if 'c' not in variable_types.values():
        bw = 'cv_ml'
    else:
        bw = 'normal_reference'

    indep_type = get_type_string(covariates, variable_types)
    dep_type = get_type_string([treatment], variable_types)

    model = KDEMultivariateConditional(endog=data[treatment],
                                       exog=data[covariates],
                                       dep_type=''.join(dep_type),
                                       indep_type=''.join(indep_type),
                                       bw=bw,
                                       defaults=defaults)
    scores = model.pdf(endog_predict=data[treatment], exog_predict=data[covariates])
    return scores
예제 #4
0
파일: sampler.py 프로젝트: q1park/spacetime
 def _compute_joint_kde(self, *nodes, normref=True):
     endog = [self.node_data.info[node]['data'] for node in nodes]
     t = time.time()
     if normref:
         kde = KDEMultivariate(data=endog,
                               var_type='c' * len(nodes),
                               bw='normal_reference')
     else:
         kde = KDEMultivariate(data=endog,
                               var_type='c' * len(nodes),
                               bw='cv_ml',
                               defaults=EstimatorSettings(efficient=True))
     print("Fit joint KDE for %s in %s seconds" % (nodes, time.time() - t))
     self.kdes_joint[nodes] = kde
예제 #5
0
파일: sampler.py 프로젝트: q1park/spacetime
 def _compute_conditional_kde(self, dep, inds, normref=True):
     endog = self.node_data.info[dep]['data']
     exog = [self.node_data.info[node]['data'] for node in inds]
     t = time.time()
     if normref:
         kde = KDEMultivariateConditional(endog=endog,
                                          exog=exog,
                                          dep_type='c',
                                          indep_type='c' * len(exog),
                                          bw='normal_reference')
     else:
         kde = KDEMultivariateConditional(
             endog=endog,
             exog=exog,
             dep_type='c',
             indep_type='c' * len(exog),
             bw='cv_ml',
             defaults=EstimatorSettings(efficient=True))
     print("Fit conditional KDE for %s wrt %s in %s seconds" %
           (dep, inds, time.time() - t))
     self.kdes_conditional[dep][inds] = kde
예제 #6
0
    def __init__(
        self,
        X,
        causes,
        effects,
        admissable_set=[],
        variable_types=None,
        expectation=False,
        density=True,
    ):
        """
        We want to calculate the causal effect of X and Y through
        back-door adjustment, P(Y|do(X)) = Sum( P(Y|X,Z)P(Z), Z)
        for some admissable set of control variables, Z.  First we
        calculate the conditional density P(Y|X,Z), then the density
        P(Z).  We find the support of Z so we can properly sum over
        it later.  variable_types are a dictionary with the column name
        pointing to an element of set(['o', 'u', 'c']), for 'ordered',
        'unordered discrete', or 'continuous'.
        """
        conditional_density_vars = causes + admissable_set
        self.causes = causes
        self.effects = effects
        self.admissable_set = list(
            admissable_set
        )  # uses a list internally; AdjustForDirectCauses.admissable_set returns a set
        self.conditional_density_vars = conditional_density_vars

        if (
            len(X) > 300
            or max(len(causes + admissable_set), len(effects + admissable_set)) >= 3
        ):
            self.defaults = EstimatorSettings(n_jobs=4, efficient=True)
        else:
            self.defaults = EstimatorSettings(n_jobs=-1, efficient=False)

        if variable_types:
            self.variable_types = variable_types
            dep_type = [variable_types[var] for var in effects]
            indep_type = [variable_types[var] for var in conditional_density_vars]
            density_types = [variable_types[var] for var in admissable_set]
        else:
            self.variable_types = self.__infer_variable_types(X)

        if "c" not in variable_types.values():
            bw = "cv_ml"
        else:
            bw = "normal_reference"

        if admissable_set:
            self.density = KDEMultivariate(
                X[admissable_set],
                var_type="".join(density_types),
                bw=bw,
                defaults=self.defaults,
            )

        self.conditional_density = KDEMultivariateConditional(
            endog=X[effects],
            exog=X[conditional_density_vars],
            dep_type="".join(dep_type),
            indep_type="".join(indep_type),
            bw=bw,
            defaults=self.defaults,
        )
        if expectation:
            self.conditional_expectation = KernelReg(
                X[effects].values,
                X[conditional_density_vars].values,
                "".join(indep_type),
                bw="cv_ls",
            )

        self.support = self.__get_support(X)

        self.discrete_variables = [
            variable
            for variable, var_type in self.variable_types.items()
            if var_type in ["o", "u"]
        ]
        self.discrete_Z = list(
            set(self.discrete_variables).intersection(set(admissable_set))
        )
        self.continuous_variables = [
            variable
            for variable, var_type in self.variable_types.items()
            if var_type == "c"
        ]
        self.continuous_Z = list(
            set(self.continuous_variables).intersection(set(admissable_set))
        )