コード例 #1
0
ファイル: AIPW.py プロジェクト: takkyi83/zEpid
    def exposure_model(self, model, custom_model=None, bound=False, print_results=True):
        r"""Specify the propensity score / inverse probability weight model. Model used to predict the exposure via a
        logistic regression model. This model estimates

        .. math::

            \widehat{\Pr}(A=1|L) = logit^{-1}(\widehat{\beta_0} + \widehat{\beta} L)

        Parameters
        ----------
        model : str
            Independent variables to predict the exposure. For example, 'var1 + var2 + var3'
        custom_model : optional
            Input for a custom model that is used in place of the logit model (default). The model must have the
            "fit()" and  "predict()" attributes. SciKit-Learn style models supported as custom models. In the
            background, AIPTW will fit the custom model and generate the predicted probablities
        bound : float, list, optional
            Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations.
            Specifying this argument can improve finite sample performance for random positivity violations. However,
            truncating weights leads to additional confounding. Default is False, meaning no truncation of
            predicted probabilities occurs. Providing a single float assumes symmetric trunctation, where values below
            or above the threshold are set to the threshold value. Alternatively a list of floats can be provided for
            asymmetric trunctation, with the first value being the lower bound and the second being the upper bound
        print_results : bool, optional
            Whether to print the fitted model results. Default is True (prints results)
        """
        self.__mweight = model
        self._exp_model = self.exposure + ' ~ ' + model

        if custom_model is None:
            d, n, iptw = iptw_calculator(df=self.df, treatment=self.exposure, model_denom=model, model_numer='1',
                                         weight=self._weight_, stabilized=False, standardize='population',
                                         bound=None, print_results=print_results)
        else:
            self._exp_model_custom = True
            data = patsy.dmatrix(model + ' - 1', self.df)
            d = exposure_machine_learner(xdata=np.asarray(data),
                                         ydata=np.asarray(self.df[self.exposure]),
                                         ml_model=copy.deepcopy(custom_model),
                                         print_results=print_results)

        g1w = d
        g0w = 1 - d

        # Applying bounds AFTER extracting g1 and g0
        if bound:
            g1w = probability_bounds(g1w, bounds=bound)
            g0w = probability_bounds(g0w, bounds=bound)

        self.df['_g1_'] = g1w
        self.df['_g0_'] = g0w
        self._fit_exposure_ = True
コード例 #2
0
ファイル: IPTW.py プロジェクト: waternk/zEpid
    def treatment_model(self,
                        model_denominator,
                        model_numerator='1',
                        stabilized=True,
                        bound=False,
                        print_results=True):
        """Logistic regression model(s) for propensity score models. The model denominator must be specified for both
        stabilized and unstabilized weights. The optional argument 'model_numerator' allows specification of the
        stabilization factor for the weight numerator. By default model results are returned

        Parameters
        ------------
        model_denominator : str
            String listing variables to predict the exposure via `patsy` syntax. For example, `'var1 + var2 + var3'`.
            This is for the predicted probabilities of the denominator
        model_numerator : str, optional
            Optional string listing variables to predict the exposure, separated by +. Only used to calculate the
            numerator. Default ('1') calculates the overall probability of exposure. In general this is recommended. If
            confounding variables are included in the numerator, they would later need to be adjusted for in the faux
            marginal structural argument. Additionally, used for assessment of effect measure modification. Argument is
            also only used when calculating stabilized weights
        stabilized : bool, optional
            Whether to return stabilized or unstabilized weights. Default is stabilized weights (True)
        bound : float, list, optional
            Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations.
            Specifying this argument can improve finite sample performance for random positivity violations. However,
            inference becomes limited to the restricted population. Default is False, meaning no truncation of
            predicted probabilities occurs. Providing a single float assumes symmetric trunctation. A collection of
            floats can be provided for asymmetric trunctation
        print_results : bool, optional
            Whether to print the model results from the regression models. Default is True
        """
        # Calculating denominator probabilities
        self.__mdenom = model_denominator
        self.df['__denom__'], self.df[
            '__numer__'], self.iptw = iptw_calculator(
                df=self.df,
                treatment=self.treatment,
                model_denom=model_denominator,
                model_numer=model_numerator,
                weight=self._weight_,
                stabilized=stabilized,
                standardize=self.standardize,
                bound=bound,
                print_results=print_results)
コード例 #3
0
ファイル: AIPW.py プロジェクト: waternk/zEpid
    def exposure_model(self, model, bound=False, print_results=True):
        r"""Specify the propensity score / inverse probability weight model. Model used to predict the exposure via a
        logistic regression model. This model estimates

        .. math::

            \widehat{\Pr}(A=1|L) = logit^{-1}(\widehat{\beta_0} + \widehat{\beta} L)

        Parameters
        ----------
        model : str
            Independent variables to predict the exposure. For example, 'var1 + var2 + var3'
        bound : float, list, optional
            Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations.
            Specifying this argument can improve finite sample performance for random positivity violations. However,
            truncating weights leads to additional confounding. Default is False, meaning no truncation of
            predicted probabilities occurs. Providing a single float assumes symmetric trunctation, where values below
            or above the threshold are set to the threshold value. Alternatively a list of floats can be provided for
            asymmetric trunctation, with the first value being the lower bound and the second being the upper bound
        print_results : bool, optional
            Whether to print the fitted model results. Default is True (prints results)
        """
        self.__mweight = model
        self._exp_model = self.exposure + ' ~ ' + model
        d, n, iptw = iptw_calculator(df=self.df,
                                     treatment=self.exposure,
                                     model_denom=model,
                                     model_numer='1',
                                     weight=self._weight_,
                                     stabilized=False,
                                     standardize='population',
                                     bound=None,
                                     print_results=print_results)

        self.df['_g1_'] = d
        self.df['_g0_'] = 1 - d
        # Applying bounds AFTER extracting g1 and g0
        if bound:
            self.df['_g1_'] = _bounding_(self.df['_g1_'], bounds=bound)
            self.df['_g0_'] = _bounding_(self.df['_g0_'], bounds=bound)

        self._fit_exposure_ = True
コード例 #4
0
ファイル: estimators.py プロジェクト: takkyi83/zEpid
    def treatment_model(self,
                        model_denominator,
                        model_numerator='1',
                        bound=None,
                        stabilized=True,
                        print_results=False):
        """Logistic regression model(s) for estimating inverse probability of treatment weights (IPTW). The model
        denominator must be specified for both stabilized and unstabilized weights. The optional argument
        'model_numerator' allows specification of the stabilization factor for the weight numerator. By default model
        results are returned

        Parameters
        ----------
        model_denominator : str
            String listing variables to predict the exposure, separated by +. For example, 'var1 + var2 + var3'. This
            is for the predicted probabilities of the denominator
        model_numerator : str, optional
            Optional string listing variables to predict the selection separated by +. Only used to calculate the
            numerator. Default ('1') calculates the overall probability of selection. In general, this is recommended.
            Adding in other variables means they are no longer accounted for in estimation of IPSW. Argument is also
            only used when calculating stabilized weights
        bound : float, list, optional
            Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations.
            Specifying this argument can improve finite sample performance for random positivity violations. However,
            inference becomes limited to the restricted population. Default is False, meaning no truncation of
            predicted probabilities occurs. Providing a single float assumes symmetric trunctation. A collection of
            floats can be provided for asymmetric trunctation
        stabilized : bool, optional
            Whether to generated stabilized IPTW. Default is True, which returns the stabilized IPTW
        print_results : bool, optional
            Whether to print the model results from the regression models. Default is True
        """
        d, n, self.iptw = iptw_calculator(df=self.df,
                                          treatment=self.exposure,
                                          model_denom=model_denominator,
                                          model_numer=model_numerator,
                                          weight=self.weight,
                                          stabilized=stabilized,
                                          standardize='population',
                                          bound=bound,
                                          print_results=print_results)