Esempio n. 1
0
    def run_diagnostics(self, decimal=3):
        """Run all currently implemented diagnostics for the exposure and outcome models. Each
        `run_diagnostics` provides results for all implemented diagnostics for ease of the user. For publication
        quality presentations, I recommend calling each diagnostic function individually and utilizing the optional
        parameters

        Note
        ----
        The plot presented cannot be edited. To edit the plots, call `plot_kde` or `plot_love` directly. Those
        functions return an axes object

        Parameters
        ----------
        decimal : int, optional
            Number of decimal places to display. Default is 3

        Returns
        -------
        None
        """
        if not self._fit_outcome_ or not self._fit_exposure_:
            raise ValueError("The exposure_model and outcome_model function must be ran before any diagnostics")

        # Weight diagnostics
        print('\tExposure Model Diagnostics')
        self.positivity(decimal=decimal)

        print('\n======================================================================')
        print('                  Standardized Mean Differences')
        print('======================================================================')
        print(self.standardized_mean_differences().set_index(keys='labels'))
        print('======================================================================\n')

        # Outcome accuracy diagnostics
        print('\tOutcome Model Diagnostics')
        v = self._predicted_y_ - self.df[self.outcome]
        outcome_accuracy(true=self.df[self.outcome], predicted=self._predicted_y_, decimal=decimal)

        df = self.df.copy()
        df['_ipw_'] = np.where(df[self.exposure] == 1, 1 / df['_g1_'], 1 / (1 - df['_g1_']))

        plt.figure(figsize=[8, 6])
        plt.subplot(221)
        plot_love(df=df, treatment=self.exposure, weight='_ipw_', formula=self.__mweight)
        plt.title("Love Plot")

        plt.subplot(223)
        plot_kde(df=df, treatment=self.exposure, probability='_g1_')
        plt.title("Kernel Density of Propensity Scores")

        plt.subplot(222)
        plot_kde_accuracy(values=v.dropna(), color='green')
        plt.title("Kernel Density of Accuracy")
        plt.tight_layout()
        plt.show()
Esempio n. 2
0
    def plot_kde(self, bw_method='scott', fill=True, color='b'):
        """Generates a Kernel Density plot of the accuracy of the model predicted outcomes. The plot compares the
        model predicted outcome to the observed outcome. This can be used as a diagnostic for the g-formula.

        Parameters
        ----------
        bw_method : str, optional
            Method used to estimate the bandwidth. Following SciPy, either 'scott' or 'silverman' are valid options
        fill : bool, optional
            Whether to color the area under the density curves. Default is true
        color : str, optional
            Color of the line/area. Default is blue

        Returns
        -------
        matplotlib axes
        """
        if self._predicted_y_ is None:
            raise ValueError(
                "The outcome_model function must be ran before any diagnostics"
            )

        v = self._predicted_y_ - self.gf[self.outcome]
        return plot_kde_accuracy(values=v.dropna(),
                                 bw_method=bw_method,
                                 fill=fill,
                                 color=color)
Esempio n. 3
0
    def plot_kde(self,
                 to_plot,
                 bw_method='scott',
                 fill=True,
                 color='g',
                 color_e='b',
                 color_u='r'):
        """Generates density plots that can be used to check predictions qualitatively. Density plots can be generated
        for assess either positivity violations of the exposure model or the accuracy in predicting the outcome for
        the outcome model. The kernel density used is SciPy's Gaussian kernel. Either Scott's Rule or
        Silverman's Rule can be implemented.

        Parameters
        ------------
        to_plot : str, optional
            The plot to generate. Specifying 'exposure' returns only the density plot for treatment probabilities,
            and 'outcome' returns only the density plot for the outcome accuracy
        bw_method : str, optional
            Method used to estimate the bandwidth. Following SciPy, either 'scott' or 'silverman' are valid options
        fill : bool, optional
            Whether to color the area under the density curves. Default is true
        color : str, optional
            Color of the line/area for predicted outcomes minus observed outcomes. Default is Green
        color_e : str, optional
            Color of the line/area for the treated group. Default is Blue
        color_u : str, optional
            Color of the line/area for the treated group. Default is Red

        Returns
        ---------------
        matplotlib axes
        """
        if to_plot == 'exposure':
            ax = plot_kde(df=self.df,
                          treatment=self.exposure,
                          probability='_g1_',
                          bw_method=bw_method,
                          fill=fill,
                          color_e=color_e,
                          color_u=color_u)
            ax.set_title("Kernel Density of Propensity Scores")

        elif to_plot == 'outcome':
            v = self._predicted_y_ - self.df[self.outcome]
            ax = plot_kde_accuracy(values=v.dropna(),
                                   bw_method=bw_method,
                                   fill=fill,
                                   color=color)
            ax.set_title("Kernel Density of Accuracy")

        else:
            raise ValueError(
                "Please use one of the following options for `to_plot`; 'treatment', 'outcome'"
            )

        return ax