def run_diagnostics(self, decimal=3): """Run all currently implemented diagnostics for the exposure and outcome models. Each `run_diagnostics` provides results for all implemented diagnostics for ease of the user. For publication quality presentations, I recommend calling each diagnostic function individually and utilizing the optional parameters Note ---- The plot presented cannot be edited. To edit the plots, call `plot_kde` or `plot_love` directly. Those functions return an axes object Parameters ---------- decimal : int, optional Number of decimal places to display. Default is 3 Returns ------- None """ if not self._fit_outcome_ or not self._fit_exposure_: raise ValueError("The exposure_model and outcome_model function must be ran before any diagnostics") # Weight diagnostics print('\tExposure Model Diagnostics') self.positivity(decimal=decimal) print('\n======================================================================') print(' Standardized Mean Differences') print('======================================================================') print(self.standardized_mean_differences().set_index(keys='labels')) print('======================================================================\n') # Outcome accuracy diagnostics print('\tOutcome Model Diagnostics') v = self._predicted_y_ - self.df[self.outcome] outcome_accuracy(true=self.df[self.outcome], predicted=self._predicted_y_, decimal=decimal) df = self.df.copy() df['_ipw_'] = np.where(df[self.exposure] == 1, 1 / df['_g1_'], 1 / (1 - df['_g1_'])) plt.figure(figsize=[8, 6]) plt.subplot(221) plot_love(df=df, treatment=self.exposure, weight='_ipw_', formula=self.__mweight) plt.title("Love Plot") plt.subplot(223) plot_kde(df=df, treatment=self.exposure, probability='_g1_') plt.title("Kernel Density of Propensity Scores") plt.subplot(222) plot_kde_accuracy(values=v.dropna(), color='green') plt.title("Kernel Density of Accuracy") plt.tight_layout() plt.show()
def plot_kde(self, to_plot, bw_method='scott', fill=True, color='g', color_e='b', color_u='r'): """Generates density plots that can be used to check predictions qualitatively. Density plots can be generated for assess either positivity violations of the exposure model or the accuracy in predicting the outcome for the outcome model. The kernel density used is SciPy's Gaussian kernel. Either Scott's Rule or Silverman's Rule can be implemented. Parameters ------------ to_plot : str, optional The plot to generate. Specifying 'exposure' returns only the density plot for treatment probabilities, and 'outcome' returns only the density plot for the outcome accuracy bw_method : str, optional Method used to estimate the bandwidth. Following SciPy, either 'scott' or 'silverman' are valid options fill : bool, optional Whether to color the area under the density curves. Default is true color : str, optional Color of the line/area for predicted outcomes minus observed outcomes. Default is Green color_e : str, optional Color of the line/area for the treated group. Default is Blue color_u : str, optional Color of the line/area for the treated group. Default is Red Returns --------------- matplotlib axes """ if to_plot == 'exposure': ax = plot_kde(df=self.df, treatment=self.exposure, probability='_g1_', bw_method=bw_method, fill=fill, color_e=color_e, color_u=color_u) ax.set_title("Kernel Density of Propensity Scores") elif to_plot == 'outcome': v = self._predicted_y_ - self.df[self.outcome] ax = plot_kde_accuracy(values=v.dropna(), bw_method=bw_method, fill=fill, color=color) ax.set_title("Kernel Density of Accuracy") else: raise ValueError( "Please use one of the following options for `to_plot`; 'treatment', 'outcome'" ) return ax
def plot_kde(self, measure='probability', bw_method='scott', fill=True, color_e='b', color_u='r'): """Generates a density plot that can be used to check whether positivity may be violated qualitatively. The kernel density used is SciPy's Gaussian kernel. Either Scott's Rule or Silverman's Rule can be implemented. Alternative option to the boxplot of probabilities Parameters ------------ measure : str, optional Measure to plot. Options include either the probabilities or log-odds stratified by treatment received. Default is probabilities (measure='probability'). Log-odds can be requested via measure='logit' bw_method : str, optional Method used to estimate the bandwidth. Following SciPy, either 'scott' or 'silverman' are valid options fill : bool, optional Whether to color the area under the density curves. Default is true color_e : str, optional Color of the line/area for the treated group. Default is Blue color_u : str, optional Color of the line/area for the treated group. Default is Red Returns --------------- matplotlib axes """ ax = plot_kde(df=self.df, treatment=self.treatment, probability='__denom__', measure=measure, bw_method=bw_method, fill=fill, color_e=color_e, color_u=color_u) return ax