Esempio n. 1
0
def slm_plot_scaleloc(fitted_model,
                      ax=None,
                      scolor='C0',
                      lcolor='C1',
                      lw=2,
                      lowess=True,
                      annotations=3):
    """Produce scale-location plot."""

    resids = np.sqrt(
        np.abs(fitted_model.get_influence().resid_studentized_internal))
    values = fitted_model.fittedvalues

    ax = sns.scatterplot(values, resids, ax=ax, color=scolor)

    if lowess:
        ax.plot(*utils.lowess(values, resids), color=lcolor, lw=lw)

    if annotations:
        idxs = pd.Series(resids).nlargest(annotations).index
        for idx in idxs:
            value = values[idx]
            resid = resids[idx]
            ax.annotate(values.index[idx], (value, resid))

    ax.set_title('Scale-Location')
    ax.set_xlabel('Fitted Values')
    ax.set_ylabel('$\sqrt{|\mathrm{Standardised}\; \mathrm{Residuals}|}$')

    return ax
Esempio n. 2
0
def glm_plot_scaleloc(fitted_model,
                      ax=None,
                      scolor='C0',
                      lcolor='C1',
                      lw=2,
                      lowess=True,
                      annotations=3):
    """Produce scale-location plot."""

    resids = np.sqrt(np.abs(fitted_model.resid_deviance))
    values = fitted_model.fittedvalues

    ax = sns.scatterplot(values, resids, ax=ax, color=scolor)

    if lowess:
        ax.plot(*utils.lowess(values, resids), color=lcolor, lw=lw)

    if annotations:
        idxs = pd.Series(resids).nlargest(annotations).index
        for idx in idxs:
            value = values[idx]
            resid = resids[idx]
            ax.annotate(values.index[idx], (value, resid))

    ax.set_title('Scale-Location')
    ax.set_xlabel('Predicted Values')
    ax.set_ylabel(
        '$\sqrt{|\mathrm{Std.}\; \mathrm{Deviance}\; \mathrm{Resid.}|}$')
    #_, y_high = ax.get_ylim()
    #ax.set_ylim((-0.01, y_high))

    return ax
Esempio n. 3
0
def slm_plot_resid(fitted_model,
                   ax=None,
                   scolor='C0',
                   lcolor='C1',
                   lw=2,
                   lowess=True,
                   annotations=3):
    """Plot residuals versus fitted values."""

    values = fitted_model.fittedvalues
    resids = fitted_model.resid

    ax = sns.scatterplot(values, resids, color=scolor, ax=ax)
    ax.axhline(0, color=scolor, alpha=0.5)

    if lowess:
        ax.plot(*utils.lowess(values, resids), color=lcolor, lw=lw)

    if annotations:
        idxs = resids.abs().nlargest(annotations).index
        for idx in idxs:
            value = values[idx].max()
            resid = resids[idx].max()
            ax.annotate(idx, (value, resid))

    ax.set_title('Residuals vs Fitted')
    ax.set_xlabel('Fitted Values')
    ax.set_ylabel('Residuals')

    return ax
Esempio n. 4
0
def glm_plot_leverage(fitted_model,
                      ax=None,
                      scolor='C0',
                      lcolor='C1',
                      ccolor='C2',
                      lw=2,
                      lowess=True,
                      cook=True,
                      legend=True,
                      annotations=3):
    """Produce leverage plot."""

    influence = fitted_model.get_influence()
    resids = fitted_model.resid_pearson
    values = influence.hat_matrix_diag
    cooks = influence.cooks_distance[0]

    ax = sns.scatterplot(values, resids, ax=ax, color=scolor)
    ax.axhline(0, color=scolor, alpha=0.5)

    if lowess:
        ax.plot(*utils.lowess(values, resids), color=lcolor, lw=lw)

    if annotations:
        idxs = pd.Series(cooks).nlargest(annotations).index
        for idx in idxs:
            value = values[idx]
            resid = resids[idx]
            ax.annotate(fitted_model.fittedvalues.index[idx], (value, resid))

    if cook:
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        p = fitted_model.params.size
        x = np.linspace(*xlim, 100)
        y = np.sqrt((0.5 * p * (1 - x)) / x)
        ax.plot(x, y, color=ccolor, lw=lw, alpha=0.8, label="Cook's Distance")
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        if legend:
            ax.legend()

    ax.set_title('Residuals vs Leverage')
    ax.set_xlabel('Leverage')
    ax.set_ylabel('Std. Pearson Resid.')

    return ax
Esempio n. 5
0
def plot_fit(fitted_model,
             column,
             data=None,
             ax=None,
             points=100,
             scolor='C0',
             fcolor='C1',
             pcolor='C2',
             lcolor='C7',
             cialpha=0.3,
             pialpha=0.2,
             lalpha=0.8,
             lw=2,
             show_ci=True,
             show_pi=False,
             lowess=False,
             legend=False):
    """Make a scatter plot and overlay fit result.
    
    Make a scatter plot of the response versus a specified predictor and
    overlay the fit result.  The distributions of the other predictors are
    marginalised out, ie. they are set to the mean values of their respective
    distributions.

    Returns the matplot lib axis object the plot was drawn on.
    """

    model = fitted_model.model

    if data is None:
        data = pd.DataFrame(model.exog, columns=model.exog_names)

    xs = utils.marginalised_range(column, data, points=points)

    pred = fitted_model.get_prediction(xs).summary_frame()
    x = xs[column]
    y = pred['mean']
    cil = pred['mean_ci_lower']
    ciu = pred['mean_ci_upper']

    slabel = None
    if legend:
        slabel = 'data'
    ax = sns.scatterplot(x=column,
                         y=model.endog,
                         data=data,
                         ax=ax,
                         color=scolor,
                         label=slabel)
    ax.set_title(f'Fit vs {column}')
    ax.set_ylabel(model.endog_names)
    ax.plot(x, y, color=fcolor, lw=lw, label='fit')

    if show_ci:
        ax.fill_between(x,
                        cil,
                        ciu,
                        color=fcolor,
                        alpha=cialpha,
                        label='conf. int.')

    if show_pi:
        try:
            pil = pred['obs_ci_lower']
            piu = pred['obs_ci_upper']
            ax.fill_between(x,
                            pil,
                            piu,
                            color=pcolor,
                            alpha=pialpha,
                            label='pred. int.')
        except KeyError:
            pass

    if lowess:
        ax.plot(*utils.lowess(data[column], model.endog),
                color=lcolor,
                lw=lw,
                alpha=lalpha,
                label='lowess')

    if legend:
        ax.legend()

    return ax