Esempio n. 1
0
	def model_fit_plot(self, y, yhat):
		fig, ax = plt.subplots()
		ax.scatter(yhat, y)
		line_fit = sm.OLS(y, sm.add_constant(yhat, prepend=True)).fit()
		abline_plot(model_results=line_fit, ax=ax)
		ax.set_title('Model Fit Plot')
		ax.set_ylabel('Observed values')
		ax.set_xlabel('Fitted values')
		ax.set_title('Model fit plot')
Esempio n. 2
0
def plot_observed_predicted(y_data, y_predict, ols_line=False, model_fit=None, figsize=(15, 10), save=False, end_name_fig='', folder='Charts'):
    """
        Plots the predicted vs the observed values   
        Arguments --> the test target variable values, the predictions,   
            a boolean indicating if predictions are from a ols model,    
            and the glm fitted model to make tests on pearson / deviance residuals   
            the figure size, a boolean to indicate if the plot has to be saved or not, the prefix name for the saved file, the chart title and the folder where to save the chart
    """    

    end_name_fig = end_name_fig + '_' if end_name_fig is not None else ''

    fig, ax = plt.subplots(figsize=figsize)
    ax.scatter(y_data, y_predict)
    
    if ols_line == False:
        ax.plot([y_data.min(), y_data.max()], [y_data.min(), y_data.max()], 'k--', lw=4)

    else:
        line_fit = sm.OLS(y_data, sm.add_constant(y_predict, prepend=True)).fit()
        abline_plot(model_results=line_fit, ax=ax)

    ax.set_title('Predicted vs Observed')
    ax.set_ylabel('Observed values')
    ax.set_xlabel('Predicted values')

    if save == True:
        plt.savefig(folder + '/predict_observed_' + end_name_fig + '.png')

    if model_fit is not None:
        
        fig, ax = plt.subplots(figsize=figsize)
        ax.scatter(y_predict, model_fit.resid_pearson)
        ax.hlines(0, 0, 1)
        ax.set_xlim(0, 1)
        ax.set_title('Residual Dependence Plot')
        ax.set_ylabel('Pearson Residuals')
        ax.set_xlabel('Fitted values') 

        if save == True:
            plt.savefig(folder + '/pearson_residuals_' + end_name_fig + '.png')


        fig, ax = plt.subplots(figsize=figsize)
        res_dev_residuals = model_fit.resid_deviance.copy()
        res_dev_residuals_std = stats.zscore(res_dev_residuals)
        ax.hist(res_dev_residuals_std, bins=25)
        ax.set_title('Histogram of standardized deviance residuals')

        if save == True:
            plt.savefig(folder + '/standard_deviance_residuals_' + end_name_fig + '.png')

        graphics.gofplots.qqplot(res_dev_residuals, line='r')

        if save == True:
            plt.savefig(folder + '/gofplot_' + end_name_fig + '.png')        
Esempio n. 3
0
def run_model(offer_name: str, norm_func=None):

    X, y = filter_by_offer(offer_name)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    CAT_COL = ['gender']
    CON_COL = [
        'age', 'became_member_on', 'income', 'total_spending', 'total_offers'
    ]

    print('Building model...')
    model = build_model(CAT_COL, CON_COL, func_name=norm_func)

    print('Training model...')
    model.fit(X_train, y_train)
    pprint.pprint(model.best_params_)
    print(f'\nBest Score:{model.best_score_:.2%}', '\n')

    print('Evaluating model...')
    r2, Mean_Abs_Perc_Err, Mean_Abs_Err, RMS_Err, y_pred = evaluate_model(
        model, X_test, y_test)
    print(
        f'\nThis model explains {r2:.2%} of the variance of the amount_viewed')
    print(f'Our predictions are wrong by {Mean_Abs_Perc_Err:.2%}')
    print(f'Which represent on average ${Mean_Abs_Err:.2f}')

    if True:

        res_text = f'$r^{{2}}$: {r2:.2%}\nRMSE: \${RMS_Err:.2f}'

        f, ax = plt.subplots(figsize=[8, 8])
        plt.scatter(y_test, y_pred)
        abline_plot(intercept=0, slope=1, color="red", ax=ax, ls='--', lw=2)

        anchored_text = AnchoredText(res_text,
                                     loc='lower right',
                                     frameon=False,
                                     prop={
                                         'weight': 'light',
                                         'size': 16
                                     })
        ax.add_artist(anchored_text)

        ax.set_xlabel('Truth')
        ax.set_ylabel('Prediction')
        ax.set_title(f'Offer: {offer_name} - Transformation: {norm_func}',
                     weight='bold',
                     fontsize=16)
        plt.show()

        if True:
            path_fig = './docs/assets/'
            fig_name = 'truth_vs_preds' + f'_{offer_name}' + f'_{norm_func}' + '.png'
            f.savefig(path_fig + fig_name, transparent=True, dpi=150)
Esempio n. 4
0
def plot_corr(pred, obs):
    ''' Plot predicted value vs. observed value
    '''
    rval = sp.stats.pearsonr(pred, obs)[0]
    fig, ax = plt.subplots()
    ax.scatter(pred, obs)
    abline_plot(ax=ax, slope=1, intercept=0)
    ax.set_title('R = {}'.format(rval))
    ax.set_ylabel('Observed values')
    ax.set_xlabel('Predictd values')
    ax.set_xlim([min(min(pred), min(obs)), max(max(pred), max(obs))])
    ax.set_ylim([min(min(pred), min(obs)), max(max(pred), max(obs))])
    return fig, ax
Esempio n. 5
0
 def plot(self, y, x, title, ylabel, xlabel, type, xmin=0, xmax=1):
     fig, ax = plt.subplots()
     ax.scatter(y, x)
     if type == "fit":
         line_fit = sm.OLS(endog=x,
                           exog=sm.add_constant(y, prepend=True)).fit()
         abline_plot(model_results=line_fit, ax=ax)
     elif type == "residual":
         ax.set_xlim(xmin, xmax)
         ax.hlines(0, xmin, xmax)
     ax.set_title(title)
     ax.set_ylabel(ylabel)
     ax.set_xlabel(xlabel)
     plt.show()
Esempio n. 6
0
	def histogram_of_std_deviance_residuals(self, x, y, fit_model):
		"""
		### Plots
		# TODO:UNTESTED FOR ALL EXPONENTIAL FAMILY FUNCTIONS
		"""
		nobs = fit_model.nobs #		print "n observations: ",nobs
		y = y/y.sum(1) #data.endog[:,0]/data.endog.sum(1)
		yhat = fit_model.mu

		# Plot yhat vs y:
		from statsmodels.graphics.api import abline_plot

		fig, ax = plt.subplots()
		ax.scatter(yhat, y)
		line_fit = sm.OLS(y, sm.add_constant(yhat, prepend=True)).fit()
		abline_plot(model_results=line_fit, ax=ax)

		ax.set_title('Model Fit Plot')
		ax.set_ylabel('Observed values')
		ax.set_xlabel('Fitted values');

		#Plot yhat vs. Pearson residuals:
		fig, ax = plt.subplots()

		ax.scatter(yhat, fit_model.resid_pearson)
		ax.hlines(0, 0, 1)
		ax.set_xlim([0, 1])
		ax.set_title('Residual Dependence Plot')
		ax.set_ylabel('Pearson Residuals')
		ax.set_xlabel('Fitted values')

		#Histogram of standardized deviance residuals:
		from scipy import stats
		fig, ax = plt.subplots()
		resid = fit_model.resid_deviance.copy()
		resid_std = stats.zscore(resid)
		ax.hist(resid_std, bins=25)
		ax.set_title('Histogram of standardized deviance residuals');

		# QQ Plot of Deviance Residuals:

		from statsmodels import graphics
		fig = graphics.gofplots.qqplot(resid, line='r')
Esempio n. 7
0
def pval_qqplot(pvals, labels=None, ax=None):
    ''' Plot a pval qqplot and label top points when provided.
    Args:
        pvals - List or np.array of pvals
        labels - List or np.array of top point labels. Default None.
    '''
    obs_pval = -np.log10(np.sort(pvals))
    # solve p=0 problems by set -logp to max(-logp)+5
    obs_pval[np.isinf(obs_pval)] = np.max(obs_pval[~np.isinf(obs_pval)]) + 5
    exp_pval = -np.log10(
        np.arange(1,
                  len(obs_pval) + 1) / float(len(obs_pval + 1)))
    if not ax:
        fig, ax = plt.subplots()
    ax.scatter(exp_pval, obs_pval)
    abline_plot(ax=ax, slope=1, intercept=0)
    ax.set_ylabel('Observed p-values')
    ax.set_xlabel('Expected p-values')
    if labels:
        ntop = len(labels)
        for g, x, y in zip(labels, exp_pval[:ntop], obs_pval[:ntop]):
            ax.annotate(g, xy=(x + 0.1, y + 0.1))
    return ax
    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)
plt.xlabel('TEST');
plt.ylabel('JPERF');

min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
print(min_lm.summary())

plt.figure(figsize=(6,6));
for factor, group in factor_group:
    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

plt.xlabel('TEST')
plt.ylabel('JPERF')
abline_plot(model_results = min_lm, ax=plt.gca());

min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',
        data=jobtest_table).fit()

print(min_lm2.summary())

plt.figure(figsize=(6,6));
for factor, group in factor_group:
    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

abline_plot(intercept = min_lm2.params['Intercept'],
                 slope = min_lm2.params['TEST'], ax=plt.gca(), color='purple');
abline_plot(intercept = min_lm2.params['Intercept'],
        slope = min_lm2.params['TEST'] + min_lm2.params['TEST:ETHN'],
Esempio n. 9
0
            fontsize=16,
     )
# annotate these with their index
for i,row in dta.ix[dta['log.Te'] < 3.8].iterrows():
    ax.annotate(i, row, row + .01, fontsize=14)
xlim, ylim = ax.get_xlim(), ax.get_ylim()


from IPython.display import Image
Image(filename='star_diagram.png')


y = dta['log.light']
X = sm.add_constant(dta['log.Te'], prepend=True)
ols_model = sm.OLS(y, X).fit()
abline_plot(model_results=ols_model, ax=ax)


rlm_mod = sm.RLM(y, X, sm.robust.norms.TrimmedMean(.5)).fit()
abline_plot(model_results=rlm_mod, ax=ax, color='red')


# * Why? Because M-estimators are not robust to leverage points.

infl = ols_model.get_influence()


h_bar = 2*(ols_model.df_model + 1 )/ols_model.nobs
hat_diag = infl.summary_frame()['hat_diag']
hat_diag.ix[hat_diag > h_bar]
    verticalalignment='bottom',
    clip_on=True,  # clip to the axes bounding box
    fontsize=16,
)
# annotate these with their index
for i, row in dta.loc[dta['log.Te'] < 3.8].iterrows():
    ax.annotate(i, row, row + .01, fontsize=14)
xlim, ylim = ax.get_xlim(), ax.get_ylim()

from IPython.display import Image
Image(filename='star_diagram.png')

y = dta['log.light']
X = sm.add_constant(dta['log.Te'], prepend=True)
ols_model = sm.OLS(y, X).fit()
abline_plot(model_results=ols_model, ax=ax)

rlm_mod = sm.RLM(y, X, sm.robust.norms.TrimmedMean(.5)).fit()
abline_plot(model_results=rlm_mod, ax=ax, color='red')

# * Why? Because M-estimators are not robust to leverage points.

infl = ols_model.get_influence()

h_bar = 2 * (ols_model.df_model + 1) / ols_model.nobs
hat_diag = infl.summary_frame()['hat_diag']
hat_diag.loc[hat_diag > h_bar]

sidak2 = ols_model.outlier_test('sidak')
sidak2.sort_values('unadj_p', inplace=True)
print(sidak2)
Esempio n. 11
0
means75[0] = lowinc_75per = stats.scoreatpercentile(data.exog[:,0], 75)
resp_25 = res.predict(means25)
resp_75 = res.predict(means75)
diff = resp_75 - resp_25

# Interquartile first difference fro percentage of low income households
print("%2.4f%%" % (diff*100))

# Plot
nobs = res.nobs
y = data.endog[:,0]/data.endog.sum(1)
yhat = res.mu
fig, ax = plt.subplots()
ax.scatter(yhat, y)
line_fit = sm.OLS(y, sm.add_constant(yhat, prepend=True)).fit()
abline_plot(model_results=line_fit, ax=ax)
ax.set_title("Model Fit Plot")
ax.set_ylabel("Observed values")
ax.set_xlabel("Fitted values")

# Residuals
fig, ax = plt.subplots()
ax.scatter(yhat, res.resid_pearson)
ax.hlines(0, 0, 1)
ax.set_xlim(0, 1)
ax.set_title("Residual Dependence Plot")
ax.set_ylabel("Pearson Residuals")
ax.set_xlabel("Fitted values")

# Standard deviation residuals
fig, ax = plt.subplots()
    plt.scatter(group["TEST"], group["JPERF"], color=colors[factor], marker=markers[factor], s=12 ** 2)
plt.xlabel("TEST")
# @savefig group_test.png align=center
plt.ylabel("JPERF")

min_lm = ols("JPERF ~ TEST", df=minority_table).fit()
print min_lm.summary()

plt.figure(figsize=(6, 6))
for factor, group in factor_group:
    plt.scatter(group["TEST"], group["JPERF"], color=colors[factor], marker=markers[factor], s=12 ** 2)

plt.xlabel("TEST")
plt.ylabel("JPERF")
# @savefig abline.png align=center
abline_plot(model_results=min_lm, ax=plt.gca())

min_lm2 = ols("JPERF ~ TEST + TEST:ETHN", df=minority_table).fit()

print min_lm2.summary()

plt.figure(figsize=(6, 6))
for factor, group in factor_group:
    plt.scatter(group["TEST"], group["JPERF"], color=colors[factor], marker=markers[factor], s=12 ** 2)

abline_plot(intercept=min_lm2.params["Intercept"], slope=min_lm2.params["TEST"], ax=plt.gca(), color="purple")
# @savefig abline2.png align=center
abline_plot(
    intercept=min_lm2.params["Intercept"],
    slope=min_lm2.params["TEST"] + min_lm2.params["TEST:ETHN"],
    ax=plt.gca(),
Esempio n. 13
0
min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
print(min_lm.summary())

fig, ax = plt.subplots(figsize=(6, 6))
for factor, group in factor_group:
    ax.scatter(
        group['TEST'],
        group['JPERF'],
        color=colors[factor],
        marker=markers[factor],
        s=12**2)

ax.set_xlabel('TEST')
ax.set_ylabel('JPERF')
fig = abline_plot(model_results=min_lm, ax=ax)

min_lm2 = ols('JPERF ~ TEST + TEST:MINORITY', data=jobtest_table).fit()

print(min_lm2.summary())

fig, ax = plt.subplots(figsize=(6, 6))
for factor, group in factor_group:
    ax.scatter(
        group['TEST'],
        group['JPERF'],
        color=colors[factor],
        marker=markers[factor],
        s=12**2)

fig = abline_plot(
Esempio n. 14
0
ax.set_ylabel('JPERF')

min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
print(min_lm.summary())

fig, ax = plt.subplots(figsize=(6, 6))
for factor, group in factor_group:
    ax.scatter(group['TEST'],
               group['JPERF'],
               color=colors[factor],
               marker=markers[factor],
               s=12**2)

ax.set_xlabel('TEST')
ax.set_ylabel('JPERF')
fig = abline_plot(model_results=min_lm, ax=ax)

min_lm2 = ols('JPERF ~ TEST + TEST:MINORITY', data=jobtest_table).fit()

print(min_lm2.summary())

fig, ax = plt.subplots(figsize=(6, 6))
for factor, group in factor_group:
    ax.scatter(group['TEST'],
               group['JPERF'],
               color=colors[factor],
               marker=markers[factor],
               s=12**2)

fig = abline_plot(intercept=min_lm2.params['Intercept'],
                  slope=min_lm2.params['TEST'],
Esempio n. 15
0
    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)
plt.xlabel('TEST');
plt.ylabel('JPERF');

min_lm = ols('JPERF ~ TEST', data=minority_table).fit()
print min_lm.summary()

plt.figure(figsize=(6,6));
for factor, group in factor_group:
    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

plt.xlabel('TEST')
plt.ylabel('JPERF')
abline_plot(model_results = min_lm, ax=plt.gca());

min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',
        data=minority_table).fit()

print min_lm2.summary()

plt.figure(figsize=(6,6));
for factor, group in factor_group:
    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],
                marker=markers[factor], s=12**2)

abline_plot(intercept = min_lm2.params['Intercept'],
                 slope = min_lm2.params['TEST'], ax=plt.gca(), color='purple');
abline_plot(intercept = min_lm2.params['Intercept'],
        slope = min_lm2.params['TEST'] + min_lm2.params['TEST:ETHN'],
Esempio n. 16
0
min_lm = ols("JPERF ~ TEST", data=jobtest_table).fit()
print(min_lm.summary())

fig, ax = plt.subplots(figsize=(6, 6))
for factor, group in factor_group:
    ax.scatter(
        group["TEST"],
        group["JPERF"],
        color=colors[factor],
        marker=markers[factor],
        s=12**2,
    )

ax.set_xlabel("TEST")
ax.set_ylabel("JPERF")
fig = abline_plot(model_results=min_lm, ax=ax)

min_lm2 = ols("JPERF ~ TEST + TEST:MINORITY", data=jobtest_table).fit()

print(min_lm2.summary())

fig, ax = plt.subplots(figsize=(6, 6))
for factor, group in factor_group:
    ax.scatter(
        group["TEST"],
        group["JPERF"],
        color=colors[factor],
        marker=markers[factor],
        s=12**2,
    )
def glmControlPlots(fitted_model, y, yAll, yhat, yhatAll, line_fit,
                    meanRespAll, rr, rp, iv_u, iv_l, iv_uAll, iv_lAll, rrivU,
                    rrivL):

    # df = pd.DataFrame({'yAll': yAll, 'yhatAll': yhatAll,
    #                    'meanRespAll': meanRespAll, 'rr': rr,
    #                    'iv_uAll': iv_uAll, 'iv_lAll': iv_lAll,
    #                    'rrivU': rrivU, 'rrivL': rrivL})
    # df.sort_values(by='yhatAll', inplace=True)

    ivs_sorted = np.column_stack(
        [yhatAll.T, iv_uAll.T, iv_lAll.T, yAll.T, meanRespAll.T, rrivU,
         rrivL])  # combine fitted losses with predict ints for sorting
    ivs_sorteds = np.column_stack(
        [yhat.T, iv_u.T, iv_l.T]
    )  # also show original (non-extrapolated) upper pred interval for comparison
    ivs_sorted = ivs_sorted[ivs_sorted[:, 0].argsort(
    )]  # sort predict int values by fitted losses to avoid distorted line plot
    ivs_sorteds = ivs_sorteds[ivs_sorteds[:, 0].argsort()]

    fig, ax = plt.subplots(2, 2, figsize=[20, 20])
    # ax.fill_between(yhat, iv_u, iv_l, color="#b9cfe7", edgecolor="")  # attempt to plot predict int as area
    ax[0, 0].plot(ivs_sorted[:, 0],
                  ivs_sorted[:, 1],
                  color='lightgray',
                  linestyle='dashed')  # plot upper predict int
    ax[0, 0].plot(ivs_sorteds[:, 0], ivs_sorteds[:, 1], color='red')
    ax[0, 0].plot(ivs_sorted[:, 0],
                  ivs_sorted[:, 2],
                  color='lightgray',
                  linestyle='dashed')  # plot lower predict int
    ax[0, 0].plot(ivs_sorteds[:, 0], ivs_sorteds[:, 2], color='red')
    abline_plot(model_results=line_fit, ax=ax[0, 0],
                color='black')  # plot linear regression line
    # ax[0, 0].scatter(yhatAll, meanRespAll, s=20, color='black')
    # ax[0, 0].scatter(yhat, line_fit.predict(sm.add_constant(yhat)), s=20, color='red', zorder=8)
    # ax[0, 0].bar(ivs_sorted[:, 0], rr, bottom=meanRespAll, color='black', width=0.002)
    ax[0, 0].scatter(
        yhatAll, yAll,
        s=50)  # plot all original loss values against predicted loss values
    ax[0, 0].scatter(yhat, y,
                     s=10)  # plot water point in different color on top
    ax[0, 0].set_xlabel(
        'fitted relative changes (attributed to n and image quality)')
    ax[0, 0].set_ylabel(
        'observed relative changes (blue dots, water with orange kernel)')
    ax[0, 0].set_title('Linearized model')
    ax[0, 0].set_ylim(yAll.min() - (yAll.max() - yAll.min()) * 0.1,
                      yAll.max() + (yAll.max() - yAll.min()) * 0.1)
    ax[0, 0].set_xlim(yhatAll.min() - (yhatAll.max() - yhatAll.min()) * 0.1,
                      yhatAll.max() + (yhatAll.max() - yhatAll.min()) * 0.1)

    ax[0, 1].scatter(yhat, rp, s=50, color='C2')
    # ax[0, 1].scatter(meanRespAll, rr, s=50, color='C2')
    # ax[0, 1].scatter(ivs_sorted[:, 0], rr, s=50, color='C2')
    # ax[0, 1].plot(ivs_sorted[:, 0], ivs_sorted[:, 5], color='black')
    # ax[0, 1].scatter(np.arange(1, len(yAll) + 1, 1), iv_lAll, s=50, color='black')
    # ax[0, 1].bar(yhatAll, rr, color='C2', width=0.01)
    # ax[0, 1].bar(meanRespAll, rr, color='C2', width=0.005)
    # ax[0, 1].bar(ivs_sorted[:, 0], rr, color='C2', width=0.006)
    ax[0, 1].hlines(0, min(yhat), max(yhat))
    # ax[0, 1].hlines(0, 0, len(yAll))
    ax[0, 1].set_xlabel('fitted values')
    # ax[0, 1].set_ylabel('Standard response residuals (corrected relative changes)')
    ax[0, 1].set_ylabel('Pearson residuals')
    # ax[0, 1].set_title('Relative changes caused by treatment after GLM correction')
    ax[0, 1].set_title('Residual Dependence Plot')

    ax[1, 0].scatter(np.arange(1, len(yAll) + 1, 1), ivs_sorted[:, 3])
    ax[1, 0].scatter(np.arange(1,
                               len(yAll) + 1, 1),
                     ivs_sorted[:, 0],
                     s=20,
                     color='grey')
    ax[1, 0].plot(np.arange(1,
                            len(yAll) + 1, 1),
                  ivs_sorted[:, 4],
                  color='black')
    # ax[1, 0].set_xlabel('id')
    ax[1, 0].axes.xaxis.set_visible(False)
    ax[1, 0].set_ylabel('observed and fitted relative changes')
    ax[1, 0].set_title("\n".join(
        wrap(
            'Original relative change (blue) and change attributed to n and image quality (i.e. fitted values, grey), Black line shows the fitted mean',
            80)))

    ax[1, 1].bar(np.arange(1, len(yAll) + 1, 1), yAll)  # , width=0.2)
    # ax[1, 1].bar(np.arange(1, len(yAll) + 1, 1), rr, color='C2')  # , width=0.2)
    ax[1, 1].scatter(np.arange(1,
                               len(yAll) + 1, 1),
                     rr,
                     color='black',
                     zorder=4)
    ax[1, 1].bar(np.arange(1,
                           len(yAll) + 1, 1),
                 -yhatAll,
                 bottom=yAll,
                 color='C1',
                 alpha=0.5)  # , width=0.2)
    ax[1, 1].set_ylim(0, np.max([1, max(yAll)]))
    # ax[1, 1].set_xlabel('id')
    ax[1, 1].axes.xaxis.set_visible(False)
    ax[1, 1].set_ylabel('relative changes')
    ax[1, 1].set_title("\n".join(
        wrap(
            'Relative changes: original (tops), attributed to n and image quality (upper bar) and attributed to treatment effects (bottom bar)',
            80)))

    textstr1 = f'Family: {type(fitted_model.family).__name__},    Link: {type(fitted_model.family.link).__name__}  \nFormula: {fitted_model.model.formula} \nDeviance: {fitted_model.deviance.round(2)}'
    textstr2 = f'                       Coefficients \n{fitted_model.params.round(4).to_string()}'
    textstr3 = f'                       p-Values \n{fitted_model.pvalues.round(3).to_string()}'
    plt.gcf().text(0.05, 0.92, textstr1, fontsize=16)
    plt.gcf().text(0.6, 0.92, textstr2, fontsize=14)
    plt.gcf().text(0.8, 0.92, textstr3, fontsize=14)
    fig.show()
    plt.show(block=True)
    return
# The interquartile first difference for the percentage of low income
# households in a school district is:

print("%2.4f%%" % (diff[0] * 100))

nobs = glm_mod.nobs
y = glm_mod.model.endog
yhat = glm_mod.mu

from statsmodels.graphics.api import abline_plot
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, ylabel='Observed Values', xlabel='Fitted Values')
ax.scatter(yhat, y)
y_vs_yhat = sm.OLS(y, sm.add_constant(yhat, prepend=True)).fit()
fig = abline_plot(model_results=y_vs_yhat, ax=ax)

# #### Plot fitted values vs Pearson residuals

# Pearson residuals are defined to be
#
# $$\frac{(y - \mu)}{\sqrt{(var(\mu))}}$$
#
# where var is typically determined by the family. E.g., binomial variance
# is $np(1 - p)$

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111,
                     title='Residual Dependence Plot',
                     xlabel='Fitted Values',
                     ylabel='Pearson Residuals')
# The interquartile first difference for the percentage of low income
# households in a school district is:

print("%2.4f%%" % (diff[0] * 100))

nobs = glm_mod.nobs
y = glm_mod.model.endog
yhat = glm_mod.mu

from statsmodels.graphics.api import abline_plot
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, ylabel='Observed Values', xlabel='Fitted Values')
ax.scatter(yhat, y)
y_vs_yhat = sm.OLS(y, sm.add_constant(yhat, prepend=True)).fit()
fig = abline_plot(model_results=y_vs_yhat, ax=ax)

# #### Plot fitted values vs Pearson residuals

# Pearson residuals are defined to be
#
# $$\frac{(y - \mu)}{\sqrt{(var(\mu))}}$$
#
# where var is typically determined by the family. E.g., binomial variance
# is $np(1 - p)$

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(
    111,
    title='Residual Dependence Plot',
    xlabel='Fitted Values',
Esempio n. 20
0
#
#  We extract information that will be used to draw some interesting
# plots:

nobs = res.nobs
y = data.endog[:, 0] / data.endog.sum(1)
yhat = res.mu

# Plot yhat vs y:

from statsmodels.graphics.api import abline_plot

fig, ax = plt.subplots()
ax.scatter(yhat, y)
line_fit = sm.OLS(y, sm.add_constant(yhat, prepend=True)).fit()
abline_plot(model_results=line_fit, ax=ax)

ax.set_title('Model Fit Plot')
ax.set_ylabel('Observed values')
ax.set_xlabel('Fitted values')

# Plot yhat vs. Pearson residuals:

fig, ax = plt.subplots()

ax.scatter(yhat, res.resid_pearson)
ax.hlines(0, 0, 1)
ax.set_xlim(0, 1)
ax.set_title('Residual Dependence Plot')
ax.set_ylabel('Pearson Residuals')
ax.set_xlabel('Fitted values')
Esempio n. 21
0
print("Mean Absolute Error (Σ|y-pred|/n):", "{:,.0f}".format(metrics.mean_absolute_error(y_test, predicted)))
print("Root Mean Squared Error (sqrt(Σ(y-pred)^2/n)):", "{:,.0f}".format(np.sqrt(metrics.mean_squared_error(y_test, predicted))))
## residuals
residuals = y_test - predicted
max_error = max(residuals) if abs(max(residuals)) > abs(min(residuals)) else min(residuals)
max_idx = list(residuals).index(max(residuals)) if abs(max(residuals)) > abs(min(residuals)) else list(residuals).index(min(residuals))
max_true, max_pred = y_test[max_idx], predicted[max_idx]
print("Max Error:", "{:,.0f}".format(max_error))


## Plot predicted vs true Vale
fig = plt.figure()
ax = plt.axes()
from statsmodels.graphics.api import abline_plot
ax.scatter(predicted, y_test, color="black")
abline_plot(intercept=0, slope=1, color=colours[1], ax=ax)
ax.vlines(x=max_pred, ymin=max_true, ymax=max_true-max_error, color=colours[1], linestyle='--', alpha=0.7, label="max error")
ax.grid(True)
ax.set(xlabel="Predicted", ylabel="True Value", title="Predicted vs True")
ax.legend()
    
## Plot predicted vs residuals
#ax[1].scatter(predicted, residuals, color="red")
#ax[1].vlines(x=max_pred, ymin=0, ymax=max_error, color=colours[1], linestyle='--', alpha=0.7, label="max error")
#ax[1].grid(True)
#ax[1].set(xlabel="Predicted", ylabel="Residuals", title="Predicted vs Residuals")
#ax[1].hlines(y=0, xmin=np.min(predicted), xmax=np.max(predicted))
#ax[1].legend()