eqn = """happiness ~ -1 + freedom + family + year + economy + health + trust""" y,x = pt.dmatrices(eqn, data=data) # Initialize and fit the model gam = LinearGAM(s(0) + s(1) + s(2) + s(3) + s(4) + s(5)) gam = gam.gridsearch(np.asarray(x), y) # Specify plot shape titles = ['freedom', 'family', 'year', 'economy', 'health', 'trust'] fig = tools.make_subplots(rows=2, cols=3, subplot_titles=titles) fig['layout'].update(height=800, width=1200, title='pyGAM', showlegend=False) for i, title in enumerate(titles): XX = gam.generate_X_grid(term=i) pdep, confi = gam.partial_dependence(term=i, width=.95) trace = go.Scatter(x=XX[:,i], y=pdep, mode='lines', name='Effect') ci1 = go.Scatter(x = XX[:,i], y=confi[:,0], line=dict(dash='dash', color='grey'), name='95% CI') ci2 = go.Scatter(x = XX[:,i], y=confi[:,1], line=dict(dash='dash', color='grey'), name='95% CI') if i<3: fig.append_trace(trace, 1, i+1) fig.append_trace(ci1, 1, i+1) fig.append_trace(ci2, 1, i+1) else: fig.append_trace(trace, 2, i-2) fig.append_trace(ci1, 2, i-2) fig.append_trace(ci2, 2, i-2) py.plot(fig)
predictors = ['SqFtTotLiving', 'SqFtLot', 'Bathrooms', 'Bedrooms', 'BldgGrade'] outcome = 'AdjSalePrice' X = house_98105[predictors].values y = house_98105[outcome] ## model gam = LinearGAM(s(0, n_splines=12) + l(1) + l(2) + l(3) + l(4)) gam.gridsearch(X, y) print(gam.summary()) fig, axes = plt.subplots(figsize=(8, 8), ncols=2, nrows=3) titles = ['SqFtTotLiving', 'SqFtLot', 'Bathrooms', 'Bedrooms', 'BldgGrade'] for i, title in enumerate(titles): ax = axes[i // 2, i % 2] XX = gam.generate_X_grid(term=i) ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX)) ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--') ax.set_title(titles[i]) axes[2][1].set_visible(False) plt.tight_layout() plt.show() ## Additional material - not in book ## Regularization ### Lasso
#X[0] es el año X[0] = 0 es 2000?... #X[1] es la edad de la persona #X[2] es su nivel de estudios, 0 = basica, 1=media superior, 2 = universidad, 3= posgrado #y ingresos $$ ## model gam1 = LinearGAM(s(0) + s(1) + f(2), fit_intercept=False) gam1.gridsearch(X, y) ## plotting plt.figure(figsize=(10, 7.5)) fig, axs = plt.subplots(1, 3) titles = ['year', 'age', 'education'] for i, ax in enumerate(axs): XX = gam1.generate_X_grid(term=i) ax.plot(XX[:, i], gam1.partial_dependence(term=i, X=XX)) ax.plot(XX[:, i], gam1.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--') ax.set_title(titles[i]) plt.rcParams['figure.figsize'] = [10, 7.5] XX = [[2021, 29, 4]] print(gam1.predict(XX)) for i in range(3): print(gam1.partial_dependence(term=i, X=XX)) ## model gam2 = LinearGAM(s(0, constraints='monotonic_inc') +
def get_graph_figure(data, atlas, spl): fig = go.Figure() x = data.Age.array y = data[atlas].array color = 'rgba(123,0,123,0.2)' lncolor = 'rgb(123,50,123)' gam = LinearGAM(s(0, n_splines=spl)).fit(x, y) for i, term in enumerate(gam.terms): if term.isintercept: continue XX = gam.generate_X_grid(term=i) pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=confi.T[1], name='UPR of Total', line=dict(dash='dash', color=lncolor), legendgroup="total", showlegend=False, hovertemplate="Age: %{x} <br>Volume: %{y}")) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=pdep, name='Total', line=dict(color='black', width=3), legendgroup="total", fill='tonexty', fillcolor=color, hovertemplate="Age: %{x} <br>Volume: %{y}")) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=confi.T[0], name='LWR of Total', line=dict(dash='dash', color=lncolor), legendgroup="total", showlegend=False, fill='tonexty', fillcolor=color, hovertemplate="Age: %{x} <br>Volume: %{y}")) x = data[data['Sex'] == 'M'].Age.array y = data[data['Sex'] == 'M'][atlas].array color = 'rgba(0,0,255,0.1)' lncolor = 'rgb(0,100,255)' gam = LinearGAM(s(0, n_splines=spl)).fit(x, y) for i, term in enumerate(gam.terms): if term.isintercept: continue XX = gam.generate_X_grid(term=i) pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=confi.T[1], name='UPR of Male', line=dict(dash='dash', color=lncolor), legendgroup="male", showlegend=False, visible='legendonly', hovertemplate="Age: %{x} <br>Volume: %{y}")) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=pdep, name='Male', line=dict(color='black', width=3), legendgroup="male", visible='legendonly', fill='tonexty', fillcolor=color, hovertemplate="Age: %{x} <br>Volume: %{y}")) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=confi.T[0], name='LWR of Male', line=dict(dash='dash', color=lncolor), legendgroup="male", showlegend=False, visible='legendonly', fill='tonexty', fillcolor=color, hovertemplate="Age: %{x} <br>Volume: %{y}")) x = data[data['Sex'] == 'F'].Age.array y = data[data['Sex'] == 'F'][atlas].array color = 'rgba(255,0,0,0.1)' lncolor = 'rgb(255,100,0)' gam = LinearGAM(s(0, n_splines=spl)).fit(x, y) for i, term in enumerate(gam.terms): if term.isintercept: continue XX = gam.generate_X_grid(term=i) pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=confi.T[1], name='UPR of Female', line=dict(dash='dash', color=lncolor), legendgroup="female", showlegend=False, visible='legendonly', hovertemplate="Age: %{x} <br>Volume: %{y}")) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=pdep, name='Female', line=dict(color='black', width=3), legendgroup="female", visible='legendonly', fill='tonexty', fillcolor=color, hovertemplate="Age: %{x} <br>Volume: %{y}")) fig.add_traces( go.Scatter(x=XX[:, term.feature], y=confi.T[0], name='LWR of Female', line=dict(dash='dash', color=lncolor), legendgroup="female", showlegend=False, visible='legendonly', fill='tonexty', fillcolor=color, hovertemplate="Age: %{x} <br>Volume: %{y}")) if atlas[0] == 'B': ytitle = 'Brain Volume' elif atlas[0] == 'R': ytitle = 'Right ' + atlas[2:-5] else: ytitle = 'Left ' + atlas[2:-5] fig.update_layout( xaxis_title="Age", yaxis_title=ytitle, ) return fig
for m in range(len(y)): if y[m] >= 0 and c_user[m] >= 3: data_item_2.append([z[m], z[m]-y[m]]) data_t_.append([z[m], z[m]-y[m]]) th_90 = np.quantile(np.array(data_t_)[:, 0], 0.9) for m in range(len(y)): if y[m] >= 0 and z[m] <= th_90: data_item_3.append([z[m], z[m]-y[m]]) gam = LinearGAM(n_splines=4).fit(np.array(data_item_0)[:, 0], np.array(data_item_0)[:, 1]) XX = gam.generate_X_grid(term=0) gam_90 = LinearGAM(n_splines=4).fit(np.array(data_item_1)[:, 0], np.array(data_item_1)[:, 1]) XX_90 = gam_90.generate_X_grid(term=0) u_lim = max(max(np.array(data_item_0)[:, 0]), max(np.array(data_item_0)[:, 1])) diag = np.linspace(0, u_lim) zeros = np.linspace(0, 0) plt.figure(1); plt.plot(diag, zeros, color='black', ls='--', lw=3) plt.plot(XX, gam.predict(XX), color='r', label='X=DeepCoNN', lw=2) # plt.plot(XX_90, gam_90.predict(XX_90), color='r', ls='--', lw=3, label='Top 90% uRMSE, X=DeepCoNN') gam_mf = LinearGAM(n_splines=4).fit(np.array(data_item_2)[:, 0], np.array(data_item_2)[:, 1]) XX = gam_mf.generate_X_grid(term=0) gam_mf_90 = LinearGAM(n_splines=4).fit(np.array(data_item_3)[:, 0], np.array(data_item_3)[:, 1]) XX_90 = gam_mf_90.generate_X_grid(term=0) plt.plot(XX, gam_mf.predict(XX), color='b', label='X=MF_re', lw=2)
# # if i==0 and j==0: # # y = dfs_test[-1].iloc[:, k].tolist() # # else: # # y = dfs_test[i*4+j].iloc[:, k].tolist() z = df_mf.iloc[:, k].tolist() for m in range(len(x)): if x[m] >= 0 and c_user[m] >= 3: data_dc.append([1 + z[m], z[m] - x[m]]) data_best.append([1 + z[m], z[m] - b[m]]) # if j == 0: data_mf.append([1 + z[m], z[m] - y[m]]) gam = LinearGAM(n_splines=4).fit( np.array(data_dc)[:, 0], np.array(data_dc)[:, 1]) XX = gam.generate_X_grid(term=0) # # # gam_90 = LinearGAM(n_splines=4).fit(np.array(data_item_1)[:, 0], np.array(data_item_1)[:, 1]) # # # XX_90 = gam_90.generate_X_grid(term=0) u_lim = max(max(np.array(data_dc)[:, 0]), max(np.array(data_dc)[:, 1])) diag = np.linspace(1, 1 + u_lim) zeros = np.linspace(0, 0) axes[i].scatter(np.array(data_dc)[:, 0], np.array(data_dc)[:, 1], s=4, alpha=.03, color='r') # axes[i].scatter(np.array(data_best)[:, 0], np.array(data_best)[:, 1], s=4, alpha=.03, color='black') axes[i].scatter(np.array(data_mf)[:, 0], np.array(data_mf)[:, 1],
# scatter raw data fig = px.scatter(df, x="t", y="Exp_1", trendline="ols") # update the markers fig.update_traces(marker=dict(size=2, line=dict(width=2, color='DarkSlateGrey')), selector=dict(mode='markers')) fig.show() #%% # pyGAM # train gam = LinearGAM(s(0, constraints="monotonic_inc"), n_splines=25).gridsearch(X_train.reshape((-1, 1)), y_train.reshape((-1, 1))) # predict XX = gam.generate_X_grid(term=0, n=500) y = gam.predict(XX) y_pred = gam.predict(X_test) y_CI = gam.prediction_intervals(XX, width=.95) #%% # plot prediction and confindence intervals fig = go.Figure() fig.add_trace( go.Scatter(x=XX.reshape((-1, )), y=y, name="Prediction", line=dict(color="firebrick", width=1))) fig.add_trace( go.Scatter(x=XX.reshape((-1, )), y=y_CI[:, 0], name="95% Confidence",