def wage_data_linear(): X, y = wage() gam = LinearGAM(n_splines=10) gam.gridsearch(X, y, lam=np.logspace(-5,3,50)) XX = gam.generate_X_grid() plt.figure() fig, axs = plt.subplots(1,3) titles = ['year', 'age', 'education'] for i, ax in enumerate(axs): ax.plot(XX[:, i], gam.partial_dependence(XX, feature=i)) ax.plot(XX[:, i], *gam.partial_dependence(XX, feature=i, width=.95)[1], c='r', ls='--') if i == 0: ax.set_ylim(-30,30); ax.set_title(titles[i]) fig.tight_layout() plt.savefig('imgs/pygam_wage_data_linear.png', dpi=300)
def wage_X_y(): # y is real # recommend LinearGAM return wage(return_X_y=True)
gam1.summary() gam2 = LinearGAM(te(0, 1)).fit(X, y) gam2.summary() import pandas as pd pd.DataFrame(X).corr() ###################################################### # regression from pygam import LinearGAM, s, f from pygam.datasets import wage X, y = wage(return_X_y=True) ## model gam = LinearGAM(s(0) + s(1) + f(2)) gam.gridsearch(X, y) gam.summary() ## plotting plt.figure() fig, axs = plt.subplots(1, 3) titles = ['year', 'age', 'education'] for i, ax in enumerate(axs): XX = gam.generate_X_grid(term=i) ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX)) ax.plot(XX[:, i],
from pygam.datasets import wage from pygam import LinearGAM, s, f import numpy as np import matplotlib.pyplot as plt X, y = wage() gam = LinearGAM(s(0, n_splines=5) + s(1) + f(2)).fit(X, y) gam.summary() lam = np.logspace(-3, 5, 5) lams = [lam] * 3 gam.gridsearch(X, y, lam=lams) gam.summary() lams = np.random.rand(100, 3) # random points on [0, 1], with shape (100, 3) lams = lams * 8 - 3 # shift values to -3, 3 lams = np.exp(lams) # transforms values to 1e-3, 1e3 random_gam = LinearGAM(s(0) + s(1) + f(2)).gridsearch(X, y, lam=lams) random_gam.summary() print(gam.statistics_['GCV'] < random_gam.statistics_['GCV']) for i, term in enumerate(gam.terms): if term.isintercept: continue XX = gam.generate_X_grid(term=i)
def crude_strCat_to_int(ar, findex): ''' check if feature is a string if so replaces its unique values with an integer coresponding to lexicographical order ''' if isinstance(ar[0, findex], str): ar[:, findex] = np.unique(ar[:, findex], return_inverse=True)[1] # load dataset (as pd.DataFrame) => describe features df = wage(return_X_y=False) df.describe(include='all') ''' type of terms: 1) int/category ['year'] 2) int ['age'] 3) continous ['logwage'] 4) category ['sex','maritl','race','education','religion','jobclass','health','health_ins'] ''' #prep X and y features = ['year', 'age', 'education'] X = df[features].values crude_strCat_to_int(X, 2)
df = pd.DataFrame(dataset, columns=column_name) # df1 = pd.DataFrame(dataset1, columns=column_name) db.connect.commit() train_value = df['2020-09-01' > df['date']] x_train1 = train_value.iloc[:, 1:].astype('float64') y_train1 = train_value['value'].astype('float64').to_numpy() x_train2 = train_value['rain'].astype('float64') from pygam import LinearGAM, s, f from pygam.datasets import wage x_train2, y_train1 = wage() gam = LinearGAM(s(0) + s(1) + f(2)).fit(x_train2, y_train1) # s(0) + s(1) + f(2) gam.summary() import matplotlib.pyplot as plt for i, term in enumerate(gam.terms): if term.isintercept: continue XX = gam.generate_X_grid(term=i) pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95) plt.figure() plt.plot(XX[:, term.feature], pdep)