def residuals(samples, i, j, cond_set): cond_set = list(cond_set) g = pygam.GAM() g.fit(samples[:, cond_set], samples[:, i]) residuals_i = g.deviance_residuals(samples[:, cond_set], samples[:, i]) g.fit(samples[:, cond_set], samples[:, j]) residuals_j = g.deviance_residuals(samples[:, cond_set], samples[:, j]) return residuals_i, residuals_j
def gammer(self, n_splines=21): self.model = pygam.GAM(n_splines=n_splines).fit( self.x_train, self.y_train) self.train_preds = self.model.predict(self.x_train) self.test_preds = self.model.predict(self.x_test) train_mse = mse(self.y_train, self.train_preds) test_mse = mse(self.y_test, self.test_preds) print(train_mse, test_mse) print("Train R2", r2_score(self.y_train, self.train_preds)) print("Test R2", r2_score(self.y_test, self.test_preds))
def gamSplineSens(preinterpsurfaces): sumerrors=[] for splines in range(4,16): error=[] for k in preinterpsurfaces.keys(): if int(k)>2000: surface = preinterpsurfaces[k] X = np.zeros((len(surface["lons"]),2)) X[:,0]=surface["lons"] X[:,1]=surface["lats"] #for d in Bar("Interpolating: ").iter(surface["data"].keys()): d = "pres" notnan = ~np.isnan(surface["data"][d]) if np.count_nonzero(notnan)>10: gam = pygam.GAM(pygam.te(0,1,n_splines=[splines,splines])).fit(X[notnan],np.asarray(surface["data"][d])[notnan]) #random_gam = pygam.LinearGAM(pygam.s(0) + pygam.s(1) ).gridsearch(X, surface["data"][d]) error += list(np.log10(np.abs(surface["data"][d]-gam.predict(X)))) sns.distplot(error,kde_kws={"fill":False,"label": str(splines)}) #plt.plot(range(4,16),sumerrors) plt.legend() plt.show()
def gammerMulti(self): train_preds = [] test_preds = [] for i in range(5, 50): print("Iter ", i) model = pygam.GAM(n_splines=i).fit(self.x_train, self.y_train) train_preds.append(mse(self.y_train, model.predict(self.x_train))) test_preds.append(mse(self.y_test, model.predict(self.x_test))) print(train_preds) print(test_preds) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4)) fig.suptitle("Train Error vs. Test Error", fontsize=14, y=1) plt.subplots_adjust(top=0.93, wspace=0) ax1.plot(range(5, 50), test_preds, linestyle='-', marker='o') ax1.set_title("Test Set") ax1.set_xlabel("N_Splines") ax1.set_ylabel("MSE") ax2.plot(range(5, 50), train_preds, linestyle='-', marker='o') ax2.set_title("Train Set") ax2.set_xlabel("N_Splines") ax2.set_ylabel("") plt.show()
mse_train = mean_squared_error(y[train], lm.predict(X[train])) mse_test = mean_squared_error(y[test], lm.predict(X[test])) print('MSE train: %.4f, MSE test: %.4f' % (mse_train, mse_test)) mse_train_lm.append(mse_train) mse_test_lm.append(mse_test) print('Mean MSE train: %.4f (%.4f), Mean MSE test: %.4f (%.4f)' % (np.mean(mse_train_lm), np.std(mse_train_lm), np.mean(mse_test_lm), np.std(mse_test_lm))) #%% GAM # Modules import pygam pygam.GAM(pygam.s(0), distribution='normal', link='identity') #%% Ridge mse_train_ridge = list() mse_test_ridge = list() for train, test in kf_cv.split(X, y): lasso = Ridge() lasso.fit(X[train], y[train]) mse_train = mean_squared_error(y[train], ridge.predict(X[train])) mse_test = mean_squared_error(y[test], ridge.predict(X[test])) print('MSE train: %.4f, MSE test: %.4f' % (mse_train, mse_test)) mse_train_ridge.append(mse_train) mse_test_ridge.append(mse_test) #%% Network
def gam_reg(target_samples, cond_samples): g = pygam.GAM() g.fit(cond_samples, target_samples) print(g.coef_.shape) residuals = g.deviance_residuals(cond_samples, target_samples) return residuals
@author: Christian Winkler """ import matplotlib.pyplot as plt import numpy as np import pandas as pd from pygam import LinearGAM import pygam example_data = pd.read_csv("example_data.csv") y = example_data['head'].values X = example_data['age'].values #gam = LinearGAM() #gam = LinearGAM(n_splines=10).gridsearch(X, y) gam = pygam.GAM(n_splines=10).gridsearch(X, y) gam.fit(X, y) # your fitted model samples = gam.sample(X, y, quantity='y', n_draws=500, sample_at_X=X) # sampels is shape (500, len(y)) percentiles = np.percentile(samples, q=[2.5, 97.5], axis=0) #percentiles is now shape (2, len(y)) plt.figure(figsize=(10, 8)) plt.scatter(X, y) plt.plot(X, percentiles[0]) plt.plot(X, percentiles[1]) # plotting plt.savefig("pygam_example.png")
def interpolateSurface(surface,region,coord="xy",debug=True,interpmethod="gam",smart=False,splines=10): #print("######") interpsurf={} X = np.zeros((len(surface["lons"]),2)) X[:,0]=surface["lons"] X[:,1]=surface["lats"] if smart: xi,yi,neighbors,finalids = smartMesh(surface["x"],surface["y"],region,coord) else: xi,yi,neighbors,finalids = generateMaskedMesh(surface["lons"],surface["lats"],region,coord) interpdata={} interpsurf["x"] =xi interpsurf["y"] =yi interpsurf["ids"] =finalids if len(xi) != len(finalids): print("OH NOOOOOO") if interpmethod=="gam": for d in surface["data"].keys(): notnan = ~np.isnan(surface["data"][d]) if np.count_nonzero(notnan)>10: gam = pygam.GAM() lams = np.logspace(-3, 3, 11) # lams = [lams,lams] # sclfactor = np.nanmin(np.nanmin(np.asarray(surface["data"][d])[notnan])) # scores = gam.gridsearch(X[notnan],np.asarray(surface["data"][d])[notnan]/sclfactor,return_scores=True,objective="GCV") df = pd.DataFrame({'lon': X[:,0][notnan], 'lat': X[:,1][notnan], 'd':(np.asarray(surface["data"][d])[notnan])}) r_dataframe = pandas2ri.py2rpy(df) tps=mgcv.gamm(ro.Formula('d~te(lon,lat,bs=\"tp\" )'),data=r_dataframe) sgrid = pd.DataFrame({'lon': xi.T, 'lat': yi.T}) griddata = mgcv.predict_gam(dollar(tps,'gam'),sgrid,se="TRUE") interpdata[d] = griddata[0] # print(scores # if d == "pv": # with open('pv.csv', 'w', newline='') as csvfile: # fieldnames = ['lon', 'lat', 'pv'] # writer = csv.DictWriter(csvfile, fieldnames=fieldnames) # writer.writeheader() # for l in range(len(X[notnan][:,0])): # writer.writerow({'lon': X[notnan][l,0] , 'lat': X[notnan][l,1], 'pv':np.log10((np.asarray(surface["data"][d]))[notnan][l])}) # gam.summary() # print(gam.terms[0].lam) #gam = pygam.GAM(pygam.te(0,1,n_splines=[splines,splines])).fit(X[notnan],np.asarray(surface["data"][d])[notnan]) Xgrid = np.zeros((yi.shape[0],2)) Xgrid[:,0] = xi Xgrid[:,1] = yi #interpdata[d] = gam.predict(Xgrid)*sclfactor else: interpdata[d] = np.asarray([np.nan]*len(xi)) elif interpmethod in ["linear","nearest"] : for d in Bar("Interpolating: ").iter(surface["data"].keys()): notnan = np.logical_and(~np.isnan(X[:,0]),~np.isnan(surface["data"][d])) if np.count_nonzero(notnan)>10: Xgrid = np.zeros((yi.shape[0],2)) Xgrid[:,0] = xi Xgrid[:,1] = yi f = nstools.griddata(X[notnan],np.asarray(surface["data"][d])[notnan],Xgrid,method=interpmethod) if np.isinf(f).any(): print("oh no!") interpdata[d] = f else: interpdata[d] = np.asarray([np.nan]*len(xi)) else: interpdata = surfaceSnap(surface,xi,yi) interpsurf["data"] = interpdata interpsurf["data"]["ids"] = finalids interpsurf = addLatLonToSurface(interpsurf) return interpsurf,neighbors