Ejemplo n.º 1
0
def residuals(samples, i, j, cond_set):
    cond_set = list(cond_set)
    g = pygam.GAM()
    g.fit(samples[:, cond_set], samples[:, i])
    residuals_i = g.deviance_residuals(samples[:, cond_set], samples[:, i])
    g.fit(samples[:, cond_set], samples[:, j])
    residuals_j = g.deviance_residuals(samples[:, cond_set], samples[:, j])

    return residuals_i, residuals_j
Ejemplo n.º 2
0
    def gammer(self, n_splines=21):
        self.model = pygam.GAM(n_splines=n_splines).fit(
            self.x_train, self.y_train)
        self.train_preds = self.model.predict(self.x_train)
        self.test_preds = self.model.predict(self.x_test)

        train_mse = mse(self.y_train, self.train_preds)
        test_mse = mse(self.y_test, self.test_preds)
        print(train_mse, test_mse)
        print("Train R2", r2_score(self.y_train, self.train_preds))
        print("Test R2", r2_score(self.y_test, self.test_preds))
def gamSplineSens(preinterpsurfaces):
    sumerrors=[]
    for splines in range(4,16):
        error=[]
        for k in preinterpsurfaces.keys():
            if int(k)>2000:
                surface = preinterpsurfaces[k]
                X = np.zeros((len(surface["lons"]),2))
                X[:,0]=surface["lons"]
                X[:,1]=surface["lats"]
                #for d in Bar("Interpolating: ").iter(surface["data"].keys()):
                d = "pres"
                notnan = ~np.isnan(surface["data"][d])
                if np.count_nonzero(notnan)>10:
                    gam = pygam.GAM(pygam.te(0,1,n_splines=[splines,splines])).fit(X[notnan],np.asarray(surface["data"][d])[notnan])
                    #random_gam =  pygam.LinearGAM(pygam.s(0) + pygam.s(1) ).gridsearch(X, surface["data"][d])
                    error += list(np.log10(np.abs(surface["data"][d]-gam.predict(X))))
        sns.distplot(error,kde_kws={"fill":False,"label": str(splines)})
    #plt.plot(range(4,16),sumerrors)
    plt.legend()
    plt.show()
Ejemplo n.º 4
0
    def gammerMulti(self):
        train_preds = []
        test_preds = []
        for i in range(5, 50):
            print("Iter ", i)
            model = pygam.GAM(n_splines=i).fit(self.x_train, self.y_train)
            train_preds.append(mse(self.y_train, model.predict(self.x_train)))
            test_preds.append(mse(self.y_test, model.predict(self.x_test)))
        print(train_preds)
        print(test_preds)
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
        fig.suptitle("Train Error vs. Test Error", fontsize=14, y=1)
        plt.subplots_adjust(top=0.93, wspace=0)
        ax1.plot(range(5, 50), test_preds, linestyle='-', marker='o')
        ax1.set_title("Test Set")
        ax1.set_xlabel("N_Splines")
        ax1.set_ylabel("MSE")
        ax2.plot(range(5, 50), train_preds, linestyle='-', marker='o')
        ax2.set_title("Train Set")
        ax2.set_xlabel("N_Splines")
        ax2.set_ylabel("")

        plt.show()
Ejemplo n.º 5
0
    mse_train = mean_squared_error(y[train], lm.predict(X[train]))
    mse_test = mean_squared_error(y[test], lm.predict(X[test]))
    print('MSE train: %.4f, MSE test: %.4f' % (mse_train, mse_test))
    mse_train_lm.append(mse_train)
    mse_test_lm.append(mse_test)

print('Mean MSE train: %.4f (%.4f), Mean MSE test: %.4f (%.4f)' %
      (np.mean(mse_train_lm), np.std(mse_train_lm), np.mean(mse_test_lm),
       np.std(mse_test_lm)))

#%% GAM

# Modules
import pygam

pygam.GAM(pygam.s(0), distribution='normal', link='identity')

#%% Ridge

mse_train_ridge = list()
mse_test_ridge = list()
for train, test in kf_cv.split(X, y):
    lasso = Ridge()
    lasso.fit(X[train], y[train])
    mse_train = mean_squared_error(y[train], ridge.predict(X[train]))
    mse_test = mean_squared_error(y[test], ridge.predict(X[test]))
    print('MSE train: %.4f, MSE test: %.4f' % (mse_train, mse_test))
    mse_train_ridge.append(mse_train)
    mse_test_ridge.append(mse_test)

#%% Network
Ejemplo n.º 6
0
 def gam_reg(target_samples, cond_samples):
     g = pygam.GAM()
     g.fit(cond_samples, target_samples)
     print(g.coef_.shape)
     residuals = g.deviance_residuals(cond_samples, target_samples)
     return residuals
Ejemplo n.º 7
0
@author: Christian Winkler
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from pygam import LinearGAM
import pygam

example_data = pd.read_csv("example_data.csv")
y = example_data['head'].values
X = example_data['age'].values

#gam =  LinearGAM()
#gam =  LinearGAM(n_splines=10).gridsearch(X, y)
gam = pygam.GAM(n_splines=10).gridsearch(X, y)

gam.fit(X, y)  # your fitted model
samples = gam.sample(X, y, quantity='y', n_draws=500, sample_at_X=X)
# sampels is shape (500, len(y))

percentiles = np.percentile(samples, q=[2.5, 97.5], axis=0)
#percentiles is now shape (2, len(y))

plt.figure(figsize=(10, 8))
plt.scatter(X, y)
plt.plot(X, percentiles[0])
plt.plot(X, percentiles[1])
# plotting

plt.savefig("pygam_example.png")
Ejemplo n.º 8
0
def interpolateSurface(surface,region,coord="xy",debug=True,interpmethod="gam",smart=False,splines=10):
    #print("######")
    interpsurf={}
    X = np.zeros((len(surface["lons"]),2))
    X[:,0]=surface["lons"]
    X[:,1]=surface["lats"]
    if smart:
        xi,yi,neighbors,finalids = smartMesh(surface["x"],surface["y"],region,coord)
    else:
        xi,yi,neighbors,finalids = generateMaskedMesh(surface["lons"],surface["lats"],region,coord)

    interpdata={}
    interpsurf["x"] =xi
    interpsurf["y"] =yi
    interpsurf["ids"] =finalids
    if len(xi) != len(finalids):
        print("OH NOOOOOO")
    if interpmethod=="gam":
        for d in surface["data"].keys():
            notnan = ~np.isnan(surface["data"][d])
            if np.count_nonzero(notnan)>10:
                gam = pygam.GAM()
                lams = np.logspace(-3, 3, 11)
                # lams = [lams,lams]
                # sclfactor = np.nanmin(np.nanmin(np.asarray(surface["data"][d])[notnan]))
                # scores = gam.gridsearch(X[notnan],np.asarray(surface["data"][d])[notnan]/sclfactor,return_scores=True,objective="GCV")

                df = pd.DataFrame({'lon': X[:,0][notnan], 'lat': X[:,1][notnan], 'd':(np.asarray(surface["data"][d])[notnan])})
                r_dataframe = pandas2ri.py2rpy(df)
                tps=mgcv.gamm(ro.Formula('d~te(lon,lat,bs=\"tp\" )'),data=r_dataframe)
                sgrid = pd.DataFrame({'lon': xi.T, 'lat': yi.T})
                griddata = mgcv.predict_gam(dollar(tps,'gam'),sgrid,se="TRUE")
                interpdata[d] = griddata[0]
                # print(scores
                # if d == "pv":
                #     with open('pv.csv', 'w', newline='') as csvfile:
                #         fieldnames = ['lon', 'lat', 'pv']
                #         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                #         writer.writeheader()
                #         for l in range(len(X[notnan][:,0])):
                #             writer.writerow({'lon': X[notnan][l,0] , 'lat': X[notnan][l,1], 'pv':np.log10((np.asarray(surface["data"][d]))[notnan][l])})
                #     gam.summary()
                # print(gam.terms[0].lam)
                #gam = pygam.GAM(pygam.te(0,1,n_splines=[splines,splines])).fit(X[notnan],np.asarray(surface["data"][d])[notnan])
                Xgrid = np.zeros((yi.shape[0],2))
                Xgrid[:,0] = xi
                Xgrid[:,1] = yi

                #interpdata[d] = gam.predict(Xgrid)*sclfactor
            else:
                interpdata[d] = np.asarray([np.nan]*len(xi))
    elif interpmethod in ["linear","nearest"] :
        for d in Bar("Interpolating: ").iter(surface["data"].keys()):
            notnan = np.logical_and(~np.isnan(X[:,0]),~np.isnan(surface["data"][d]))
            if np.count_nonzero(notnan)>10:
                Xgrid = np.zeros((yi.shape[0],2))
                Xgrid[:,0] = xi
                Xgrid[:,1] = yi
                f = nstools.griddata(X[notnan],np.asarray(surface["data"][d])[notnan],Xgrid,method=interpmethod)
                if np.isinf(f).any():
                    print("oh no!")
                interpdata[d] = f
            else:
                interpdata[d] = np.asarray([np.nan]*len(xi))

    else:
        interpdata = surfaceSnap(surface,xi,yi)
 
    interpsurf["data"] = interpdata
    interpsurf["data"]["ids"] = finalids
    interpsurf = addLatLonToSurface(interpsurf)
    return interpsurf,neighbors