Example #1
0
    def _build_ensemble_feature(self, X, base_pred):
        """Builds featurre array and corresponding GAM TermList.

        Terms corresponding to X will be summation of
            dimension-wise splines, plus a tensor-product term across all dimension.

        """
        ensemble_term_func = s if self.nonlinear_ensemble else l

        ens_feature = np.asarray(list(base_pred.values())).T
        term_list = [ensemble_term_func(dim_index) for dim_index in range(ens_feature.shape[1])]

        # optionally, add residual process
        if self.model_residual:
            # build gam terms
            term_list += [s(dim_index) for dim_index in
                          range(ens_feature.shape[1],
                                ens_feature.shape[1] + X.shape[1])]
            if X.shape[1] > 1:
                term_list += [te(*list(ens_feature.shape[1] +
                                       np.array(range(X.shape[1]))))]

            # update features
            ens_feature = np.concatenate([ens_feature, X], axis=1)

        gam_feature_terms = TermList(*term_list)

        return ens_feature, gam_feature_terms
Example #2
0
def BAM(X, y):
    # model implementation by PYGAM
    gam = LinearGAM(s(0, spline_order=3) + s(1, spline_order=3) + te(0, 1))
    gam.gridsearch(X, y)
    # print(gam.gridsearch(X, y).summary())

    return gam
Example #3
0
def interp_gam(data):
    valid = np.isfinite(data.stream_dist.values[:, 0])
    sample_xy = data.sample_xy.values[valid]
    sample_st = data.stream_dist.values[valid]
    sample_z = data.sample_z.values[valid]
    if np.sum(valid) == 0:
        return np.nan

    gam = LinearGAM(
        s(0, n_splines=4) + s(1, n_splines=5) +
        te(0, 1, n_splines=4)).gridsearch(sample_st, sample_z)
    z_pred = gam.predict(np.array([[0, 0]]))[0]
    return z_pred
Example #4
0
def get_GAM_predictions(Xtrain, Ytrain, Xtest):
    """
    Perform grid search and train Linear GAM model and return predictions for the test set.
    :param Xtrain: X values for training.
    :param Ytrain: Y values for training.
    :param Xtest:  X values for validation.
    :return: Predictions from Linear GAM model for test dataset
    """
    # Create an array of lambda values to search
    lams = np.logspace(-3, 20, 35)
    # GAM search requires numpy arrays
    Xtrain_np = np.array(Xtrain, dtype=np.float64)
    Ytrain_np = np.array(Ytrain, dtype=np.float64)

    # Linear Generalised Additive Model
    model = LinearGAM(
        s(99) + s(100) + l(3) + l(6) + l(8) + l(11) + l(7) + l(9) + l(12) +
        l(10) + l(14) + l(29) + l(15) + l(71) + l(17) + l(21) + l(107) +
        l(16) + l(68) + l(78) + l(61) + l(55) + l(31) + l(13) + l(37) + l(4) +
        l(5) + l(2) + te(4, 5) + te(68, 78)).gridsearch(Xtrain_np,
                                                        Ytrain_np,
                                                        lam=lams)
    return model.predict(Xtest)
Example #5
0
def AAM():

    gam = LinearGAM(s(0, n_splines=25, spline_order=3, constraints='concave', penalties = 'auto', basis = 'cp', edge_knots=[147, 147])
                        + l(3)  # the last travel time
                        + te(0, 1)  # distance and departure_time
                        + te(2, 0)  # distance and isWeekend
                        + l(2),  # isWeekend
                    fit_intercept=True)

    print(gam.gridsearch(X1, y1).summary())
    # print(gam.gridsearch(X1,y1).get_params(deep=True))
    '''plt.scatter(X1[:,0][0:56], y1[0:56], s=3, linewidth=1, label = 'data')
    plt.plot(X1[:,0][0:56], gam.predict(X1[0:56]), color = 'red', linewidth = 1, label = 'prediction')
    plt.legend()
    plt.title('Extended Additive Model')
    plt.show()'''
    # error calculation
    rmse_val = rmse(np.array(y1), np.array(gam.predict(X1)))
    print("RMSE is: "+str(rmse_val))
    mae = mean_absolute_error(y1, gam.predict(X1))
    print("MAE is: "+str(mae))
    mape = mean_absolute_percentage_error(np.array(y1), np.array(gam.predict(X1)))
    print("MAPE is: "+ str(mape))
Example #6
0
def BAM():

    gam = GAM(s(0, n_splines=25, spline_order=3, constraints='concave', penalties = 'auto', basis = 'cp', edge_knots=[147,147])
                    + s(1, n_splines=25, spline_order=3, constraints='concave', penalties = 'auto', basis = 'cp', edge_knots=[147,147])
                    + te(0, 1, dtype=['numerical', 'numerical']), distribution= 'normal', link = 'identity', fit_intercept=True)
    print(gam.gridsearch(X, y, n_splines=np.arange(50)).summary())
    plt.scatter(X[:, 0][0:56], y[0:56], s=3, linewidths=0.0001, label='data')
    plt.plot(X[:, 0][0:56], gam.predict(X[0:56]), color='red', linewidth=1, label='prediction')
    plt.legend()
    plt.title('Basic Additive Model')
    plt.show()

    # error calculation
    rmse_val = rmse(np.array(y), np.array(gam.predict(X)))
    print("RMSE is: " + str(rmse_val))
    mae = mean_absolute_error(y, gam.predict(X))
    print("MAE is: " + str(mae))
    mape = mean_absolute_percentage_error(np.array(y), np.array(gam.predict(X)))
    print("MAPE is: " + str(mape))
def gamSplineSens(preinterpsurfaces):
    sumerrors=[]
    for splines in range(4,16):
        error=[]
        for k in preinterpsurfaces.keys():
            if int(k)>2000:
                surface = preinterpsurfaces[k]
                X = np.zeros((len(surface["lons"]),2))
                X[:,0]=surface["lons"]
                X[:,1]=surface["lats"]
                #for d in Bar("Interpolating: ").iter(surface["data"].keys()):
                d = "pres"
                notnan = ~np.isnan(surface["data"][d])
                if np.count_nonzero(notnan)>10:
                    gam = pygam.GAM(pygam.te(0,1,n_splines=[splines,splines])).fit(X[notnan],np.asarray(surface["data"][d])[notnan])
                    #random_gam =  pygam.LinearGAM(pygam.s(0) + pygam.s(1) ).gridsearch(X, surface["data"][d])
                    error += list(np.log10(np.abs(surface["data"][d]-gam.predict(X))))
        sns.distplot(error,kde_kws={"fill":False,"label": str(splines)})
    #plt.plot(range(4,16),sumerrors)
    plt.legend()
    plt.show()
Example #8
0
############################################################
# https://pygam.readthedocs.io/en/latest/notebooks/tour_of_pygam.html

#Fitting and plotting interactions with te()

from pygam import PoissonGAM, s, te
from pygam.datasets import chicago

X, y = chicago(return_X_y=True)
X.shape

gam = PoissonGAM(s(0, n_splines=200) + te(3, 1) + s(2)).fit(X, y)

import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

plt.ion()
plt.rcParams['figure.figsize'] = (12, 8)

XX = gam.generate_X_grid(term=1, meshgrid=True)
Z = gam.partial_dependence(term=1, X=XX, meshgrid=True)

ax = plt.axes(projection='3d')
ax.plot_surface(XX[0], XX[1], Z, cmap='viridis')

#Simple interactions, copare with te()

from pygam import LinearGAM, s
from pygam.datasets import toy_interaction

X, y = toy_interaction(return_X_y=True)
Example #9
0
    def covariance(self, mean=None, smooth=None, **kwargs):
        """Compute an estimate of the covariance.

        Parameters
        ----------
        smooth: str, default=None
            Name of the smoothing method to use. Currently, not implemented.
        mean: DenseFunctionalData, default=None
            An estimate of the mean of self. If None, an estimate is computed.

        Returns
        -------
        obj: DenseFunctionalData object
            An estimate of the covariance as a two-dimensional
            DenseFunctionalData object with same argvals as `self`.

        Keyword Args
        ------------
        kernel_name: str, default='epanechnikov'
            Name of the kernel used for local polynomial smoothing.
        degree: int, default=1
            Degree used for local polynomial smoothing.
        bandwidth: float, default=1
            Bandwidth used for local polynomial smoothing.
        n_basis: int, default=10
            Number of splines basis used for GAM smoothing.

        References
        ----------
        * Yao, Müller and Wang (2005), Functional Data Analysis for Sparse
        Longitudinal Data,
        Journal of the American Statistical Association, Vol. 100, No. 470
        * Staniswalis, J. G., and Lee, J. J. (1998), “Nonparametric Regression
        Analysis of Longitudinal Data,” Journal of the American Statistical
        Association, 93, 1403–1418.

        """
        if self.n_dim > 1:
            raise ValueError('Only one dimensional functional data are'
                             ' supported')

        p = self.n_points['input_dim_0']
        argvals = self.argvals['input_dim_0']
        if mean is None:
            mean = self.mean(smooth)
        data = self.values - mean.values
        cov = np.dot(data.T, data) / (self.n_obs - 1)
        cov_diag = np.copy(np.diag(cov))

        if smooth is not None:
            # Remove covariance diagonale because of measurement errors.
            np.fill_diagonal(cov, None)
            cov = cov[~np.isnan(cov)]

            # Define train vector
            train_ = np.vstack((np.repeat(argvals, repeats=len(argvals)),
                                np.tile(argvals, reps=len(argvals))))

            train = train_[:, train_[0, :] != train_[1, :]]

            if smooth == 'LocalLinear':
                points = kwargs.get('points', 0.5)
                neigh = kwargs.get('neighborhood',
                                   np.int(p * np.exp(-(np.log(np.log(p)))**2)))
                data_smooth = self.smooth(points=points, neighborhood=neigh)
                data = data_smooth.values - mean.values
                cov = np.dot(data.T, data) / (self.n_obs - 1)
            elif smooth == 'GAM':
                n_basis = kwargs.get('n_basis', 10)

                cov = pygam.LinearGAM(pygam.te(0, 1, n_splines=n_basis)).\
                    fit(np.transpose(train), cov).\
                    predict(np.transpose(train_)).\
                    reshape((len(argvals), len(argvals)))
            else:
                raise NotImplementedError('Smoothing method not implemented.')

        # Ensure the covariance is symmetric.
        cov = (cov + cov.T) / 2

        # Smoothing the diagonal of the covariance (Yao, Müller and Wang, 2005)
        lp = LocalPolynomial(kernel_name=kwargs.get('kernel_name', 'gaussian'),
                             bandwidth=kwargs.get('bandwidth', 1),
                             degree=kwargs.get('degree', 1))
        var_hat = lp.fit_predict(argvals, cov_diag, argvals)
        # Estimate noise variance (Staniswalis and Lee, 1998)
        ll = argvals[len(argvals) - 1] - argvals[0]
        lower = np.sum(~(argvals >= (argvals[0] + 0.25 * ll)))
        upper = np.sum((argvals <= (argvals[len(argvals) - 1] - 0.25 * ll)))
        weights = integration_weights_(argvals[lower:upper], method='trapz')
        nume = np.dot(weights, (var_hat - cov_diag)[lower:upper])
        self.var_noise = np.maximum(nume / argvals[upper] - argvals[lower], 0)

        new_argvals = {'input_dim_0': argvals, 'input_dim_1': argvals}
        return DenseFunctionalData(new_argvals, cov[np.newaxis])
    'pi', 'beta', 'rho', 'u', 'delta00', 'delta01', 'delta10', 'delta11',
    'mu1', 'mu2', 'mu3'
]
texnamesx = [
    '$\\pi$', '$\\beta$', '$\\rho$', '$u$', '$\\delta_{00}$', '$\\delta_{01}$',
    '$\\delta_{10}$', '$\\delta_{11}$', '$\\mu_1$', '$\\mu_2$', '$\\mu_3$'
]
texnamesy = [
    '$f_1(\\pi)$', '$f_2(\\beta)$', '$f_3(\\rho)$', '$f_4(u)$',
    '$f_5(\\delta_{00})$', '$f_6(\\delta_{01})$', '$f_7(\\delta_{10})$',
    '$f_8(\\delta_{11})$', '$f_9(\\mu_1)$', '$f_10(\\mu_2)$', '$f_11(\\mu_3)$'
]

fit5 = GammaGAM().fit(res[features], res.sigma5)
fit30 = GammaGAM(terms=s(0) + s(1) + s(2) + s(3) + s(4) + s(5) + s(6) + s(7) +
                 s(8) + s(9) + s(10) + te(9, 10) + te(1, 3)).fit(
                     res[features], res.sigma30)
fit30 = fit30.gridsearch(res[features],
                         res.sigma30,
                         lam=np.logspace(3, 4, 100))
fit60 = GammaGAM().fit(res[features], res.sigma60)
fitSpread = GammaGAM().fit(res[features], res.meanSpread)
# %% Volatility 30 Plots
x_grid = np.array([
    np.linspace(min(res[feature]),
                max(res[feature]) * 1, 1000) for feature in features
])

for k in range(len(features)):
    pdeps, cof = fit30.partial_dependence(k, width=0.95, X=x_grid.T)
Example #11
0
def EAM():
    X = np.load('EAM_factors.npy')
    y = np.load('EAM_time.npy')
    gam = LinearGAM(
        s(0, spline_order=3) + s(1, spline_order=3) + te(0, 1) + te(0, 2))
    gam.gridsearch(X, y)
Example #12
0
    # (38 previous)
    # 27,29121,0,22838721,264,11.9,0,69,9.6,4,0.0,60000,-11,1474355121,38,40

    tester = [29121, 0, 264, 11.9, 0, 69, 9.6, 4]

    y_pred = gam.predict([tester])

    print(y_pred)


my_data = pd.read_csv("result_Charlemont Street.csv")

attributes = ['time_of_day', 'type_of_day', 'day_of_year', 'temperature', 'rain', 'relative_humidity',
              'vapour_pressure', 'wind_speed']

X = my_data[attributes].values
# Xother = my_data[['time_of_day', 'type_of_day', 'day_of_year', 'temperature']].values

y = my_data['available_bike_stands'].values

gam = LinearGAM(te(0, 1) + s(2) + s(3) + s(4) + s(5) + s(6) + s(7), n_splines=[25, 10, 10, 10, 10, 10, 10, 10],
                dtype=['numerical', 'categorical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical',
                       'numerical'])

gam.gridsearch(X, y)

gam.summary()

display_breakdown()

test_model(gam)
Example #13
0
#%%
# plotting
# plotting
fig = plt.figure()
ax = plt.axes(projection='3d')
nr = 2
ax.scatter3D(X[:, 1][::nr], X[:, 0][::nr], y[::nr], c=y[::2], cmap='Spectral')
plt.show()
#%%
# pyGAM
from pygam import LinearGAM, s, te, PoissonGAM, f, GAM

gam = GAM(
    s(0, constraints="monotonic_inc", n_splines=15) +
    s(1) +  #, constraints="concave", n_splines=100) +
    te(1, 0))
gam.fit(X_train, y_train)

titles = ['QDot[l/min*m]', 'TemperaturStart']
fig, axs = plt.subplots(1, len(titles), figsize=(13, 9))

# plot partial dependences
for i, ax in enumerate(axs):
    print("i = ", i)
    XX = gam.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i],
            gam.partial_dependence(term=i, X=XX, width=.95)[1],
            c='r')
    ax.set_title(titles[i])
    ax.grid()
Example #14
0
def train(X, y):
    gam = LinearGAM(s(0) + s(1) + te(0, 1)).fit(X, y)
    # gam.summary()
    return gam
Example #15
0
# AIC=2005
# gam=LinearGAM( s(0,n_splines=4) + s(1,n_splines=4) + te(0,1,n_splines=4) ).gridsearch(sample_st,sample_z)
# AIC: 2760, but arguably the best looking.
# gam=LinearGAM( s(0,n_splines=4) + s(1,n_splines=5) + te(0,1,n_splines=4) ).gridsearch(sample_st,sample_z)
# AIC=1345
# gam=LinearGAM( s(0,n_splines=4) + s(1,n_splines=4)  ).gridsearch(sample_st,sample_z)
# AIC=1500 -- looks terrible
# gam=LinearGAM( s(0,n_splines=4) ).gridsearch(sample_st,sample_z)
# AIC=1250 -- meh.
# gam=LinearGAM( s(1,n_splines=4) ).gridsearch(sample_st,sample_z)
# AIC 2001
# gam=LinearGAM( te(0,1,n_splines=4) ).gridsearch(sample_st,sample_z)
# AIC 2900
#gam=LinearGAM( te(0,1,n_splines=5) ).gridsearch(sample_st,sample_z)
# 6900, but looks okay
gam = LinearGAM(s(1, n_splines=5) + te(0, 1, n_splines=6)).gridsearch(
    sample_st, sample_z)

print("AIC: ", gam.statistics_['AIC'])
# gam.summary()

#--
plt.figure(2).clf()
nterms = len(gam.terms) - 1  # omit intercept
fig, term_axs = plt.subplots(1, nterms, num=2)
if nterms == 1:
    term_axs = [term_axs]
titles = [repr(t) for t in gam.terms]

for i, ax in enumerate(term_axs):
    XX = gam.generate_X_grid(term=i)