def get_gam_model(self, features: [Field], model_type=TYPE_LINEAR):

        model_spec = f(0) if features[0].is_factor() else s(
            0, n_splines=self.num_splines)

        for i in range(1, len(features)):
            model_spec += f(i) if features[i].is_factor() else s(
                i, n_splines=self.num_splines)

        if model_type == TYPE_LINEAR:
            return LinearGAM(model_spec)

        if model_type == TYPE_LOGISTIC:
            return LogisticGAM(model_spec)
Exemple #2
0
def GAM_linear(X, y):
    X= X.to_numpy()
    y = y.to_numpy()
    from pygam import LinearGAM, s, f, te
    gam = LinearGAM(s(0) +s(1) +f(2))
    gam.gridsearch(X,y)
    y_pred = gam.predict(X)
    y_pred = pd.DataFrame(y_pred)
    y_pred['actual'] =y
    y_pred['residual'] = y_pred.actual-y_pred[0]
    return gam, gam.summary(), y_pred
Exemple #3
0
    def _fit(self, X, y, mylam=None, **kwargs):
        if isinstance(X, pd.DataFrame):
            X = X.values

        if not self.fit_binary_feat_as_factor_term:
            self.model = self.model_cls(max_iter=self.max_iter,
                                        n_splines=self.n_splines,
                                        **self.kwargs)
        else:
            formulas = []
            for idx, feat_name in enumerate(self.feature_names):
                num_unique_x = len(self.X_values_counts[feat_name])
                if num_unique_x < 2:
                    continue

                if num_unique_x == 2:
                    formulas.append(f(idx))
                else:
                    formulas.append(s(idx))

            the_formula = formulas[0]
            for term in formulas[1:]:
                the_formula += term

            self.model = self.model_cls(the_formula,
                                        max_iter=self.max_iter,
                                        n_splines=self.n_splines,
                                        **self.kwargs)

        if not self.search:
            # Just fit the model with this lam
            return self.model.fit(X, y, **kwargs)

        if mylam is None:
            mylam = self.search_lam

        # do a grid search over here
        try:
            print('search range from %f to %f' % (mylam[0], mylam[-1]))
            self.model.gridsearch(X, y, lam=mylam, **kwargs)
        except (np.linalg.LinAlgError, pygam.utils.OptimizationError) as e:
            print('Get the following error:', str(e),
                  '\nRetry the grid search')
            if hasattr(self.model, 'coef_'):
                del self.model.coef_

            self._fit(X, y, mylam=mylam[1:], **kwargs)

        if not hasattr(self.model,
                       'statistics_'):  # Does not finish the training
            raise Exception('Training fails.')

        return self
    def fit(self):
        S = s(0) if self.feature_names[0] in self.numerical_features else f(0)
        for i in range(1, len(self.feature_names)):
            if self.feature_names[i] in self.numerical_features:
                S += s(i)
            else:
                S += f(i)

        if self.mode == 'regression':
            gam = LinearGAM(S)
            gam.gridsearch(self.X_train, self.y_train)
            self._is_fitted = True
            self.explainer = gam
        elif self.mode == 'classification':
            gam = LogisticGAM(S)
            gam.gridsearch(np.array(self.X_train), self.y_train)
            self._is_fitted = True
            self.explainer = gam
        else:
            raise NameError(
                'ERROR: mode should be regression or classification')
Exemple #5
0
def lingam(term='spline'):
    """
    Method to load unfitted Generalized Additive Models models of
    type modelclass

    INPUT:
    term: 'linear', 'spline' or 'factor'

    RETURN:
    model
    """
    if term is 'linear':
        regmod = LinearGAM(l(0))
    # GAM with spline term
    elif term is 'spline':
        regmod = LinearGAM(s(0))
    # GAM with factor term
    elif term is 'factor':
        regmod = LinearGAM(f(0))
    else:
        raise ValueError('Given Gam term unknown')
    utils.display_get_params('LinearGAM Model Description',
                             regmod.get_params())
    return(regmod)
Exemple #6
0
gam2.summary()

import pandas as pd

pd.DataFrame(X).corr()

######################################################
# regression

from pygam import LinearGAM, s, f
from pygam.datasets import wage

X, y = wage(return_X_y=True)

## model
gam = LinearGAM(s(0) + s(1) + f(2))
gam.gridsearch(X, y)
gam.summary()

## plotting
plt.figure()
fig, axs = plt.subplots(1, 3)

titles = ['year', 'age', 'education']
for i, ax in enumerate(axs):
    XX = gam.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i],
            gam.partial_dependence(term=i, X=XX, width=.95)[1],
            c='r',
            ls='--')
Exemple #7
0
from pygam.datasets import wage
from pygam import LinearGAM, s, f
import numpy as np
import matplotlib.pyplot as plt

X, y = wage()

gam = LinearGAM(s(0, n_splines=5) + s(1) + f(2)).fit(X, y)

gam.summary()

lam = np.logspace(-3, 5, 5)
lams = [lam] * 3

gam.gridsearch(X, y, lam=lams)
gam.summary()

lams = np.random.rand(100, 3)  # random points on [0, 1], with shape (100, 3)
lams = lams * 8 - 3  # shift values to -3, 3
lams = np.exp(lams)  # transforms values to 1e-3, 1e3

random_gam = LinearGAM(s(0) + s(1) + f(2)).gridsearch(X, y, lam=lams)
random_gam.summary()

print(gam.statistics_['GCV'] < random_gam.statistics_['GCV'])

for i, term in enumerate(gam.terms):
    if term.isintercept:
        continue

    XX = gam.generate_X_grid(term=i)
#https://pygam.readthedocs.io/en/latest/notebooks/quick_start.html#
import pygam

from pygam.datasets import wage

X, y = wage()

from pygam import LinearGAM, s, f

#Let’s fit a spline term to the first 2 features, and a factor term to the 3rd feature.

gam = LinearGAM(s(0) + s(1) + f(2)).fit(X, y)

gam.summary()
Exemple #9
0
# https://pygam.readthedocs.io/en/latest/notebooks/quick_start.html

from pygam.datasets import wage

X, y = wage()

X.shape, y.shape

from pygam import LinearGAM, s, f

gam = LinearGAM(s(0) + s(1) + f(2)).fit(X, y)
gam.summary()

#by deafaule s has 20 base functions
gam2 = LinearGAM(s(0, n_splines=5) + s(1) + f(2)).fit(X, y)
gam2.summary()

#by default all terms has lambda penalty of 0.6,
#running grid search for lambda optimization
#using GCV (generalized cv-score)

import numpy as np

lam = np.logspace(-3, 5, 5)
lams = [lam] * 3
lams

gam3 = LinearGAM(s(0) + s(1) + f(2))
gam3.gridsearch(X, y, lam=lams)
gam3.summary()
Exemple #10
0
def gam_wave_1(df, df_train: pd.DataFrame):

    galaxy_to_int = dict(
        (i, g) for g, i in enumerate(df_train.galaxy.unique()))
    int_to_galaxy = {v: k for k, v in galaxy_to_int.items()}

    tmp_df_to_predict = df.loc[:, predictors_wave_1].dropna()

    tmp_df_to_change_train = df_train.loc[:, predictors_wave_1].dropna()

    df_train_return = df_train.copy()

    df_train.loc[:, ['galaxy']] = [galaxy_to_int[i] for i in df_train.galaxy]

    for TARGET in wave_1_gam:
        tmp_df_to_train_on = df_train.loc[:, predictors_wave_1 + [TARGET]]

        tmp_df_to_train_on = tmp_df_to_train_on.dropna()

        X = tmp_df_to_train_on.loc[:, tmp_df_to_train_on.columns != TARGET]
        y = tmp_df_to_train_on.loc[:, TARGET]

        lams = np.exp(np.random.random(size=(500, 9)) * 6 - 3)

        gam = LinearGAM(
            s(0, dtype='categorical', by=1) + f(0) + s(1) + s(2) + s(3) +
            s(4) + s(5) + s(6) + s(7)).gridsearch(np.array(X),
                                                  np.array(y),
                                                  lam=lams)

        tmp_df_to_predict.loc[:, ['galaxy']] = [
            galaxy_to_int[i] for i in tmp_df_to_predict.galaxy
        ]
        tmp_df_to_change_train.loc[:, ['galaxy']] = [
            galaxy_to_int[i] for i in tmp_df_to_change_train.galaxy
        ]

        tmp_df_to_predict[TARGET] = gam.predict(tmp_df_to_predict)
        tmp_df_to_change_train[TARGET] = gam.predict(tmp_df_to_change_train)

        tmp_df_to_predict.loc[:, ['galaxy']] = [
            int_to_galaxy[i] for i in tmp_df_to_predict.galaxy
        ]
        tmp_df_to_change_train.loc[:, ['galaxy']] = [
            int_to_galaxy[i] for i in tmp_df_to_change_train.galaxy
        ]

        df = df.set_index(['galaxy', 'galactic year']).fillna(
            tmp_df_to_predict.set_index(['galaxy',
                                         'galactic year'])).reset_index()

        df_train_return = df_train_return.set_index([
            'galaxy', 'galactic year'
        ]).fillna(tmp_df_to_change_train.set_index(['galaxy', 'galactic year'
                                                    ])).reset_index()

        tmp_df_to_predict = tmp_df_to_predict.drop(columns=[TARGET], axis=1)
        tmp_df_to_change_train = tmp_df_to_change_train.drop(columns=[TARGET],
                                                             axis=1)

    return df, df_train_return
#'inverse'
#'log'
#'inverse-squared'

from pygam.datasets import wage
import matplotlib.pyplot as plt

X, y = wage(return_X_y=True)

#X[0] es el año X[0] = 0 es 2000?...
#X[1] es la edad de la persona
#X[2] es su nivel de estudios, 0 = basica, 1=media superior, 2 = universidad, 3= posgrado
#y ingresos $$

## model
gam1 = LinearGAM(s(0) + s(1) + f(2), fit_intercept=False)
gam1.gridsearch(X, y)

## plotting
plt.figure(figsize=(10, 7.5))
fig, axs = plt.subplots(1, 3)
titles = ['year', 'age', 'education']
for i, ax in enumerate(axs):
    XX = gam1.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam1.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i],
            gam1.partial_dependence(term=i, X=XX, width=.95)[1],
            c='r',
            ls='--')
    ax.set_title(titles[i])
plt.rcParams['figure.figsize'] = [10, 7.5]
Exemple #12
0
df = pd.DataFrame(dataset, columns=column_name)
# df1 = pd.DataFrame(dataset1, columns=column_name)

db.connect.commit()

train_value = df['2020-09-01' > df['date']]

x_train1 = train_value.iloc[:, 1:].astype('float64')
y_train1 = train_value['value'].astype('float64').to_numpy()

x_train2 = train_value['rain'].astype('float64')

from pygam import LinearGAM, s, f
from pygam.datasets import wage
x_train2, y_train1 = wage()
gam = LinearGAM(s(0) + s(1) + f(2)).fit(x_train2,
                                        y_train1)  # s(0) + s(1) + f(2)
gam.summary()

import matplotlib.pyplot as plt

for i, term in enumerate(gam.terms):
    if term.isintercept:
        continue

    XX = gam.generate_X_grid(term=i)
    pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95)

    plt.figure()
    plt.plot(XX[:, term.feature], pdep)
    plt.plot(XX[:, term.feature], confi, c='r', ls='--')
Exemple #13
0
import pygam as pg
import pandas as pd
import matplotlib.pyplot as plt

filtered = pd.read_csv('full.csv')

filtered = filtered.query('score != 0.5').query('time_awake<24')
gam = pg.LogisticGAM(
    pg.s(0, basis='cp') + pg.f(1) + pg.s(2) + pg.s(3) + pg.s(4) +
    pg.s(5)).fit(X=filtered[[
        'hour', 'day', 'within_day', 'last_3_days', 'rating_diff', 'time_awake'
    ]],
                 y=filtered['win'])

gam.summary()


##hour has to be a cyclical smoother
def draw_terms(gam, plt=None):
    for i, term in enumerate(gam.terms):
        if term.isintercept:
            continue

        XX = gam.generate_X_grid(term=i)
        pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95)

        plt.figure()
        plt.plot(XX[:, term.feature], pdep)
        plt.plot(XX[:, term.feature], confi, c='r', ls='--')
        plt.title(repr(term))
        plt.show()
Exemple #14
0
    elif row['maritl'] == '2. Married':
        val = 1
    elif row['maritl'] == '3. Widowed':
        val = 2
    elif row['maritl'] == '4. Divorced':
        val = 3
    else:
        val = 4
    return val


Wage['mar_d'] = Wage.apply(marital_d, axis=1)
Wage['job_d'] = np.where(Wage['jobclass'] == '1. Industrial', 0, 1)

x3 = pd.concat([Wage['age'], Wage['mar_d'], Wage['job_d']], axis=1)
gam = LinearGAM(s(0, n_splines=4) + f(1) + f(2)).fit(x3, Wage['wage'])

for i, term in enumerate(gam.terms):
    if term.isintercept:
        continue

    XX = gam.generate_X_grid(term=i)
    pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95)

    plt.figure()
    plt.plot(XX[:, term.feature], pdep)
    plt.plot(XX[:, term.feature], confi, c='r', ls='--')
    plt.title(repr(term))
    plt.show()

# ----------------------------------------------------------------------------