Python LinearGAM.gridsearchの例、pygam.LinearGAM.gridsearch Pythonの例

コード例 #1

1

ファイルを表示

ファイル: GAM_playing.py プロジェクト: IanEisenberg/Self_Regulation_Ontology

def run_GAM(X, Y, get_importance=False, n_splines=20, folds=10):
    # set up GAM
    formula = s(0, n_splines)
    for i in range(1, X.shape[1]):
        formula = formula + s(i, n_splines)
    gam = LinearGAM(formula)
    gam.fit(X, X.iloc[:,0])
    
    # run full model
    GAM_results = {}
    for name, y in Y.iteritems():
        print("\nFitting for %s\n" % name)
        CV = BalancedKFold(folds)
        importances = {k:[] for k in X.columns}
        pred=np.zeros(y.shape[0])
        for train,test in CV.split(X,y):
            Xtrain = X.iloc[train,:]
            ytrain = y.iloc[train]
            Xtest = X.iloc[test,:]
            ytest = y.iloc[test]
            gam = LinearGAM(formula)
            gam.gridsearch(Xtrain, ytrain)

            # out of fold
            p = gam.predict(Xtest)
            if len(p.shape)>1:
                p=p[:,0]
            pred[test]=p

            if get_importance:    
                # get importances, defined as the predictive ability of each variable on its own
                importance_out = get_importances(Xtrain, ytrain, Xtest, ytest)
                for k,v in importance_out.items():
                    importances[k].append(v)
                    
        cv_scores = [{'r': np.corrcoef(y,pred)[0,1],
                      'R2': np.corrcoef(y,pred)[0,1]**2,
                      'MAE': mean_absolute_error(y,pred)}]
        
        
        # insample
        gam.gridsearch(X, y)
        in_pred = gam.predict(X)
        in_scores = [{'r': np.corrcoef(y,in_pred)[0,1],
                          'R2': np.corrcoef(y,in_pred)[0,1]**2,
                          'MAE': mean_absolute_error(y,in_pred)}]
        GAM_results[name] = {'scores_cv': cv_scores,
                             'scores_insample': in_scores,
                             'pred_vars': X.columns,
                             'importances': importances,
                             'model': gam}
    return GAM_results

コード例 #2

0

ファイルを表示

def BAM(X, y):
    # model implementation by PYGAM
    gam = LinearGAM(s(0, spline_order=3) + s(1, spline_order=3) + te(0, 1))
    gam.gridsearch(X, y)
    # print(gam.gridsearch(X, y).summary())

    return gam

コード例 #3

0

ファイルを表示

def run_GAM(X, Y, get_importance=False, n_splines=20, folds=10):
    # set up GAM
    formula = s(0, n_splines)
    for i in range(1, X.shape[1]):
        formula = formula + s(i, n_splines)
    gam = LinearGAM(formula)
    gam.fit(X, X.iloc[:, 0])

    # run full model
    GAM_results = {}
    for name, y in Y.iteritems():
        print("\nFitting for %s\n" % name)
        CV = BalancedKFold(folds)
        importances = {k: [] for k in X.columns}
        pred = np.zeros(y.shape[0])
        for train, test in CV.split(X, y):
            Xtrain = X.iloc[train, :]
            ytrain = y.iloc[train]
            Xtest = X.iloc[test, :]
            ytest = y.iloc[test]
            gam = LinearGAM(formula)
            gam.gridsearch(Xtrain, ytrain)

            # out of fold
            p = gam.predict(Xtest)
            if len(p.shape) > 1:
                p = p[:, 0]
            pred[test] = p

            if get_importance:
                # get importances, defined as the predictive ability of each variable on its own
                importance_out = get_importances(Xtrain, ytrain, Xtest, ytest)
                for k, v in importance_out.items():
                    importances[k].append(v)

        cv_scores = [{
            'r': np.corrcoef(y, pred)[0, 1],
            'R2': np.corrcoef(y, pred)[0, 1]**2,
            'MAE': mean_absolute_error(y, pred)
        }]

        # insample
        gam.gridsearch(X, y)
        in_pred = gam.predict(X)
        in_scores = [{
            'r': np.corrcoef(y, in_pred)[0, 1],
            'R2': np.corrcoef(y, in_pred)[0, 1]**2,
            'MAE': mean_absolute_error(y, in_pred)
        }]
        GAM_results[name] = {
            'scores_cv': cv_scores,
            'scores_insample': in_scores,
            'pred_vars': X.columns,
            'importances': importances,
            'model': gam
        }
    return GAM_results

コード例 #4

0

ファイルを表示

ファイル: pspline.py プロジェクト: mycarta/wotan

def pspline(time, flux, edge_cutoff, max_splines, return_nsplines, verbose):
    try:
        from pygam import LinearGAM, s
    except:
        raise ImportError('Could not import pygam')

    newflux = flux.copy()
    newtime = time.copy()
    detrended_flux = flux.copy() / np.nanmedian(newflux)

    for i in range(constants.PSPLINES_MAXITER):
        mask_outliers = np.ma.where(
            np.abs(1 - detrended_flux) < constants.PSPLINES_STDEV_CUT *
            np.std(detrended_flux))
        newtime, newflux = cleaned_array(newtime[mask_outliers],
                                         newflux[mask_outliers])
        gam = LinearGAM(s(0, n_splines=max_splines))
        search_gam = gam.gridsearch(newtime[:, np.newaxis],
                                    newflux,
                                    progress=False)
        trend = search_gam.predict(newtime)
        detrended_flux = newflux / trend
        stdev = np.std(detrended_flux)
        mask_outliers = np.ma.where(
            np.abs(1 - detrended_flux) > constants.PSPLINES_STDEV_CUT *
            np.std(detrended_flux))
        if verbose:
            print('Iteration:', i + 1, 'Rejected outliers:',
                  len(mask_outliers[0]))
            # Check convergence
            if len(mask_outliers[0]) == 0:
                print('Converged.')
                break

    # Final iteration, applied to unclipped time series (interpolated over clipped values)
    mask_outliers = np.ma.where(
        np.abs(1 - detrended_flux) < constants.PSPLINES_STDEV_CUT * stdev)
    newtime, newflux = cleaned_array(newtime[mask_outliers],
                                     newflux[mask_outliers])
    gam = LinearGAM(s(0, n_splines=max_splines))
    search_gam = gam.gridsearch(newtime[:, np.newaxis],
                                newflux,
                                progress=False)
    trend = search_gam.predict(time)

    # Cut off edges
    if edge_cutoff > 0:
        low_index = np.argmax(time > (min(time) + edge_cutoff))
        hi_index = np.argmax(time > (max(time) - edge_cutoff))
        trend[:low_index] = np.nan
        trend[hi_index:] = np.nan

    nsplines = np.ceil(gam.statistics_['edof'])
    return trend, nsplines

コード例 #5

0

ファイルを表示

def GAM_linear(X, y):
    X= X.to_numpy()
    y = y.to_numpy()
    from pygam import LinearGAM, s, f, te
    gam = LinearGAM(s(0) +s(1) +f(2))
    gam.gridsearch(X,y)
    y_pred = gam.predict(X)
    y_pred = pd.DataFrame(y_pred)
    y_pred['actual'] =y
    y_pred['residual'] = y_pred.actual-y_pred[0]
    return gam, gam.summary(), y_pred

コード例 #6

0

ファイルを表示

ファイル: GAM_playing.py プロジェクト: IanEisenberg/Self_Regulation_Ontology

def get_importances(X, y, Xtest, ytest):
    importances = {}
    for predictor, vals in X.iteritems():
        gam = LinearGAM(s(0), fit_intercept=False)
        gam.fit(vals, y)
        gam.gridsearch(vals, y)
        pred = gam.predict(Xtest[predictor])
        # define importances as the R2 for that factor alone
        R2 = np.corrcoef(ytest,pred)[0,1]**2
        importances[predictor] = R2
    return importances

コード例 #7

0

ファイルを表示

def get_importances(X, y, Xtest, ytest):
    importances = {}
    for predictor, vals in X.iteritems():
        gam = LinearGAM(s(0), fit_intercept=False)
        gam.fit(vals, y)
        gam.gridsearch(vals, y)
        pred = gam.predict(Xtest[predictor])
        # define importances as the R2 for that factor alone
        R2 = np.corrcoef(ytest, pred)[0, 1]**2
        importances[predictor] = R2
    return importances

コード例 #8

0

ファイルを表示

    def GAM1(self):
        """Generalized Additive Model with possible non-linear effects. Specific
        variables are modelled by splines. Can the possible non-linearities be
        captured by adding polynomial terms to the linear model? Fit such a
        model and comment on the two solutions."""
        from pygam import LinearGAM, s, l, f
        """Non-linear effects are modeled by splines. Analyze the summary table
        and declare which factors should be splined. Do this depending on the
        so-called significance code of the table."""
        terms = l(0)+l(1)+l(2)+l(3)+l(4)+l(5)+l(6)+l(7)+l(8)+l(9)+l(10)+l(11)\
            +l(12)+l(13)+l(14)+l(15)+l(16)+l(17)+l(18)+l(19)+l(20)+l(21)+l(22)\
                +l(23)

        gam = LinearGAM(terms=terms, fit_intercept=False)
        mod = gam.gridsearch(self.Xtrain.values, self.ytrain.values, \
            lam=np.logspace(-3, 3, 11))     # Generate the model
        mod.summary()  # Pseudo-R2: 0.6449
        ypred = mod.predict(self.Xtest)
        MSE1 = np.mean((self.ytest - ypred.reshape(-1, 1))**2).values

        if self.plot:
            plt.plot(ypred.reshape(-1, 1), label='GAM model')
            plt.plot(self.ytest, label='Testing Data')
            plt.legend()
            plt.title("GAM model with linear terms")
            plt.ylabel("FFVC score")
            plt.xlabel("Sample no.")
            plt.show()
        """Repeat the study adding the 'auto' function, adding splines and
        polynomial contributions."""
        gam = LinearGAM(terms='auto', fit_intercept=False)
        mod = gam.gridsearch(self.Xtrain.values, self.ytrain.values, \
            lam=np.logspace(-3, 3, 11))     # Generate the model
        mod.summary()  # Pseudo-R2: 0.6449
        ypred = mod.predict(self.Xtest)
        MSE2 = np.mean((self.ytest - ypred.reshape(-1, 1))**2).values

        if self.plot:
            plt.plot(ypred.reshape(-1, 1), label='GAM model')
            plt.plot(self.ytest, label='Testing Data')
            plt.legend()
            plt.title("GAM model with spline terms")
            plt.ylabel("FFVC score")
            plt.xlabel("Sample no.")
            plt.show()

        print(f"Linear GAM produced MSE={MSE1},"+"\n"\
            f"Spline addition produced MSE={MSE2}")
        """Save these values for Exercise 7."""
        self.GAM1E1P5 = MSE1[0]
        self.GAM2E1P5 = MSE2[0]

        return 1

コード例 #9

0

ファイルを表示

def pspline(time, flux):
    try:
        from pygam import LinearGAM, s
    except:
        raise ImportError('Could not import pygam')

    newflux = flux.copy()
    newtime = time.copy()
    detrended_flux = flux.copy()

    for i in range(constants.PSPLINES_MAXITER):
        mask_outliers = numpy.ma.where(
            1 - detrended_flux < constants.PSPLINES_STDEV_CUT *
            numpy.std(detrended_flux))
        newtime, newflux = cleaned_array(newtime[mask_outliers],
                                         newflux[mask_outliers])
        gam = LinearGAM(s(0, n_splines=constants.PSPLINES_MAX_SPLINES))
        search_gam = gam.gridsearch(newtime[:, numpy.newaxis],
                                    newflux,
                                    progress=False)
        trend = search_gam.predict(newtime)
        detrended_flux = newflux / trend
        stdev = numpy.std(detrended_flux)
        mask_outliers = numpy.ma.where(
            1 - detrended_flux > constants.PSPLINES_STDEV_CUT *
            numpy.std(detrended_flux))
        print('Iteration:', i + 1, 'Rejected outliers:', len(mask_outliers[0]))

        # Check convergence
        if len(mask_outliers[0]) == 0:
            print('Converged.')
            break

    # Final iteration, applied to unclipped time series (interpolated over clipped values)
    mask_outliers = numpy.ma.where(
        1 - detrended_flux < constants.PSPLINES_STDEV_CUT * stdev)
    newtime, newflux = cleaned_array(newtime[mask_outliers],
                                     newflux[mask_outliers])
    gam = LinearGAM(s(0, n_splines=constants.PSPLINES_MAX_SPLINES))
    search_gam = gam.gridsearch(newtime[:, numpy.newaxis],
                                newflux,
                                progress=False)
    trend = search_gam.predict(time)

    return trend

コード例 #10

0

ファイルを表示

def GAM_model(df, feature_list):
    X_train = df[feature_list]
    y_train = df[['logerror']]
    scaler = MinMaxScaler(copy=True, feature_range=(0, 1)).fit(X_train)
    X_scaled = pd.DataFrame(scaler.transform(X_train),
                            columns=X_train.columns.values).set_index(
                                [X_train.index.values])
    X_scaled = X_scaled.to_numpy()
    y_train = y_train.to_numpy()
    from pygam import LinearGAM, s, f, te
    gam = LinearGAM(s(0) + s(1) + s(2) + s(3) + s(4) + s(5))
    gam.gridsearch(X_scaled, y_train)
    y_pred = gam.predict(X_scaled)
    y_pred = pd.DataFrame(y_pred)
    y_pred['actual'] = y_train
    y_pred.columns = ['predicted', 'actual']
    RMSE = float('{:.3f}'.format(
        sqrt(mean_squared_error(y_pred.actual, y_pred.predicted))))
    R2 = float('{:.3f}'.format(r2_score(y_pred.actual, y_pred.predicted)))
    return RMSE, R2, gam

コード例 #11

0

ファイルを表示

ファイル: gam_explainer.py プロジェクト: achraf-azize/Stage2020-CA-MAP594

    def fit(self):
        S = s(0) if self.feature_names[0] in self.numerical_features else f(0)
        for i in range(1, len(self.feature_names)):
            if self.feature_names[i] in self.numerical_features:
                S += s(i)
            else:
                S += f(i)

        if self.mode == 'regression':
            gam = LinearGAM(S)
            gam.gridsearch(self.X_train, self.y_train)
            self._is_fitted = True
            self.explainer = gam
        elif self.mode == 'classification':
            gam = LogisticGAM(S)
            gam.gridsearch(np.array(self.X_train), self.y_train)
            self._is_fitted = True
            self.explainer = gam
        else:
            raise NameError(
                'ERROR: mode should be regression or classification')

コード例 #12

0

ファイルを表示

def find_parameters_evaluation(index_set, gene_expression, cell_count_aa):
    prediction = []
    actual_value = []
    n_splines_all = []
    lam_all = []

    # THIS IS OUTER LOOP: for VALIDATION/TESTING
    #train n models and evaluate their average performance
    gene_indexes = index_set
    y = cell_count_aa
    X = gene_expression[gene_expression.columns[gene_indexes]]

    gam = LinearGAM()
    kf = KFold(n_splits=10)

    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[train_index], y[test_index]
        gam = gam.gridsearch(X_train,
                             y_train,
                             n_splines=np.arange(15, 35),
                             lam=[0.5, 0.6, 0.7])
        n_splines_all.append(gam.n_splines)
        lam_all.append(gam.lam)

    lams = np.array(lam_all)
    lams_mean = lams.mean()
    n_splines_all = np.array(n_splines_all)
    n_splines_mean = n_splines_all.mean()

    gam = LinearGAM(n_splines=n_splines_mean, lam=lams_mean)
    loo = LeaveOneOut()
    for train_index, test_index in loo.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[train_index], y[test_index]
        regr = gam.fit(X_train, y_train)
        prediction_val = regr.predict(X_test)[0]
        prediction.append(prediction_val)
        actual_value.append(y_test[0])
        print(test_index)
        print(str(prediction_val), " ", str(y_test[0]))
    #calculate spearman correlation over all of the models
    rho, pval = spearmanr(actual_value, prediction)

    return lams_mean, n_splines_mean, rho, pval

コード例 #13

0

ファイルを表示

ファイル: GAM_ccle.py プロジェクト: mslovett21/GAM

def find_parameters_evaluation(index_set, gene_expression, cell_count_aa):
    prediction = []
    actual_value = []
    n_splines_all = []
    lam_all = []

    # THIS IS OUTER LOOP: for VALIDATION/TESTING
    #train n models and evaluate their average performance
    gene_indexes = index_set
    y = cell_count_aa
    X = gene_expression[gene_expression.columns[gene_indexes]]
    loo = LeaveOneOut()
    loo.get_n_splits(X)
    gam = LinearGAM()
    gam = gam.gridsearch(X,
                         y,
                         n_splines=np.arange(10, 50),
                         lam=[0.4, 0.5, 0.6, 0.7, 0.8])

    for train_index, test_index in loo.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # THIS IS INNER LOOP: for TRAINING/VALIDATION
        #train model with given optimized parameters
        regr = gam.fit(X_train, y_train)
        #make a prediction on OUTER LOOP test set
        prediction_val = regr.predict(X_test)[0]
        # store predictions and actual values
        prediction.append(prediction_val)
        actual_value.append(y_test[0])
        # add optimal parameter values to arrays
        n_splines_all.append(regr.n_splines)
        lam_all.append(regr.lam)
        print(test_index)
        print(str(prediction_val), " ", str(y_test[0]))
    #calculate spearman correlation over all of the models
    rho, pval = spearmanr(actual_value, prediction)
    lams = np.array(lam_all)
    lams_mean = lams.mean()
    n_splines_all = np.array(n_splines_all)
    n_splines_mean = n_splines_all.mean()
    return lams_mean, n_splines_mean, rho, pval

コード例 #14

0

ファイルを表示

def AAM():

    gam = LinearGAM(s(0, n_splines=25, spline_order=3, constraints='concave', penalties = 'auto', basis = 'cp', edge_knots=[147, 147])
                        + l(3)  # the last travel time
                        + te(0, 1)  # distance and departure_time
                        + te(2, 0)  # distance and isWeekend
                        + l(2),  # isWeekend
                    fit_intercept=True)

    print(gam.gridsearch(X1, y1).summary())
    # print(gam.gridsearch(X1,y1).get_params(deep=True))
    '''plt.scatter(X1[:,0][0:56], y1[0:56], s=3, linewidth=1, label = 'data')
    plt.plot(X1[:,0][0:56], gam.predict(X1[0:56]), color = 'red', linewidth = 1, label = 'prediction')
    plt.legend()
    plt.title('Extended Additive Model')
    plt.show()'''
    # error calculation
    rmse_val = rmse(np.array(y1), np.array(gam.predict(X1)))
    print("RMSE is: "+str(rmse_val))
    mae = mean_absolute_error(y1, gam.predict(X1))
    print("MAE is: "+str(mae))
    mape = mean_absolute_percentage_error(np.array(y1), np.array(gam.predict(X1)))
    print("MAPE is: "+ str(mape))

コード例 #15

0

ファイルを表示

ファイル: 05.GAM.py プロジェクト: Alejandrocunef/proyectomachine

# ![Captura%20de%20pantalla%202021-01-11%20a%20las%2017.05.01.png](attachment:Captura%20de%20pantalla%202021-01-11%20a%20las%2017.05.01.png)

# #### Construimos el modelo:

# In[8]:


model = LinearGAM(n_splines=10)


# * Ajustamos el modelo a nuestra base de datos de entrenamiento:

# In[9]:


model.gridsearch(X_train, y_train)


# #### Predicción

# In[10]:


#Predicción del modelo

y_pred_validation = model.predict(X_validation)
y_pred_validation


# #### Evaluación de nuestro modelo:

コード例 #16

0

ファイルを表示

y = loan3000[outcome]

loan_tree = DecisionTreeClassifier(random_state=1, criterion='entropy', 
                                   min_impurity_decrease=0.003)
loan_tree.fit(X, y)

loan_lda = LinearDiscriminantAnalysis()
loan_lda.fit(X, y)

logit_reg = LogisticRegression(penalty="l2", solver='liblinear')
logit_reg.fit(X, y)


## model
gam = LinearGAM(s(0) + s(1))
print(gam.gridsearch(X.values, [1 if yi == 'default' else 0 for yi in y]))

models = {
    'Decision Tree': loan_tree,
    'Linear Discriminant Analysis': loan_lda,
    'Logistic Regression': logit_reg,
    'Generalized Additive Model': gam,
}

fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(5, 5))

xvalues = np.arange(0.25, 0.73, 0.005)
yvalues = np.arange(-0.1, 20.1, 0.1)
xx, yy = np.meshgrid(xvalues, yvalues)
X = np.c_[xx.ravel(), yy.ravel()]

コード例 #17

0

ファイルを表示

from pygam.datasets import wage
from pygam import LinearGAM, s, f
import numpy as np
import matplotlib.pyplot as plt

X, y = wage()

gam = LinearGAM(s(0, n_splines=5) + s(1) + f(2)).fit(X, y)

gam.summary()

lam = np.logspace(-3, 5, 5)
lams = [lam] * 3

gam.gridsearch(X, y, lam=lams)
gam.summary()

lams = np.random.rand(100, 3)  # random points on [0, 1], with shape (100, 3)
lams = lams * 8 - 3  # shift values to -3, 3
lams = np.exp(lams)  # transforms values to 1e-3, 1e3

random_gam = LinearGAM(s(0) + s(1) + f(2)).gridsearch(X, y, lam=lams)
random_gam.summary()

print(gam.statistics_['GCV'] < random_gam.statistics_['GCV'])

for i, term in enumerate(gam.terms):
    if term.isintercept:
        continue

    XX = gam.generate_X_grid(term=i)

コード例 #18

0

ファイルを表示

class GAMEnsemble(EnsembleModel):
    """Implements GAM ensemble in [1]."""

    def __init__(self, nonlinear_ensemble=False, residual_process=True):
        """
        Initializer.

        Args:
            nonlinear_ensemble: (bool) Whether use nonlinear term to transform base model.
            residual_process: (bool) Whether model residual process.
        """
        model_name = (
            "Generalized Additive Ensemble" if residual_process
            else "{} Stacking".format("Nonlinear" if nonlinear_ensemble else "Linear"))

        super().__init__(model_name)
        self.gam_model = None
        self.nonlinear_ensemble = nonlinear_ensemble
        self.model_residual = residual_process

    def train(self, X, y, base_pred):
        """Trains ensemble model based on data and base predictions.

        Adds value to class attribute "model_weight"

        Args:
            X: (np.ndarray) Training features, shape (N, D)
            y: (np.ndarray)  Training labels, shape (N, 1)
            base_pred: (dict of np.ndarray) Dictionary of base model predictions
                With keys (str) being model name, and values (np.ndarray) being
                predictions corresponds to X and y.
        """
        # build feature and  gam terms
        ens_feature, feature_terms = self._build_ensemble_feature(X, base_pred)

        # define model
        self.gam_model = LinearGAM(feature_terms)

        # additional fine-tuning
        lam_grid = self._build_lambda_grid(n_grid=100)
        self.gam_model.gridsearch(X=ens_feature, y=y, lam=lam_grid,
                                  progress=False)

    def predict(self, X, base_pred):
        """Predicts label based on feature and base model.

        Args:
            X: (np.ndarray) Training features, shape (N, D)
            base_pred: (dict of np.ndarray) Dictionary of base model predictions
                With keys (str) being model name, and values (np.ndarray) being
                predictions corresponds to X and y.

        Returns:
            (np.ndarray) ensemble prediction and variance

        Raises:
            (ValueError) If self.model_weight is empty.
        """
        if not self.gam_model:
            raise ValueError("Attribute gam_model empty."
                             "Model was not trained properly.")

        # build feature and  gam terms
        ens_feature, _ = self._build_ensemble_feature(X, base_pred)

        # prediction
        prediction = self.gam_model.predict(ens_feature)
        prediction_var = ((self.gam_model.prediction_intervals(
            ens_feature, width=.95)[:, 1] - prediction) / 2) ** 2

        return prediction, prediction_var

    def _build_ensemble_feature(self, X, base_pred):
        """Builds featurre array and corresponding GAM TermList.

        Terms corresponding to X will be summation of
            dimension-wise splines, plus a tensor-product term across all dimension.

        """
        ensemble_term_func = s if self.nonlinear_ensemble else l

        ens_feature = np.asarray(list(base_pred.values())).T
        term_list = [ensemble_term_func(dim_index) for dim_index in range(ens_feature.shape[1])]

        # optionally, add residual process
        if self.model_residual:
            # build gam terms
            term_list += [s(dim_index) for dim_index in
                          range(ens_feature.shape[1],
                                ens_feature.shape[1] + X.shape[1])]
            if X.shape[1] > 1:
                term_list += [te(*list(ens_feature.shape[1] +
                                       np.array(range(X.shape[1]))))]

            # update features
            ens_feature = np.concatenate([ens_feature, X], axis=1)

        gam_feature_terms = TermList(*term_list)

        return ens_feature, gam_feature_terms

    def _build_lambda_grid(self, n_grid=100):
        # count actual number of terms in each nonlinear term
        # (e.g. te(0, 1) will actually have two terms)
        n_terms = np.sum([len(model_term._terms) if model_term.istensor else 1
                          for model_term in self.gam_model.terms])
        lam = np.random.rand(n_grid, n_terms)
        # rescale to between (0, 1)
        lam_norm = (lam - np.min(lam)) / (np.max(lam) - np.min(lam))

        return np.exp((lam_norm - 0.5) * 6)

コード例 #19

0

ファイルを表示

ファイル: codePyGAM.py プロジェクト: dustywhite7/Econ8310

import numpy as np
from plotly import tools
import plotly.offline as py
import plotly.graph_objs as go

# Prep the dataset
data = pd.read_csv(
    "/home/dusty/Econ8310/DataSets/HappinessWorld.csv")

# Generate x and y matrices
eqn = """happiness ~ -1 + freedom + family + year + economy + health + trust"""
y,x = pt.dmatrices(eqn, data=data)

# Initialize and fit the model
gam = LinearGAM(s(0) + s(1) + s(2) + s(3) + s(4) + s(5))
gam = gam.gridsearch(np.asarray(x), y)

# Specify plot shape
titles = ['freedom', 'family', 'year', 'economy',
          'health', 'trust']

fig = tools.make_subplots(rows=2, cols=3, subplot_titles=titles)
fig['layout'].update(height=800, width=1200, title='pyGAM', showlegend=False)

for i, title in enumerate(titles):
  XX = gam.generate_X_grid(term=i)
  pdep, confi = gam.partial_dependence(term=i, width=.95)
  trace = go.Scatter(x=XX[:,i], y=pdep, mode='lines', name='Effect')
  ci1 = go.Scatter(x = XX[:,i], y=confi[:,0], line=dict(dash='dash', color='grey'), name='95% CI')
  ci2 = go.Scatter(x = XX[:,i], y=confi[:,1], line=dict(dash='dash', color='grey'), name='95% CI')
  if i<3:

コード例 #20

0

ファイルを表示

ファイル: Chapter 4 - Regression and Prediction.py プロジェクト: leader2345/practical-statistics-for-data-scientists-master_practical_notebooks

partialResidualPlot(result_spline, house_98105, 'AdjSalePrice',
                    'SqFtTotLiving', ax)

plt.tight_layout()
plt.show()

### Generalized Additive Models

predictors = ['SqFtTotLiving', 'SqFtLot', 'Bathrooms', 'Bedrooms', 'BldgGrade']
outcome = 'AdjSalePrice'
X = house_98105[predictors].values
y = house_98105[outcome]

## model
gam = LinearGAM(s(0, n_splines=12) + l(1) + l(2) + l(3) + l(4))
gam.gridsearch(X, y)
print(gam.summary())

fig, axes = plt.subplots(figsize=(8, 8), ncols=2, nrows=3)

titles = ['SqFtTotLiving', 'SqFtLot', 'Bathrooms', 'Bedrooms', 'BldgGrade']
for i, title in enumerate(titles):
    ax = axes[i // 2, i % 2]
    XX = gam.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i],
            gam.partial_dependence(term=i, X=XX, width=.95)[1],
            c='r',
            ls='--')
    ax.set_title(titles[i])

コード例 #21

0

ファイルを表示

import pickle

timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

X = pd.read_pickle("data/processed/X.pickle")
y = pd.read_pickle("data/processed/y.pickle")
print('Read data.')

lams = np.random.rand(150000, 4) * 8 - 3
lams = np.exp(lams)

# randomized grid search
print('Initialized Linear GAM.')
gam_grid = LinearGAM(s(0) + s(1) + s(2) + s(3))
print("Grid searching Linear GAM's lambdas.")
gam_grid.gridsearch(X, y, lam=lams)

with open(f"models/{timestamp} {sys.argv[1]}.pickle", "wb") as handle:
    pickle.dump(gam_grid, handle)
print('Serialized GAM as pickle.')

print(gam_grid.summary())

# plotting
plt.figure(figsize=(16, 16 / 1.618))
fig, axs = plt.subplots(1, 3)

titles = ["pm10median", "time", "tmpd"]
for i, ax in enumerate(axs):
    XX = gam_grid.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam_grid.partial_dependence(term=i, X=XX))

コード例 #22

0

ファイルを表示

ファイル: 15_gam_example_wage.py プロジェクト: IrvingGomez/academic-hugo

#'log'
#'inverse-squared'

from pygam.datasets import wage
import matplotlib.pyplot as plt

X, y = wage(return_X_y=True)

#X[0] es el año X[0] = 0 es 2000?...
#X[1] es la edad de la persona
#X[2] es su nivel de estudios, 0 = basica, 1=media superior, 2 = universidad, 3= posgrado
#y ingresos $$

## model
gam1 = LinearGAM(s(0) + s(1) + f(2), fit_intercept=False)
gam1.gridsearch(X, y)

## plotting
plt.figure(figsize=(10, 7.5))
fig, axs = plt.subplots(1, 3)
titles = ['year', 'age', 'education']
for i, ax in enumerate(axs):
    XX = gam1.generate_X_grid(term=i)
    ax.plot(XX[:, i], gam1.partial_dependence(term=i, X=XX))
    ax.plot(XX[:, i],
            gam1.partial_dependence(term=i, X=XX, width=.95)[1],
            c='r',
            ls='--')
    ax.set_title(titles[i])
plt.rcParams['figure.figsize'] = [10, 7.5]

コード例 #23

0

ファイルを表示

ファイル: DeepModels.py プロジェクト: aaronsossin/DengAI_2020

class DeepModels:

    # Sequential 6 layer neural network
    def returnSequential6(self, idim = 20):
        model = Sequential()
        model.add(Dense(50, input_dim=idim, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def returnSequential6_regularized(self, idim = 20):
        model = Sequential()
        model.add(Dense(50, input_dim=idim, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01)))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def returnSequential9(self, idim = 20):
        model = Sequential()
        model.add(Dense(80, input_dim = idim, activation='relu'))
        model.add(Dense(70, activation='relu'))
        model.add(Dense(60, activation='relu'))
        model.add(Dense(50, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def returnSequential15(self, idim = 20):
        model = Sequential()
        model.add(Dense(140, input_dim=idim, activation='relu'))
        model.add(Dense(130, activation='relu'))
        model.add(Dense(120, activation='relu'))
        model.add(Dense(110, activation='relu'))
        model.add(Dense(100, activation='relu'))
        model.add(Dense(90, activation='relu'))
        model.add(Dense(80, activation='relu'))
        model.add(Dense(70, activation='relu'))
        model.add(Dense(60, activation='relu'))
        model.add(Dense(50, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def returnSequential15_regularized(self, idim = 20):
        model = Sequential()
        model.add(Dense(140, input_dim=idim, activation='relu'))
        model.add(Dense(130, activation='relu'))
        model.add(Dense(120, activation='relu'))
        model.add(Dense(110, activation='relu'))
        model.add(Dense(100, activation='relu'))
        model.add(Dense(90, activation='relu'))
        model.add(Dense(80, activation='relu'))
        model.add(Dense(70, activation='relu'))
        model.add(Dense(60, activation='relu'))
        model.add(Dense(50, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model


    def returnSequential21(self, idim = 20):
        model = Sequential()
        model.add(Dense(200, input_dim=idim, activation='relu'))
        model.add(Dense(190, activation='relu'))
        model.add(Dense(180, activation='relu'))
        model.add(Dense(170, activation='relu'))
        model.add(Dense(160, activation='relu'))
        model.add(Dense(150, activation='relu'))
        model.add(Dense(140, activation='relu'))
        model.add(Dense(130, activation='relu'))
        model.add(Dense(120, activation='relu'))
        model.add(Dense(110, activation='relu'))
        model.add(Dense(100, activation='relu'))
        model.add(Dense(90, activation='relu'))
        model.add(Dense(80, activation='relu'))
        model.add(Dense(70, activation='relu'))
        model.add(Dense(60, activation='relu'))
        model.add(Dense(50, activation='relu'))
        model.add(Dense(40, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def RNN(self, idim = 20):
        model = Sequential()
        model.add(SimpleRNN(10, input_dim=idim))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def multi_RNN(self, idim = 20):
        model = Sequential()
        model.add(SimpleRNN(14, input_dim=idim, activation='relu'))
        model.add(Dense(7, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def multi_RNN2(self, idim = 20):
        model = Sequential()
        model.add(SimpleRNN(40, input_dim=idim))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def baseline(self, idim=20):
        # Create model
        model = Sequential()
        model.add(Dense(20, input_dim=idim, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mean_absolute_error'])
        return model

    def lstm(self, idim = 20):
        model = Sequential()
        model.add(LSTM(20, input_dim=idim))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='mean_absolute_error', optimizer='adam')
        return model

    def multi_lstm(self, idim = 20):
        model = Sequential()
        model.add(LSTM(14, input_dim=idim, activation='relu'))
        model.add(Dense(7, input_dim=idim, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='mean_absolute_error', optimizer='adam')
        return model

    # Sequential 4 layer neural network
    def returnSequential4(self, idim = 20):
        model = Sequential()
        model.add(Dense(20, activation='relu', input_dim=idim))
        model.add(Dense(units=15, activation='relu'))
        model.add(Dense(units=10, activation='relu'))
        model.add(Dense(units=5, activation='relu'))
        model.add(Dense(units=1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')

        return model

        # Sequential 4 layer neural network

    def returnSequential8(self, idim=20):
        model = Sequential()
        model.add(Dense(70, activation='relu', input_dim=idim))
        model.add(Dense(units=60, activation='relu'))
        model.add(Dense(units=50, activation='relu'))
        model.add(Dense(units=40, activation='relu'))
        model.add(Dense(units=30, activation='relu'))
        model.add(Dense(units=20, activation='relu'))
        model.add(Dense(units=10, activation='relu'))
        model.add(Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01)))
        model.compile(optimizer='Adam', loss='mean_absolute_error')

        return model

    def base(self, idim=20):
        model = Sequential()
        model.add(Dense(10, activation='relu', input_dim=idim))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def base2(self, idim=20):
        model = Sequential()
        model.add(Dense(14, activation='relu', input_dim=idim))
        model.add(Dense(7, activation='relu', input_dim=idim))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='Adam', loss='mean_absolute_error')
        return model

    def __init__(self, m, idim=20):
        if m == 0:
            self.model = self.base(idim)
            self.type = 2
        elif m == 1:
            self.model = self.base2(idim)
            self.type = 2
        elif m == 2:
            self.model = self.returnSequential4(idim)
            self.type = 2
        elif m == 3:
            self.model = self.returnSequential8(idim)
            self.type = 2
        elif m == 4:
            self.model = self.returnSequential15_regularized(idim)
            self.type = 2
        elif m == 5:
            self.model = self.multi_RNN(idim)
            self.type = 1
        elif m == 6:
            self.model = self.multi_lstm(idim)
            self.type = 1
        elif m == 7:
            self.model = LinearGAM()
            self.type = 3
        elif m == 8:
            self.model = self.RNN(idim)
            self.type = 1
        elif m == 9:
            self.model = self.lstm(idim)
            self.type = 1

    def returnModel(self):
        return self.model

    def train(self, X, y, bs=10, epochs=100):
        if self.type == 1:
            X = np.reshape(X, (X.shape[0], 1, X.shape[1]))
        if self.type == 3:
            self.model.gridsearch(X,y)
        else:
            self.model.fit(X, y, batch_size = bs, epochs = epochs, shuffle=True, verbose = 0)

    def prediction(self, X):
        if self.type == 1:
            X = np.reshape(X, (X.shape[0], 1, X.shape[1]))
        return self.model.predict(X)

    def cross_eval_with_plotting(self, city, X,y,bs=10,ep=100, k=3):
        scores = []
        multiplier = 0
        fig10, ax10 = plt.subplots()
        if self.type == 0:
            kf = KFold(n_splits=k, shuffle=False, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                a, score = self.model.evaluate(X_test, y_test, verbose=0)
                predictions = self.model.predict(X_test)
                plt.plot(range(len(y_test) * multiplier, len(y_test) + len(y_test) * multiplier), y_test, 'm',
                         alpha=0.4)
                plt.plot(range(len(y_test) * multiplier, len(y_test) + len(y_test) * multiplier), predictions, 'g')

                scores.append(score)
                multiplier = multiplier + 1
            plt.title('True vs. Predicted Cases {}'.format(city))
            plt.xlabel('Week')
            plt.ylabel('Cases of Dengue')
            plt.legend(['True', 'Predicted'])
            plt.show()
            return sum(scores) / len(scores)

        elif self.type == 1:
            kf = KFold(n_splits=k, shuffle=False, random_state=0)
            scores = []
            multiplier = 0
            fig10, ax10 = plt.subplots()
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
                X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
                self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                predictions = self.model.predict(X_test)
                plt.plot(range(len(y_test)*multiplier, len(y_test) + len(y_test)*multiplier), y_test, 'm', alpha=0.4)
                plt.plot(range(len(y_test)*multiplier, len(y_test) + len(y_test)*multiplier), predictions, 'g')
                score = self.model.evaluate(X_test, y_test, verbose=0)
                scores.append(score)
                multiplier = multiplier + 1
            plt.title('True vs. Predicted Cases in {}'.format(city))
            plt.xlabel('Week')
            plt.ylabel('Cases of Dengue')
            plt.legend(['True', 'Predicted'])
            plt.show()
            return sum(scores) / len(scores)

        elif self.type == 2:
            multiplier = 0
            fig10, ax10 = plt.subplots()
            kf = KFold(n_splits=k, shuffle=False, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.fit(X_train, y_train, batch_size=10, epochs=300, verbose=0)
                predictions = self.model.predict(X_test)

                plt.plot(range(len(y_test) * multiplier, len(y_test) + len(y_test) * multiplier), y_test, 'm',
                        alpha=0.4)
                plt.plot(range(len(y_test) * multiplier, len(y_test) + len(y_test) * multiplier), predictions, 'g')

                score = self.model.evaluate(X_test, y_test, verbose=0)
                scores.append(score)
                multiplier = multiplier + 1
            plt.title('True vs. Predicted Cases in {}'.format(city))
            plt.xlabel('Week')
            plt.ylabel('Cases of Dengue')
            plt.legend(['True', 'Predicted'])
            plt.show()
            return sum(scores) / len(scores)

        elif self.type == 3:
            multiplier = 0
            fig10, ax10 = plt.subplots()
            kf = KFold(n_splits=k, shuffle=False, random_state=0)
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.model.gridsearch(X_train, y_train)
                y_pre = self.model.predict(X_test)

                plt.plot(range(len(y_test) * multiplier, len(y_test) + len(y_test) * multiplier), y_test, 'm',
                         alpha=0.4)
                plt.plot(range(len(y_test) * multiplier, len(y_test) + len(y_test) * multiplier), y_pre, 'g')

                scores.append(mean_absolute_error(y_pre, y_test))
            plt.title('True vs. Predicted Cases in {}'.format(city))
            plt.xlabel('Week')
            plt.ylabel('Cases of Dengue')
            plt.legend(['True', 'Predicted'])
            plt.show()
            return sum(scores) / len(scores)

    def cross_eval(self, X, y, bs=10, ep=100, k=3):
            scores = []
            if self.type == 0:
                kf = KFold(n_splits=k, shuffle=True, random_state=0)
                for train_index, test_index in kf.split(X):
                    X_train, X_test = X[train_index], X[test_index]
                    y_train, y_test = y[train_index], y[test_index]
                    self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                    a, score = self.model.evaluate(X_test, y_test, verbose=0)
                    scores.append(score)
                return sum(scores) / len(scores)

            elif self.type == 1:
                kf = KFold(n_splits=k, shuffle=False, random_state=0)
                scores = []
                for train_index, test_index in kf.split(X):
                    X_train, X_test = X[train_index], X[test_index]
                    y_train, y_test = y[train_index], y[test_index]
                    X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
                    X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
                    self.model.fit(X_train, y_train, batch_size=bs, epochs=ep, verbose=0)
                    score = self.model.evaluate(X_test, y_test, verbose=0)
                    scores.append(score)
                return sum(scores) / len(scores)

            elif self.type == 2:
                kf = KFold(n_splits=k, shuffle=True, random_state=0)
                for train_index, test_index in kf.split(X):
                    X_train, X_test = X[train_index], X[test_index]
                    y_train, y_test = y[train_index], y[test_index]
                    self.model.fit(X_train, y_train, batch_size=10, epochs=300, verbose=0)
                    score = self.model.evaluate(X_test, y_test, verbose=0)
                    scores.append(score)
                return sum(scores) / len(scores)

            elif self.type == 3:
                kf = KFold(n_splits=k, shuffle=False, random_state=0)
                for train_index, test_index in kf.split(X):
                    X_train, X_test = X[train_index], X[test_index]
                    y_train, y_test = y[train_index], y[test_index]
                    self.model.gridsearch(X_train, y_train)
                    y_pre = self.model.predict(X_test)
                    print(y_pre)
                    scores.append(mean_absolute_error(y_pre, y_test))
                return sum(scores) / len(scores)

コード例 #24

0

ファイルを表示

def EAM():
    X = np.load('EAM_factors.npy')
    y = np.load('EAM_time.npy')
    gam = LinearGAM(
        s(0, spline_order=3) + s(1, spline_order=3) + te(0, 1) + te(0, 2))
    gam.gridsearch(X, y)

コード例 #25

0

ファイルを表示

ファイル: GAM.py プロジェクト: lindenmp/python_cookbook

df_system.head()

# Get some data

# In[4]:

metric = 'jd'
X = df_pheno.loc[:, 'ageAtScan1_Years']
Y = df_system.loc[:, metric]

# Estimate GAM with spline

# In[5]:

gam = LinearGAM(s(0)).fit(X, Y)
gam.gridsearch(X, Y)

# Plot

# In[6]:

XX = gam.generate_X_grid(term=0)
pdep, confi = gam.partial_dependence(term=0, X=XX, width=0.95)

plt.figure()
plt.plot(XX, pdep)  # fit
plt.plot(XX, confi, c='r', ls='--')  # confidence interval
plt.plot(XX, gam.prediction_intervals(XX, width=.95), color='b',
         ls='--')  # 95% prediction interval
plt.scatter(X, Y, facecolor='gray', edgecolors='none', alpha=0.5)  # data
plt.xlabel('Age')

コード例 #26

0

ファイルを表示

ファイル: pseudotime_functions.py プロジェクト: Cai-Lab-at-University-of-Michigan/MiCV

def calculate_gene_trends(session_ID, list_of_genes, branch_ID):
    n_steps = 2 + len(list_of_genes)

    #uns = cache_adata(session_ID, group="uns")
    obs = cache_adata(session_ID, group="obs")
    cache_progress(session_ID, progress=int(1 / n_steps * 100))

    if (branch_ID == -1):
        branch_probs = None
    else:
        branch_probs = obs["pseudotime_branch_" + str(branch_ID)]

    pseudotime = obs["pseudotime"]
    cache_progress(session_ID, progress=int(2 / n_steps * 100))

    if ((branch_ID == -1) or (branch_probs is None)):
        cells_in_branch = obs.index
    else:
        cells_in_branch = obs[obs["pseudotime_branch_" +
                                  str(branch_ID)] > 0.2].index
    print("[DEBUG] branch: " + str(branch_ID))
    '''
    gene_trends = palantir.presults.compute_gene_trends(pr_res, 
                                                        imp_df.loc[:, genes],
                                                        lineages = [branch],
                                                        n_jobs=1)
    '''
    X_train = pseudotime.to_numpy()

    # reduce the number of data points we fit to save computation time
    max_samples_to_fit = 5000
    if (len(X_train) <= max_samples_to_fit):
        subsample_mask = np.ones_like(X_train)
    else:
        subsample_mask = np.zeros_like(X_train)
        subsample_mask[0:max_samples_to_fit] = 1
        np.random.shuffle(subsample_mask)
    subsample_mask = np.array(subsample_mask, dtype=bool)

    if ((branch_ID != -1) and not (branch_probs is None)):
        weights = branch_probs.to_numpy()
    else:
        weights = np.ones_like(X_train)

    X_train = X_train[subsample_mask]
    weights = weights[subsample_mask]

    X_train = np.reshape(X_train, (len(X_train), 1))
    weights = np.reshape(weights, (len(weights), 1))

    X_plot = np.linspace(np.min(obs["pseudotime"][cells_in_branch]),
                         np.max(obs["pseudotime"][cells_in_branch]), 125)

    gene_trends = pd.DataFrame()
    gene_trends["pseudotime"] = X_plot

    step_number = 3
    for gene in list_of_genes:
        #Y_train = adata.obs_vector(gene, layer="imputed")

        time_0 = datetime.now()
        Y_train = get_obs_vector(session_ID, gene, layer="imputed")
        print("[BENCH] time for get_obs_vector: " +
              str(datetime.now() - time_0))
        Y_train = Y_train[subsample_mask]

        gam = LinearGAM(n_splines=5, spline_order=3)

        time_0 = datetime.now()
        gam.gridsearch(X_train, Y_train, weights=weights, progress=False)
        print("[BENCH] time for gam fit: " + str(datetime.now() - time_0))

        #gam = ExpectileGAM(terms="s(0)", expectile=0.5).gridsearch(X_train, Y_train)
        #lam = gam.lam
        #gam_upper = ExpectileGAM(expectile=0.75, lam=lam).fit(X_train, Y_train)
        #gam_lower = ExpectileGAM(expectile=0.25, lam=lam).fit(X_train, Y_train)
        gene_trends[gene] = gam.predict(X_plot)
        #gene_trends[gene + "_ci_upper"] = gam_upper.predict(X_plot)
        #gene_trends[gene + "_ci_lower"] = gam_lower.predict(X_plot)

        ci = gam.confidence_intervals(X_plot, width=.95)
        gene_trends[gene + "_ci_upper"] = ci[:, 1]
        gene_trends[gene + "_ci_lower"] = ci[:, 0]
        cache_progress(session_ID, progress=int(step_number / n_steps * 100))
        step_number += 1
    gene_trends = gene_trends.clip(lower=0)
    return gene_trends