Python LinearGAM.prediction_intervals Beispiele, pygam.LinearGAM.prediction_intervals Python Beispiele

Beispiel #1

0

Datei anzeigen

def gam_results(x, y, df, param, infection_time):
    gam = LinearGAM(s(0), lam=.5).fit(x, y)
    y_new = gam.predict(x)
    confi1 = gam.prediction_intervals(x, width=.95)
    pred = np.zeros(x.shape[0])
    for i in np.arange(x.shape[0]):
        if i == 0:
            pred[i] = np.mean(df[param].iloc[0:3])
        else:
            if i < infection_time:
                pred[i] = pred[i - 1] * y_new[i] + pred[i - 1]
            else:
                pred[i] = pred[i - 1] * y_new[i] + pred[i - 1]

    if param == 'Positive':
        pred = pred + np.concatenate(
            (np.zeros(infection_time),
             pred[0:(pred.shape[0] - infection_time)]),
            axis=0)

    x_forcast = np.arange(np.max(x), np.max(x) + 10)
    y_forcast = gam.predict(x_forcast)
    confi = gam.prediction_intervals(x_forcast, width=.95)

    forcast = np.zeros(x_forcast.shape[0])
    forcast_L = np.zeros(x_forcast.shape[0])
    forcast_U = np.zeros(x_forcast.shape[0])
    for i in np.arange(x_forcast.shape[0]):
        if i == 0:
            forcast[i] = df[param].iloc[-1]
            forcast_L[i] = forcast[i]
            forcast_U[i] = forcast[i]
        else:
            forcast[i] = forcast[i - 1] * y_forcast[i - 1] + forcast[i - 1]
            forcast_L[i] = forcast_L[i - 1] * confi[i - 1, 0] + forcast_L[i -
                                                                          1]
            forcast_U[i] = forcast_U[i - 1] * confi[i - 1, 1] + forcast_U[i -
                                                                          1]
    return ([pred, forcast, forcast_L, forcast_U, y_new, confi1])

Beispiel #2

0

Datei anzeigen

Datei: GAM_PD.py Projekt: Csogeza/O-C_extract

def GAMfitter(indir, dat_st, T0=None):
    fname = [i for i in os.listdir(indir) if dat_st in i]
    data = np.loadtxt(indir + fname[0])
    frequency = np.linspace(
        1e-3, 0.5, int(1e6))  # A range for frequencies (2 to 1000 day periods)
    power = LombScargle(data[:, 0],
                        data[:, 1]).power(frequency=frequency)  # Get spectrum
    ind = get_index_of_max(power)  # Best frequency

    if T0 is None:  # If we have no preset T0, try to get a minimum
        phs = get_phase_curve(data[:, 0], data[0, 0], 1 / frequency[ind])
        ext_phs, ext_mags = phase_curve_extender(phs, data[:, 1])
        gam = LinearGAM(n_splines=30).gridsearch(ext_phs,
                                                 ext_mags)  # Fit a GAM

        XX = gam.generate_X_grid(term=0, n=500)
        fit = gam.predict(XX)  # This is the fit on the grid
        minimal_val = max(fit)  # Maximum magnitude (minimal brightness)
        min_ind = get_index_of_min(abs(data[:, 1] - minimal_val))
        T0 = data[min_ind, 0]

    phs = get_phase_curve(data[:, 0], T0, 1 / frequency[ind])
    ext_phs, ext_mags = phase_curve_extender(phs, data[:, 1])
    gam = LinearGAM(n_splines=30).gridsearch(ext_phs, ext_mags)

    pred_int_vls = gam.prediction_intervals(phs, width=.85)
    cond = (data[:, 1] > pred_int_vls[:, 0]) & (data[:, 1] < pred_int_vls[:,
                                                                          1])

    filtered_data = data[cond]

    power_f = LombScargle(data[:, 0], data[:, 1]).power(frequency=frequency)
    ind_f = get_index_of_max(power_f)
    phs_f = get_phase_curve(filtered_data[:, 0], T0, 1 / frequency[ind_f])
    ext_phs, ext_mags = phase_curve_extender(phs_f, filtered_data[:, 1])
    gam_f = LinearGAM(n_splines=30).gridsearch(ext_phs, ext_mags)

    return filtered_data, gam_f, frequency[ind_f], T0

Beispiel #3

0

Datei anzeigen

Datei: filtermod.py Projekt: bohlinger/wavy

def cleaner_linearGAM(x,y,**kwargs):
    from pygam import LinearGAM, l, s
    if isinstance(x,list):
        x = np.array(x)
    if isinstance(y,list):
        y = np.array(y)
    X = x.reshape(len(x),1)
    #if 'n_splines' in kwargs.keys():
    #    n_splines = kwargs['n_splines']
    #else:
    #    # This is because the automatic approach is too smooth
    #    #n_splines = int(len(y)/5)
    #gam = LinearGAM(n_splines=n_splines,\
    #                terms=s(0,basis='ps')\
    #                ).gridsearch(X, y)
    gam = LinearGAM(terms=s(0,basis='ps')).gridsearch(X, y)
    #gam = LinearGAM(n_splines=n_splines,terms=s(0)).gridsearch(X, y)
    # sample on the input grid
    means = gam.predict(X)
    bounds = gam.prediction_intervals(X, width=.95)
    idx = [i for i in range(len(y)) \
            if (y[i]>bounds[i,1] or y[i]<bounds[i,0])]
    return idx

Beispiel #4

0

Datei anzeigen

Datei: GAM.py Projekt: lindenmp/python_cookbook

# In[5]:

gam = LinearGAM(s(0)).fit(X, Y)
gam.gridsearch(X, Y)

# Plot

# In[6]:

XX = gam.generate_X_grid(term=0)
pdep, confi = gam.partial_dependence(term=0, X=XX, width=0.95)

plt.figure()
plt.plot(XX, pdep)  # fit
plt.plot(XX, confi, c='r', ls='--')  # confidence interval
plt.plot(XX, gam.prediction_intervals(XX, width=.95), color='b',
         ls='--')  # 95% prediction interval
plt.scatter(X, Y, facecolor='gray', edgecolors='none', alpha=0.5)  # data
plt.xlabel('Age')
plt.ylabel('Brain feature')
plt.show()

# In[7]:

metric = 'jd'
X = df_pheno.loc[:, ['ageAtScan1_Years', 'mprage_antsCT_vol_TBV']]
Y = df_system.loc[:, metric]

# Estimate GAM with spline

# In[8]:

Beispiel #5

0

Datei anzeigen

'''

#GAMs
#https://github.com/dswah/pyGAM
#https://codeburst.io/pygam-getting-started-with-generalized-additive-models-in-python-457df5b4705f
from pygam import LinearGAM, LogisticGAM
gam_model = LinearGAM().fit(d[['disp', 'wt']], d['mpg'])
print(gam_model.summary())
gam_predictions = gam_model.predict(d[['disp', 'wt']])
gam_mse = np.mean((gam_predictions - d['mpg'])**2)
print('MSE:', gam_mse)

#Plot the predictions with confidence intervals
plt.plot(list(d.index), gam_predictions, 'r--')
plt.plot(list(d.index),
         gam_model.prediction_intervals(d[['disp', 'wt']], width=.95),
         color='b',
         ls='--')
plt.scatter(list(d.index), d['mpg'], facecolor='gray', edgecolors='none')
plt.xlabel('Row Index')
plt.ylabel('mpg')
plt.title('GAM Prediction with 95% Condidence Interval')
plt.show()

#Plot with simulated posterior
for response in gam_model.sample(d[['disp', 'wt']],
                                 d['mpg'],
                                 quantity='y',
                                 n_draws=50,
                                 sample_at_X=d[['disp', 'wt']]):
    plt.scatter(list(d.index), response, alpha=0.03, color='k')

Beispiel #6

0

Datei anzeigen

def GAMf(df,
         in_var,
         ex_vars,
         city,
         cut,
         pred_end='one_month',
         train_duration='all'):
    """
    Parameters
    ----------
    df: 
        dataframe containing all variables of interest for the whole time of measurement
    in_var: 
        independent variable
    ex_vars: 
        list of explanatory variables
    city: 
        name of specific city
    cut: 
        string of the format '%m/%d/%Y' indicating the date where training set ends & test set starts
    pred_end:
        end of the prediction period
         if 'one_month' pred_end is set to one month after the cut
    train_duration:
        int, indicating the number of months that should be used for training
        defaults to 'all' -> all available data before the cut date will be used as training data
        
    Returns
    -------
    gam:
        fitted gam model instance
        
        
    model_statistics:
        vector containing the following information about the fitted model
        
        rmse:
            RMSE for test set
        r_squared:
            pseudo R-squared for the fitted GAM model
        fac2:
            fraction of predictions that lies between 50% and 200% of the corresponding measurements
        test_len:
            number of observations in the test set
        train_len:
            number of observations in the training set
        ratio:
            ratio of prediction to true values for test set
        avg_err:
        
    preds:
        a dataframe containing all explanatory variables, the independent variable, the predicted values & 
        the absolute error divided by the average value of the pollution variables in the training set
    """

    # drop rows with NAN values for explantory variables
    df = df.dropna(subset=ex_vars)

    # subset dataset to given city
    df = df[df['city'] == city]

    # convert cut variable to datetime object
    cut = datetime.strptime(cut, '%m/%d/%Y')

    # if pred_end has the default value add one month to cut date to calculate end of the test dataset
    # else convert given string to datetime
    if (pred_end == 'one_month'):
        pred_end = cut + relativedelta(months=+1)
    else:
        pred_end = datetime.strptime(pred_end, '%m/%d/%Y')

    # determine subset of dataset used for training based on the given value for training duration
    if (train_duration == 'all'):
        df_train = df[df.index < cut]
    else:
        train_start = cut - relativedelta(months=+train_duration)
        df_train = df[df.index < cut]
        df_train = df_train[df_train.index > train_start]
    df_train = df_train.replace([np.inf, -np.inf], np.nan)
    df_train = df_train.dropna(subset=ex_vars)

    # determine subset of dataset used for test
    df_test = df[df.index > cut]
    df_test = df_test[df_test.index < pred_end]

    # extract values for independent and explanatory variables
    train_X = df_train[ex_vars].values
    train_y = np.log(df_train[in_var].values)
    test_X = df_test[ex_vars].values
    test_y = np.log(df_test[in_var].values)

    # check if test and training set contain sufficient observations
    if ((len(test_y) != 0) and (len(train_y) != 0)):

        # generate TermList for GAM
        string = str()
        if isinstance(ex_vars, str):
            length = 1
        else:
            length = len(ex_vars)
        for i in range(0, length):
            if (ex_vars[i] in [
                    'weekday', 'month', 'season', 'hour', 'season', 'new_year',
                    'daytime'
            ]) and (len(train_y) > 300):
                string = string + "+f(" + str(i) + ")"
        #  else:
            elif ('ws' in ex_vars[i]):
                string = string + '+l(' + str(i) + ')'
            else:
                string = string + '+s(' + str(i) + ", lam = 0.6, basis = 'ps')"

        string = string[1:]

        # specify and fit GAM model
        gam = LinearGAM(eval(string))
        gam.fit(train_X, train_y)
        y_pred = gam.predict(test_X)

        # get max observed value for y
        max_value = train_y.max()

        # cut prediction to not get higher than maximum value in the training dataset
        y_pred[y_pred > max_value] = max_value

        # calculate model statistics
        ratio = np.mean(y_pred / test_y)
        rmse = np.sqrt(
            metrics.mean_squared_error(np.exp(test_y), np.exp(y_pred)))
        avg_err = np.mean(np.exp(test_y) - np.exp(y_pred))
        r_squared = list(gam.statistics_['pseudo_r2'].items())[0][1]
        fac2 = np.mean(test_y / y_pred < 2)

        # dataframe with independent & dependent variables, prediction and prediction error
        preds = df_test.copy()[ex_vars]
        preds['true'] = np.exp(test_y)
        preds['y_pred'] = np.exp(y_pred)
        preds['err'] = abs(preds['true'] -
                           preds['y_pred']) / (np.mean(train_y))

        confidence = gam.prediction_intervals(test_X)

        preds['lower'] = np.exp(confidence[:, 0])
        preds['upper'] = np.exp(confidence[:, 1])
    else:
        # return Nan and give a warning if the training set is very small
        print(
            'Problem with test and/or training data length for the station ' +
            city + 'in the month of ' + str(cut.month))
        print('Training Length: ' + str(len(train_y)) + ' Test Length: ' +
              str(len(test_y)))
        rmse = gam = ratio = preds = avg_err = r_squared = fac2 = float("NaN")

    # calculate length of test & training set
    test_len = len(test_X)
    train_len = len(train_X)
    model_statistics = [
        rmse, r_squared, fac2, test_len, train_len, ratio, avg_err
    ]

    return (gam, model_statistics, preds)

Beispiel #7

0

Datei anzeigen

class GAMEnsemble(EnsembleModel):
    """Implements GAM ensemble in [1]."""

    def __init__(self, nonlinear_ensemble=False, residual_process=True):
        """
        Initializer.

        Args:
            nonlinear_ensemble: (bool) Whether use nonlinear term to transform base model.
            residual_process: (bool) Whether model residual process.
        """
        model_name = (
            "Generalized Additive Ensemble" if residual_process
            else "{} Stacking".format("Nonlinear" if nonlinear_ensemble else "Linear"))

        super().__init__(model_name)
        self.gam_model = None
        self.nonlinear_ensemble = nonlinear_ensemble
        self.model_residual = residual_process

    def train(self, X, y, base_pred):
        """Trains ensemble model based on data and base predictions.

        Adds value to class attribute "model_weight"

        Args:
            X: (np.ndarray) Training features, shape (N, D)
            y: (np.ndarray)  Training labels, shape (N, 1)
            base_pred: (dict of np.ndarray) Dictionary of base model predictions
                With keys (str) being model name, and values (np.ndarray) being
                predictions corresponds to X and y.
        """
        # build feature and  gam terms
        ens_feature, feature_terms = self._build_ensemble_feature(X, base_pred)

        # define model
        self.gam_model = LinearGAM(feature_terms)

        # additional fine-tuning
        lam_grid = self._build_lambda_grid(n_grid=100)
        self.gam_model.gridsearch(X=ens_feature, y=y, lam=lam_grid,
                                  progress=False)

    def predict(self, X, base_pred):
        """Predicts label based on feature and base model.

        Args:
            X: (np.ndarray) Training features, shape (N, D)
            base_pred: (dict of np.ndarray) Dictionary of base model predictions
                With keys (str) being model name, and values (np.ndarray) being
                predictions corresponds to X and y.

        Returns:
            (np.ndarray) ensemble prediction and variance

        Raises:
            (ValueError) If self.model_weight is empty.
        """
        if not self.gam_model:
            raise ValueError("Attribute gam_model empty."
                             "Model was not trained properly.")

        # build feature and  gam terms
        ens_feature, _ = self._build_ensemble_feature(X, base_pred)

        # prediction
        prediction = self.gam_model.predict(ens_feature)
        prediction_var = ((self.gam_model.prediction_intervals(
            ens_feature, width=.95)[:, 1] - prediction) / 2) ** 2

        return prediction, prediction_var

    def _build_ensemble_feature(self, X, base_pred):
        """Builds featurre array and corresponding GAM TermList.

        Terms corresponding to X will be summation of
            dimension-wise splines, plus a tensor-product term across all dimension.

        """
        ensemble_term_func = s if self.nonlinear_ensemble else l

        ens_feature = np.asarray(list(base_pred.values())).T
        term_list = [ensemble_term_func(dim_index) for dim_index in range(ens_feature.shape[1])]

        # optionally, add residual process
        if self.model_residual:
            # build gam terms
            term_list += [s(dim_index) for dim_index in
                          range(ens_feature.shape[1],
                                ens_feature.shape[1] + X.shape[1])]
            if X.shape[1] > 1:
                term_list += [te(*list(ens_feature.shape[1] +
                                       np.array(range(X.shape[1]))))]

            # update features
            ens_feature = np.concatenate([ens_feature, X], axis=1)

        gam_feature_terms = TermList(*term_list)

        return ens_feature, gam_feature_terms

    def _build_lambda_grid(self, n_grid=100):
        # count actual number of terms in each nonlinear term
        # (e.g. te(0, 1) will actually have two terms)
        n_terms = np.sum([len(model_term._terms) if model_term.istensor else 1
                          for model_term in self.gam_model.terms])
        lam = np.random.rand(n_grid, n_terms)
        # rescale to between (0, 1)
        lam_norm = (lam - np.min(lam)) / (np.max(lam) - np.min(lam))

        return np.exp((lam_norm - 0.5) * 6)

Beispiel #8

0

Datei anzeigen

lams = np.linspace(-4, 100, 10)
gam = LinearGAM(n_splines=20).gridsearch(X_train, y_train, lam=lams)
gam.summary()

# <div class="alert alert-block alert-warning"><b>Realizando el test:</b> Si bien anteriormente se selecciono el 20% de las variables, para efectos del ejemplo y para poder visualizar mejor la gráfica, se realiza la predicción de 100 datos de la variable X_test. </div>

# In[16]:
"""Tomo 100 valores para predecir"""
predictions = gam.predict(X_test[:100])
xsa = range(len(predictions))

IC = 0.95  # Intervalo de confianza

plt.plot(xsa, predictions, 'r', xsa, y_test[:100], '--k', lw=1.5)
plt.plot(xsa,
         gam.prediction_intervals(X_test[0:100], width=IC),
         color='gray',
         ls='-.',
         lw=1)
plt.legend(('Prediction', 'Real', f'Intervalos de confianza {IC*100:.2f} %'))
plt.title(f"Intervalo de confianza del {IC*100:.2f} %")

# <div class="alert alert-block alert-warning"><b>Preparando resultados:</b> Preparo la información para mostrar  la predicción en un DataFrame. Se adiciona la columna del error. </div>

# In[17]:

y_test = pd.DataFrame(y_test).reset_index(drop=True)
y_predictions = pd.DataFrame(predictions).reset_index(drop=True)

table_predict = pd.concat([y_test, y_predictions], axis=1)
table_predict.columns = ['Calidad_real', 'Calidad_predecida']

Beispiel #9

0

Datei anzeigen

Datei: split_data.py Projekt: shaoxiuma/heatwave_coupling

def make_plot(plot_dir, site, df_flx, df_met, pft, fp):

    K_TO_C = 273.15

    #golden_mean = 0.6180339887498949
    #width = 6*2*(1/golden_mean)
    #height = width * golden_mean

    fig = plt.figure(figsize=(14, 4))
    fig.subplots_adjust(hspace=0.1)
    fig.subplots_adjust(wspace=0.1)
    plt.rcParams['text.usetex'] = False
    plt.rcParams['font.family'] = "sans-serif"
    plt.rcParams['font.sans-serif'] = "Helvetica"
    plt.rcParams['axes.labelsize'] = 14
    plt.rcParams['font.size'] = 14
    plt.rcParams['legend.fontsize'] = 14
    plt.rcParams['xtick.labelsize'] = 14
    plt.rcParams['ytick.labelsize'] = 14

    almost_black = '#262626'
    # change the tick colors also to the almost black
    plt.rcParams['ytick.color'] = almost_black
    plt.rcParams['xtick.color'] = almost_black

    # change the text colors also to the almost black
    plt.rcParams['text.color'] = almost_black

    # Change the default axis colors from black to a slightly lighter black,
    # and a little thinner (0.5 instead of 1)
    plt.rcParams['axes.edgecolor'] = almost_black
    plt.rcParams['axes.labelcolor'] = almost_black

    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    # Mask crap stuff
    df_met.where(df_flx.Qle_qc == 1, inplace=True)
    df_met.where(df_flx.Qh_qc == 1, inplace=True)

    df_flx.where(df_flx.Qle_qc == 1, inplace=True)
    df_flx.where(df_flx.Qh_qc == 1, inplace=True)
    #df_flx.where(df_met.Tair_qc == 1, inplace=True)
    #df_flx.where(df_met.SWdown == 1, inplace=True)

    #df_met.where(df_met.SWdown == 1, inplace=True)
    #df_met.where(df_met.Tair_qc == 1, inplace=True)

    # Mask dew
    df_met.where(df_flx.Qle > 0., inplace=True)
    df_flx.where(df_flx.Qle > 0., inplace=True)

    df_flx.dropna(inplace=True)
    df_met.dropna(inplace=True)


    if len(df_flx) > 0 and len(df_met) > 0:
        print(site, len(df_flx), len(df_met))

        alpha = 0.07

        # < "Midday" data
        df_flx = df_flx.between_time("09:00", "13:00")
        df_met = df_met.between_time("09:00", "13:00")

        ax1.plot(df_met.SWdown, df_flx.Qle, ls=" ", marker="o",
                 color="salmon", alpha=alpha)
        ax1.plot(df_met.SWdown, df_flx.Qh, ls=" ", marker="o",
                 color="royalblue", alpha=alpha)

        gam = LinearGAM(n_splines=20).gridsearch(df_met.SWdown, df_flx.Qle)
        XX = generate_X_grid(gam)
        ax1.plot(XX, gam.predict(XX), color="salmon", ls='-', lw=2.0,
                 label="Qle")
        ax1.plot(XX, gam.prediction_intervals(XX, width=.95), color='salmon',
                 ls='--')

        for ii in range(len(df_met)):
            print("%f,%f,%f,%f,%s" % (df_met.SWdown[ii], df_flx.Qle[ii],\
                                      df_flx.Qh[ii], df_met.Tair[ii] - K_TO_C,\
                                      pft), file=fp)

        gam = LinearGAM(n_splines=20).gridsearch(df_met.SWdown, df_flx.Qh)
        XX = generate_X_grid(gam)
        ax1.plot(XX, gam.predict(XX), color="royalblue", ls='-', lw=2.0,
                 label="Qh")
        ax1.plot(XX, gam.prediction_intervals(XX, width=.95), color='royalblue',
                 ls='--')

        ax2.plot(df_met.Tair - K_TO_C, df_flx.Qle, ls=" ", marker="o",
                 color="salmon", alpha=alpha, label="Qle")
        ax2.plot(df_met.Tair - K_TO_C, df_flx.Qh, ls=" ", marker="o",
                 color="royalblue", alpha=alpha, label="Qh")

        gam = LinearGAM(n_splines=20).gridsearch(df_met.Tair - K_TO_C, df_flx.Qle)
        XX = generate_X_grid(gam)
        ax2.plot(XX, gam.predict(XX), color="salmon", ls='-', lw=2.0)
        ax2.plot(XX, gam.prediction_intervals(XX, width=.95), color='salmon',
                 ls='--')

        gam = LinearGAM(n_splines=20).gridsearch(df_met.Tair - K_TO_C, df_flx.Qh)
        XX = generate_X_grid(gam)
        ax2.plot(XX, gam.predict(XX), color="royalblue", ls='-', lw=2.0)
        ax2.plot(XX, gam.prediction_intervals(XX, width=.95), color='royalblue',
                 ls='--')
        plt.setp(ax2.get_yticklabels(), visible=False)

        ax1.set_xlim(0, 1300)
        ax1.set_ylim(0, 1000)
        ax2.set_xlim(0, 45)
        ax2.set_ylim(0, 1000)
        ax1.set_xlabel("SW down (W m$^{-2}$)")
        ax2.set_xlabel("Tair (deg C)")
        ax1.set_ylabel("Daytime flux (W m$^{-2}$)")
        ax1.legend(numpoints=1, loc="best")
        #fig.savefig(os.path.join(plot_dir, "%s.pdf" % (site)),
        #            bbox_inches='tight', pad_inches=0.1)

        fig.savefig(os.path.join(plot_dir, "%s.png" % (site)),
                    bbox_inches='tight', pad_inches=0.1, dpi=100)

Beispiel #10

0

Datei anzeigen

fig.update_traces(marker=dict(size=2,
                              line=dict(width=2, color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()

#%%
# pyGAM
# train
gam = LinearGAM(s(0, constraints="monotonic_inc"),
                n_splines=25).gridsearch(X_train.reshape((-1, 1)),
                                         y_train.reshape((-1, 1)))
# predict
XX = gam.generate_X_grid(term=0, n=500)
y = gam.predict(XX)
y_pred = gam.predict(X_test)
y_CI = gam.prediction_intervals(XX, width=.95)
#%%
# plot prediction and confindence intervals
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=XX.reshape((-1, )),
               y=y,
               name="Prediction",
               line=dict(color="firebrick", width=1)))
fig.add_trace(
    go.Scatter(x=XX.reshape((-1, )),
               y=y_CI[:, 0],
               name="95% Confidence",
               line=dict(color="green", width=1, dash="dash")))
fig.add_trace(
    go.Scatter(x=XX.reshape((-1, )),

Beispiel #11

0

Datei anzeigen

# -*- coding: utf-8 -*-
"""
@author: Christian Winkler
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from pygam import LinearGAM
import pygam

from pygam.utils import generate_X_grid

example_data = pd.read_csv("example_data.csv")
y = example_data['head'].values
X = example_data['age'].values

gam = LinearGAM(n_splines=4).fit(X, y)  # your fitted model

# change resolution of X grid
XX = generate_X_grid(gam, n=20)

plt.figure(figsize=(10, 8))
plt.scatter(X, y)
plt.plot(XX,
         gam.prediction_intervals(XX, quantiles=[.025, .5, .975]),
         color="k")
plt.savefig("pygam_example_2.png")
plt.show()