Python gls Beispiele, statsmodels.formula.api.gls Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: regression.py Projekt: QuadroLT/regTools

    def _fit(self):
        """
        makes model fit. the model is fitted according to the outcome of levene
        test. GLS is used dependent on the outcome of the test weight matrix is
        calculated.


        returns statsmodels gls object. For further manipulation with object
        see statsmodels documentation.

        """
        p = self._levene_test()
        if p > 0.05:
            self.weights = np.repeat(1, len(self.data[self.x]))
            print('Variance is homogenious, assuming OLS')
            self.c, self.d = 1, 1
            self.model_type = 'OLS'
        else:
            print('Variance is not homogenious, assuming WLS')
            self.model_type = 'WLS'
            self.c, self.d = self._get_weights()
            self.weights = (self.c + self.d * self.data[self.x]) ** 2
        model = gls(f'{self.y}~{self.x}', data=self.data, sigma=self.weights).fit()

        return model

Beispiel #2

0

Datei anzeigen

Datei: funktionen_0.py Projekt: carmir71/dwd

def best_formula(dataframe, response):
    remaining = set(dataframe.columns)
    remaining.remove(response)
    selected, results = [], []
    while remaining:
        scores_with_candidates = []
        for candidate in remaining:
            formula = '{} ~ {}'.format(response,
                                       ' + '.join(selected + [candidate]))
            lm = smf.gls(formula, sm.add_constant(dataframe)).fit()
            score = lm.rsquared_adj
            scores_with_candidates.append(
                (score, candidate, [formula, lm.rsquared_adj, lm.ssr]))
        scores_with_candidates.sort()
        best_score, best_candidate, best_metrics = scores_with_candidates.pop()
        results.append(best_metrics)
        remaining.remove(best_candidate)
        selected.append(best_candidate)
    dataframe = pd.DataFrame(results)
    dataframe.columns = ['formula', 'adjr2', 'ssr']
    dataframe = dataframe.sort_values('adjr2', axis=0,
                                      ascending=False)[:1].reset_index()
    formula = str(dataframe['formula'].values).replace('[', '').replace(
        ']', '').replace("'", '')

    return formula, dataframe

Beispiel #3

0

Datei anzeigen

def adl_regression(dataframe,
                   target_variable,
                   lags_dict=None,
                   cov_type='nonrobust',
                   model_type='gls'):
    """
    param dataframe: df содержащий необходимые переменные
    param target_variable: (str) название зависимой переменной
    param lags_dict: (dict) словарь: ключи - названия (str) колонок в dataframe, которые будут 
    использоваться для регрессии, значения (list) - списки лагов для каждой из переменных
    param cov_type: ['nonrobust', 'HC0', 'HC1', 'HC2', 'HC3']
    param model_type: ['ols', 'gls']
    """
    if lags_dict is not None:
        dat_dict = {}
        max_lag = 0
        for varname in lags_dict:
            m_l = np.max(lags_dict[varname])
            if m_l > max_lag:
                max_lag = m_l
        target_variable_array = dataframe[target_variable].values[max_lag:]
        dat_dict.update({'target': target_variable_array})
        target_len = len(target_variable_array)
        colnames_list = ['target']
        for varname in lags_dict:
            variable_len = len(dataframe[varname])
            for lag in lags_dict[varname]:
                dat_dict.update({
                    varname + '_{}'.format(lag):
                    dataframe[varname].values[max_lag - lag:variable_len - lag]
                })
                colnames_list.append('{}_{}'.format(varname, lag))
        data_for_regression = pd.DataFrame(dat_dict, columns=colnames_list)
    else:
        data_for_regression = dataframe
    formula = ' '.join([
        '{}'.format(varname) + ' + '
        for varname in data_for_regression.columns[:-1]
    ])[:-3]
    if model_type == 'ols':
        model = smf.ols('target ~ ' + formula,
                        data=data_for_regression).fit(cov_type=cov_type)
    else:
        model = smf.gls('target ~ ' + formula, data=data_for_regression).fit()
    return model

Beispiel #4

0

Datei anzeigen

Datei: utils.py Projekt: santosfamilyfoundation/Traffic

def runModel(experiment,
             data,
             dependentVariable,
             independentVariables,
             regressionType='ols'):
    import statsmodels.formula.api as smf
    modelStr = modelString(experiment, dependentVariable, independentVariables)
    if regressionType == 'ols':
        model = smf.ols(modelStr, data=data)
    elif regressionType == 'gls':
        model = smf.gls(modelStr, data=data)
    elif regressionType == 'rlm':
        model = smf.rlm(modelStr, data=data)
    else:
        print('Unknown regression type {}. Exiting'.format(regressionType))
        import sys
        sys.exit()
    return model.fit()

Beispiel #5

0

Datei anzeigen

def cpgls(responseList,
          intercept,
          phyCovMatrix,
          subMatrix,
          colVector,
          subWeight=1,
          colnum=0):
    # colnum is dummy for now

    exog = np.array([
        aa2vec(aa, aalphabet) for aa in colVector.flatten().tolist()
    ])  # exogenous "STATE matrix": n rows x 20 cols
    covMatrix = adjustCovMatrix(phyCovMatrix, subMatrix, exog, subWeight)

    # using the formula API
    data = pd.DataFrame()
    #data["response"] = responseList # forces intercept thru 0
    # fix the intercept at the passed value
    data["response"] = [response - intercept for response in responseList]
    data["aa"] = colVector.flatten()
    return smf.gls(formula=("response ~ aa + 0"), data=data, sigma=covMatrix)

Beispiel #6

0

Datei anzeigen

def gls_formula(data, xseq, **params):
    """
    Fit GLL using a formula
    """
    eval_env = params['enviroment']
    formula = params['formula']
    init_kwargs, fit_kwargs = separate_method_kwargs(params['method_args'],
                                                     sm.GLS, sm.GLS.fit)
    model = smf.gls(formula, data, eval_env=eval_env, **init_kwargs)
    results = model.fit(**fit_kwargs)
    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(data)

    if params['se']:
        _, predictors = dmatrices(formula, data, eval_env=eval_env)
        alpha = 1 - params['level']
        prstd, iv_l, iv_u = wls_prediction_std(results,
                                               predictors,
                                               alpha=alpha)
        data['se'] = prstd
        data['ymin'] = iv_l
        data['ymax'] = iv_u
    return data

Beispiel #7

0

Datei anzeigen

Datei: Virat Kohli Data Analysis.py Projekt: Dawny33/Cricket-Analytics

Virat = df[df["Player"] == "V Kohli"]

Vir_runs = Virat["Runs"].astype(int)
Vir_min = Virat["Minutes"].astype(int)
Vir_balls = Virat["Balls Faced"].astype(int)
Vir_fours = Virat["Fours"].astype(int)
Vir_sixes = Virat["Sixes"].astype(int)
Vir_sr = Virat["Strike Rate"].astype(float)

print np.mean(Vir_min)
print np.mean(Vir_balls)
print np.mean(Vir_fours)
print np.mean(Vir_sixes)
print np.mean(Vir_sr)
print np.mean(Vir_runs)

#Some cool Visualizations
plt.scatter(Vir_runs, Vir_balls,  color='red', alpha=0.5, s= Vir_fours*100, facecolor = "white")
plt.scatter(Vir_runs, Vir_min,  color='red', alpha=0.5, s= Vir_balls*10, facecolor = "white")
plt.scatter(Vir_runs, Vir_balls,  color='red', alpha=0.5, s= Vir_sr*10, facecolor = "white")

#Dependency of runs on some factors
import statsmodels.formula.api as smf
est = smf.gls(formula='Vir_runs ~ Vir_balls + Vir_fours + Vir_sixes + Vir_sr', data=Virat).fit()
est.summary()

est = smf.gls(formula='Vir_runs ~ Vir_balls + Vir_fours + Vir_sixes + Opposition', data=Virat).fit()
est.summary()

Beispiel #8

0

Datei anzeigen

Datei: TimeVary.py Projekt: takkyi83/zEpid

    def add_covariate_model(self,
                            label,
                            covariate,
                            model,
                            restriction=None,
                            recode=None,
                            var_type='binary',
                            print_results=True):
        """Add a specified regression model for time-varying confounders. Unlike the exposure and outcome models, a
        covariate model does NOT have to be specified. Additionally, *n* covariate models can be specified for *n*
        time-varying covariates. Additional models are added by repeated calls for this function with the corresponding
        covariates and predictive regression equations

        Parameters
        ----------
        label : int
            Integer label for the covariate model. Covariate models are fit in ascending order within
             TimeVaryGFormula
        covariate : str
            Column label for time-varying confounder to be predicted
        model : str
            Variables to include in the model for predicting the outcome. Must be contained within the input
            pandas dataframe when initialized. Format follows patsy
            For example) 'var1 + var2 + var3 + var4'
        restriction : str, optional
            Used to restrict the population to fit the logistic regression model to. Useful for Intent-to-Treat
            model fitting. The pandas dataframe must be referred to as 'g'. For example) "g['art']==1"
        recode : str, optional
            This variable is vitally important for various functional forms implemented later in models. This
            is used to run some background code to recreate functional forms as the g-formula is estimated via fit()
            For an example, let's say we have age but we want the functional form to be quadratic. For this, we
            would set the recode="g['age_sq'] = g['age']**2;" Similar to TimeFixedGFormula, 'g' must be specified as the
            DataFrame object with the corresponding indexes. Also lines of executable code should end with ';', so
            Python knows that the line ends there. My apologies for this poor solution... I am working on a better way.
            In the background, Python executes the code input into recode
        var_type : str, optional
            Type of variable that the covariate is. Current options include 'binary' or 'continuous'
        print_results : bool, optional
            Whether to print the logistic regression model results to the terminal. Default is True
        """
        if type(label) is not int:
            raise ValueError('Label must be an integer')

        # Building predictive model
        g = self.gf.copy()
        if restriction is not None:
            g = g.loc[eval(restriction)].copy()

        if self._weights is None:  # Unweighted g-formula
            if var_type == 'binary':
                linkdist = sm.families.family.Binomial()
                m = smf.glm(covariate + ' ~ ' + model, g, family=linkdist)
            elif var_type == 'continuous':
                m = smf.gls(covariate + ' ~ ' + model, g)
            else:
                raise ValueError(
                    'Only binary or continuous covariates are currently supported'
                )
        else:  # Weighted g-formula
            if var_type == 'binary':
                linkdist = sm.families.family.Binomial()
                m = smf.glm(covariate + ' ~ ' + model,
                            g,
                            freq_weights=g[self._weights],
                            family=linkdist)
            elif var_type == 'continuous':
                m = smf.wls(covariate + ' ~ ' + model,
                            g,
                            weights=g[self._weights])
            else:
                raise ValueError(
                    'Only binary or continuous covariates are currently supported'
                )

        f = m.fit()
        if print_results:
            print(
                '=============================================================================='
            )
            print('Covariate (' + str(covariate) + ') Model')
            print(f.summary())
            print(
                '=============================================================================='
            )

        # Adding to lists, it is used to predict variables later on for the time-varying...
        self._covariate_models.append(f)
        self._covariate_model_index.append(label)
        self._covariate.append(covariate)
        self._covariate_type.append(var_type)
        if recode is None:
            self._covariate_recode.append(
                'None')  # Must be string for exec() to use later
        else:
            self._covariate_recode.append(recode)

Beispiel #9

0

Datei anzeigen

Datei: linear_detail.py Projekt: hxb087/bda-pylib

def linear_new(types, intput):
    np.random.seed(9876789)

    df = pd.read_csv(intput, index_col=False)
    print(df)
    print(df.columns[:-1])
    feature = df.columns[:-1]
    s1 = ' + '.join(feature)
    s2 = df.columns[-1]
    s = s2 + " ~ " + s1

    if types == "ols":
        results = smf.ols(s, data=df).fit(use_t=True)
    elif types == "gls":
        results = smf.gls(s, data=df).fit(use_t=True)
    elif types == "glsar":
        results = smf.glsar(s, data=df).fit(use_t=True)
    elif types == "wls":
        results = smf.wls(s, data=df).fit(use_t=True)
    else:
        print("No this type!!!")
        exit(0)

    print(
        "**********************************************************************************\n"
    )
    alpha = 0.05
    print(results.summary())

    data_t = {
        "coef": results.params,
        "std err": results.bse,
        "t": results.tvalues,
        "P>|t|": results.pvalues,
        "[" + str(alpha / 2.0): results.conf_int(alpha)[0],
        str(1 - alpha / 2.0) + "]": results.conf_int(alpha)[1]
    }

    sdata_df = pd.DataFrame(data_t)
    print(sdata_df)
    sdata_df.to_csv("out/data1.csv")

    from statsmodels.stats.stattools import (jarque_bera, omni_normtest,
                                             durbin_watson)

    jb, jbpv, skew, kurtosis = jarque_bera(results.wresid)
    omni, omnipv = omni_normtest(results.wresid)

    title = [
        "Model", "R-squared", "Adj. R-squared", "F-statistic",
        "Prob (F-statistic)", "Log-Likelihood", "AIC", "BIC", "Omnibus",
        "Prob(Omnibus)", "Skew", "Kurtosis", "Durbin-Watson",
        "Jarque-Bera (JB)", "Prob(JB)", "Cond. No."
    ]

    value = [
        results.model.__class__.__name__, results.rsquared,
        results.rsquared_adj, results.fvalue, results.f_pvalue, results.llf,
        results.aic, results.bic, omni, omnipv, skew, kurtosis,
        durbin_watson(results.wresid), jb, jbpv, results.diagn['condno']
    ]

    datadf = {"title": np.array(title), "value": np.array(value)}

    select_df = pd.DataFrame(datadf)
    print(select_df)
    select_df.to_csv("out/data2.csv")

    # 画1D或者3D图形
    predicted = results.predict(df)
    import matplotlib.pyplot as plt
    if len(feature) == 1:
        x = np.array(df[feature]).reshape(-1, 1)
        y = np.array(df[s2]).reshape(-1, 1)
        plt.figure(facecolor='white', figsize=(10, 5))
        plt.scatter(x, y, marker='x')
        plt.plot(x, predicted, c='r')

        title = 'The  Linear Graph of One Dimension'
        # 绘制x轴和y轴坐标
        plt.xlabel(feature[0])
        plt.ylabel(s2)
        plt.title(title)
        plt.grid()
        plt.savefig("out/plot_out.png", format='png')

    elif len(feature) == 2:
        from mpl_toolkits.mplot3d import Axes3D
        ax1 = plt.axes(projection='3d')

        x = np.array(df[feature[0]]).reshape(-1, 1)
        y = np.array(df[feature[1]]).reshape(-1, 1)
        z = np.array(df[s2]).reshape(-1, 1)
        ax1.scatter3D(x, y, z, cmap='Blues')  # 绘制散点图
        ax1.plot3D(x, y, predicted, 'gray')  # 绘制空间曲线
        ax1.set_xlabel(feature[0])
        ax1.set_ylabel(feature[1])
        ax1.set_zlabel(s2)
        plt.savefig("out/plot_out.png", format='png')
    else:
        print("The number of feature is big than 2 ,no plot!")

    return

Beispiel #10

0

Datei anzeigen

import pandas as pd
from statsmodels.formula.api import gls
import seaborn as sns
import matplotlib.pyplot as plt

data = pd.read_csv("optimal_data_frame.csv")
print(data.head())

group = data.groupby(["block", "condition"])

model = gls("step ~ timestep + block",
            data[data["condition"] == "random"]).fit()
print(model.summary())

model = gls("reaction_time ~ timestep + block",
            data[data["condition"] == "random"]).fit()
print(model.summary())

model = gls("normalized_reaction_time ~ timestep + block",
            data[data["condition"] == "random"]).fit()
print(model.summary())
# model = gls("step ~ timestep + block", data[data["condition"] == "block"]).fit()
# print(model.summary())
#
# model = gls("reaction_time ~ timestep + block", data[data["condition"] == "block"]).fit()
# print(model.summary())
#
# model = gls("normalized_reaction_time ~ timestep + block", data[data["condition"] == "block"]).fit()
# print(model.summary())
#
model = gls("optimal_p ~ timestep + block",

Beispiel #11

0

Datei anzeigen

Datei: regressors.py Projekt: yosefmahmoud/djanban

 def fit(self, df, formula):
     return smf.gls(formula=formula, data=df).fit()

Beispiel #12

0

Datei anzeigen

def main(filename):
    results = pd.read_csv(
        f"{params.ROOT}/../data/{filename}.csv",
        dtype={
            "Place (Overall)": "Int64",
            "Place (Gender)": "Int64",
            "Place (Category)": "Int64",
            "Name": str,
            "Sex": str,
            "Club": str,
            "Running Number": object,
            "Category": "category",
            "Year": "Int64",
            "Country": str,
            "FirstName": str,
            "LastName": str,
            "DSQ": bool,
            "Finish (Total Seconds)": "float64",
        },
        parse_dates=["Finish"],
    )

    results["Finish"] = pd.to_timedelta(results["Finish"])

    # Basic plotting
    sns.violinplot(data=results, x="Sex", y="Finish (Total Seconds)")
    plt.savefig(f"{params.ROOT}/../plots/london_violin.png")

    # Try explanatory linear regression with statsmodels
    mod = smf.gls(formula='Q("Finish (Total Seconds)") ~ Sex'
                  "+ Category",
                  data=results)

    res = mod.fit()

    print(res.summary())

    # Try sklearn linear regression

    # Get label and value arrays of interest, get rid of NaNs
    X = results[["Sex", "Category"]]
    X = X.fillna(X.mode())
    y = results["Finish (Total Seconds)"]
    y = y.fillna(y.mean())

    # Change categorical variables using onehot to 1/0+
    enc = preprocessing.OneHotEncoder()
    X_transform = enc.fit_transform(X)

    # Sample data
    X_train, X_test, y_train, y_test = train_test_split(X_transform,
                                                        y,
                                                        test_size=0.2)

    regressor = LinearRegression()
    regressor.fit(X_train, y_train)

    y_pred = regressor.predict(X_test)

    df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})
    print(df.head())
    # Evaluate algorithm

    print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
    print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
    print("Root Mean Squared Error:",
          np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

Beispiel #13

0

Datei anzeigen

Datei: boston집값_ordinaryLeastSquared.py Projekt: kimchaeyeon-jessica/Optimization

from sklearn.datasets import load_boston
import pandas as pd
import statsmodels.formula.api as sm

boston_data = load_boston()

boston = pd.DataFrame(data=boston_data.data, columns=boston_data.feature_names)
boston['target'] = boston_data.target

train = boston.sample(frac=0.8, random_state=200)
test = boston.drop(train.index)

result = sm.gls(formula= 'target ~ CRIM + ZN +CHAS + NOX + RM + DIS + RAD + TAX + PTRATIO + B + LSTAT',
                data=train).fit()

print(result.summary())

#오차의 합들이 가장 작아지는 것이 무엇인지 찾아 최적의 변숫값을 찾는것
for i, row in test.iterrows():
    params = result.params
    r_estimate = row['PTRATIO']*params['PTRATIO'] + row['NOX']*params['NOX'] + row['B']*params['B'] + \
                 row['CHAS']*params['CHAS'] + row['RAD']*params['RAD'] + row['TAX']*params['TAX'] + row['ZN']*params['ZN'] + \
                 row['DIS']*params['DIS'] + row['CRIM']*params['CRIM'] + row['RM']*params['RM'] + \
                 row['LSTAT']*params['LSTAT'] + params['Intercept']
    difference = abs(row['target'] - estimate)
    sum_difference += difference
print(difference)

Beispiel #14

0

Datei anzeigen

#Some cool Visualizations
plt.scatter(Vir_runs,
            Vir_balls,
            color='red',
            alpha=0.5,
            s=Vir_fours * 100,
            facecolor="white")
plt.scatter(Vir_runs,
            Vir_min,
            color='red',
            alpha=0.5,
            s=Vir_balls * 10,
            facecolor="white")
plt.scatter(Vir_runs,
            Vir_balls,
            color='red',
            alpha=0.5,
            s=Vir_sr * 10,
            facecolor="white")

#Dependency of runs on some factors
import statsmodels.formula.api as smf
est = smf.gls(formula='Vir_runs ~ Vir_balls + Vir_fours + Vir_sixes + Vir_sr',
              data=Virat).fit()
est.summary()

est = smf.gls(
    formula='Vir_runs ~ Vir_balls + Vir_fours + Vir_sixes + Opposition',
    data=Virat).fit()
est.summary()

Beispiel #15

0

Datei anzeigen

Datei: analysis.py Projekt: derwarre/IST718-1

test_variance7 = round(
    np.power(test7_df['salary'].corr(test7_df['predicted_salary']), 2), 3)
print('Test Set Variance Accounted for: ', test_variance7)

fit7 = statsform.wls(model7, data=train7_df, weights=1. / (w**2)).fit()
print(fit7.summary())

## Model 8
## Model 6 using GLS
test8_df = test_df_nooutlines.copy()
train8_df = train_df_nooutlines.copy()

model8 = str('salary ~ conference + wl_ratio + capacity')

train8_fit = statsform.gls(model8, data=train8_df).fit()
train8_df['predicted_salary'] = train8_fit.fittedvalues
test8_df['predicted_salary'] = train8_fit.predict(test8_df)

test_variance8 = round(
    np.power(test8_df['salary'].corr(test8_df['predicted_salary']), 2), 3)
print('Test Set Variance Accounted for: ', test_variance8)

fit8 = statsform.gls(model8, data=train8_df).fit()
print(fit8.summary())

## Setting some base variables so I can easily change my inputs from prediction to prediction
year = '2017'
school = 'Syracuse'
coach = 'Dino Babers'
conference = 'ACC'

Beispiel #16

0

Datei anzeigen

    "max_depth": [25, 50, 100, None],
    "n_estimators": [100, 500, 1000],
    "criterion": ["mse"]
}

from sklearn.model_selection import GridSearchCV

# random_search = GridSearchCV(model, param_grid =param_dist, cv=2)
# print('start')
# random_search.fit(X=temp[pred_id],y=temp['actual'])
# random_search.cv_results_
# random_search.best_score_
# random_search.best_params_
# end hyper

gls(data=temp,
    formula='target~consensus_std+actual_L1+quarterly_ret').fit().summary()

model.fit(X=temp[pred_id], y=temp['target'])
feat_imp = pd.DataFrame(data=model.feature_importances_, index=pred_id)
print(feat_imp.sort_values(0, ascending=False))

test_s['over_hat'] = model.predict(X=test_s[pred_id])
test_s['brut'] = 0
t = (test_s['brut'] == test_s['target'])
sum(t) / len(t)

test_s['model'] = test_s['consensus_mean']
test_s.loc[test_s.over_hat == 1,
           'model'] = test_s.loc[test_s.over_hat == 1, 'model'] * 1.1
# test_s.loc[test_s.over_hat==0,'model']  = test_s.loc[test_s.over_hat==0,'model'] *0.9

Beispiel #17

0

Datei anzeigen

    def add_covariate_model(self,
                            label,
                            covariate,
                            model,
                            restriction=None,
                            recode=None,
                            var_type='binary',
                            print_results=True):
        """
        Build the model for the specified covariate. This is to deal with time-varying confounders.
        Does NOT have to be specified, unlike the exposure and outcome models. The order in which these
        models are fit is based on the provided integer labels
        
        Input:
        
        label:
            -integer label for the covariate model. Covariate models are fit in ascending order within 
             TimeVaryGFormula
        covariate:
            -variable to be predicted
        model:
            -variables to include in the model for predicting the outcome. Must be contained within the input
             pandas dataframe when initialized. Format is the same as the functional form,
             i.e. 'var1 + var2 + var3 + var4'
        restriction:
            -used to restrict the population to fit the logistic regression model to. Useful for Intent-to-Treat
             model fitting. The pandas dataframe must be referred to as 'g'
             Example) "g['art']==1"
        recode:
            -This variable is vitally important for various functional forms implemented later in models. This
             is used to run some background code to recreate functional forms as the g-formula is fit via fit()
             For an example, let's say we have age but we want the functional form to be cubic. For this, we 
             would set the recode="g['']" Similar to TimeFixedGFormula, 'g' must be specified as the data frame 
             object with the corresponding indexes. Also lines of executable code should end with ';', so Python
             knows that the line ends there. My apologies for this poor solution... I am working on a better way
        var_type:
            -type of variable that the covariate is. Current options include 'binary' or 'continuous'
        print_results:
            -whether to print the logistic regression results to the terminal. Default is True
        """
        if type(label) is not int:
            raise ValueError('Label must be an integer')

        # Building predictive model
        g = self.gf.copy()
        if restriction is not None:
            g = g.loc[eval(restriction)].copy()

        if self._weights is None:  # Unweighted g-formula
            if var_type == 'binary':
                linkdist = sm.families.family.Binomial(sm.families.links.logit)
                m = smf.glm(covariate + ' ~ ' + model, g, family=linkdist)
            elif var_type == 'continuous':
                linkdist = sm.families.family.Gaussian(
                    sm.families.links.identity)
                m = smf.gls(covariate + ' ~ ' + model, g)
            else:
                raise ValueError(
                    'Only binary or continuous covariates are currently supported'
                )
        else:  # Weighted g-formula
            if var_type == 'binary':
                linkdist = sm.families.family.Binomial(sm.families.links.logit)
                m = smf.gee(covariate + ' ~ ' + model,
                            self.idvar,
                            g,
                            weights=g[self._weights],
                            family=linkdist)
            elif var_type == 'continuous':
                linkdist = sm.families.family.Gaussian(
                    sm.families.links.identity)
                m = smf.gee(covariate + ' ~ ' + model,
                            self.idvar,
                            g,
                            weights=g[self._weights],
                            family=linkdist)
            else:
                raise ValueError(
                    'Only binary or continuous covariates are currently supported'
                )

        f = m.fit()
        if print_results:
            print(f.summary())

        # Adding to lists, it is used to predict variables later on for the time-varying...
        self._covariate_models.append(f)
        self._covariate_model_index.append(label)
        self._covariate.append(covariate)
        self._covariate_type.append(var_type)
        if recode is None:
            self._covariate_recode.append(
                'None')  # Must be string for exec() to use later
        else:
            self._covariate_recode.append(recode)