Beispiel #1
0
    def forward_selected(self):
        """Linear model designed by forward selection.

        Parameters:
        -----------
        data : pandas DataFrame with all possible predictors and response

        response: string, name of response column in data

        Returns:
        --------
        model: an "optimal" fitted statsmodels linear model
               with an intercept
               selected by forward selection
               evaluated by adjusted R-squared
        """
        response = self.col
        data = self.data
        remaining = set(data.columns)
        print(remaining)
        remaining.remove(response)
        selected = []
        current_score, best_new_score = float('Inf'), float('Inf')
        # print(remaining)
        while remaining and current_score == best_new_score:
            scores_with_candidates = []
            for candidate in remaining:
                l = sm.MNLogit(data[response].astype(int),
                               data[selected +
                                    [candidate]].astype(int)).fit_regularized(
                                        penalty='l2')
                score = l.aic
                scores_with_candidates.append((score, candidate))
            scores_with_candidates.sort(reverse=True)
            best_new_score, best_candidate = scores_with_candidates.pop()
            # print(current_score, best_new_score)
            if current_score > best_new_score:
                remaining.remove(best_candidate)
                selected.append(best_candidate)
                current_score = best_new_score
        model = sm.MNLogit(
            data[response].astype(int),
            data[selected].astype(int)).fit_regularized(penalty='l2')
        return model
Beispiel #2
0
    def FitModel(self, max_iter=1000):
        'Fits ARL model using sklearn'

        # get fitting data
        X = np.concatenate(list(self.terms_fit.values()), axis=1)
        y = self.xds_bmus_fit.bmus.values

        # fit model
        print("\nFitting autoregressive logistic model ...")
        start_time = time.time()

        if self.model_library == 'statsmodels':

            # mount data with pandas
            X = pd.DataFrame(X, columns=self.terms_fit_names)
            y = pd.DataFrame(y, columns=['bmus'])

            # TODO: CAPTURAR LA EVOLUCION DE L (maximun-likelihood)
            self.model = sm.MNLogit(y, X).fit(
                method='lbfgs',
                maxiter=max_iter,
                retall=True,
                full_output=True,
                disp=True,
                warn_convergence=True,
                missing='raise',
            )

        elif self.model_library == 'sklearn':

            # use sklearn logistig regression
            self.model = linear_model.LogisticRegression(penalty='l2',
                                                         C=1e5,
                                                         fit_intercept=False,
                                                         solver='lbfgs')
            self.model.fit(X, y)

        else:
            print('wrong config: {0} not in model_library'.format(
                self.model_library))
            sys.exit()

        elapsed_time = time.time() - start_time
        print("Optimization done in {0:.2f} seconds\n".format(elapsed_time))

        # save fitted model
        self.SaveModel()
Beispiel #3
0
def run_logistic_regression(df, patsy_model, dependent_variable, estimator,
                            weights):
    y, X = dmatrices(patsy_model, df, return_type='dataframe')

    model_result = discrete_model.MNLogit(y, X).fit(maxiter=100,
                                                    disp=False,
                                                    method="nm")

    p_values = model_result.pvalues[0].to_dict()
    t_values = model_result.tvalues[0].to_dict()
    params = model_result.params[0].to_dict()
    ste = model_result.bse[0].to_dict()

    constants = {
        'p_value': p_values.get('Intercept'),
        't_value': t_values.get('Intercept'),
        'coefficient': params.get('Intercept'),
        'standard_error': ste.get('Intercept')
    }

    regression_field_properties = {
        'p_value': p_values,
        't_value': t_values,
        'coefficient': params,
        'standard_error': ste
    }

    total_regression_properties = {
        'aic': model_result.aic,
        'bic': model_result.bic,
        'r_squared': model_result.prsquared,
        'r_squared_adj': model_result.prsquared,
        'llf': model_result.llf,
        'llnull': model_result.llnull,
        'llr_pvalue': model_result.llr_pvalue
        # 'f_test': model_result.f_test
    }

    regression_results = restructure_field_properties_dict(
        constants, regression_field_properties, total_regression_properties)

    return regression_results
Beispiel #4
0
#Main function
print('train_model script started')

#Training data files
abs_file_path = f_getFilePath('data\\processed\\iris_train_x.csv')
train_x = pd.read_csv(abs_file_path, header='infer', index_col=None)
abs_file_path = f_getFilePath('data\\processed\\iris_train_y.csv')
train_y = pd.read_csv(abs_file_path, header='infer', index_col=None)

#Build training models
reg_model = []
model_name = []

#Model 1: Multinomial Logit function
model_name.append('MNLogit')
mnlr_func = dm.MNLogit(train_y, add_constant(train_x))
mnlr_model = mnlr_func.fit(method='powell', maxiter=200)
##Build logistic function (scikit) --Not Working
#reg_func = lm.LogisticRegression(solver='newton-cg', multi_class='multinomial')
#reg_model = reg_func.fit(train_x, train_y.values.ravel())
reg_model.append(mnlr_model)

#Model 2: K-Nearest Neighbours
#Model 2a: K = 3
model_name.append('KNN 3')
knn3_func = KNeighborsClassifier(n_neighbors=3)
knn3_model = knn3_func.fit(train_x, train_y)
reg_model.append(knn3_model)

#Model 2b: K = 5
model_name.append('KNN 5')
label_encoder = enc.fit(df_win_lose.home_result)
y = label_encoder.transform(df_win_lose.home_result)
df_win_lose["home_result"] = y
from sklearn import preprocessing

scaler = preprocessing.StandardScaler(with_mean=False, with_std=False)
scaler.fit(df_win_lose)
X_train = scaler.transform(df_win_lose)

df_win_lose = df_win_lose.astype(float)
df_constant = add_constant(df_win_lose)
st.chisqprob = lambda chisq, df: st.chi2.sf(chisq, df)
cols = df_win_lose.columns[:-1]
df_win_lose["home_result"] = y
model = ds.MNLogit(df_win_lose.home_result, df_constant[cols])
result = model.fit()
result.summary()

from sklearn.linear_model import LogisticRegression

df_train = df_played_matches[[
    'home_pos', 'visitor_pos', 'spi1', 'spi2', 'win%', 'loss%', 'importance1',
    'importance2', 'xG1', 'xG2', 'home_result'
]]

df_test = df.iloc[1320:][[
    'home_pos', 'visitor_pos', 'spi1', 'spi2', 'win%', 'loss%', 'importance1',
    'importance2', 'xG1', 'xG2', 'home_result'
]]