def logistic_regression(): '''Logistic regression example chapter 7.3, p 130 [tbd]: the cloglog values are inconsistent with those mentioned in the book. This is probably due to the specific definitions of "loglog" and "cloglog" in the respective languages. ''' inFile = r'GLM_data/Table 7.2 Beetle mortality.xls' df = get_data(inFile) # adjust the unusual column names in the Excel file colNames = [name.split(',')[1].lstrip() for name in df.columns.values] df.columns = colNames # fit the model df['tested'] = df['n'] df['killed'] = df['y'] df['survived'] = df['tested'] - df['killed'] model = glm('survived + killed ~ x', data=df, family=Binomial()).fit() print model.summary() print '-' * 65 print 'Equivalent solution:' model = glm('I(n - y) + y ~ x', data=df, family=Binomial()).fit() print model.summary() # The fitted number of survivors can be obtained by fits = df['n'] * (1 - model.fittedvalues) print 'Fits Logit:' print fits # The fits for other link functions are: model_probit = glm('I(n - y) + y ~ x', data=df, family=Binomial(links.probit)).fit() print model_probit.summary() fits_probit = df['n'] * (1 - model_probit.fittedvalues) print 'Fits Probit:' print fits_probit model_cll = glm('I(n - y) + y ~ x', data=df, family=Binomial(links.cloglog)).fit() print model_cll.summary() fits_cll = df['n'] * (1 - model_cll.fittedvalues) print 'Fits Extreme Value:' print fits_cll
def test_compare_logit(self): vs = Independence() family = Binomial() Y = 1 * (np.random.normal(size=100) < 0) X1 = np.random.normal(size=100) X2 = np.random.normal(size=100) X3 = np.random.normal(size=100) groups = np.random.randint(0, 4, size=100) D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3}) mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D, family=family, cov_struct=vs) rslt1 = mod1.fit() mod2 = sm.logit("Y ~ X1 + X2 + X3", data=D) rslt2 = mod2.fit(disp=False) assert_almost_equal(rslt1.params.values, rslt2.params.values, decimal=10)
def test_ordinal(self): family = Binomial() endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False) va = GlobalOddsRatio("ordinal") mod = OrdinalGEE(endog, exog, groups, None, family, va) rslt = mod.fit() # Regression test cf = np.r_[1.09250002, 0.0217443 , -0.39851092, -0.01812116, 0.03023969, 1.18258516, 0.01803453, -1.10203381] assert_almost_equal(rslt.params, cf, decimal=5) # Regression test se = np.r_[0.10883461, 0.10330197, 0.11177088, 0.05486569, 0.05997153, 0.09168148, 0.05953324, 0.0853862] assert_almost_equal(rslt.bse, se, decimal=5) # Check that we get the correct results type assert_equal(type(rslt), OrdinalGEEResultsWrapper) assert_equal(type(rslt._results), OrdinalGEEResults)
def _regression(self,in_vars): X = self.X[in_vars] if self.fit_weight is None: if self.kw_algorithm_class_args is not None: glm = GLM(self.y,sm.add_constant(X),family = Binomial(link=logit),**self.kw_algorithm_class_args) else: glm = GLM(self.y,sm.add_constant(X),family = Binomial(link=logit)) else: if self.kw_algorithm_class_args is not None: glm = GLM(self.y,sm.add_constant(X),family = Binomial(link=logit),freq_weights = self.fit_weight,**self.kw_algorithm_class_args) else: glm = GLM(self.y,sm.add_constant(X),family = Binomial(link=logit),freq_weights = self.fit_weight) clf = glm.fit() clf.intercept_=[clf.params.const] clf.coef_=[clf.params[1:]] return clf
def test_default_time(self): """ Check that the time defaults work correctly. """ endog, exog, group = load_data("gee_logistic_1.csv") # Time values for the autoregressive model T = np.zeros(len(endog)) idx = set(group) for ii in idx: jj = np.flatnonzero(group == ii) T[jj] = lrange(len(jj)) family = Binomial() va = Autoregressive() md1 = GEE(endog, exog, group, family=family, cov_struct=va) mdf1 = md1.fit() md2 = GEE(endog, exog, group, time=T, family=family, cov_struct=va) mdf2 = md2.fit() assert_almost_equal(mdf1.params, mdf2.params, decimal=6) assert_almost_equal(mdf1.standard_errors(), mdf2.standard_errors(), decimal=6)
def gendat_ordinal(): os = ordinal_simulator() os.params = np.r_[0., 1] os.ngroups = 200 os.thresholds = [1, 0, -1] os.dparams = [ 1., ] os.simulate() data = np.concatenate((os.endog[:, None], os.exog, os.group[:, None]), axis=1) os.endog_ex, os.exog_ex, os.intercepts, os.nthresh = \ gee_setup_ordinal(data, 0) os.group_ex = os.exog_ex[:, -1] os.exog_ex = os.exog_ex[:, 0:-1] os.exog_ex = np.concatenate((os.intercepts, os.exog_ex), axis=1) va = GlobalOddsRatio(4, "ordinal") lhs = np.array([[0., 0., 0, 1., 0.], [0., 0, 0, 0, 1]]) rhs = np.r_[0., 1] return os, va, Binomial(), (lhs, rhs)
def test_ordinal_pandas(self): family = Binomial() endog_orig, exog_orig, groups = load_data("gee_ordinal_1.csv", icept=False) data = np.concatenate( (endog_orig[:, None], exog_orig, groups[:, None]), axis=1) data = pd.DataFrame(data) data.columns = ["endog", "x1", "x2", "x3", "x4", "x5", "group"] # Recode as cumulative indicators endog, exog, intercepts, nlevel = \ gee_setup_ordinal(data, "endog") exog1 = np.concatenate((intercepts, exog), axis=1) groups = exog1[:, -1] exog1 = exog1[:, 0:-1] v = GlobalOddsRatio(nlevel, "ordinal") beta = gee_ordinal_starting_values(endog_orig, exog_orig.shape[1]) md = GEE(endog, exog1, groups, None, family, v) mdf = md.fit(start_params=beta) cf = np.r_[1.09238131, 0.02148193, -0.39879146, -0.01855666, 0.02983409, 1.18123172, 0.01845318, -1.10233886] se = np.r_[0.10878752, 0.10326078, 0.11171241, 0.05488705, 0.05995019, 0.0916574, 0.05951445, 0.08539281] assert_almost_equal(mdf.params, cf, decimal=2) assert_almost_equal(mdf.bse, se, decimal=2)
def test_calc_wdesign_mat(): # seperately tests that _calc_wdesign_mat # returns sensible results # # regression test np.random.seed(435265) X = np.random.normal(size=(3, 3)) y = np.random.randint(0, 2, size=3) beta = np.random.normal(size=3) mod = OLS(y, X) dmat = _calc_wdesign_mat(mod, beta, {}) assert_allclose(dmat, np.array([[1.306314, -0.024897, 1.326498], [-0.539219, -0.483028, -0.703503], [-3.327987, 0.524541, -0.139761]]), atol=1e-6, rtol=0) mod = GLM(y, X, family=Binomial()) dmat = _calc_wdesign_mat(mod, beta, {}) assert_allclose(dmat, np.array([[0.408616, -0.007788, 0.41493], [-0.263292, -0.235854, -0.343509], [-0.11241, 0.017718, -0.004721]]), atol=1e-6, rtol=0)
def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False, method='bfgs', **kwds): """ Fit the model. Parameters ---------- start_params : array-like A vector of starting values for the regression coefficients. If None, a default is chosen. maxiter : integer The maximum number of iterations disp : bool Show convergence stats. method : str The optimization method to use. """ if start_params is None: start_params = sm.GLM(self.endog, self.exog, family=Binomial()).fit(disp=False).params start_params = np.append(start_params, [0.5] * self.Z.shape[1]) return super(Beta, self).fit(start_params=start_params, maxiter=maxiter, maxfun=maxfun, method=method, disp=disp, **kwds)
def test_join_naive(): # tests that the results of all the intermediate steps # remains correct for naive join, does this for OLS and GLM # # regression test np.random.seed(435265) X = np.random.normal(size=(50, 3)) y = np.random.randint(0, 2, size=50) mod = OLS(y, X) res_l = [] for i in range(2): res = _est_regularized_naive(mod, i, 2, fit_kwds={"alpha": 0.1}) res_l.append(res) joined = _join_naive(res_l) assert_allclose(joined, np.array([-0.020757, 0., 0.]), atol=1e-6, rtol=0) mod = GLM(y, X, family=Binomial()) res_l = [] for i in range(2): res = _est_regularized_naive(mod, i, 2, fit_kwds={"alpha": 0.1}) res_l.append(res) joined = _join_naive(res_l) assert_allclose(joined, np.array([0., 0., 0.]), atol=1e-6, rtol=0)
def test_join_debiased(): # tests that the results of all the intermediate steps # remains correct for debiased join, does this for OLS and GLM # # regression test np.random.seed(435265) X = np.random.normal(size=(50, 3)) y = np.random.randint(0, 2, size=50) mod = OLS(y, X) res_l = [] for i in range(2): res = _est_regularized_debiased(mod, i, 2, fit_kwds={"alpha": 0.1}) res_l.append(res) joined = _join_debiased(res_l) assert_allclose(joined, np.array([-0.167548, -0.016567, -0.34414]), atol=1e-6, rtol=0) mod = GLM(y, X, family=Binomial()) res_l = [] for i in range(2): res = _est_regularized_debiased(mod, i, 2, fit_kwds={"alpha": 0.1}) res_l.append(res) joined = _join_debiased(res_l) assert_allclose(joined, np.array([-0.164515, -0.412854, -0.223955]), atol=1e-6, rtol=0)
def initial_guess_mean(endog, exog_mean, bounded_reg_link, method="Default"): """ A function that obtains an initial guess for the regression parameters related to the mean in a bounded data regression model. The initial guess is obtained from a quasi-likelihood regression. :param endog (array_like): 1d array of endogenous response variable. :param exog_mean (array_like): A nobs x k array where nobs is the number of observations and k is the number of mean regressors. An intercept is not included by default and should be added by the user. :param bounded_reg_link: An instance of BoundedRegLink. Recall that the default precision link is None. :param method (str): The method to be used to obtain the initial guesses. The options are: 'Default' (estimate the mean by quasi-likelihood) and 'R' (use the same strategy used in R's version). """ if method == "Default": initial_guess_mean_param = (sm.GLM( endog, exog_mean, family=Binomial(link=bounded_reg_link.get_link_mean()), ).fit(disp=False).params) elif method == "R": endog_mod = bounded_reg_link.link_mean(endog) initial_guess_mean_param = (sm.OLS(endog_mod, exog_mean).fit(disp=False).params) else: raise ValueError( "Please enter a valid method for the initial guess, " + "the options are 'Default' and 'R'.") return correct_dimension(initial_guess_mean_param)
def test_fit_joblib(): # tests that the results of all the intermediate steps # remains correct for joblib fit, does this for OLS and GLM # and a variety of model sizes # # regression test np.random.seed(435265) X = np.random.normal(size=(50, 3)) y = np.random.randint(0, 2, size=50) mod = DistributedModel(1, model_class=OLS) fit = mod.fit(_data_gen(y, X, 1), parallel_method="joblib", fit_kwds={"alpha": 0.5}) assert_allclose(fit.params, np.array([-0.191606, -0.012565, -0.351398]), atol=1e-6, rtol=0) mod = DistributedModel(2, model_class=OLS) fit = mod.fit(_data_gen(y, X, 2), parallel_method="joblib", fit_kwds={"alpha": 0.5}) assert_allclose(fit.params, np.array([-0.157416, -0.029643, -0.471653]), atol=1e-6, rtol=0) mod = DistributedModel(3, model_class=OLS) fit = mod.fit(_data_gen(y, X, 3), parallel_method="joblib", fit_kwds={"alpha": 0.5}) assert_allclose(fit.params, np.array([-0.124891, -0.050934, -0.403354]), atol=1e-6, rtol=0) mod = DistributedModel(1, model_class=GLM, init_kwds={"family": Binomial()}) fit = mod.fit(_data_gen(y, X, 1), parallel_method="joblib", fit_kwds={"alpha": 0.5}) assert_allclose(fit.params, np.array([-0.164515, -0.412854, -0.223955]), atol=1e-6, rtol=0) mod = DistributedModel(2, model_class=GLM, init_kwds={"family": Binomial()}) fit = mod.fit(_data_gen(y, X, 2), parallel_method="joblib", fit_kwds={"alpha": 0.5}) assert_allclose(fit.params, np.array([-0.142513, -0.360324, -0.295485]), atol=1e-6, rtol=0) mod = DistributedModel(3, model_class=GLM, init_kwds={"family": Binomial()}) fit = mod.fit(_data_gen(y, X, 3), parallel_method="joblib", fit_kwds={"alpha": 0.5}) assert_allclose(fit.params, np.array([-0.110487, -0.306431, -0.243921]), atol=1e-6, rtol=0)
def test_debiased_v_average(): # tests that the debiased method performs better than the standard # average. Does this for both OLS and GLM. np.random.seed(435265) N = 200 p = 10 m = 4 beta = np.random.normal(size=p) beta = beta * np.random.randint(0, 2, p) X = np.random.normal(size=(N, p)) y = X.dot(beta) + np.random.normal(size=N) db_mod = DistributedModel(m) fitOLSdb = db_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.2}) olsdb = np.linalg.norm(fitOLSdb.params - beta) n_mod = DistributedModel(m, estimation_method=_est_regularized_naive, join_method=_join_naive) fitOLSn = n_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.2}) olsn = np.linalg.norm(fitOLSn.params - beta) assert_(olsdb < olsn) prob = 1 / (1 + np.exp(-X.dot(beta) + np.random.normal(size=N))) y = 1. * (prob > 0.5) db_mod = DistributedModel(m, model_class=GLM, init_kwds={"family": Binomial()}) fitGLMdb = db_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.2}) glmdb = np.linalg.norm(fitGLMdb.params - beta) n_mod = DistributedModel(m, model_class=GLM, init_kwds={"family": Binomial()}, estimation_method=_est_regularized_naive, join_method=_join_naive) fitGLMn = n_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.2}) glmn = np.linalg.norm(fitGLMn.params - beta) assert_(glmdb < glmn)
def test_logit(self): from statsmodels.formula.api import glm from statsmodels.genmod.families import Binomial inData = logit.getData() dfFit = logit.prepareForFit(inData) model = glm('ok + failed ~ temp', data=dfFit, family=Binomial()).fit() logit.showResults(inData, model) self.assertAlmostEqual(model.params.Intercept, -15.042902, places=5)
def senility_and_WAIS(): '''Another example of logistic regression. chapter 7.8, p 143 [tbd]: I don't understand how the "Binomial model" (grouped response) is supposed to work, in either language''' inFile = r'GLM_data/Table 7.8 Senility and WAIS.xls' df = get_data(inFile) # ungrouped model = glm('s ~ x', data=df, family=Binomial()).fit() print model.summary()
def setup_class(cls): family = Binomial() endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False) va = GlobalOddsRatio("ordinal") cls.mod = OrdinalGEE(endog, exog, groups, None, family, va) cls.start_params = np.array([ 1.09250002, 0.0217443, -0.39851092, -0.01812116, 0.03023969, 1.18258516, 0.01803453, -1.10203381 ])
def test_wrapper(self): endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False) endog = pd.Series(endog, name='yendog') exog = pd.DataFrame(exog) groups = pd.Series(groups, name='the_group') family = Binomial() va = GlobalOddsRatio("ordinal") mod = OrdinalGEE(endog, exog, groups, None, family, va) rslt2 = mod.fit() check_wrapper(rslt2)
def general_logistic_regression(): '''Example General Logistic Recression, Example 7.4.1, p. 135''' # Get the data inFile = r'GLM_data/Table 7.5 Embryogenic anthers.xls' df = get_data(inFile) # Define the variables so that they match Dobson df['n_y'] = df['n'] - df['y'] df['newstor'] = df['storage'] - 1 df['x'] = np.log(df['centrifuge']) # Model 1 model1 = glm('n_y + y ~ newstor*x', data=df, family=Binomial()).fit() print model1.summary() # Model 2 model2 = glm('n_y + y ~ newstor+x', data=df, family=Binomial()).fit() print model2.summary() # Model 3 model3 = glm('n_y + y ~ x', data=df, family=Binomial()).fit() print model3.summary()
def __init__(self, formula=None, data=None, link=logit, **kwargs): if formula: y, X = patsy.dmatrices(formula, data, 1) self._y_design_info = y.design_info self._X_design_info = X.design_info self._model = GLM(y, X, family=Binomial(link), **kwargs) self._fit = self._model.fit() self._betas = self._fit.params self._link = link else: self._y_design_info = None self._X_design_info = None self._model = None self._fit = None self._betas = None self._link = link
def test_est_unregularized_naive(): # tests that the shape of all the intermediate steps # remains correct for unregularized naive estimation, # does this for OLS and GLM np.random.seed(435265) X = np.random.normal(size=(50, 3)) y = np.random.randint(0, 2, size=50) beta = np.random.normal(size=3) mod = OLS(y, X) res = _est_unregularized_naive(mod, 0, 2, fit_kwds={"alpha": 0.5}) assert_equal(res.shape, beta.shape) mod = GLM(y, X, family=Binomial()) res = _est_unregularized_naive(mod, 0, 2, fit_kwds={"alpha": 0.5}) assert_equal(res.shape, beta.shape)
def test_ordinal(self): family = Binomial() endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False) v = GlobalOddsRatio("ordinal") md = GEE(endog, exog, groups, None, family, v) md.setup_ordinal() mdf = md.fit() cf = np.r_[1.09238131, 0.02148193, -0.39879146, -0.01855666, 0.02983409, 1.18123172, 0.01845318, -1.10233886] se = np.r_[0.10878752, 0.10326078, 0.11171241, 0.05488705, 0.05995019, 0.0916574, 0.05951445, 0.08539281] assert_almost_equal(mdf.params, cf, decimal=5) assert_almost_equal(mdf.bse, se, decimal=5)
def test_ordinal(self): family = Binomial() endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False) v = GlobalOddsRatio("ordinal") md = OrdinalGEE(endog, exog, groups, None, family, v) mdf = md.fit() cf = np.r_[1.09250002, 0.0217443, -0.39851092, -0.01812116, 0.03023969, 1.18258516, 0.01803453, -1.10203381] se = np.r_[0.10883461, 0.10330197, 0.11177088, 0.05486569, 0.05997153, 0.09168148, 0.05953324, 0.0853862] assert_almost_equal(mdf.params, cf, decimal=5) assert_almost_equal(mdf.bse, se, decimal=5)
def test_est_regularized_debiased(): # tests that the shape of all the intermediate steps # remains correct for regularized debiased estimation, # does this for OLS and GLM np.random.seed(435265) X = np.random.normal(size=(50, 3)) y = np.random.randint(0, 2, size=50) beta = np.random.normal(size=3) mod = OLS(y, X) res = _est_regularized_debiased(mod, 0, 2, fit_kwds={"alpha": 0.5}) bhat = res[0] grad = res[1] ghat_l = res[2] that_l = res[3] assert_(isinstance(res, tuple)) assert_equal(bhat.shape, beta.shape) assert_equal(grad.shape, beta.shape) assert_(isinstance(ghat_l, list)) assert_(isinstance(that_l, list)) assert_equal(len(ghat_l), len(that_l)) assert_equal(ghat_l[0].shape, (2, )) assert_(isinstance(that_l[0], float)) mod = GLM(y, X, family=Binomial()) res = _est_regularized_debiased(mod, 0, 2, fit_kwds={"alpha": 0.5}) bhat = res[0] grad = res[1] ghat_l = res[2] that_l = res[3] assert_(isinstance(res, tuple)) assert_equal(bhat.shape, beta.shape) assert_equal(grad.shape, beta.shape) assert_(isinstance(ghat_l, list)) assert_(isinstance(that_l, list)) assert_equal(len(ghat_l), len(that_l)) assert_equal(ghat_l[0].shape, (2, )) assert_(isinstance(that_l[0], float))
def fit_regression(self, ax=None, x_range=None, grid=None): """Fit the regression model.""" # Create the grid for the regression if grid is None: if self.truncate: x_min, x_max = self.x_range else: if ax is None: x_min, x_max = x_range else: x_min, x_max = ax.get_xlim() grid = np.linspace(x_min, x_max, 100) ci = self.ci # Fit the regression if self.order > 1: yhat, yhat_boots = self.fit_poly(grid, self.order) elif self.logistic: from statsmodels.genmod.generalized_linear_model import GLM from statsmodels.genmod.families import Binomial yhat, yhat_boots = self.fit_statsmodels(grid, GLM, family=Binomial()) elif self.lowess: ci = None grid, yhat = self.fit_lowess() elif self.robust: from statsmodels.robust.robust_linear_model import RLM yhat, yhat_boots = self.fit_statsmodels(grid, RLM) elif self.logx: yhat, yhat_boots = self.fit_logx(grid) else: yhat, yhat_boots = self.fit_fast(grid) # Compute the confidence interval at each grid point if ci is None: err_bands = None else: err_bands = utils.ci(yhat_boots, ci, axis=0) return grid, yhat, err_bands
def test_compare_logit(self): vs = Independence() family = Binomial() Y = 1 * (np.random.normal(size=100) < 0) X1 = np.random.normal(size=100) X2 = np.random.normal(size=100) X3 = np.random.normal(size=100) groups = np.random.randint(0, 4, size=100) D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3}) md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups, family=family, covstruct=vs).fit() sml = sm.logit("Y ~ X1 + X2 + X3", data=D).fit() assert_almost_equal(sml.params.values, md.params, decimal=10)
def test_margins(self): n = 300 exog = np.random.normal(size=(n, 4)) exog[:, 0] = 1 exog[:, 1] = 1 * (exog[:, 2] < 0) group = np.kron(np.arange(n / 4), np.ones(4)) time = np.zeros((n, 1)) beta = np.r_[0, 1, -1, 0.5] lpr = np.dot(exog, beta) prob = 1 / (1 + np.exp(-lpr)) endog = 1 * (np.random.uniform(size=n) < prob) fa = Binomial() ex = Exchangeable() md = GEE(endog, exog, group, time, fa, ex) mdf = md.fit() marg = GEEMargins(mdf, ()) marg.summary()
plt.xlabel("Outside Temperature [F]") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.tight_layout # Plot the fit x = np.arange(50, 85) alpha = model.params[0] beta = model.params[1] y = logistic(x, beta, alpha) plt.hold(True) plt.plot(x,y,'r') plt.xlim([50, 85]) outFile = 'ChallengerPlain.png' showData(outFile) if __name__ == '__main__': inData = getData() dfFit = prepareForFit(inData) # fit the model # --- >>> START stats <<< --- model = glm('ok + failed ~ temp', data=dfFit, family=Binomial()).fit() # --- >>> STOP stats <<< --- print(model.summary()) showResults(inData, model)
''' Prediction(80) probability prediction: 0.872046286637 Prediction(100) probability prediction: 0.970179520648 ''' #获取数据 inData = getData() #得到频率计算后的数据 dfFit = prepareForFit(inData) #Generalized Linear Model 建立二项式模型 model = glm('同盾多头借贷未命中 +同盾多头借贷命中 ~ 同盾分数', data=dfFit, family=Binomial()).fit() print(model.summary()) chi2 = model.pearson_chi2 '''Out[37]: 46.893438309853522 分数越小,p值越大,H0成立,模型越好''' print("the chi2 is smaller,the model is better") alpha = model.params[0] beta = model.params[1] Plot(inData, alpha, beta, "logiscti regression") #测试 Prediction(20) Prediction(60) Prediction(80)
# -*- coding: utf-8 -*- #import library import pandas as pd from statsmodels.formula.api import glm from statsmodels.genmod.families import Binomial #데이터 가져오기 crabs = pd.read_csv("horseshoe.csv") #로지스틱 회귀 model = glm("satellite_1 ~ width + spine", data=crabs, family=Binomial()).fit() print(model.summary()) """ Generalized Linear Model Regression Results ============================================================================== Dep. Variable: satellite_1 No. Observations: 173 Model: GLM Df Residuals: 170 Model Family: Binomial Df Model: 2 Link Function: logit Scale: 1.0000 Method: IRLS Log-Likelihood: -97.218 Date: Wed, 06 May 2020 Deviance: 194.44 Time: 17:13:38 Pearson chi2: 165. No. Iterations: 4 Covariance Type: nonrobust ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ Intercept -12.4410 2.723 -4.568 0.000 -17.779 -7.103 width 0.4980 0.102 4.887 0.000 0.298 0.698 spine 0.0282 0.220 0.128 0.898 -0.402 0.458