Esempi in Python per MNLogit, esempi in Python per statsmodels.api.MNLogit

Esempio n. 1

0

Mostra file

def deviance_test(x_int, y_cat, debug='N'):
    y = y_cat.astype('category')

    # Model 0 is yCat = Intercept
    x = numpy.where(y_cat.notnull(), 1, 0)
    obj_logit = stats.MNLogit(y, x)
    this_fit = obj_logit.fit(method='newton', full_output=True, maxiter=100, tol=1e-8)
    this_parameter = this_fit.params
    llk0 = obj_logit.loglike(this_parameter.values)

    if (debug == 'Y'):
        print(this_fit.summary())
        print("Model Log-Likelihood Value =", llk0)
        print('\n')

    # Model 1 is yCat = Intercept + xInt
    x = stats.add_constant(x_int, prepend=True)
    obj_logit = stats.MNLogit(y, x)
    this_fit = obj_logit.fit(method='newton', full_output=True, maxiter=100, tol=1e-8)
    this_parameter = this_fit.params
    llk1 = obj_logit.loglike(this_parameter.values)

    if (debug == 'Y'):
        print(this_fit.summary())
        print("Model Log-Likelihood Value =", llk1)

    # Calculate the deviance
    deviance_stat = 2.0 * (llk1 - llk0)
    deviance_df = (len(y.cat.categories) - 1.0)
    deviance_sig = scipy.stats.chi2.sf(deviance_stat, deviance_df)

    mc_fadden_r_sq = 1.0 - (llk1 / llk0)

    return (deviance_stat, deviance_df, deviance_sig, mc_fadden_r_sq)

Esempio n. 2

0

Mostra file

def runAllVarsModel(target):
    exog = testData[['OpenDummy', 'awardAmount', 'awardeeType', 'techCategory', 'partners', 'startingYear']]
    mdl6 = sm.MNLogit(target, exog)
    mdl6_fit = mdl6.fit()
    print(mdl6_fit.summary())

    exog = testData[['OpenDummy', 'awardAmount', 'awardeeType', 'techCategory', 'partners', 'early', 'middle', 'latest']]
    mdl6 = sm.MNLogit(target, exog)
    mdl6_fit = mdl6.fit()
    print(mdl6_fit.summary())

Esempio n. 3

0

Mostra file

def runTechCatModel():
    exog = testData[['OpenDummy', 'techCategory', 'startingYear']]
    mdl4 = sm.MNLogit(target, exog)
    mdl4_fit = mdl4.fit()
    print(mdl4_fit.summary())

    exog = testData[['OpenDummy', 'techCategory', 'early', 'middle', 'latest']]
    mdl4 = sm.MNLogit(target, exog)
    mdl4_fit = mdl4.fit()
    print(mdl4_fit.summary())

Esempio n. 4

0

Mostra file

 def check_parallel_lines_assumption(self):
     myModel = sm.MNLogit(self.endog, sm.add_constant(self.exog))
     myModel = myModel.fit()
     self.llmulti = myModel.llf
     self.evidence_against = stats.distributions.chi2.sf(
         df=max([1, self.df_model - 2]),
         x=(-2 * self.llf) - (-2 * myModel.llf))
     self.evidence_against_multinomial_for_ordinal = stats.distributions.chi2.sf(
         df=max([1, self.df_model - 2]),
         x=(-2 * myModel.llf) - (-2 * self.llf))
     self.evidence_against_multinomial = myModel.llr_pvalue
     print('Log-Likelihood of null model = {}'.format(myModel.llnull))
     print('Log-Likelihood of full logistic regression model = {}'.format(
         myModel.llf))
     print('Log-Likelihood of full ordinal logistic regression model = {}'.
           format(self.llf))
     print(
         'Evidence against null (intercept only) model in favour of multinomial model = {}'
         .format(self.evidence_against_multinomial))
     print(
         'Evidence against null (intercept only) model in favour of ordinal model = {}'
         .format(self.llr_pvalue))
     print(
         'Evidence against Multinomial model in favour of proportional odds = {}'
         .format(self.evidence_against_multinomial_for_ordinal))
     print(
         'Evidence against proportional odds model in favour of Multinomial= {}'
         .format(self.evidence_against))

Esempio n. 5

0

Mostra file

def step(feature_list, enog_name, df, threshold):
    select_feature = []
    while (len(feature_list) > 0):
        max_value, max_feature = 0, None
        for feature in feature_list:
            print(feature)
            mlogit_mod = sm.MNLogit(df[enog_name],
                                    df[select_feature + [feature]])
            try:
                mlogit_res = mlogit_mod.fit()
            except:
                print('singular matrix')
                continue
            if threshold == None:
                try:
                    value = np.sum(np.abs(mlogit_res._results.tvalues[-1]))
                except:
                    print('bug:%s' % feature)
                    continue
                if value > max_value:
                    max_value = value
                    max_feature = feature
            elif np.sum(mlogit_res._results.tvalues[-1]) > 2 * threshold:
                print('\n%s\n' % feature)
                feature_list.remove(feature)
                select_feature.append(feature)
        if (max_feature != None):
            print('\n%s\n' % max_feature)
            feature_list.remove(max_feature)
            select_feature.append(max_feature)
        else:
            break
    print(select_feature)
    return select_feature

Esempio n. 6

0

Mostra file

File: dobson.py Progetto: EJHortala/books-2

def nominal_logistic_regression():
    '''Nominal Logistic Regression
    chapter 8.3,  p. 155 
    
    At this point, nominal logistic regression cannot be done with the formula approach.
    
    Regarding the output, note that R produces log(pi2/pi1) and log(pi3/pi1), while
    statsmodels produces log(pi2/pi1) and log(pi3/pi2) 
    '''

    # Get the data
    inFile = r'GLM_data/Table 8.1 Car preferences.xls'
    df = get_data(inFile)

    # to make sure that "women" and "no/little" are the reference,
    # adjust them such that they come first alphabetically
    df['response'][df['response'] == 'no/little'] = '_no/little'
    df['sex'][df['sex'] == 'women'] = '_women'
    print df

    # Generate the design matrices using patsy
    pm = patsy.dmatrices('response~sex+age', data=df)

    # Generate the endog and exog matrices
    endog = np.repeat(np.array(df['response']),
                      df['frequency'].values.astype(int),
                      axis=0)
    exog = np.array(
        np.repeat(pm[1], df['frequency'].values.astype(int), axis=0))
    exog = pd.DataFrame(exog, columns=pm[1].design_info.column_names)

    # Fit the model, and print the summary
    model = sm.MNLogit(endog, exog, method='nm').fit()
    print model.summary()

Esempio n. 7

0

Mostra file

File: Q1.py Progetto: leozheng08/Machine_Learning-project

def bootstrap_MNLogit (x_train, y_train, nB):
   x_index = x_train.index
   nT = len(y_train)
   outProb = numpy.zeros((nT,5))
   #outThreshold = numpy.zeros((nB, 1))
   #classTree = tree.DecisionTreeClassifier(criterion='entropy', max_depth=2, random_state=60616)

   # Initialize internal state of the random number generator.
   random.seed(20190430)

   for iB in range(nB):
      bootIndex = sample_wr(x_index)
      x_train_boot = x_train.loc[bootIndex[:,0]]
      y_train_boot = y_train.loc[bootIndex[:,0]]
      #outThreshold[iB] = len(y_train_boot[y_train_boot['BAD'] == 1]) / len(y_train_boot)
      y_train_boot = y_train_boot['ring'].astype('category')
  
      logit = stats.MNLogit(y_train_boot,x_train_boot)
      thisFit = logit.fit(method='newton', full_output = True, maxiter = 1000, tol = 1e-8)
      #treeFit = classTree.fit(x_train_boot, y_train_boot['BAD'])
      outProb = outProb + thisFit.predict(x_train)
   outProb = outProb / nB
   #print('Mean Threshold: {:.7f}' .format(outThreshold.mean()))
   #print('  SD Threshold: {:.7f}' .format(outThreshold.std()))
   return outProb

Esempio n. 8

0

Mostra file

def model_Visualize(All, new):

    y_vis = All['Visualize']

    # Logit Model
    interaction = "Visualize ~   customer_age + totalBilled+clusters"
    y, XX = patsy.dmatrices(interaction, All, return_type="dataframe")
    X_new = new[['customer_age', 'totalBilled', 'clusters']]

    # Seperate training and testing dataset
    X_train, X_test, y_train, y_test = train_test_split(XX,
                                                        y_vis,
                                                        test_size=0.30,
                                                        random_state=9)
    num_col_names = ['totalBilled',
                     'customer_age']  ## scale only numeric variable
    scaler = StandardScaler().fit(X_train[num_col_names].values)
    X_train[num_col_names] = scaler.transform(X_train[num_col_names].values)
    X_new[num_col_names] = scaler.transform(X_new[num_col_names].values)

    # Run the model
    Logit = sm.MNLogit(y_train, X_train).fit_regularized()
    X_new["Intercept"] = 1

    y_pred_prob = Logit.predict(X_new)
    y_pred = y_vis.astype('category').cat.categories[y_pred_prob.idxmax(
        axis=1)]

    return (y_pred)

Esempio n. 9

0

Mostra file

File: models.py Progetto: mrdeds/data_science_intro

def simple_model(X_train, y_train, tpot=False):
    """
    Obtiene variable objetivo, decide si es de clasificación o regresión
    y regresa un modelo simple
    Args:
        X_train (Array): conjunto de datos de entrenamiento (regresores)
        y_train (Array): conjunto de datos de entrenamiento (objetivo)
        tpot (boolean): si queremos generar modelo con tpot
    returns:
        model (modelo): Regresión Logística o Lineal dependiendo de la variable
                         objetivo
        tpotmod (modelo): Modelo de Regresión o Clasificación generado con TPOT
    """
    tpotm = None
    # Revisamos si es modelo de clasificación binaria
    if len(set(np.unique(y_train))) == 2:
        model = logreg(X_train, y_train)
        if tpot:
            toptm = tpotclass(X_train, y_train)
    elif len(set(np.unique(y_train))) > 2 and len(set(
            np.unique(y_train))) < 10:
        multilog = sm.MNLogit(y_train, X_train)
        model = multilog.fit()
        if tpot:
            tpotm = tpotclass(X_train, y_train)
    else:
        model = linreg(X_train, y_train)
        if tpot:
            tpotm = tpotreg(X_train, y_train)

    return model, tpotm

Esempio n. 10

0

Mostra file

def build_mnlogit(fullX, y):

    # Find the non-redundant columns in the design matrix fullX
    nFullParam = fullX.shape[1]
    XtX = numpy.transpose(fullX).dot(fullX)
    invXtX, aliasParam, nonAliasParam = SWEEPOperator(pDim=nFullParam,
                                                      inputM=XtX,
                                                      tol=1e-8)

    # Build a multinomial logistic model
    X = fullX.iloc[:, list(nonAliasParam)]
    logit = stats.MNLogit(y, X)
    thisFit = logit.fit(method='newton',
                        maxiter=100,
                        gtol=1e-8,
                        full_output=True,
                        disp=True)
    thisParameter = thisFit.params
    thisLLK = logit.loglike(thisParameter.values)

    # The number of free parameters
    nYCat = thisFit.J
    thisDF = len(nonAliasParam) * (nYCat - 1)

    # Return model statistics
    return (thisLLK, thisDF, thisParameter, thisFit, aliasParam)

Esempio n. 11

0

Mostra file

def question_20():
    # MVR_PTS, BLUEBOOK_1000, TRAVTIME
    data = pd.read_csv("policy_2001.csv")
    data_train, data_test = train_test_split(data, test_size = 0.33, random_state = 20201014, stratify = data['CLAIM_FLAG'])
    
    y = data_train["CLAIM_FLAG"].astype('category')
    designX = data_train[["MVR_PTS"]]
    designX = designX.join(data_train[["BLUEBOOK_1000"]])
    designX = designX.join(data_train[["TRAVTIME"]])
    designX = stats.add_constant(designX, prepend=True)
    # Find the non-redundant columns in the design matrix fullX
    reduced_form, inds = sympy.Matrix(designX.values).rref()
    X = designX.iloc[:, list(inds)]
    logit = stats.MNLogit(y, X)
    thisFit = logit.fit(method='newton', full_output = True, maxiter = 100, tol = 1e-8)
    print("*"*50)
    X_test = stats.add_constant(data_test[["MVR_PTS", "BLUEBOOK_1000", "TRAVTIME"]], prepend = True)
    #print(X_test)
    y_pred_prob = thisFit.predict(X_test)
    #print(y_pred_prob[[1]])
    y_pred = pd.to_numeric(y_pred_prob.idxmax(axis=1))
    #acc = metrics.accuracy_score(data_test["CLAIM_FLAG"], y_pred)
    #print(acc)
    #print(data_test["CLAIM_FLAG"],"\n",  y_pred)
    lr_auc = metrics.roc_auc_score(data_test["CLAIM_FLAG"], y_pred)
    print(lr_auc)

Esempio n. 12

0

Mostra file

def _fit_logistic(train_X, train_Y):
    logit = stats.MNLogit(
        train_Y, train_X
        # mnlogit treats every distinct value as a separate category and therefore, need not
        # pass the dummies in exogenous variable.
    )
    fit = logit.fit(full_output=True, maxiter=1000)
    return fit, fit.params

Esempio n. 13

0

Mostra file

File: StrokeFunctions.py Progetto: jcdaniels-21/Python_Practice

def smLogit(X_train, y_train):
    logit_model = sm.MNLogit(y_train, sm.add_constant(X_train))
    logit_model
    result = logit_model.fit()
    stats1 = result.summary()
    stats2 = result.summary2()
    print(stats1)
    print(stats2)

Esempio n. 14

0

Mostra file

def multinomial_logit_regression(x, y, intercept=True, method="newton"):
    if intercept:
        x = sm.add_constant(x)  # add constant if need intercept
    # run regression
    model = sm.MNLogit(y, x)
    result = model.fit(method=method)
    summary = result.summary()
    return result, summary

Esempio n. 15

0

Mostra file

def runAwardeeTypeModel(target):
    #solve the mnl function open + awardee type
    exog = testData[['OpenDummy', 'awardeeType', 'startingYear']]
    #adding a constant did basically nothing, do we have to include? why/why not? 
    exog = sm.add_constant(testData[['OpenDummy', 'awardeeType', 'startingYear']])
    mdl3 = sm.MNLogit(target, exog)
    mdl3_fit = mdl3.fit()
    print(mdl3_fit.summary())

Esempio n. 16

0

Mostra file

def build_mnlogit(fullX, y, debug='N'):
    # Number of all parameters
    nFullParam = fullX.shape[1]

    # Number of target categories
    y_category = y.cat.categories
    nYCat = len(y_category)

    # Find the non-redundant columns in the design matrix fullX
    reduced_form, inds = sympy.Matrix(fullX.values).rref()

    # Extract only the non-redundant columns for modeling
    X = fullX.iloc[:, list(inds)]

    # These are the column numbers of the non-redundant columns
    if (debug == 'Y'):
        print('Column Numbers of the Non-redundant Columns:')
        print(inds)
        print(
            "-------------------------------ans 1a------------------------------------------"
        )
        aliased_indices = [x for x in range(nFullParam) if (x not in inds)]
        aliased_params = [fullX.columns[x] for x in aliased_indices]
        print("the aliased columns in our model matrix are:\n ")
        for i in aliased_params:
            print(i)

    # The number of free parameters
    thisDF = len(inds) * (nYCat - 1)

    # Build a multionomial logistic model
    logit = stats.MNLogit(y, X)
    thisFit = logit.fit(method='newton',
                        full_output=True,
                        maxiter=100,
                        tol=1e-8)
    thisParameter = thisFit.params
    thisLLK = logit.loglike(thisParameter.values)

    #if (debug == 'Y'):
    print(thisFit.summary())
    print("Model Parameter Estimates:\n", thisParameter)
    print("Model Log-Likelihood Value =", thisLLK)
    print("Number of Free Parameters =", thisDF)

    # Recreat the estimates of the full parameters
    workParams = pd.DataFrame(np.zeros(shape=(nFullParam, (nYCat - 1))))
    workParams = workParams.set_index(keys=fullX.columns)
    fullParams = pd.merge(workParams,
                          thisParameter,
                          how="left",
                          left_index=True,
                          right_index=True)
    fullParams = fullParams.drop(columns='0_x').fillna(0.0)

    # Return model statistics
    return (thisLLK, thisDF, fullParams)

Esempio n. 17

0

Mostra file

File: Data_Selection_AIC_BIC.py Progetto: wasai001/data_mining_group

def MNfit_log_reg(X, Y):
    #Fit linear regression model and return RSS and R squared values
    X = sm.add_constant(X)
    model_k = sm.MNLogit(Y, X.astype(float)).fit()
    AIC = model_k.aic
    BIC = model_k.bic
    LLH = model_k.llf
    R2 = model_k.prsquared
    return AIC, BIC, LLH, R2

Esempio n. 18

0

Mostra file

def runBasicModel(target):
    #exog = sm.add_constant(testData[['OpenDummy', 'startingYear']])
    #exog = testData[['OpenDummy', 'startingYear']]
    exog = testData.OpenDummy
    print(type(exog))
    print(target)
    mdl1 = sm.MNLogit(target, exog).fit()
    print(mdl1.summary())
    return(mdl1.params, mdl1.pvalues, mdl1._results.conf_int())

Esempio n. 19

0

Mostra file

File: test_discrete.py Progetto: djmarais/statsmodels

def test_issue_341():
    data = sm.datasets.anes96.load()
    exog = data.exog
    # leave out last exog column
    exog = exog[:, :-1]
    exog = sm.add_constant(exog, prepend=True)
    res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
    x = exog[0]
    np.testing.assert_equal(res1.predict(x).shape, (1, 7))
    np.testing.assert_equal(res1.predict(x[None]).shape, (1, 7))

Esempio n. 20

0

Mostra file

File: SelectVariable.py Progetto: wasai001/data_mining_group

def fit_log_reg(X, Y, multi):
    #Fit linear regression model and return AIC, BIC, loglikelihood, and R squared values
    X = sm.add_constant(X)
    model_k = sm.Logit(Y, X.astype(float)).fit()
    if (multi == True):
        model_k = sm.MNLogit(Y, X.astype(float)).fit()
    AIC = model_k.aic
    BIC = model_k.bic
    LLH = model_k.llf
    R2 = model_k.prsquared
    return AIC, BIC, LLH, R2

Esempio n. 21

0

Mostra file

def build_model_MNlogistic(target, data, acc=0.00000001, alpha=L1_ALPHA):
    """ Trains a logistic regresion model. target is the target.
        data is a dataframe of samples for training. The length of 
        target must match the number of rows in data.
    """
    data = data.copy()
    data['intercept'] = 1.0
    logit = sm.MNLogit(target, data, disp=False)
    return logit.fit_regularized(maxiter=1024,
                                 alpha=alpha,
                                 acc=acc,
                                 disp=False)

Esempio n. 22

0

Mostra file

 def setupClass(cls):
     anes_data = sm.datasets.anes96.load()
     anes_exog = anes_data.exog
     anes_exog = sm.add_constant(anes_exog, prepend=False)
     mlogit_mod = sm.MNLogit(anes_data.endog, anes_exog)
     alpha = 10. * np.ones((mlogit_mod.J - 1, mlogit_mod.K)) #/ anes_exog.shape[0]
     alpha[-1,:] = 0
     cls.res1 = mlogit_mod.fit_regularized(
             method='l1', alpha=alpha, trim_mode='auto', auto_trim_tol=0.02,
             acc=1e-10, disp=0)
     res2 = DiscreteL1()
     res2.mnlogit()
     cls.res2 = res2

Esempio n. 23

0

Mostra file

 def regressor(y, X, model_type=model_type):
     if model_type == "linear":
         regressor = sm.OLS(y, X).fit()
     elif model_type == "MNlogit":
         regressor = sm.MNLogit(y, X).fit(method='lbfgs',
                                          maxiter=100,
                                          disp=0)
     else:
         print("\nWrong Model Type : " + model_type +
               "\nLinear model type is seleted.")
         model_type = "linear"
         regressor = sm.OLS(y, X).fit()
     return regressor

Esempio n. 24

0

Mostra file

File: CS584_HW4_SongWang_A20435988.py Progetto: Simon0518/ML584-Coding-Assignment

def DevianceTest(
		xInt,  # input interval feature
		yCat,  # input categorical target variable
		debug='N'  # debugging flag (Y/N)
):
	y = yCat.astype('category')

	# Model 0 is yCat = Intercept
	X = numpy.where(yCat.notnull(), 1, 0)
	objLogit = stats.MNLogit(y, X)
	thisFit = objLogit.fit(method='newton', full_output=True, maxiter=100, tol=1e-8)
	thisParameter = thisFit.params
	LLK0 = objLogit.loglike(thisParameter.values)

	if (debug == 'Y'):
		print(thisFit.summary())
		print("Model Log-Likelihood Value =", LLK0)
		print('\n')

	# Model 1 is yCat = Intercept + xInt
	X = stats.add_constant(xInt, prepend=True)
	objLogit = stats.MNLogit(y, X)
	thisFit = objLogit.fit(method='newton', full_output=True, maxiter=100, tol=1e-8)
	thisParameter = thisFit.params
	LLK1 = objLogit.loglike(thisParameter.values)

	if (debug == 'Y'):
		print(thisFit.summary())
		print("Model Log-Likelihood Value =", LLK1)

	# Calculate the deviance
	devianceStat = 2.0 * (LLK1 - LLK0)
	devianceDf = (len(y.cat.categories) - 1.0)
	devianceSig = scipy.stats.chi2.sf(devianceStat, devianceDf)

	mcFaddenRSq = 1.0 - (LLK1 / LLK0)

	return (devianceStat, devianceDf, devianceSig, mcFaddenRSq)

Esempio n. 25

0

Mostra file

def build_mnlogit(full_x, y, debug='N'):
    # Number of all parameters
    no_full_param = full_x.shape[1]

    # Number of target categories
    y_category = y.cat.categories
    no_y_cat = len(y_category)

    # Find the non-redundant columns in the design matrix fullX
    reduced_form, inds = sympy.Matrix(full_x.values).rref()

    # These are the column numbers of the non-redundant columns
    if (debug == 'Y'):
        print('Column Numbers of the Non-redundant Columns:')
        print(inds)

    # Extract only the non-redundant columns for modeling
    x = full_x.iloc[:, list(inds)]

    # The number of free parameters
    this_df = len(inds) * (no_y_cat - 1)

    # Build a multionomial logistic model
    logit = stats.MNLogit(y, x)
    this_fit = logit.fit(method='newton',
                         full_output=True,
                         maxiter=100,
                         tol=1e-8)
    this_parameter = this_fit.params
    this_llk = logit.loglike(this_parameter.values)

    if (debug == 'Y'):
        print(this_fit.summary())
        print("Model Parameter Estimates:\n", this_parameter)
        print("Model Log-Likelihood Value =", this_llk)
        print("Number of Free Parameters =", this_df)

    # Recreat the estimates of the full parameters
    work_params = pandas.DataFrame(
        numpy.zeros(shape=(no_full_param, (no_y_cat - 1))))
    work_params = work_params.set_index(keys=full_x.columns)
    full_params = pandas.merge(work_params,
                               this_parameter,
                               how="left",
                               left_index=True,
                               right_index=True)
    full_params = full_params.drop(columns='0_x').fillna(0.0)

    # Return model statistics
    return (this_llk, this_df, full_params)

Esempio n. 26

0

Mostra file

    def setup_class(cls):
        #from .results.results_discrete import Anes
        data = sm.datasets.anes96.load()
        exog = data.exog
        exog = sm.add_constant(exog, prepend=False)
        cls.mod = sm.MNLogit(data.endog, exog)

        #def loglikeflat(cls, params):
        #reshapes flattened params
        #    return cls.loglike(params.reshape(6,6))
        #cls.mod.loglike = loglikeflat  #need instance method
        #cls.params = [np.ones((6,6)).ravel()]
        res = cls.mod.fit(disp=0)
        cls.params = [res.params.ravel('F')]

Esempio n. 27

0

Mostra file

    def __init__(self):
        #from results.results_discrete import Anes
        data = sm.datasets.anes96.load()
        exog = data.exog
        exog = sm.add_constant(exog)
        self.mod = sm.MNLogit(data.endog, exog)

        #def loglikeflat(self, params):
        #reshapes flattened params
        #    return self.loglike(params.reshape(6,6))
        #self.mod.loglike = loglikeflat  #need instance method
        #self.params = [np.ones((6,6)).ravel()]
        res = self.mod.fit(disp=0)
        self.params = [res.params.ravel('F')]

Esempio n. 28

0

Mostra file

File: test_discrete.py Progetto: djmarais/statsmodels

def test_issue_339():
    # make sure MNLogit summary works for J != K.
    data = sm.datasets.anes96.load()
    exog = data.exog
    # leave out last exog column
    exog = exog[:, :-1]
    exog = sm.add_constant(exog, prepend=True)
    res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
    # strip the header from the test
    smry = "\n".join(res1.summary().as_text().split('\n')[9:])
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    test_case_file = os.path.join(cur_dir, 'results', 'mn_logit_summary.txt')
    test_case = open(test_case_file, 'r').read()
    np.testing.assert_(smry == test_case[:-1])

Esempio n. 29

0

Mostra file

    def __init__(self):
        #from results.results_discrete import Anes
        data = sm.datasets.anes96.load()
        exog = data.exog
        exog[:, 0] = np.log(exog[:, 0] + .1)
        exog = np.column_stack((exog[:, 0], exog[:, 2], exog[:, 5:8]))
        exog = sm.add_constant(exog)
        self.mod = sm.MNLogit(data.endog, exog)

        def loglikeflat(self, params):
            #reshapes flattened params
            return self.loglike(params.reshape(6, 6))

        self.mod.loglike = loglikeflat  #need instance method
        self.params = [np.ones((6, 6))]

Esempio n. 30

0

Mostra file

File: socioLinguisticApp.py Progetto: CoEDLSydneySpeaks/SummerScholar2017

def view_Analysis(model_type: model_type, headers_dependent: headers_dependent,
                  headers_factor: headers_factor,
                  headers_groups: headers_groups,
                  analysis_formula: analysis_formula):

    data = df

    mdl_string = 'noInput'

    if analysis_formula != '':
        mdl_string = analysis_formula
    else:
        if headers_dependent != 'Select' and headers_factor != 'Select':
            mdl_string = headers_dependent + ' ~ ' + headers_factor

    if mdl_string != 'noInput':

        if analysis_formula != '':
            mdl_string = analysis_formula
        else:
            mdl_string = headers_dependent + ' ~ ' + headers_factor

        if model_type == 'Ordinary Least Squares':
            model = ols(mdl_string, data).fit()
        elif model_type == 'Generalized Linear Models':
            model = glm(mdl_string, data, family=sm.families.Gamma()).fit()
        elif model_type == 'Robust Linear Models':
            model = rlm(mdl_string, data, M=sm.robust.norms.HuberT()).fit()
        elif model_type == 'Linear Mixed Effects Models':
            if headers_groups != 'Select':
                model = mixedlm(mdl_string, data,
                                groups=data[headers_groups]).fit()
        elif model_type == 'Discrete - Regression with binary - Logit':
            model = Logit(data[headers_dependent],
                          data[headers_factor].astype(float)).fit()
        elif model_type == 'Discrete - Regression with binary - Probit':
            model = Probit(data[headers_dependent],
                           data[headers_factor].astype(float)).fit()
        elif model_type == 'Discrete - Regression with nominal - MNLogit':
            y = data[headers_factor]
            x = sm.add_constant(data[headers_dependent], prepend=False)
            model = sm.MNLogit(y, x).fit()
        elif model_type == 'Discrete - Regression with count - Poisson':
            model = Poisson(data[headers_dependent],
                            data[headers_factor].astype(float)).fit()

        display(model.summary())