Exemple #1
0
def Reg32():
    y, X = dmatrices('Dec ~ Markt + Altru_1 + Altru_2 + Geld + Müll',
                     df_ama,
                     return_type='dataframe')
    probit = sm.Probit(y, X, missing='drop')
    res = probit.fit()
    print(res.summary())
def train_probit_across_obs(obs, states, alpha=10.0, verbosity=0):
    """

    :param states: n x k matrix, k = num elements in latent binary vector
    :param obs: n x j matrix, j = num elements in observation/feature binary vector
    :return: k+1 x j weight matrix
    """
    states = sm.tools.add_constant(states, prepend=False)
    weight_columns = np.zeros((states.shape[1], obs.shape[1]))
    for column in range(obs.shape[1]):

        if verbosity == 1:
            print('{0} '.format(column), end="",)
            if column % 10 == 0: print()

        obs_column = obs[:, column]

        print('num 1\'s in obs_column: sum(obs_column)= {0}'.format(np.sum(obs_column)))

        if verbosity > 1: print('obs_column {0}: {1}'.format(obs_column.shape, obs_column))

        probit_model = sm.Probit(obs_column, states)
        fresult = probit_model.fit_regularized(method='l1', alpha=alpha)
        weight_columns[:, column] = fresult.params

        if verbosity > 1:
            print('fresult.params: {0}'.format(fresult.params))
            print('weight_columns:\n{0}'.format(weight_columns))

    return weight_columns
Exemple #3
0
def Reg45():
    y, X = dmatrices('Dec ~ Marktgeschehen*Markt',
                     df_ama,
                     return_type='dataframe')
    probit = sm.Probit(y, X, missing='drop')
    res = probit.fit()
    print(res.summary())
Exemple #4
0
def models_pattern(data, matrix):
    y = np.array(data['survived'])
    ones = np.ones(len(matrix[0]))
    X = sm.add_constant(np.column_stack((matrix[0], ones)))
    for ele in matrix[1:]:
        X = sm.add_constant(np.column_stack((ele, X)))
    logit_model = sm.Logit(y, X)
    logit_res = logit_model.fit(maxiter=2000)
    print logit_res.summary()
    print logit_res.wald_test('1*x1 + 1*x2 + 1*x3')
    print
    probit_model = sm.Probit(y, X)
    probit_res = probit_model.fit(maxiter=2000)
    print probit_res.summary()
    print logit_res.wald_test('1*x1 + 1*x2 + 1*x3')
    print
    linear_model = sm.OLS(y, X)
    linear_res = linear_model.fit(maxiter=2000)
    result = 0.
    for array in X:
        for i, item in enumerate(array):
            result += linear_res.params[i] * item
    result /= (len(X))
    print 'Linear function value: {}'.format(result)
    print linear_res.summary()
    print linear_res.wald_test('1*x1 + 1*x2 + 1*x3')
    print
Exemple #5
0
def Reg44():
    y, X = dmatrices('Dec ~ WiStu*Markt + Student',
                     df_ama,
                     return_type='dataframe')
    probit = sm.Probit(y, X, missing='drop')
    res = probit.fit()
    print(res.summary())
Exemple #6
0
def Reg43():
    y, X = dmatrices('Dec ~ FMIS_Index*Markt + Altru_1 + Altru_2',
                     df_ama,
                     return_type='dataframe')
    probit = sm.Probit(y, X, missing='drop')
    res = probit.fit()
    print(res.summary())
Exemple #7
0
def fit_psychometric_curve(log_file, plot=False, thresholds=(1, 4)):
    import statsmodels.api as sm
    df = pd.read_table(log_file)

    df = df[df.phase == 9]
    df = df.pivot_table(index=['trial_nr'], values=['choice', 'certainty', 'n1', 'n2', 'prob1', 'prob2'])
    df = df[~df.choice.isnull()]

    df['log(risky/safe)'] = np.log(df['n1'] / df['n2'])
    ix = df.prob1 == 1.0

    print(df)

    if ix.sum() > 0:
        df.loc[ix, 'log(risky/safe)'] = np.log(df.loc[ix, 'n2'] / df.loc[ix, 'n1'])
        df.loc[ix, 'chose risky'] = df.loc[ix, 'choice'] == 2

    if (~ix).sum() > 0:
        df.loc[~ix, 'log(risky/safe)'] = np.log(df.loc[~ix, 'n1'] / df.loc[~ix, 'n2'])
        df.loc[~ix, 'chose risky'] = df.loc[~ix, 'choice'] == 1

    df['chose risky'] = df['chose risky'].astype(bool)

    if plot:
        import seaborn as sns
        import matplotlib.pyplot as plt
        fac = sns.lmplot('log(risky/safe)', 'chose risky', data=df, logistic=True)

        for color, x in zip(sns.color_palette()[:4], [np.log(1./.55)]):
            
            plt.axvline(x, color=color, ls='--')    
            
        plt.gcf().set_size_inches(14, 6)
        plt.axhline(.5, c='k', ls='--')
        x = np.linspace(0, 1.5, 17)
        plt.xticks(x, [f'{e:0.2f}' for e in np.exp(x)], rotation='vertical')
        plt.xlim(0, 1.5)
        plt.show()

    
    # Fit probit
    df['intercept'] = 1

    try:
        m = sm.Probit(df['chose risky'], df[['intercept', 'log(risky/safe)']])
        r = m.fit()
        x_lower = (ss.norm.ppf(.2) - r.params.intercept) / r.params['log(risky/safe)']
        x_upper = (ss.norm.ppf(.8) - r.params.intercept) / r.params['log(risky/safe)']
    except Exception as e:
        print("Problem with calibration, using standard values")
        x_lower = np.log(thresholds[0])
        x_upper = np.log(thresholds[1])

    print(f'Original bounds: {np.exp(x_lower)}, {np.exp(x_upper)}')
    x_lower = np.exp(np.max((x_lower, np.log(thresholds[0]))))
    x_upper = np.exp(np.min((x_upper, np.log(thresholds[1]))))
    print(f'Final bounds: {x_lower}, {x_upper}')


    return x_lower, x_upper
Exemple #8
0
def Reg53():
    y, X = dmatrices(
        'Dec ~ FMIS_Index + pol_rechts + Gespendet + Geld + weiblich + Alter + Selbst + Sozial + Marktberuf + Akademiker ',
        df_Markt,
        return_type='dataframe')
    probit = sm.Probit(y, X, missing='drop')
    res = probit.fit()
    print(res.summary())
Exemple #9
0
def Reg51():
    y, X = dmatrices(
        'Dec ~ Altru_1 + Altru_2 + FMIS_Index + Marktgeschehen + pol_rechts + Gespendet + Geld + weiblich + Alter + Selbst + Sozial + Marktberuf + WiStu + NatStu + SoStu + JurStu + Akademiker',
        df_Markt,
        return_type='dataframe')
    probit = sm.Probit(y, X, missing='drop')
    res = probit.fit()
    print(res.summary())
Exemple #10
0
def start_values(init_dict, data_frame, option):
    """The function selects the start values for the minimization process."""
    if not isinstance(init_dict, dict):
        msg = 'The input object ({})for specifing the start values isn`t a dictionary.' \
            .format(init_dict)
        raise UserError(msg)
    indicator = init_dict['ESTIMATION']['indicator']
    dep = init_dict['ESTIMATION']['dependent']

    if option == 'init':
        # Set coefficients equal the true init file values
        x0 = init_dict['AUX']['init_values'][:-6]
    elif option == 'auto':

        try:

            # Estimate beta1 and beta0:
            beta = []
            sd_ = []

            for i in [1.0, 0.0]:
                Y = data_frame[dep][data_frame[indicator] == i]
                if i == 1:
                    order = init_dict['TREATED']['order']
                else:
                    order = init_dict['UNTREATED']['order']
                X = data_frame[[init_dict['varnames'][j - 1]
                                for j in order]][i == data_frame[indicator]]

                ols_results = sm.OLS(Y, X).fit()
                beta += [ols_results.params]
                sd_ += [np.sqrt(ols_results.scale), 0.0]
            # Estimate gamma via Probit
            Z = data_frame[[
                init_dict['varnames'][j - 1]
                for j in init_dict['CHOICE']['order']
            ]]
            probitRslt = sm.Probit(data_frame[indicator], Z).fit(disp=0)
            gamma = probitRslt.params
            # Adjust estimated cost-benefit shifter and intercept coefficients
            # Arrange starting values
            x0 = np.concatenate((beta[0], beta[1], gamma, sd_))
            check_start_values(x0)

        except (PerfectSeparationError, ValueError, UserError):
            msg = 'The estimation process wasn`t able to provide automatic start values due to ' \
                  'perfect seperation. \n                                                     ' \
                  ' The intialization specifications are used as start ' \
                  'values during the further process.'

            # Set coefficients equal the true init file values
            x0 = init_dict['AUX']['init_values'][:-6]
            init_dict['ESTIMATION']['warning'] = msg
            option = 'init'

    x0 = start_value_adjustment(x0, init_dict, option)
    x0 = np.array(x0)
    return x0
Exemple #11
0
def start_values(init_dict, data_frame, option):
    """The function selects the start values for the minimization process."""

    if not isinstance(init_dict, dict):
        raise AssertionError()
    numbers = [init_dict['AUX']['num_covars_out'], init_dict['AUX']['num_covars_cost']]

    if option == 'init':
        # Set coefficients equal the true init file values
        x0 = init_dict['AUX']['init_values'][:2 * numbers[0] + numbers[1]]
        sd_ = None
    elif option == 'auto':

        try:

            # Estimate beta1 and beta0:
            beta = []
            sd_ = []
            for i in [0.0, 1.0]:
                Y, X = data_frame.Y[data_frame.D == i], data_frame.filter(regex=r'^X\_')[
                    data_frame.D == i]
                ols_results = sm.OLS(Y, X).fit()
                beta += [ols_results.params]
                sd_ += [np.sqrt(ols_results.scale)]

            # Estimate gamma via probit
            X = data_frame.filter(regex=r'^X\_')
            Z = (data_frame.filter(regex=r'^Z\_')).drop('Z_0', axis=1)
            XZ = np.concatenate((X, Z), axis=1)
            probitRslt = sm.Probit(data_frame.D, XZ).fit(disp=0)
            gamma = probitRslt.params
            gamma_const = np.subtract(np.subtract(beta[1][0], beta[0][0]), gamma[0])
            if len(init_dict['COST']['all']) == 1:
                gamma = [gamma_const]
            else:
                gamma = np.concatenate(([gamma_const], gamma[-(numbers[1] - 1):]))
            # Arange starting values
            x0 = np.concatenate((beta[1], beta[0]))
            x0 = np.concatenate((x0, gamma))

        except (PerfectSeparationError, ValueError):
            msg = 'The estimation process wasn`t able to provide automatic start values due to ' \
                  'perfect seperation. \n                                                     ' \
                  ' The intialization specifications are used as start ' \
                  'values during the further process.'
            # Set coefficients equal the true init file values
            x0 = init_dict['AUX']['init_values'][:2 * numbers[0] + numbers[1]]
            sd_ = None
            init_dict['ESTIMATION']['warning'] = msg
            option = 'init'

    x0, start = provide_cholesky_decom(init_dict, x0, option, sd_)
    init_dict['AUX']['starting_values'] = x0[:]
    init_dict['AUX']['start_values'] = start
    x0 = np.array(x0)

    return x0
Exemple #12
0
def berry_table_5(df: pd.DataFrame) -> str:

    y = df['active_next_period']
    x = df[['geo_mean_pop', 'distance', 'distance_squared', 'city2']]
    x = sm.add_constant(x)
    number_of_variables = x.shape[1]

    x_less_city2 = df[['geo_mean_pop', 'distance', 'distance_squared']]
    x_less_city2 = sm.add_constant(x_less_city2)

    # run probit with the full set of variables
    probit_mod = sm.Probit(y, x)
    probit_res = probit_mod.fit()

    # run probit without city2
    probit_mod = sm.Probit(y, x_less_city2)
    probit_res_less_city2 = probit_mod.fit()

    # generate a container for a table
    table = []
    for i in range(number_of_variables):
        if probit_res.params.index.values[i] == 'city2':
            table.append(
                [
                    '{}'.format(x.columns.values[i]),
                    '{:.2f}\n({:.2f})'.format(probit_res.params.values[i], probit_res.bse.values[i]),
                    '--\n--'
                ]
            )
        else:
            table.append(
                [
                    '{}'.format(x.columns.values[i]),
                    '{:.2f}\n({:.2f})'.format(probit_res.params.values[i], probit_res.bse.values[i]),
                    '{:.2f}\n({:.2f})'.format(probit_res_less_city2.params.values[i],
                                              probit_res_less_city2.bse.values[i])
                ]
            )

    # set header
    headers = ['Variable', '(1) Probit\nParameters\n(Std. Error)', '(2) Probit\nParameters\n(Std. Error)']

    return tabulate(table, headers, tablefmt="latex", numalign="right", floatfmt=".2f")
Exemple #13
0
def start_values(init_dict, data_frame, option):
    """The function selects the start values for the minimization process."""
    if not isinstance(init_dict, dict):
        msg = ("The input object ({})for specifing the start values isn`t a "
               "dictionary.".format(init_dict))
        raise UserError(msg)
    indicator = init_dict["ESTIMATION"]["indicator"]
    dep = init_dict["ESTIMATION"]["dependent"]

    if option == "init":
        # Set coefficients equal the true init file values
        x0 = init_dict["AUX"]["init_values"][:-6]
    elif option == "auto":

        try:

            # Estimate beta1 and beta0:
            beta = []
            sd_ = []

            for i in [1.0, 0.0]:
                Y = data_frame[dep][data_frame[indicator] == i]
                if i == 1:
                    order = init_dict["TREATED"]["order"]
                else:
                    order = init_dict["UNTREATED"]["order"]
                X = data_frame[order][i == data_frame[indicator]]

                ols_results = sm.OLS(Y, X).fit()
                beta += [ols_results.params]
                sd_ += [np.sqrt(ols_results.scale), 0.0]
            # Estimate gamma via Probit
            Z = data_frame[init_dict["CHOICE"]["order"]]
            probitRslt = sm.Probit(data_frame[indicator], Z).fit(disp=0)
            gamma = probitRslt.params
            # Adjust estimated cost-benefit shifter and intercept coefficients
            # Arrange starting values
            x0 = np.concatenate((beta[0], beta[1], gamma, sd_))
            check_start_values(x0)

        except (PerfectSeparationError, ValueError, UserError):
            msg = ("The estimation process wasn`t able to provide automatic"
                   " start values due to perfect seperation. \n"
                   " The intialization specifications are used as start "
                   "values during the further process.")

            # Set coefficients equal the true init file values
            x0 = init_dict["AUX"]["init_values"][:-6]
            init_dict["ESTIMATION"]["warning"] = msg
            option = "init"

    x0 = start_value_adjustment(x0, init_dict, option)
    x0 = np.array(x0)

    return x0
Exemple #14
0
def het_test_probit(results):
    """
    Wald検定 for Probit
    ------------------
    H0: homoscedasticity
    HA: heteroscedasticity

    Parameters
    ----------
    results : Logit results instance

    Returns
    -------
    Wald test statistic
    p-value
    Degree of Freedom
        The number of restrictions, which are equivalent to the number of explanatory variables, excluding a constant term

    References
    ----------
    The test is based on
    (1) Wooldridge 2010, section 15.5.3
    (2) https://www.statalist.org/forums/forum/general-stata-discussion/general/1292180-test-for-heteroskedasticity-in-logit-probit-models
    """

    yhat = results.predict(linear=True)  # original fitted values
    exog_var = results.model.exog  # original exog
    exog_df = pd.DataFrame(exog_var)  # convert to DataFrame

    try:  # drop a column of a constant if any
        tt = exog_df.nunique()
        idx_1 = list(tt).index(1.0)
        exog_df = exog_df.drop(idx_1, axis=1)
    except ValueError:
        pass

    num_para = exog_df.shape[1]  # no of non-constant parameters

    # X = np.exp(yhat).reshape(len(yhat),1) * exog_df.values
    X = yhat.reshape(len(yhat), 1) * exog_df.values

    endog = results.model.endog
    exog = np.column_stack((results.model.exog, X))
    res_test = sm.Probit(endog, exog).fit(disp=False)

    A = np.identity(len(res_test.params))
    A = A[-num_para:, :]
    s = res_test.wald_test(A)
    return print('H0: homoscedasticity\nHA: heteroscedasticity\n',
                 '\nWald test:', "%#2.3f" % s.statistic[0][0], '\np-value:',
                 "%#7.3f" % s.pvalue, '\ndf freedom:', "%#3.0f" % s.df_denom)
Exemple #15
0
def estimate_treatment_propensity(dict_, data, logit, show_output=False):
    """
    This function estimates the propensity of selecting into treatment
    for both treated and untreated individuals based on instruments Z.
    Z subsumes all the observable components that influence the treatment
    decision, e.g. the decision to enroll into college (D = 1) or not (D = 0).

    Estimate propensity scores via Logit (default) or Probit.

    Parameters
    ----------
    dict_: dict
        Estimation dictionary. Returned by grmpy.read(init_file)).
    data: pandas.DataFrame
        Data set to perform the estimation on. Specified
        under dict_["ESTIMATION"]["file"].
    logit: bool
        Probability model for the choice equation.
        If True: logit, else: probit.
    show_output: bool
        If True, intermediate outputs of the estimation process are displayed.

    Returns
    -------
    data: pandas.DataFrame
        Propensity score (range between [0, 1]). Values closer to 1
        denote a higher inclination to treatment.
    """
    D = data[dict_["ESTIMATION"]["indicator"]].values
    Z = data[dict_["CHOICE"]["order"]]

    if logit is True:
        logitRslt = sm.Logit(D, Z).fit(disp=0)
        prop_score = logitRslt.predict(Z)

        if show_output is True:
            print(logitRslt.summary())

    else:
        probitRslt = sm.Probit(D, Z).fit(disp=0)
        prop_score = probitRslt.predict(Z)

        if show_output is True:
            print(probitRslt.summary())

    data.loc[:, "prop_score"] = prop_score
    # prop_score.values

    return data
Exemple #16
0
def asGLM(X, y):
    '''
    Frequentist Probit model

    Inputs:
    - X: Feature matrix (DxN)
    - y: Observations (binary vector of length N)
    '''
    clf = sm.Probit(y, X.T)
    clf_ti = clf.fit()
    print 'Coefficients: '
    print clf_ti.params
    print 'CI: '
    print clf_ti.conf_int()
    return clf_ti
def dump_probit_results(rel_matchups):
    break_q = 0.66
    filenames = {True: ExportedFiles.over_probit, False: ExportedFiles.under_probit}

    for gid, gdf in rel_matchups.groupby(
        rel_matchups[style_pair_vals[0]].pipe(lambda s: s > s.quantile(break_q))
    ):
        html_str = (
            sm.Probit(gdf["win"], gdf[style_pair_vals[1:]].assign(const=1))
            .fit(cov_type="HC1")
            .summary()
            .tables[1]
            .as_html()
        )
        with open(filenames[gid], "w") as fp:
            fp.write(html_str)
Exemple #18
0
def SPProbit(context):
    # 从 Context 中获取相关数据
    args = context.args
    # 查看上一节点发送的 args.inputData 数据
    df = args.inputData

    featureColumns = args.featureColumns
    labelColumn = args.labelColumn

    features = df[featureColumns].values
    label = df[labelColumn].values

    arma_mod = sm.Probit(label, features, missing=args.missing)
    arma_res = arma_mod.fit(method=args.method)

    return arma_res
Exemple #19
0
 def compute(self, method='logistic'):
     """
     Compute propensity score and measures of goodness-of-fit
     
     Parameters
     ----------
     method : str
         Propensity score estimation method. Either 'logistic' or 'probit'
     """
     predictors = sm.add_constant(self.covariates, prepend=False)
     if method == 'logistic':
         model = sm.Logit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
     elif method == 'probit':
         model = sm.Probit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
     else:
         raise ValueError('Unrecognized method')
     return model.predict()
def plot_probit(model, trim_pct, probit_index_loc=0):
    '''Return plot of probit for specification used in *model*, use full dataset.

    '''

    data = pd.read_csv('./data/' + model_json['data'])
    exog_vars = ' + '.join(model.coeffs_final[probit_index_loc].index)

    Y, X = dmatrices(model_json['y_name'] + ' ~ ' + exog_vars, data)
    probit_result = sm.Probit(Y, X).fit()

    Xb = np.sort(probit_result.fittedvalues)
    p_hat = np.sort(probit_result.predict())

    # To make plots comparable, standardize Xb, rescale to mean and variance of index.
    μ_Xb = Xb.mean()
    σ_Xb = Xb.std()
    Xb_standardized = (Xb - μ_Xb) / σ_Xb

    index = model.index_final[probit_index_loc]
    μ_index = index.mean()
    σ_index = index.std()

    Xb_scaled = Xb_standardized * σ_index + μ_index

    # Align limits with ASF limits.
    xmin = np.percentile(model.index_final[probit_index_loc], trim_pct)
    xmax = np.percentile(model.index_final[probit_index_loc], 100 - trim_pct)

    fig = plt.figure()
    fig.add_subplot(1, 1, 1)

    plt.xlim(get_lim(xmin, xmax))
    plt.ylim((0, PRED_MAX))

    if probit_index_loc == 0:
        plt.xlabel('Rescaled probit index 1')
    elif probit_index_loc == 1:
        plt.xlabel('Rescaled probit index 2')

    plt.ylabel(labels["y"])
    plt.plot(Xb_scaled, p_hat)

    return fig
Exemple #21
0
def WiStu_plot():
    y, X = dmatrices('Dec ~ WiStu*Markt + Student',
                     df_ama,
                     return_type='dataframe')
    probit = sm.Probit(y, X, missing='drop')
    res = probit.fit()

    #print(res.summary())
    #--------------------------------------
    def f_Markt(VI):
        return res.predict([1, VI, 1, VI, 1])

    def f_Baseline(VI):
        return res.predict([1, VI, 0, 0, 1])

    #---------------Vorbereitung ------------
    VI_list = list(range(0, 2))
    y_Markt = [None] * len(VI_list)
    y_Base = [None] * len(VI_list)
    for i in VI_list:
        y_Markt[i] = f_Markt(i)
    for i in VI_list:
        y_Base[i] = f_Baseline(i)
    #--------------Estimated Likelihood----------
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    plt.xticks(np.arange(0, 2, step=1))
    ax.plot(VI_list, y_Markt, color='tab:blue', linewidth=2)
    ax.plot(VI_list, y_Base, color='tab:orange', linewidth=2)
    ax.axhline(y=df_ama['Dec'].mean(),
               color='gray',
               linewidth=1,
               linestyle='--')
    ax.axvline(x=df_ama.WiStu.mean(),
               color='gray',
               linewidth=1,
               linestyle='--')
    #ax.set_title('Likelihood of unfair decision dependent on FMIS')
    ax.set_ylabel('Estimated Probability of Fair Decision')
    ax.set_xlabel('Economics Student')
    ax.legend(labels=['Market', 'Baseline'])
    #plt.savefig(sav_dir+'\WiStu_plot.png', bbox_inches="tight")
    plt.show()
Exemple #22
0
    def compute(self, method='logistic'):
        """
        Compute propensity score and measures of goodness-of-fit
        
        Parameters
        ----------
        method : str
            Propensity score estimation method. Either 'logistic' or 'probit'
        """
        predictions = None
        if method == 'logistic':
            # i've had a ton of issues w/ the default SM solver and w/ others (including bfgs, which I thought was
            # working but then started giving me 0.5 for everything) - for ex, singular matrix errors, etc. I don't
            # need the things like p-values that SM gives over sklearn, but I do want a more robust implementation,
            # so I'm going to switch to sklearn for here at least
            # there's some useful stuff at https://stackoverflow.com/questions/24924755/logit-estimator-in-statsmodels-and-sklearn

            # high C value means to regularize hardly at all - i'm not standardizing the data so i don't want to
            # drop features incorrectly because of different scales (some regularization is needed because of how the
            # solver works)
            lr = LogisticRegression(C=1e9, fit_intercept=True)
            lr.fit(self.covariates, self.treatment)
            self.model = lr
            predictions = lr.predict_proba(
                self.covariates)[:,
                                 1]  # index 1 because we want the prob of a 1

            # old
            #model = sm.Logit(self.treatment, predictors).fit_regularized(alpha = 0.001, disp=False, warn_convergence=True)
            #model = sm.Logit(self.treatment, predictors).fit(method='bfgs', disp=False, warn_convergence=True)
            #model = sm.Logit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
            #model = sm.Logit(self.treatment, predictors).fit(disp=True, warn_convergence=True, maxiter=500)
        elif method == 'probit':
            predictors = sm.add_constant(self.covariates, prepend=False)
            model = sm.Probit(self.treatment,
                              predictors).fit(disp=False,
                                              warn_convergence=True)
            self.model = model
            predictions = model.predict()
        else:
            raise ValueError('Unrecognized method')

        return predictions
Exemple #23
0
def stats(predictor, response, model):
    ##will apply the statistical model you enter to the variables inputed, the
    ##codes for each statistical model are viewable in the chain of if statements
    predictor = np.asarray(predictor)
    response = np.asarray(response)
    if model == 'logit':
        model = sm.Logit(predictor, response)
    elif model == 'lsr':
        model = sm.OLS(predictor, response)
    elif model == "probit":
        model = sm.Probit(predictor, response)
    elif model == "gls":
        model = sm.GLS(predictor, response)
    elif model == "glsar":
        model = sm.GLSAR(predictor, response)
    elif model == "quantreg":
        model = sm.QuantReg(predictor, response)
    else:
        pass
    model = model.fit()
    print(model.summary())
Exemple #24
0
def FMIS_plot():
    y, X = dmatrices('Dec ~ FMIS_Index*Markt', df_ama, return_type='dataframe')
    probit = sm.Probit(y, X, missing='drop')
    res = probit.fit()

    #print(res.summary())
    #------------------------------------------------------------------
    def f_Markt(FMIS):
        return res.predict([1, FMIS, 1, FMIS])

    def f_Baseline(FMIS):
        return res.predict([1, FMIS, 0, 0])

    #-------------Vorbereitung--------------------------------
    FMIS_list = list(range(1, 12))
    y_Markt = [None] * len(FMIS_list)
    y_Base = [None] * len(FMIS_list)
    for i in FMIS_list:
        y_Markt[i - 1] = f_Markt(i)
    for i in FMIS_list:
        y_Base[i - 1] = f_Baseline(i)
    #----------------Estimated Likelihood----------------------
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(FMIS_list, y_Markt, color='tab:blue', linewidth=2)
    ax.plot(FMIS_list, y_Base, color='tab:orange', linewidth=2)
    ax.axhline(y=df_ama['Dec'].mean(),
               color='gray',
               linewidth=1,
               linestyle='--')
    ax.axvline(x=df_ama.FMIS_Index.mean(),
               color='gray',
               linewidth=1,
               linestyle='--')
    #ax.set_title('Likelihood of unfair decision dependent on FMIS')
    ax.set_ylabel('Estimated Probability of Fair Decision')
    ax.set_xlabel('Fair Market Ideology Index')
    ax.legend(labels=['Market', 'Baseline'])
    #plt.savefig(sav_dir+'\FMIS_plot.png', bbox_inches="tight")
    plt.show()
Exemple #25
0
def estimate_treatment_propensity(D, Z, logit, show_output):
    """
    This function estimates the propensity of selecting into treatment
    for both treated and untreated individuals based on instruments Z.
    Z subsumes all the observable components that influence the treatment
    decision, e.g. the decision to enroll into college (D = 1) or not (D = 0).

    Estimate propensity scores via Logit (default) or Probit.
    """
    if logit is True:
        logitRslt = sm.Logit(D, Z).fit(disp=0)
        ps = logitRslt.predict(Z)

        if show_output is True:
            print(logitRslt.summary())

    else:
        probitRslt = sm.Probit(D, Z).fit(disp=0)
        ps = probitRslt.predict(Z)

        if show_output is True:
            print(probitRslt.summary())

    return ps.values
Exemple #26
0
    def _fit_twostep(self):
        ########################################################################
        # PRIVATE METHOD
        # Fits using Heckman two-step from Heckman (1979).
        ########################################################################

        ## prep data
        Y, X, Z = self.get_datamats()

        ## Step 1
        step1model = sm.Probit(self.treated, Z)
        step1res = step1model.fit(disp=False)
        step1_fitted = np.atleast_2d(step1res.fittedvalues).T
        step1_varcov = step1res.cov_params()

        inverse_mills = norm.pdf(step1_fitted) / norm.cdf(step1_fitted)

        ## Step 2
        W = np.hstack((X, inverse_mills[self.treated]))
        step2model = sm.OLS(Y, W)
        step2res = step2model.fit()

        params = step2res.params[:-1]
        betaHat_inverse_mills = step2res.params[-1]

        ## Compute standard errors
        # Compute estimated error variance of censored regression
        delta = np.multiply(inverse_mills,
                            inverse_mills + step1_fitted)[self.treated]

        sigma2Hat = step2res.resid.dot(step2res.resid) / self.nobs_uncensored + \
            (betaHat_inverse_mills**2 * sum(delta)) / self.nobs_uncensored
        sigma2Hat = sigma2Hat[0]
        sigmaHat = np.sqrt(sigma2Hat)
        rhoHat = betaHat_inverse_mills / sigmaHat

        # compute standard errors of beta estimates of censored regression
        delta_1d = delta.T[0]

        Q = rhoHat**2 * (
            (W.T * delta_1d).dot(Z[self.treated])).dot(step1_varcov).dot(
                (Z[self.treated].T * delta_1d).dot(W))

        WT_W_inv = np.linalg.inv(W.T.dot(W))
        WT_R = W.T * (1 - rhoHat**2 * delta_1d)
        normalized_varcov_all = WT_W_inv.dot(WT_R.dot(W) + Q).dot(WT_W_inv)
        del WT_W_inv
        del WT_R

        del delta_1d

        normalized_varcov = normalized_varcov_all[:-1, :-1]

        varcov_all = sigma2Hat * normalized_varcov_all
        varcov = varcov_all[:-1, :-1]

        stderr_all = np.sqrt(np.diag(varcov_all))
        stderr = stderr_all[:-1]
        stderr_betaHat_inverse_mills = stderr_all[-1]

        ## store results
        results = HeckmanResults(
            self,
            params,
            normalized_varcov,
            sigma2Hat,
            select_res=step1res,
            params_inverse_mills=betaHat_inverse_mills,
            stderr_inverse_mills=stderr_betaHat_inverse_mills,
            var_reg_error=sigma2Hat,
            corr_eqnerrors=rhoHat,
            method='twostep')

        return results
Exemple #27
0
    def loglike(self, params):
        exog = self.exog
        endog = self.endog
        q = 2 * endog - 1
        return stats.norm.logcdf(q * np.dot(exog, params)).sum()


# Estimate the model and print a summary:

sm_probit_manual = MyProbit(endog, exog).fit()
print(sm_probit_manual.summary())

# Compare your Probit implementation to ``statsmodels``' "canned"
# implementation:

sm_probit_canned = sm.Probit(endog, exog).fit()

print(sm_probit_canned.params)
print(sm_probit_manual.params)

print(sm_probit_canned.cov_params())
print(sm_probit_manual.cov_params())

# Notice that the ``GenericMaximumLikelihood`` class provides automatic
# differentiation, so we didn't have to provide Hessian or Score functions
# in order to calculate the covariance estimates.

#
#
# ## Example 2: Negative Binomial Regression for Count Data
#
Exemple #28
0
def result_statsmodels_probit():
    endog, exog = generate_test_data()
    result = sm.Probit(endog, exog).fit()

    return result
logit_res = logit_mod.fit(disp=0)
print("Parameters: ", logit_res.params)

# Marginal Effects

margeff = logit_res.get_margeff()
print(margeff.summary())

# As in all the discrete data models presented below, we can print a nice
# summary of results:

print(logit_res.summary())

# ## Probit Model

probit_mod = sm.Probit(spector_data.endog, spector_data.exog)
probit_res = probit_mod.fit()
probit_margeff = probit_res.get_margeff()
print("Parameters: ", probit_res.params)
print("Marginal effects: ")
print(probit_margeff.summary())

# ## Multinomial Logit

# Load data from the American National Election Studies:

anes_data = sm.datasets.anes96.load()
anes_exog = anes_data.exog
anes_exog = sm.add_constant(anes_exog)

# Inspect the data:
Exemple #30
0
from __future__ import print_function
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel


data = sm.datasets.spector.load()
data.exog = sm.add_constant(data.exog, prepend=False)
# in this dir

probit_mod = sm.Probit(data.endog, data.exog)
probit_res = probit_mod.fit()
loglike = probit_mod.loglike
score = probit_mod.score
mod = GenericLikelihoodModel(data.endog, data.exog*2, loglike, score)
res = mod.fit(method="nm", maxiter = 500)

def probitloglike(params, endog, exog):
      """
      Log likelihood for the probit
      """
      q = 2*endog - 1
      X = exog
      return np.add.reduce(stats.norm.logcdf(q*np.dot(X,params)))

mod = GenericLikelihoodModel(data.endog, data.exog, loglike=probitloglike)
res = mod.fit(method="nm", fargs=(data.endog,data.exog), maxiter=500)
print(res)