Python PanelOLS.fit 예제들, linearmodels.panel.PanelOLS.fit Python 예제들

예제 #1

0

파일 보기

def regressions(data, endog, exog, options, clusterfirm, constant):
    #results = []
    if constant == 1:
        exog = sm.add_constant(data[exog])
    if constant == 0:
        exog = data[exog]
    if options == 0:
        mod = PanelOLS(data[endog],
                       exog,
                       entity_effects=True,
                       time_effects=True)
    if options == 1:
        mod = PanelOLS(data[endog],
                       exog,
                       entity_effects=False,
                       time_effects=True)
    if options == 2:
        #print(data[[endog]], exog)
        mod = PooledOLS(data[endog], exog)
    if clusterfirm == 0:
        results = mod.fit(cov_type='clustered', clusters=data.gvkey)
    if clusterfirm == 1:
        results = mod.fit(cov_type='clustered', cluster_entity=True)
    if clusterfirm == 2:
        results = mod.fit()
    return results

예제 #2

0

파일 보기

def run_regressions_3(data=[], endog=[], exog=[], options=0, clusterfirm=0):
    results = []
    print(endog)
    for index, elem in enumerate(data):
        # name = 'endog' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(elem[endog],
                           elem[exog],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(elem[endog],
                           elem[exog],
                           entity_effects=False,
                           time_effects=True)
        if options == 2:
            print(type(elem))
            mod = PooledOLS(elem[endog], elem[exog])
        if clusterfirm == 0:
            results.append(mod.fit(cov_type='clustered', clusters=elem.gvkey))
        if clusterfirm == 1:
            results.append(mod.fit(cov_type='clustered', cluster_entity=True))
        if clusterfirm == 2:
            results.append(mod.fit())
    return results

예제 #3

0

파일 보기

def run_regressions(dataa, datab, endog1, endog2, exog1, exog2, options=0):
    results = []
    print(endog1)
    for index, elem in enumerate(endog1):
        name = 'endog1' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(dataa[elem],
                           dataa[exog1],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(dataa[elem],
                           dataa[exog1],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=dataa.gvkey))
    for index, elem in enumerate(endog2):
        name = 'endog2' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(datab[elem],
                           datab[exog2],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(datab[elem],
                           datab[exog2],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=datab.gvkey))
    return results

예제 #4

0

파일 보기

파일: project_auxiliary_plots.py 프로젝트: OpenSourceEconomics/ose-data-science-course-projeect-hugo-fb

def balancing_tests_cohort_results(df, exog):
    post_exposure1 = PanelOLS(df.adult,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton1 = post_exposure1.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure2 = PanelOLS(df.below_median_age_restr,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton2 = post_exposure2.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure3 = PanelOLS(df.sex_ratio,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton3 = post_exposure3.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure4 = PanelOLS(df.have_adults_patch,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton4 = post_exposure4.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    return (compare(
        {
            'Size of cohort': result_balancing_canton1,
            'Below median age': result_balancing_canton2,
            'Sex ratio': result_balancing_canton3,
            'Have families': result_balancing_canton4
        },
        stars=True))

예제 #5

0

파일 보기

파일: project_auxiliary_plots.py 프로젝트: OpenSourceEconomics/ose-data-science-course-projeect-hugo-fb

def balancing_tests_cantonal_results(df, exog):
    ##These are the conditional results
    ##between countries as= asylum seekers
    mod_balancing = PanelOLS(df.share_AS_between * 100,
                             exog,
                             entity_effects=True,
                             time_effects=True,
                             singletons=False)
    result_balancing_canton = mod_balancing.fit(cov_type='clustered',
                                                clusters=df.id_e,
                                                singletons=False)

    mod_balancing2 = PanelOLS(df.share_AS_within * 100,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton2 = mod_balancing2.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    mod_balancing3 = PanelOLS(df.sex_ratio_AS_ntc * 100,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton3 = mod_balancing3.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    return (compare(
        {
            'Between countries': result_balancing_canton,
            'Within countries': result_balancing_canton2,
            'Sex ratio': result_balancing_canton3
        },
        stars=True))

예제 #6

0

파일 보기

	def preprocessing_regression(self):
		#Filling missing values with mean values.
		imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
		self.df.iloc[:, :9] = imputer.fit_transform(self.df.iloc[:, :9])
		data = self.df.iloc[:, :10]
		#Taking natural log of variable that have outliers
		data.mezun = np.log(self.df.iloc[:, 2])
		data.yogunluk = np.log(self.df.iloc[:, 3])
		data.dogum = np.log(self.df.iloc[:, 4])
		#Setting indexes in order to shape to data into panel form.
		data = data.set_index(['iller', 'yil'])
		#Regressing variables to find out time effect on the relation between regressand and regressors.
		mod = PanelOLS(data.mezun, data.iloc[:, 1:9], time_effects=True)
		res = mod.fit(cov_type='clustered', cluster_entity=True)

		return res

예제 #7

0

파일 보기

def run_regressions_2(data, endog=[], exog=[], options=0):
    results = []
    print(endog)
    for index, elem in enumerate(endog):
        name = 'endog' + '_' + str(index)
        if options == 0:
            for i, e in enumerate(endog):
                mod = PanelOLS(data[elem],
                               data[e],
                               entity_effects=True,
                               time_effects=True)
        if options == 1:
            mod = PanelOLS(data[elem],
                           data[e],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=data.gvkey))
    return results

예제 #8

0

파일 보기

def panel_regression(y,
                     xs,
                     years,
                     country,
                     list_x,
                     prev=0,
                     show=False,
                     save=True,
                     path="",
                     diff=False,
                     constant=False,
                     entity_effects=False):
    data = bdf.filter_origin_country_dataset(y, country, years,
                                             xs.index.levels[0].tolist(), xs,
                                             prev)
    if constant == False:
        exog = data[list_x]
    else:
        exog = sm.add_constant(data[list_x])
    #
    if diff == False:
        mod = PanelOLS(data.y, exog, entity_effects=entity_effects)
    else:
        mod = FirstDifferenceOLS(data.y, exog)
    res = mod.fit()
    #print("The R-squared of the regression model is %f." %res.rsquared)
    #print("Estimated parameters:")
    #print(pd.DataFrame(res.params))

    evaluation(data, res.fitted_values, constant, len(xs.columns.tolist()))

    if show == True:
        pmf.plot_real_VS_prediction(y, res.fitted_values, xs, years, country,
                                    45, "Regression model", save, path)
    else:
        pass

    return (res.params, res.fitted_values)

예제 #9

0

파일 보기

import sys

import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS

DATA_FILE = sys.argv[1]
OUTPUT_FILE = sys.argv[2]

change_df = pd.read_csv(DATA_FILE)
change_df = change_df.set_index(["Code", "date"])

exog_vars = ["Income_t0_log", "nm_change", "shm_change", "ne_change", "sum_adv_t0"]
exog = sm.add_constant(change_df[exog_vars])
mod = PanelOLS(change_df.growth_rate, exog)
fe_res = mod.fit()
with open(OUTPUT_FILE, "w") as f:
    f.write(fe_res.summary.as_text())

예제 #10

0

파일 보기

파일: Q3.py 프로젝트: sairohit748/Programming-for-Data-Science

jtrain2[:5]
## Define the ID and Time column for Panel Regression
jtrain2 = jtrain2.set_index(['fcode', 'year'])
print(jtrain2.head(5))
exog_vars = ['d88', 'd89', 'grant', 'grant_1']
grant_vars = ['grant']
exog = sm.add_constant(jtrain2[exog_vars])
grant0 = sm.add_constant(jtrain2[grant_vars])

## Model Pooled OLS
model_pool = PooledOLS(jtrain2.lscrap, exog)
pooled_res = model_pool.fit()
print(pooled_res)
## Model Fixed Effects -- Entity Effects - True
model_fe = PanelOLS(jtrain2.lscrap, exog, entity_effects=True)
fe_res = model_fe.fit()
print(fe_res)
## Model Fixed Effects -- Entity and Time Effects - True
model_fe = PanelOLS(jtrain2.lscrap,
                    exog,
                    entity_effects=True,
                    time_effects=True)
fe_res = model_fe.fit()
print(fe_res)
## Random Effects Model
model_re = RandomEffects(jtrain2.lscrap, exog)
re_res = model_re.fit()
print(fe_res)
#################################################
## Regress scrap~grant
## Model Pooled OLS

예제 #11

0

파일 보기

파일: Predict Earnings Surprises.py 프로젝트: giaphattram/ThesisCode

#mod = PanelOLS(temp.UE6M, temp[['activeWeight3MSquared']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight6MSquared']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight12MSquared']], entity_effects = True, time_effects = True)


#mod = PanelOLS(temp.UE12M, temp[['activeWeight3MSquared', 'activeWeight6MSquared', 'activeWeight12MSquared']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight3MSquared']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight6MSquared']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight12MSquared']], entity_effects = True, time_effects = True)

# =============================================================================
# Specify standard error clustering
# There are two options: cluster standard errors by 
# firm (cluster_entity) or by time (cluster_time)
# =============================================================================
res = mod.fit(cov_type = 'clustered', cluster_entity = True)
#res = mod.fit(cov_type = 'clustered', cluster_time = True)

# =============================================================================
# Print the regression result
# =============================================================================
print(res)

del temp

예제 #12

0

파일 보기

파일: ps5.py 프로젝트: xinxinzhang123/CompEcon_Fall19

# CLO has much more positive holding period return than corporate bonds

# In[41]:

#Part B
# 1. OLS without fixed effect
hpr_OLS = smf.ols(formula='lnhpr ~ clo+tmkt_rf+tsmb+thml+tterm+tdef+hp',
                  data=ps5)
# I use panel data to regression holding period return on common risk factors (tmkt_rf,tsmb,thml,tterm,and tdef) and
# holding period. CLO is an indicator which is 1 if bond is CLO. If CLO is significant and positive, CLO has higher
# return than corporate bond.
res = hpr_OLS.fit()
print(res.summary())
# The significant positive coefficient for CLO shows that CLO has higher excess return than corporate bond

# In[59]:

# 2. OLS with firm fixed effect
startyear = pd.Categorical(ps5.startyear)
ps5 = ps5.set_index(['entity_name', 'startyear'])

# In[67]:

exog_vars = ['clo', 'tmkt_rf', 'tsmb', 'thml', 'tterm', 'tdef', 'hp']
exog = sm.add_constant(ps5[exog_vars])
mod = PanelOLS(ps5.lnhpr, exog, entity_effects=True)
res = mod.fit()
print(res)
# After adding firm fixed effect, the coefficient of CLO is still significant positive and at similiar magnititude.
# The argument that CLO has higher excess return than corporate return is valid.

예제 #13

0

파일 보기

파일: main.py 프로젝트: StephLee12/OutsideJobs

    x = np.stack([calc_mat[:, 1], calc_mat[:, 2], calc_mat[:, 3], calc_mat[:, 4]])
    ones = np.ones(len(x[0]))
    X = sm.add_constant(np.column_stack((x[0], ones)))
    for elem in x[1:]:
        X = sm.add_constant(np.column_stack((elem, X)))
    res = sm.OLS(y,X).fit()
    print(res.summary())

    FE模型回归
    company_codes = []
    for each_file in file_list:
        company_code = each_file.split('.')[0]
        company_code = int(company_code)
        company_codes.append(company_code)
    time = [2019] * 50
    df = pd.DataFrame({
        'TDA': x[0],
        'CR5': x[1],
        'SIZE': x[2],
        'ROE': x[3],
        'REWARD': y,
        'YEAR': time,
        'CODE': company_codes
    })
    df.to_stata('Stock/res.dta')
    df = df.set_index(['CODE', 'YEAR'])
    exog_vars = ['TDA', 'LDA', 'SIZE', 'ROE']
    exog = sm.add_constant(df[exog_vars])
    model = PanelOLS(df['REWARD'], exog, entity_effects=True)
    fe = model.fit()
    print(fe)

예제 #14

0

파일 보기

import numpy as np
import linearmodels as lm
lm.WARN_ON_MISSING = False
from linearmodels import utility
utility.missing_warning(np.array([True, True, False]))

from linearmodels.panel import PanelOLS, RandomEffects, PooledOLS
from linearmodels.datasets import wage_panel
import statsmodels.api as sm
data = wage_panel.load()
data = data.set_index(['nr','year'])
dependent = data.lwage
exog = sm.add_constant(data[['expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True)
res = mod.fit(cov_type='unadjusted')
res2 = mod.fit(cov_type='robust')
exog = sm.add_constant(data[['exper', 'expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res3 = mod.fit(cov_type='clustered',cluster_entity=True)
mod = RandomEffects(dependent, exog)
res4 = mod.fit(cov_type='robust')
from linearmodels.panel.results import compare

exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy())
import pandas as pd
exog['year'] = pd.Categorical(data.reset_index()['year'])
mod = PooledOLS(dependent, exog)
res5 = mod.fit(cov_type='robust')
print(compare([res,res2, res3, res4, res5]))

print(data.columns)

예제 #15

0

파일 보기

파일: reg.py 프로젝트: QSCTech-Sange/listed-company-group-structure

    VIF[y] = 1 / (1 - res.rsquared)
with open('../result/VIF.txt', 'w') as f:
    print(VIF, file=f)

# pooled 回归
x = data[["MV", "RM", "BM", "ROE", "Inv"]]
y = data["Ret"]
results = sm.OLS(y, x).fit()
with open('../result/pooled_reg.txt', 'w') as f:
    print(results.summary(), file=f)

# 固定效应回归
data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index(['Stkcd', 'Time'])
dependent = data.Ret
exog = sm.add_constant(data[['MV', 'BM', 'RM', 'ROE', 'Inv']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res = mod.fit(cov_type='clustered')
with open('../result/fixed_effects.txt', 'w') as f:
    print(res, file=f)

# 控制行业回归
data = pd.read_csv("../data/data_all.csv")
data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index(['Industry', 'Time'])
dependent = data.Ret
exog = sm.add_constant(data[['MV', 'BM', 'RM', 'ROE', 'Inv']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res = mod.fit(cov_type='clustered')
with open('../result/industry_control.txt', 'w') as f:
    print(res, file=f)

예제 #16

0

파일 보기

independent_vars = [
    'Gross fixed capital formation (% of GDP)',
    'Gross domestic savings (% of GDP)', 'Population growth (annual %)',
    'FDI, net inflows (% of GDP)', 'Aid/Gdp_sqr', 'Aid/Gdp', 'ln_ODA', 'wopen',
    'Trade'
]

mod = PanelOLS(df['ln_gdp_pc'],
               df[independent_vars],
               entity_effects=True,
               time_effects=True
               )  # you can turn on or off both entity_effects and time_effects

res = mod.fit(
    cov_type='clustered',
    cluster_entity=True)  # here cov_type means covariance estimators type.
# cov_type can be ‘unadjusted’, ‘homoskedastic’ or ‘robust’, ‘heteroskedastic’ or ‘clustered` - One or two way clustering.

print(res)

# <h4> ODA - Official development assistance
#  Three significant Independent Varibles: Aid/GDP, Aid/GDP^2, ln_ODA </h4>

# <h2><center>DIAGNOSTIC ANALYSIS</center></h2>

# <h3> 1. Unit root test </h3>
#
# The Augmented Dickey-Fuller test is a type of statistical test called a unit root test
# The null hypothesis of the test is that the time series can be represented by a unit root, that it is not stationary (has some time-dependent structure). The alternate hypothesis (rejecting the null hypothesis) is that the time series is stationary.
#

예제 #17

0

파일 보기

파일: simulation.py 프로젝트: YuxinWang2020/panel_data_models_with_fixed_effects

 p = X.shape[0]
 # within model require no collinear variable combinations
 no_collinear_x_var = ["x" + str(i + 1) for i in range(min(p, 3))]
 # run estimator for starting value for interactive estimator
 if interactive_start_value_effect == "twoways":
     start_value_estimator = PanelOLS(
         panel_df.y,
         panel_df[no_collinear_x_var],
         entity_effects=True,
         time_effects=True,
     )
 else:
     start_value_estimator = PooledOLS(
         panel_df.y, panel_df[["x" + str(i) for i in range(1, p + 1)]]
     )
 start_value_result = start_value_estimator.fit()
 interactive_start_value = [
     *start_value_result.params.tolist(),
     *np.zeros(p - len(start_value_result.params)),
 ]
 # run interactive fixed effect estimator
 interactive_estimator = InteractiveFixedEffect(Y, X)
 (
     beta_hat_interactive,
     beta_hat_list,
     f_hat,
     lambda_hat,
 ) = interactive_estimator.fit(r, interactive_start_value, tolerance)
 # run within estimator with the same data
 if within_effect == interactive_start_value_effect:
     within_result = start_value_result

예제 #18

0

파일 보기

#Preparing the variables
temp = temp.set_index(['crsp_portno', 'slided_caldt'])
temp = temp[temp.AFP != -np.inf]
temp = temp[temp.AFP != np.inf]

# Please comment / uncomment the following if you want to choose for the dependent variable
# (1) Carhart Alpha
#mod = PanelOLS(temp.CarhartAlpha, temp.AFP, time_effects = True)
# (2) Fama-French Alpha
mod = PanelOLS(temp.FFAlpha, temp.AFP, time_effects=True)

# Please comment / uncomment the following if you want to choose for the standard errors
# (1) to be clustered by fund
#res = mod.fit(cov_type = 'clustered', cluster_entity = True)
# (2) to be clustered by time
res = mod.fit(cov_type='clustered', cluster_time=True)

#Print regression result
print(res)

del temp
#%%
# =============================================================================
# Part 2: Panel Data Regression with multiple independent variables:
# - AFP
# - Fund characteristics: log(TNA), log(Age), expense ratio, turnover ratio
# - First order lag of dependent variable
#
# =============================================================================
from linearmodels.panel import PanelOLS

예제 #19

0

파일 보기

def panel_regression_training_test(y,
                                   xs,
                                   years_training,
                                   years_test,
                                   country,
                                   list_x,
                                   prev=0,
                                   show=False,
                                   save=True,
                                   path="",
                                   diff=False,
                                   constant=False,
                                   entity_effects=False):
    years = years_training + years_test
    data = bdf.filter_origin_country_dataset(y, country, years,
                                             xs.index.levels[0].tolist(), xs,
                                             prev)

    data_tr = data.loc[(slice(None), years_training), :]
    data_te = data.loc[(slice(None), years_test), :]

    if constant == False:
        exog_tr = data_tr[list_x]
        exog_te = data_te[list_x]
    else:
        exog_tr = sm.add_constant(data_tr[list_x])
        exog_te = sm.add_constant(data_te[list_x])
    #
    if diff == False:
        mod = PanelOLS(data_tr.y, exog_tr, entity_effects=entity_effects)
    else:
        mod = FirstDifferenceOLS(data_tr.y, exog_tr)
    res_tr = mod.fit()

    #print("---------------- Training Results ----------------")
    #evaluation(data_tr, res_tr.fitted_values, constant)

    fitted_values_te = res_tr.params.values * exog_te
    fitted_values_te["fitted_values"] = fitted_values_te.sum(axis=1)
    fitted_values_ = fitted_values_te.append(res_tr.fitted_values)
    fitted_values_ = fitted_values_.sort_index()
    if show == True:
        pmf.plot_real_VS_prediction(y,
                                    fitted_values_,
                                    xs,
                                    years,
                                    country,
                                    45,
                                    "Regression model",
                                    save=save,
                                    path="")
    else:
        pass

    print("-------------- Trainin-Test  Results --------------")
    #print(data.head())
    #print(fitted_values_.head())
    #print(fitted_values_te.head())
    evaluation(data.loc[(slice(None), years_test), ], fitted_values_te,
               constant, len(xs.columns.tolist()))
    #evaluation(data.loc[(slice(None), years_test), ], fitted_values_.loc[(slice(None), years_test), ], constant, len(xs.columns.tolist()))

    return (res_tr.params, fitted_values_)

예제 #20

0

파일 보기

파일: project_auxiliary_plots.py 프로젝트: OpenSourceEconomics/ose-data-science-course-projeect-hugo-fb

def baseline_results_women(df):
    CPRT_baseline_female = df.groupby(by=['sex'])
    CPRT_baseline_women = CPRT_baseline_female.get_group("F")
    CPRT_baseline_womenage = CPRT_baseline_women[~(
        CPRT_baseline_women['age'] <= 18)]

    mi_data_women = CPRT_baseline_womenage.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70"
    ]
    exog_women = sm.add_constant(mi_data_women[exog_vars])

    CPRT_baseline_womenage.head()

    mod_women = PanelOLS(mi_data_women.crime_rate_all_violent_p30,
                         exog_women,
                         entity_effects=True,
                         time_effects=True,
                         drop_absorbed=True,
                         singletons=False)
    res_women = mod_women.fit(cov_type='clustered',
                              cluster=mi_data_women.id_e,
                              singletons=False)
    CPRT_baseline_womenage_sub = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['allmk_periode'] == 1)]
    mi_data2_women = CPRT_baseline_womenage_sub.set_index(["id_a", "id_e_t"])

    exog_vars2 = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71"
    ]

    exog2_women = sm.add_constant(mi_data2_women[exog_vars2])

    mod2_women = PanelOLS(mi_data2_women.crime_rate_all_violent_p30,
                          exog2_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res2_women = mod2_women.fit(cov_type='clustered',
                                cluster=mi_data2_women["id_e"],
                                singletons=False)

    CPRT_baseline_womenage_sub_sub = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['all_periode'] == 1)]
    mi_data3_women = CPRT_baseline_womenage_sub_sub.set_index(
        ["id_a", "id_e_t"])

    exog_vars3 = [
        "kid012", "exp_all_13", "exp_all_14", "exp_all_15", "exp_all_16",
        "exp_all_17", "exp_all_18", "exp_all_19", "exp_all_20", "exp_all_21",
        "exp_all_22", "exp_all_23", "exp_all_24", "exp_all_25", "exp_all_26",
        "exp_all_27", "exp_all_28", "exp_all_29", "exp_all_30", "exp_all_31",
        "exp_all_32", "exp_all_33", "exp_all_34", "exp_all_35", "exp_all_36",
        "exp_all_37", "exp_all_38", "exp_all_39", "exp_all_40", "exp_all_41",
        "exp_all_42", "exp_all_43", "exp_all_44", "exp_all_45", "exp_all_46",
        "exp_all_47", "exp_all_48", "exp_all_49", "exp_all_50", "exp_all_51",
        "exp_all_52", "exp_all_53", "exp_all_54", "exp_all_55", "exp_all_56",
        "exp_all_57", "exp_all_58", "exp_all_59", "exp_all_60", "exp_all_61",
        "exp_all_62", "exp_all_63", "exp_all_64", "exp_all_65", "exp_all_66",
        "exp_all_67", "exp_all_68", "exp_all_69", "exp_all_70", "exp_all_71"
    ]

    exog3_women = sm.add_constant(mi_data3_women[exog_vars3])

    mod3_women = PanelOLS(mi_data3_women.crime_rate_all_violent_p30,
                          exog3_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res3_women = mod3_women.fit(cov_type='clustered',
                                cluster=mi_data3_women["id_e"],
                                singletons=False)
    ##Table 5 column 4 women
    CPRT_baseline_womenage_sub4 = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['mk_periode'] == 1)]
    mi_data4_women = CPRT_baseline_womenage_sub4.set_index(["id_a", "id_e_t"])
    ##had to delete nr. 71-86
    exog_vars4 = [
        "MK_kid012", "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16",
        "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21",
        "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26",
        "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31",
        "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36",
        "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41",
        "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46",
        "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51",
        "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56",
        "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61",
        "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66",
        "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70"
    ]

    exog4_women = sm.add_constant(mi_data4_women[exog_vars4])

    mod4_women = PanelOLS(mi_data4_women.crime_rate_all_violent_p30,
                          exog4_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res4_women = mod4_women.fit(cov_type='clustered',
                                cluster=CPRT_baseline_womenage_sub["id_e"],
                                singletons=False)
    return (compare(
        {
            'Full': res_women,
            'CC and MK': res2_women,
            'CC': res3_women,
            'MK': res4_women
        },
        stars=True))

예제 #21

0

파일 보기

파일: project_auxiliary_plots.py 프로젝트: OpenSourceEconomics/ose-data-science-course-projeect-hugo-fb

def crime_by_type(df):
    mi_data = df.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog_baseline_type = sm.add_constant(mi_data[exog_vars])

    result_6_1 = PanelOLS(mi_data.crime_rate_violent_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_violent = result_6_1.fit(cov_type='clustered',
                                   cluster=mi_data["id_e"])

    result_6_2 = PanelOLS(mi_data.crime_rate_freedom_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_freedom = result_6_2.fit(cov_type='clustered',
                                   cluster=mi_data["id_e"])

    result_6_3 = PanelOLS(mi_data.crime_rate_sexual_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_sexual = result_6_3.fit(cov_type='clustered',
                                  cluster=mi_data["id_e"])

    result_6_4 = PanelOLS(mi_data.crime_rate_property_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_property = result_6_4.fit(cov_type='clustered',
                                    cluster=mi_data["id_e"])
    return (compare(
        {
            'violent': res_6_violent,
            'freedom': res_6_freedom,
            'sexual': res_6_sexual,
            'property': res_6_property
        },
        stars=True))

예제 #22

0

파일 보기

파일: project_auxiliary_plots.py 프로젝트: OpenSourceEconomics/ose-data-science-course-projeect-hugo-fb

def baseline_results(df):
    ##first column of baseline
    mi_data = df.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog_baseline = sm.add_constant(mi_data[exog_vars])

    mod = PanelOLS(mi_data.crime_rate_all_violent_p30,
                   exog_baseline,
                   entity_effects=True,
                   time_effects=True,
                   singletons=False)
    res = mod.fit(cov_type='clustered',
                  clusters=mi_data.id_e,
                  singletons=False)

    ##second column of baseline results

    CPRT_baseline_maleage_sub = df[(df['allmk_periode'] == 1)]
    mi_data2 = CPRT_baseline_maleage_sub.set_index(["id_a", "id_e_t"])

    exog_vars2 = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog2 = sm.add_constant(mi_data2[exog_vars2])

    mod2 = PanelOLS(mi_data2.crime_rate_all_violent_p30,
                    exog2,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res2 = mod2.fit(cov_type='clustered',
                    clusters=mi_data2.id_e,
                    singletons=False)

    ##third column of baseline results

    CPRT_baseline_maleage_sub_sub = df[(df['all_periode'] == 1)]
    CPRT_baseline_maleage_sub_sub = CPRT_baseline_maleage_sub_sub.drop(
        ['kid012_all'], axis=1)
    CPRT_baseline_maleage_sub_sub = CPRT_baseline_maleage_sub_sub.rename(
        columns={"kid012": "kid012_all"})
    mi_data3 = CPRT_baseline_maleage_sub_sub.set_index(["id_a", "id_e_t"])

    exog_vars3 = [
        "kid012_all", "exp_all_13", "exp_all_14", "exp_all_15", "exp_all_16",
        "exp_all_17", "exp_all_18", "exp_all_19", "exp_all_20", "exp_all_21",
        "exp_all_22", "exp_all_23", "exp_all_24", "exp_all_25", "exp_all_26",
        "exp_all_27", "exp_all_28", "exp_all_29", "exp_all_30", "exp_all_31",
        "exp_all_32", "exp_all_33", "exp_all_34", "exp_all_35", "exp_all_36",
        "exp_all_37", "exp_all_38", "exp_all_39", "exp_all_40", "exp_all_41",
        "exp_all_42", "exp_all_43", "exp_all_44", "exp_all_45", "exp_all_46",
        "exp_all_47", "exp_all_48", "exp_all_49", "exp_all_50", "exp_all_51",
        "exp_all_52", "exp_all_53", "exp_all_54", "exp_all_55", "exp_all_56",
        "exp_all_57", "exp_all_58", "exp_all_59", "exp_all_60", "exp_all_61",
        "exp_all_62", "exp_all_63", "exp_all_64", "exp_all_65", "exp_all_66",
        "exp_all_67", "exp_all_68", "exp_all_69", "exp_all_70", "exp_all_71",
        "exp_all_72", "exp_all_73", "exp_all_74", "exp_all_75", "exp_all_76",
        "exp_all_77", "exp_all_78", "exp_all_79", "exp_all_80", "exp_all_81",
        "exp_all_82", "exp_all_83", "exp_all_84", "exp_all_85", "exp_all_86"
    ]
    exog3 = sm.add_constant(mi_data3[exog_vars3])

    mod3 = PanelOLS(mi_data3.crime_rate_all_violent_p30,
                    exog3,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res3 = mod3.fit(cov_type='clustered',
                    clusters=mi_data3.id_e,
                    singletons=False)

    ##4th column
    CPRT_baseline_maleage_sub4 = df[(df['mk_periode'] == 1)]
    mi_data4 = CPRT_baseline_maleage_sub4.set_index(["id_a", "id_e_t"])

    exog_vars4 = [
        "MK_kid012", "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16",
        "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21",
        "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26",
        "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31",
        "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36",
        "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41",
        "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46",
        "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51",
        "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56",
        "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61",
        "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66",
        "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70", "exp_mk_71",
        "exp_mk_72", "exp_mk_73", "exp_mk_74", "exp_mk_75", "exp_mk_76",
        "exp_mk_77", "exp_mk_78", "exp_mk_79", "exp_mk_80", "exp_mk_81",
        "exp_mk_82", "exp_mk_83"
    ]
    exp_mk4 = [
        "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16", "exp_mk_17",
        "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21", "exp_mk_22",
        "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26", "exp_mk_27",
        "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31", "exp_mk_32",
        "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36", "exp_mk_37",
        "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41", "exp_mk_42",
        "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46", "exp_mk_47",
        "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51", "exp_mk_52",
        "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56", "exp_mk_57",
        "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61", "exp_mk_62",
        "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66", "exp_mk_67",
        "exp_mk_68", "exp_mk_69", "exp_mk_70", "exp_mk_71", "exp_mk_72",
        "exp_mk_73", "exp_mk_74", "exp_mk_75", "exp_mk_76", "exp_mk_77",
        "exp_mk_78", "exp_mk_79", "exp_mk_80", "exp_mk_81", "exp_mk_82",
        "exp_mk_83", "exp_mk_84", "exp_mk_85", "exp_mk_86", "exp_mk_87",
        "exp_mk_88", "exp_mk_89", "exp_mk_90", "exp_mk_91", "exp_mk_92",
        "exp_mk_93", "exp_mk_94", "exp_mk_95", "exp_mk_96", "exp_mk_97",
        "exp_mk_98", "exp_mk_99"
    ]
    exog4 = sm.add_constant(mi_data4[exog_vars4])

    mod4 = PanelOLS(mi_data4.crime_rate_all_violent_p30,
                    exog4,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res4 = mod4.fit(cov_type='clustered',
                    clusters=mi_data4.id_e,
                    singletons=False)
    ##presentation
    return (compare({
        'Full': res,
        'CC and MK': res2,
        'CC': res3,
        'MK': res4
    },
                    stars=True))