コード例 #1
0
def run_regressions_3(data=[], endog=[], exog=[], options=0, clusterfirm=0):
    results = []
    print(endog)
    for index, elem in enumerate(data):
        # name = 'endog' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(elem[endog],
                           elem[exog],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(elem[endog],
                           elem[exog],
                           entity_effects=False,
                           time_effects=True)
        if options == 2:
            print(type(elem))
            mod = PooledOLS(elem[endog], elem[exog])
        if clusterfirm == 0:
            results.append(mod.fit(cov_type='clustered', clusters=elem.gvkey))
        if clusterfirm == 1:
            results.append(mod.fit(cov_type='clustered', cluster_entity=True))
        if clusterfirm == 2:
            results.append(mod.fit())
    return results
コード例 #2
0
ファイル: regression.py プロジェクト: ryansteed/datamaster
def model_pooled(df):
    df["age"] = (df["t"] - df["patent_date"]) / np.timedelta64(1, 'Y')
    df["agesq"] = np.square(df.age)
    df["t"] = pd.Categorical(df.t)

    df = df.rename(index=str, columns={
        "log(knowledge_forward_cites)": "lknowledge_forward_cites"
    })
    df.index = df.index.set_levels([
        df.index.levels[0].astype(int),
        df.index.levels[1].astype('datetime64[ns]')
    ])

    exog_vars = [
        "t",
        "source",
        'log(patent_num_claims)',
        'log(avg_inventor_total_num_patents)',
        'log(patent_processing_time)',
        'one-hot_assignee_type_3',
        'one-hot_assignee_type_4',
        'one-hot_assignee_type_5',
        'one-hot_assignee_type_6',
        'one-hot_assignee_type_7',
        'one-hot_assignee_type_9',
        'age',
        'agesq'
    ]
    exog = add_constant(df[exog_vars])

    mod = PooledOLS(df.lknowledge_forward_cites, exog)
    # robust_res = fit_write(mod, "robust", cov_type='robust')
    fit_write(mod, "entity", cov_type='clustered', cluster_entity=True)
    fit_write(mod, "entity-time", cov_type='clustered', cluster_entity=True, cluster_time=True)
コード例 #3
0
def regressions(data, endog, exog, options, clusterfirm, constant):
    #results = []
    if constant == 1:
        exog = sm.add_constant(data[exog])
    if constant == 0:
        exog = data[exog]
    if options == 0:
        mod = PanelOLS(data[endog],
                       exog,
                       entity_effects=True,
                       time_effects=True)
    if options == 1:
        mod = PanelOLS(data[endog],
                       exog,
                       entity_effects=False,
                       time_effects=True)
    if options == 2:
        #print(data[[endog]], exog)
        mod = PooledOLS(data[endog], exog)
    if clusterfirm == 0:
        results = mod.fit(cov_type='clustered', clusters=data.gvkey)
    if clusterfirm == 1:
        results = mod.fit(cov_type='clustered', cluster_entity=True)
    if clusterfirm == 2:
        results = mod.fit()
    return results
def task_factor_estimate_interactive_fixed_effects_model(produces):
    """
    Task for estimating factor numbers in interactive fixed effects model.
    We choose different penalty functions g1,g2,g3 with criterias PC and IC.
    """
    rmax = 8
    nsims = 1000
    all_N = [100, 100, 100, 100, 10, 20, 50]
    all_T = [10, 20, 50, 100, 100, 100, 100]
    dgp_func = dgp_interactive_fixed_effects_model_with_common_and_time_invariant
    tolerance = 0.0001
    beta_true = {"beta1": 1, "beta2": 3, "mu": 5, "gamma": 2, "delta": 4}
    r0 = 8
    df_factor_estimate = pd.DataFrame()
    np.random.seed(123)
    for case in range(len(all_N)):
        N = all_N[case]
        T = all_T[case]
        df_sim = pd.DataFrame(
            index=range(nsims),
            columns=["T", "N", "PC1", "PC2", "PC3", "IC1", "IC2", "IC3"],
        )
        df_sim["T"] = [T] * nsims
        df_sim["N"] = [N] * nsims
        for i in range(nsims):
            X, Y, panel_df = dgp_func(T, N, **beta_true)
            start_value_estimator = PooledOLS(
                panel_df.y, panel_df[["x" + str(i) for i in range(1, 6)]])
            start_value_result = start_value_estimator.fit()
            interactive_start_value = start_value_result.params.tolist()
            interactive_estimator = InteractiveFixedEffect(Y, X)
            beta_hat, beta_hat_list, f_hat, lambda_hat = interactive_estimator.fit(
                r0, interactive_start_value, tolerance)
            residual = Y - (X.T.dot(beta_hat)).T
            factor_estimator = FactorEstimator(residual)
            df_sim.loc[i, "PC1"] = factor_estimator.r_hat(rmax, "PC", 1)
            df_sim.loc[i, "PC2"] = factor_estimator.r_hat(rmax, "PC", 2)
            df_sim.loc[i, "PC3"] = factor_estimator.r_hat(rmax, "PC", 3)
            df_sim.loc[i, "IC1"] = factor_estimator.r_hat(rmax, "IC", 1)
            df_sim.loc[i, "IC2"] = factor_estimator.r_hat(rmax, "IC", 2)
            df_sim.loc[i, "IC3"] = factor_estimator.r_hat(rmax, "IC", 3)
        df_factor_estimate = df_factor_estimate.append(
            pd.DataFrame(df_sim.mean(axis=0)).T)
    df_factor_estimate = df_factor_estimate.reset_index(drop=True)
    df_factor_estimate.to_csv(produces, index=False)
コード例 #5
0
jtrain.head()
jtrain[jtrain['grant'] == 1].count()
scrap_panel_pool = smf.ols('scrap~d88+d89+grant+grant_1', data=jtrain).fit()
scrap_panel_pool.summary()
jtrain2 = jtrain
jtrain2[:5]
## Define the ID and Time column for Panel Regression
jtrain2 = jtrain2.set_index(['fcode', 'year'])
print(jtrain2.head(5))
exog_vars = ['d88', 'd89', 'grant', 'grant_1']
grant_vars = ['grant']
exog = sm.add_constant(jtrain2[exog_vars])
grant0 = sm.add_constant(jtrain2[grant_vars])

## Model Pooled OLS
model_pool = PooledOLS(jtrain2.lscrap, exog)
pooled_res = model_pool.fit()
print(pooled_res)
## Model Fixed Effects -- Entity Effects - True
model_fe = PanelOLS(jtrain2.lscrap, exog, entity_effects=True)
fe_res = model_fe.fit()
print(fe_res)
## Model Fixed Effects -- Entity and Time Effects - True
model_fe = PanelOLS(jtrain2.lscrap,
                    exog,
                    entity_effects=True,
                    time_effects=True)
fe_res = model_fe.fit()
print(fe_res)
## Random Effects Model
model_re = RandomEffects(jtrain2.lscrap, exog)
コード例 #6
0
#          'dTrVo_pd',
#          'vVol_pd',
#          'dTrVa_pd',])
#corrMatt = corrMat.corr()

corrMattt = {}
for char in ttic:
    corrMattt[char] = []
    corrMat = pd.concat([
        esRol_pd[char], arES_pd[char], adRe_pd[char], dMaCa_pd[char],
        dTrVo_pd[char], vVol_pd[char], dTrVa_pd[char]
    ],
                        axis=1,
                        keys=[
                            'esRol_pd',
                            'arES_pd',
                            'adRe_pd',
                            'dMaCa_pd',
                            'dTrVo_pd',
                            'vVol_pd',
                            'dTrVa_pd',
                        ])
    corrMatt = corrMat.corr()
    corrMattt[char] = corrMatt

#Pooled regression:
#https://bashtage.github.io/linearmodels/doc/panel/examples/examples.html
#这里有问题…… 我不会pooled OLS……
pReg = pd.concat([dMaCa_pd['AY'], dTrVo_pd['AY'], vVol_pd['AY']])
pRegg = PooledOLS(esRol_pd['AY'], pReg)
pooled_res = pRegg.fit()
コード例 #7
0
dependent = data.lwage
exog = sm.add_constant(data[['expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True)
res = mod.fit(cov_type='unadjusted')
res2 = mod.fit(cov_type='robust')
exog = sm.add_constant(data[['exper', 'expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res3 = mod.fit(cov_type='clustered',cluster_entity=True)
mod = RandomEffects(dependent, exog)
res4 = mod.fit(cov_type='robust')
from linearmodels.panel.results import compare

exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy())
import pandas as pd
exog['year'] = pd.Categorical(data.reset_index()['year'])
mod = PooledOLS(dependent, exog)
res5 = mod.fit(cov_type='robust')
print(compare([res,res2, res3, res4, res5]))

print(data.columns)


from linearmodels.panel.data import PanelData
import numpy as np
import pandas as pd
from statsmodels.datasets import grunfeld

from linearmodels.panel.model import RandomEffects
from linearmodels.tests.panel._utility import generate_data

data = pd.read_stata(r'C:\git\linearmodels\linearmodels\tests\panel\results\simulated-panel.dta')
コード例 #8
0
change_df = pd.DataFrame()
for index, start in enumerate(yearlist[:-1]):
    end = yearlist[index + 1]
    ter_start = ternarydata[ternarydata.YEAR == start]
    ter_end = ternarydata[ternarydata.YEAR == end]
    termeta = ter_start.merge(ter_end, on='Country Code', how="inner")
    termeta = termeta.dropna()
    termeta['nm_change'] = termeta['NM_y'] - termeta['NM_x']
    termeta['shm_change'] = termeta['SHM_y'] - termeta['SHM_x']
    termeta['ne_change'] = termeta['NE_y'] - termeta['NE_x']
    termeta['net_change'] = termeta['ne_change'] - termeta['nm_change']
    termeta['growth_rate'] = (termeta['Income_y'] -
                              termeta['Income_x']) / (termeta['Income_x'])
    termeta['date'] = index
    temp_df = termeta[[
        'Country Code', 'date', 'nm_change', 'shm_change', 'ne_change',
        'net_change', 'Income_x', 'Income_y', 'growth_rate'
    ]]
    change_df = pd.concat([change_df, temp_df])

change_df = change_df.sort_values(by=['Country Code', 'date'])
change_df = change_df.set_index(['Country Code', 'date'])
change_df['log_income'] = np.log10(change_df['Income_x'])

exog_vars = EXOG.split(",")
exog = sm.add_constant(change_df[exog_vars])
mod = PooledOLS(change_df.growth_rate, exog)
fe_res = mod.fit()
with open(OUTPUT, 'w') as fh:
    fh.write(fe_res.summary.as_text())
コード例 #9
0
# print(data1)

d = pd.Categorical(data1['Date'])
data1 = data1.set_index(['ID', 'Date'])
data1['Date'] = d
# print(data1)

exog_vars = [
    'Kilo', 'Brakes', 'Range', 'Speed', 'RPM', 'Engine fuel rate', 'Date'
]
a = ['Kilo', 'Brakes', 'Range', 'Speed', 'RPM', 'Engine fuel rate']
print(data1[a])
exog = sm.add_constant(data1[exog_vars])
exog1 = sm.add_constant(data1[a])
mod = PanelOLS(data1['Accelerator pedal position'],
               exog,
               entity_effects=True,
               time_effects=False)
mod1 = PooledOLS(data1['Accelerator pedal position'], exog1)
mod2 = RandomEffects(data1['Accelerator pedal position'], exog1)
mod3 = BetweenOLS(data1['Accelerator pedal position'], exog1)
res = mod.fit()
pooled_res = mod1.fit()
re_res = mod2.fit()
be_res = mod3.fit()
print(res)

print(compare({'Pooled': pooled_res, 'RE': re_res, 'BE': be_res}))

if __name__ == '__main__':
    pass
コード例 #10
0
    '/media/guolewen/intraday_data/needs/day_stock_computed_data/full_data_with_volg.csv'
)
Date = pd.Categorical(df.Date)
Ticker = pd.Categorical(df.Ticker)
df = df.set_index(["Ticker", "Date"])
df["Date1"] = Date
df["Ticker1"] = Ticker
# Variable Constructions
df['lognprints'] = np.log(df['NPRINTs'])
w = (df['YENVOL'] / 100) * (df['volatility'])
df['logwdbw'] = np.log(w / (601000 * 0.016))
# drop -inf when volatility equals to 0
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
# Pooled OLS
exog = sm.add_constant(df['logwdbw'])
mod = PooledOLS(df.lognprints, exog)
pooled_res = mod.fit()
print(pooled_res)
"""
# fixed effects with time dummy
exog = sm.add_constant(df[['logwdbw','Date1']])
mod = PanelOLS(df.lognprints, exog, entity_effects=True)
fe_res1 = mod.fit()
print(fe_res1)
"""
# fixed effects with ticker dummy
exog = sm.add_constant(df[['logwdbw', "Ticker1"]])
mod = PanelOLS(df.lognprints, exog, time_effects=True)
fe_res3 = mod.fit()
print(fe_res3)
コード例 #11
0
 # gerate simulation data
 X, Y, panel_df = dgp_func(T_N_sim.loc[case, "T"], T_N_sim.loc[case, "N"])
 p = X.shape[0]
 # within model require no collinear variable combinations
 no_collinear_x_var = ["x" + str(i + 1) for i in range(min(p, 3))]
 # run estimator for starting value for interactive estimator
 if interactive_start_value_effect == "twoways":
     start_value_estimator = PanelOLS(
         panel_df.y,
         panel_df[no_collinear_x_var],
         entity_effects=True,
         time_effects=True,
     )
 else:
     start_value_estimator = PooledOLS(
         panel_df.y, panel_df[["x" + str(i) for i in range(1, p + 1)]]
     )
 start_value_result = start_value_estimator.fit()
 interactive_start_value = [
     *start_value_result.params.tolist(),
     *np.zeros(p - len(start_value_result.params)),
 ]
 # run interactive fixed effect estimator
 interactive_estimator = InteractiveFixedEffect(Y, X)
 (
     beta_hat_interactive,
     beta_hat_list,
     f_hat,
     lambda_hat,
 ) = interactive_estimator.fit(r, interactive_start_value, tolerance)
 # run within estimator with the same data