def random_effects(panel_data, formula, weights=None, cov="unadjusted"):
    """
    Fits a standard Random Effects model with the corresponding covariance matrix.
    It can be estimated WITH and WITHOUT a constant.
    It is preferred when the unobserved effects aren't correlated with the error term
    and, therefore, CAN estimate constant terms.
    Remember to include an intercept in the formula ('y ~ 1 + x1 + ...') and to assign it to an object!

    :param panel_data : dataframe (which must be in a panel structure)
    :param formula : patsy formula
    :param weights : N x 1 Series or vector containing weights to be used in estimation; defaults to None
        Use is recommended when analyzing survey data, passing on the weight available in the survey
    :param cov : str
        unadjusted: common standard errors
        robust: robust standard errors
        kernel: robust to heteroskedacity AND serial autocorrelation
        clustered: clustered standard errors by the entity column
    :return : linearmodels model instance
    """

    ## Creating model instance
    if weights is None:
        mod = RandomEffects.from_formula(formula=formula, data=panel_data)
    else:
        mod = RandomEffects.from_formula(formula=formula, data=panel_data, weights=weights)

    ## Fitting with desired covariance matrix
    mod = mod.fit(cov_type='clustered', cluster_entity=True) if cov == 'clustered' else mod.fit(cov_type=cov)

    print(mod.summary)
    return mod
def hausman_fe_re(panel_data, inef_formula, weights=None, cov="unadjusted", level=0.05):
    """
    Executes a Hausman test, which H0: there is no correlation between unobserved effects and the independent variables
    It is not necessary to assign the function to an object! But remember to include an intercept in the formulas.

    :param panel_data : dataframe (which must be in a panel structure)
    :param inef_formula : patsy formula for the inefficient model under H0 (fixed effects)
    :param weights : N x 1 Series or vector containing weights to be used in estimation; defaults to None
        Use is recommended when analyzing survey data, passing on the weight available in the survey
    :param cov : str
        unadjusted: common standard errors
        robust: robust standard errors
        kernel: robust to heteroskedacity AND serial autocorrelation
    :param level : significance level for the test. Defaults to 5%.
    """

    ## Random Effects
    if weights is None:
        random = RandomEffects.from_formula(formula=inef_formula, data=panel_data).fit(cov_type=cov)
    else:
        random = RandomEffects.from_formula(formula=inef_formula, data=panel_data, weights=weights).fit(cov_type=cov)

    ## Fixed Effects
    formula_fe = inef_formula + ' + EntityEffects'
    if weights is None:
        fixed = PanelOLS.from_formula(formula=formula_fe, data=panel_data, drop_absorbed=True).fit(cov_type=cov)
    else:
        fixed = PanelOLS.from_formula(formula=formula_fe, data=panel_data,
                                      drop_absorbed=True, weights=weights).fit(cov_type=cov)

    ## Computing the Hausman statistic
    # Difference between asymptotic variances
    var_assin = fixed.cov - random.cov
    # Difference between parameters
    d = fixed.params - random.params
    # Calculating H (statistic)
    H = d.dot(np.linalg.inv(var_assin)).dot(d)
    # Degrees of freedom
    freedom = random.params.size - 1

    # Calculating p-value using chi2 survival function (sf, 1 - cumulative distribution function)
    p = stats.chi2(freedom).sf(H)

    if p < level:
        print(f"The value of H is {round(H, 6)} with {freedom} degrees of freedom in the chi-squared distribution.")
        print(f"The p-value of the test is {round(p, 6)} and, therefore, H0 is REJECTED and fixed effects is preferred")
    else:
        print(f"The value of H is {round(H, 6)} with {freedom} degrees of freedom in the chi-squared distribution.")
        print(f"The p-value of the test is {round(p, 6)} and H0 is NOT REJECTED and random effects is preferred.")
def Reg_Painel_Efeitos_Aleatórios(x, y, constante="S", cov="normal"):
    '''
    Função que calcula uma regressão de efeitos fixos, sendo, por default, computada com um intercepto e com erros padrões  robustos.
    **IMPORTANTE: para o painel estar arrumado, os dados devem estar multi-indexados por indíviduo e por tempo, nesta ordem.
    Caso contrário, transformar o dataframe usando a função 'Arrumar Painel'
    x: lista ou array com os valores das variáveis independentes;
    y: lista ou array com os valores da variável dependente;
    constante: "S" para regressão com intercepto e qualquer outro valor para sem intercepto. Caso em branco, a regressão é computada com intercepto;
    robusta: "N" para regressão com erros-padrão tradicionais e qualquer outro valor para erros-padrões robustos. Caso em branco, a regressão é computada com erros-padrão robustos.
    '''
    global df, Resultado

    # formando o vetor de variáveis independentes
    if constante == "S":
        X = sm.add_constant(x)
    else:
        X = x

    #Criando o Modelo
    Modelo = RandomEffects(y, X)
    if cov == "robust":
        Resultado = Modelo.fit(cov_type='robust')
    elif cov == 'kernel':  ## correlação robusta à heteroscedasticidade e autocorrelação serial
        Resultado = Modelo.fit(cov_type='kernel')
    elif cov == 'clustered' or cov == 'cluster':
        Resultado = Modelo.fit(cov_type='clustered', cluster_entity=True)
    else:
        Resultado = Modelo.fit()
    print(Resultado)
import pandas as pd
from linearmodels.panel import PanelOLS
import statsmodels.api as sm
from linearmodels.panel import PooledOLS
from linearmodels import RandomEffects
import sys
import os

DATA_FILE = sys.argv[1]
OUTPUT_FILE = sys.argv[2]

change_df=pd.read_csv(DATA_FILE)
base = os.path.basename(OUTPUT_FILE)
incomegroup = base.split(".")[0].split("_")[-1]
select_df=change_df[change_df.IncomeGroup==incomegroup]

#filter out unbalanced data points
num_period=len(select_df.period.unique())
select_df['size']=select_df.groupby('Code')['Code'].transform('size')
select_df=select_df[select_df['size']==num_period]

select_df['Income_t0_log']=np.log10(select_df['Income_t0'])
select_df=select_df.set_index(['Code','date'])

exog_vars = ['Income_t0_log','nm_change','shm_change','ne_change','sum_adv_t0']
exog = sm.add_constant(select_df[exog_vars])
mod = RandomEffects(select_df.growth_rate, exog)
fe_res = mod.fit()
with open(OUTPUT_FILE,'w') as f:
    f.write(fe_res.summary.as_text())
import numpy as np
import pandas as pd
from linearmodels.panel import PanelOLS
import statsmodels.api as sm
from linearmodels.panel import PooledOLS
from linearmodels import RandomEffects
import sys

DATA_FILE = sys.argv[1]
OUTPUT_FILE = sys.argv[2]

change_df = pd.read_csv(DATA_FILE)

#filter out unbalanced data points
num_period = len(change_df.period.unique())
change_df['size'] = change_df.groupby('Code')['Code'].transform('size')
change_df = change_df[change_df['size'] == num_period]

change_df['Income_t0_log'] = np.log10(change_df['Income_t0'])
change_df = change_df.set_index(['Code', 'date'])

exog_vars = [
    'Income_t0_log', 'nm_change', 'shm_change', 'ne_change', 'sum_adv_t0'
]
exog = sm.add_constant(change_df[exog_vars])
mod = RandomEffects(change_df.growth_rate, exog)
fe_res = mod.fit()
with open(OUTPUT_FILE, 'w') as f:
    f.write(fe_res.summary.as_text())
#
# PANEL
#

from linearmodels import BetweenOLS, RandomEffects, PanelOLS

# WITHIN
w = data.n
BetweenModel = BetweenOLS.from_formula('fcs ~ rev_percap + month_Decembre',
                                       data=data,
                                       weights=w)
BetweenModel.fit(cov_type='robust', reweight=True)

# RANDOM EFFECTS
RandomEffectsModel = RandomEffects.from_formula(
    'fcs ~ rev_percap + year + month_Decembre', data=data, weights=w)
REModFit = RandomEffectsModel.fit(cov_type='robust')
REModFit
REModFit.variance_decomposition
REModFit.theta

# BASIC PANEL
PanelModel = PanelOLS.from_formula(
    'fcs ~ 1 + rev_percap + month_Decembre + EntityEffects',
    data=data,
    weights=w)
PanelModel.fit(cov_type='robust')

# INTERPRETATION : TO BE FULLY CHECKED
# une augmentation de 1000 du revenu par rapport à sa moyenne sur a période
# augmente de X le score fcs par rapport à sa moyenne sur a période
Exemple #7
0
df = df[df['DATE'] < '2014-02-01']
df = df.set_index(['DATE', 'MARKET'])
print(df.columns)

# Hausman Test to decided FE or RE

# Fixed Effects

# Random Effects
exog_vars = [
    'PPORTUGAL', 'Q', 'QPOR', 'PRICE_OIL', 'PRICE_GAS', 'RISK_PREMIUM', 'TME',
    'TMAX', 'TMIN', 'PP', 'WORKDAY', 'SUMMER', 'WINTER', 'NULL_PRICE',
    'LITINIT'
]
exog = sm.add_constant(df[exog_vars])
mod = RandomEffects(df['P'], exog)
re_res = mod.fit()
print(re_res)

# Fixed Effect
exog_vars = [
    'PPORTUGAL', 'Q', 'QPOR', 'PRICE_OIL', 'PRICE_GAS', 'RISK_PREMIUM', 'TME',
    'TMAX', 'TMIN', 'PP', 'WORKDAY', 'SUMMER', 'WINTER', 'NULL_PRICE',
    'LITINIT', 'TIMEEF_2013', 'FE_SPAIN'
]
exog = sm.add_constant(df[exog_vars])
mod = sm.OLS(endog=df['P'], exog=df[exog_vars])
re_res = mod.fit()
print(re_res.summary())

# DIF IN DIF
Exemple #8
0
industryFE = PanelOLS(Y, df[dd + ['industrycode']])
print(industryFE.fit(cov_type='clustered', cluster_entity=True))

Just for exercise purpose, suppose that the unobserved factor $\alpha_i$ is ignored. This assumption is called Random Effects (RE). In this case, $\alpha_i$ will be inside the error term $v_{it}$ and potentially biased the results.

$$Y_{it}=\beta X_{it}+v_{it}$$

$$v_{it}= \alpha_i+\epsilon_{it}$$

In an experiment, the treatment variable is uncorrelated with the unobserved factor $\alpha_i$. In this case, Random Effects (RE) model has the advantage of producing lower standard errors than the Fixed Effects models.

Note that if we run a simple Random Effects (RE) regression, we might conclude wrongly that St. Louis Fed policy increased the firm revenue in 7%.

from linearmodels import RandomEffects
re = RandomEffects(Y, df[['const', 'st_louis_fed']])
print(re.fit(cov_type='clustered', cluster_entity=True))

## Exercises

1| Suppose a non-experimental setting, where the control group differs from the treatment group. Justify if it is reasonable or not to use Difference-in-Differences (DID) to estimate a causal effect? Should you modify or add something in the DID framework?


2| Suppose a study claims based on Difference-in-Differences (DID) method that Fed avoided massive business failures via the bank bailout of 2008. Suppose another study based on Regression Discontinuity (RD) claims the opposite or denies the impact of Fed on business failures. What do you think is more credible empirical strategy DID or RD to estimate the causal impact of Fed policy? Justify you answer.


3| In a panel data, where the unit of analysis can be firm or county, what is more credible the result at firm or at county level? Justify.

4| Use the data from Ziebarth (2013) to estimate the impact of St. Louis Fed policy on firm's revenue. Specifically, run Difference-in-Differences (DID) with Random Effects (RE). Interpret the result. What can be inferred about the unobserved factor $\alpha_i$? 

5| Use the data from Ziebarth (2013) to estimate the impact of St. Louis Fed policy on firm's revenue. Specifically, run Difference-in-Differences (DID) with Firm Fixed Effects (FE) without using the command "entity_effects=True". Hint: You must use dummy variables for each firm.
Exemple #9
0
import os
from statsmodels.iolib.summary2 import summary_col
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from linearmodels import PanelOLS
from linearmodels import RandomEffects

if __name__ == "__main__":
    REG_DATA = sys.argv[1]
    RES3_PATH = sys.argv[2]

    metadata = pd.read_csv(REG_DATA)
    metadata = metadata.sort_values(by=['Code', 'Year'])
    metadata = metadata.set_index(['Code', 'Year'])
    metadata['Income_t0_log'] = np.log10(metadata['Income_t0'])

    base = os.path.basename(RES3_PATH)
    incomegroup = base.split(".")[0].split("_")[-1]
    metadata = metadata[metadata.IncomeGroup == incomegroup]
    metadata = metadata.dropna()

    num_period = len(metadata['period'].unique())
    metadata = metadata[metadata['size'] == num_period]

    exog_vars = ['ECI', 'Income_t0_log', 'diversity']
    exog = sm.add_constant(metadata[exog_vars])
    mod = RandomEffects(metadata.growth, exog)
    with open(RES3_PATH, 'w') as f:
        f.write(mod.fit().summary.as_text())