Example #1
0
    def __fitreg(self, dt, start_datetime, end_datetime, y, var_pit, var_norm,
                 fix, cluster, c):

        # filter dates
        dt = dt.loc[(dt['date'] >= start_datetime)
                    & (dt['date'] <= end_datetime)]

        # filter columns
        dt = dt[y + ['year', 'ticker'] + [col for col in dt.columns[c:]] + fix]

        # choose x
        x = '+'.join(dt.columns[3:])

        #print("Start filling NAs...")
        #dt = dt.fillna(dt.groupby('ticker').transform('mean'))
        #dt = dt.fillna(dt.transform('mean'))
        dt = dt.dropna()
        #print("Filling NAs done.")
        dt = dt.set_index(['ticker', 'year'])

        if len(fix) == 0 and len(cluster) == 0:
            mod = PanelOLS.from_formula(y[0] + '~1+' + x, data=dt)
            fit1 = mod.fit(cov_type='clustered',
                           cluster_time=False,
                           cluster_entity=False)
            return fit1

        if len(fix) == 1:
            mod = PanelOLS.from_formula(y[0] + '~1+' + x + '+' + fix[0],
                                        data=dt)
            if len(cluster) == 0:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=False,
                               cluster_entity=False)
                return fit1
            elif cluster == ['year']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=True,
                               cluster_entity=False)
                return fit1
            elif cluster == ['ticker']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=False,
                               cluster_entity=True)
                return fit1
            elif cluster == ['year', 'ticker'
                             ] or cluster == ['ticker', 'year']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=True,
                               cluster_entity=True)
                return fit1
            else:
                raise KeyError("Please choose either year or ticker, or both.")

        if len(fix) > 1:
            raise KeyError(
                "You have {} fixed effects! Please pick one.".format(len(fix)))
Example #2
0
def cond_corr_e2_e1timesprize(df):
    """Correlation of e2 and the interaction of e1 and prize after partialing out other effects."""
    df_resid = pd.DataFrame(columns=["e2_resid", "e1timesprize_resid"])
    for label in ["e2", "e1timesprize"]:
        column, formula = (
            f"{label}_resid",
            f"{label}~e1+prize+tt2+tt3+tt4+tt5+tt6+tt7+tt8+tt9+tt10+EntityEffects",
        )
        df_resid.loc[:, column] = PanelOLS.from_formula(formula, data=df).fit().resids
    return df_resid["e2_resid"].corr(df_resid["e1timesprize_resid"])
Example #3
0
def old_percentile_correlation(df):
    """J percentile of the correlation of e2 and e1 after partialing out other effects."""
    df_resid = pd.DataFrame(columns=["e2_resid", "e1_resid"], index=df.index)
    for label in ["e2", "e1"]:
        column, formula = f"{label}_resid", f"{label}~prize+e1timesprize+TimeEffects"
        df_resid.loc[:, column] = PanelOLS.from_formula(formula, data=df).fit().resids
    dfs = dict()
    for sub in df_resid.index.get_level_values('subject').unique():
        dfs[f"{sub}"] = df_resid.query(f"subject == {sub}")
    cond_corr = list()
    for key in dfs:
        cond_corr.append(dfs[key]["e2_resid"].corr(dfs[key]["e1_resid"]))
    return np.percentile(cond_corr, 66)
Example #4
0
def process_data(tag, area_tag):
    """
    处理数据
    :param area_tag
    :return:
    """
    root_path = getRootPath()
    tif_file = os.path.join(
        root_path, "{0}/result/avg_data/avg_{1}.tif".format(tag, area_tag))
    bandArray = get_raster_band_array(tif_file)
    df = pd.DataFrame(bandArray,
                      columns=["sday", "eday", "gsl", "gdd", "edd", "pre"])
    df.sday = df.sday.astype(np.int64)
    df.eday = df.eday.astype(np.int64)
    df = df.set_index(["eday", "sday"])
    df.dropna()
    print("-------- use EntityEffects ---------")
    mod = PanelOLS.from_formula('gsl ~ 1 + gdd + edd + pre + EntityEffects',
                                df)
    res = mod.fit(cov_type='unadjusted')
    print(res)
Example #5
0
def process_data(tag, area_tag):
    """
    处理预测数据:
    :param area_tag:
    :return:
    """
    print("process data area_tag: {}".format(area_tag))
    root_path = getRootPath()
    src_path = os.path.join(root_path,
                            "{0}/process/merge/{1}".format(tag, area_tag))
    tif_files = walkDirFile(src_path, ext=".tif")
    bandArray = None
    flag = False
    for tif_file in tif_files:
        tempArr = get_raster_band_array(tif_file)
        if not flag:
            bandArray = tempArr
            flag = True
        else:
            bandArray = np.vstack((bandArray, tempArr))

    if not flag:
        return
    df = pd.DataFrame(
        bandArray,
        columns=["sday", "eday", "gsl", "year", "gdd", "edd", "pre"])
    df.sday = df.sday.astype(np.int64)
    df.eday = df.eday.astype(np.int64)
    df.year = df.year.astype(np.int64)
    df = df.set_index(["year", "eday"])
    df.dropna()
    print("-------- use EntityEffects ---------")
    mod = PanelOLS.from_formula('gsl ~ 1 + gdd + edd + pre + EntityEffects',
                                df)
    res = mod.fit(cov_type='unadjusted')
    print(res)
Example #6
0
test['volume'] = test['volume'] / 1000000
test = test.loc[test['year'].isin(['2020', '2018', '2019'])]
test = test[[
    'year', 'ticker', 'assetclasslevel1', 'assetclasslevel2',
    'assetclasslevel3', 'cd', 'cdlag1', 'pd', 'volume', 'age'
]]

test = test.dropna()

# In[16]:

test0 = test.set_index(['ticker', 'year'])

# fix assetclasslevel1, cluster time + ticker
mod = PanelOLS.from_formula(
    'cd ~ 1 + cdlag1 + volume + pd + age + assetclasslevel1', data=test0)
fit01 = mod.fit(cov_type='clustered', cluster_time=True, cluster_entity=True)

# fix assetclasslevel2, cluster time + ticker
mod = PanelOLS.from_formula(
    'cd ~ 1 + cdlag1 + volume + pd + age + assetclasslevel2', data=test0)
fit02 = mod.fit(cov_type='clustered', cluster_time=True, cluster_entity=True)

# fix assetclasslevel3, cluster time + ticker
mod = PanelOLS.from_formula(
    'cd ~ 1 + cdlag1 + volume + pd + age + assetclasslevel3', data=test0)
fit03 = mod.fit(cov_type='clustered', cluster_time=True, cluster_entity=True)

# fix year, cluster time + ticker
mod = PanelOLS.from_formula('cd ~ 1 + cdlag1 + volume + pd + TimeEffects',
                            data=test0)
Example #7
0
                       index='treat',
                       columns='year',
                       values='defor',
                       aggfunc=np.sum)
count = pd.pivot_table(data=defor_df,
                       index='treat',
                       columns='year',
                       values='defor',
                       aggfunc="count")
defor_df = defor_df.set_index(['idx', 'year'])

# =============================================================================
# Run regression to estimate treatment effect
# =============================================================================
## Simple diff in diff
mod = PanelOLS.from_formula('defor ~ treat * post', defor_df)
res = mod.fit(cov_type='clustered', cluster_entity=True)
print(res)

## Generalized did using two-way fixed effects
# Outer is entity, inner is time
from linearmodels.panel import PanelOLS
defor_df['t'] = defor_df['treat'] * defor_df['post']
mod = PanelOLS.from_formula('defor ~ t + EntityEffects + TimeEffects',
                            defor_df)
res = mod.fit(cov_type='clustered', cluster_entity=True)
print(res)

### KEY OBSERVATION: FE estimator yields ~ (estimate of att) = diff + att while
### simple diff in diff yields ~ (estimate of att) = att
Example #8
0
autor["other"] = autor["rs_om"] + autor["rs_of"]
autor["married"] = autor["marfem"] + autor["marmale"]

# Create categorical for state
autor["state_c"] = pd.Categorical(autor["state"])

# Set index for use with linearmodels
autor = autor.set_index(["state", "year"], drop=False)

# Diff-in-diff regression
did = PanelOLS.from_formula(
    ("lnths ~"
     "1 +"
     "lnemp +"
     "admico_2 + admico_1 + admico0 + admico1 + admico2 + admico3 + mico4 +"
     "admppa_2 + admppa_1 + admppa0 + admppa1 + admppa2 + admppa3 + mppa4 +"
     "admgfa_2 + admgfa_1 + admgfa0 + admgfa1 + admgfa2 + admgfa3 + mgfa4 +"
     "state_c:t +"
     "EntityEffects + TimeEffects"),
    data=autor,
    drop_absorbed=True).fit(cov_type='clustered', cluster_entity=True)

# Store results in a DataFrame for a plot
results_did = pd.DataFrame({
    "coef": did.params * 100,
    "ci": 1.96 * did.std_errors * 100
})

# Keep only the relevant coefficients
results_did = results_did.filter(regex="admico|mico", axis=0).reset_index()
Example #9
0
    def __fitreg(self, dt, start_datetime, end_datetime, y, var_pit, var_norm,
                 fix, cluster, c):

        # filter dates
        dt = dt.loc[(dt['date'] >= start_datetime)
                    & (dt['date'] <= end_datetime)]

        # filter columns
        dt = dt[y + ['year', 'ticker'] + [col for col in dt.columns[self.c:]] +
                fix]

        # choose x
        x = '+'.join(dt.columns[3:])

        #print("Start filling NAs...")
        #dt = dt.fillna(dt.groupby('ticker').transform('mean'))
        #dt = dt.fillna(dt.transform('mean'))
        dt = dt.dropna()

        #print("Filling NAs done.")
        dt = dt.set_index(['ticker', 'year'])

        self.assetclass = dt[fix].drop_duplicates().reset_index(drop=True)

        # winsorise before running regression
        for col in dt.columns[:-1]:
            # get the upper and lower bound as quantile of +/- 3 sigma of standard normal
            lb = dt[col].quantile(stats.norm.cdf(-3))
            ub = dt[col].quantile(stats.norm.cdf(3))

            # winsorise for outlier data points
            dt.loc[dt[col] < lb, col] = lb
            dt.loc[dt[col] > ub, col] = ub

        print(dt.info())

        if len(fix) == 0 and len(cluster) == 0:
            mod = PanelOLS.from_formula(y[0] + '~1+' + x, data=dt)
            fit1 = mod.fit(cov_type='heteroskedastic',
                           cluster_time=False,
                           cluster_entity=False)
            print(fit1)
            return fit1

        if len(fix) == 1:
            mod = PanelOLS.from_formula(y[0] + '~1+' + x + '+' + fix[0],
                                        data=dt)
            if len(cluster) == 0:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=False,
                               cluster_entity=False)
                print(fit1)
                return fit1
            elif cluster == ['year']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=True,
                               cluster_entity=False)
                print(fit1)
                return fit1
            elif cluster == ['ticker']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=False,
                               cluster_entity=True)
                print(fit1)
                return fit1
            elif cluster == ['year', 'ticker'
                             ] or cluster == ['ticker', 'year']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=True,
                               cluster_entity=True)
                print(fit1)
                return fit1
            else:
                raise KeyError("Please choose either year or ticker, or both.")

        if len(fix) > 1:
            raise KeyError(
                "You have {} fixed effects! Please pick one.".format(len(fix)))