def estimate(self, nparrays: List[np.recarray]): """Compute the book leverage Parameters ---------- nparrays : List[np.recarray] Input datasets, same order as specified in `DATASETS_REQUIRED` Returns ------- Tuple[pd.DataFrame, Dict[str, str]] Output dataset and the `VARIABLE_LABELS` """ nparray = filter_funda(nparrays[0]) # debts = long-term debt + debt in current liabilities debts = np.nansum([nparray.dltt, nparray.dlc], axis=0) # assets = debts + common equity assets = np.nansum([debts, nparray.ceq], axis=0) # book leverage = debts / assets bleverage = np.true_divide(debts, assets, where=(assets != 0)) # set book leverage to missing if common equity is somehow missing bleverage[np.isnan(nparray.ceq)] = np.nan # add book leverage to the result nparray = rfn.rec_append_fields(nparray, NAME, bleverage) # keep only useful columns cols = set(rfn.get_names_flat(nparray.dtype)) nparray.sort(order=(keys := ["gvkey", "datadate"]))
def estimate(self, nparrays: List[np.recarray]): funda = pd.DataFrame.from_records(filter_funda(nparrays[0])) anncomp = pd.DataFrame.from_records(nparrays[1]) work = anncomp.merge(funda, left_on=["gvkey", "year"], right_on=["gvkey", "fyear"]) # CSHO is in millions and SHROWN_TOT is in thousands work["ExecSharePct"] = work.shrown_tot / work.csho / 10 work["ExecSharePctExclOpt"] = work.shrown_excl_opts / work.csho / 10 work["ExecOptPct"] = work.opt_exer_num / work.csho / 10 work["ExecShareVestPct"] = work.shrs_vest_num / work.csho / 10 work["ExecIncentivePct"] = ((work.opt_exer_val + work.shrs_vest_num) / work.tdc1 / 10) # Replace infinity with nan work["ExecSharePctExclOpt"].replace(np.inf, np.nan, inplace=True) work["ExecOptPct"].replace(np.inf, np.nan, inplace=True) work["ExecShareVestPct"].replace(np.inf, np.nan, inplace=True) work["ExecIncentivePct"].replace(np.inf, np.nan, inplace=True) keys = ["gvkey", "datadate"] cols = [*keys, *(VARIABLE_LABELS.keys())] result = work[cols].drop_duplicates().sort_values(by=keys) return result, VARIABLE_LABELS
def estimate(self, nparrays: List[np.recarray]): nparray = filter_funda(nparrays[0]) size = np.log(nparray.at, where=(nparray.at > 0)) size[np.isnan(nparray.at)] = np.nan nparray = rfn.rec_append_fields(nparray, NAME, size) # keep only useful columns cols = set(rfn.get_names_flat(nparray.dtype)) nparray.sort(order=(keys := ["gvkey", "datadate"]))
def estimate(self, nparrays: List[np.recarray]): nparray = filter_funda(nparrays[0]) roa = np.true_divide(nparray.ib, nparray.at, where=(nparray.at != 0)) roa[np.isnan(nparray.at)] = np.nan nparray = rfn.rec_append_fields(nparray, NAME, roa) # keep only useful columns cols = set(rfn.get_names_flat(nparray.dtype)) nparray.sort(order=(keys := ["gvkey", "datadate"]))
def estimate( self, nparrays: List[np.recarray] ) -> Tuple[pd.DataFrame, Dict[str, str]]: """Counts the numer of restatements during the past fiscal year(s) Parameters ---------- nparrays : List[np.recarray] Input datasets, same order as specified in `DATASETS_REQUIRED` Returns ------- Tuple[pd.DataFrame, Dict[str, str]] Output dataset and the `VARIABLE_LABELS` """ # Note that the order of dataset is preseved rests = pd.DataFrame.from_records(nparrays[0]) funda = pd.DataFrame.from_records(filter_funda(nparrays[1])) # Inner join funda and restatements work = funda.merge(rests, left_on="cik", right_on="company_fkey") # Filing date must be in the past year(s) relative the datadate time_delta = work.datadate - work.file_date years = np.timedelta64(self._years, "Y") cond = (time_delta < years) & (work.datadate >= work.file_date) # Forget about the non-matched cond = cond & (work.cik != "") # Apply the filtering condition work.where(cond, inplace=True) # Count by gvkey and datadate/fyear keys = ["gvkey", "datadate"] work = work.groupby((keys), as_index=False).sum() # Rename columns to match output variable labels work.rename( columns={ "res_accounting": "NumResAcct", "res_fraud": "NumResFraud", "res_adverse": "NumResAdver", "res_cler_err": "NumResClerErr", "res_sec_invest": "NumResSECInvest", }, inplace=True, ) # Left join with funda so to retain the missing values result = funda[keys].merge(work, how="left", on=keys, copy=False) # Keep only useful columns cols = [*keys, *(VARIABLE_LABELS.keys())] # Some cosmetic issues result = result[cols].drop_duplicates().sort_values(by=keys) if self._years > 1: for k, v in VARIABLE_LABELS.items(): VARIABLE_LABELS.update({ k: v.replace("fiscal year", f"past {self._years} fiscal years") }) return result, VARIABLE_LABELS
def estimate(self, nparrays: List[np.recarray]): nparray = filter_funda(nparrays[0]) # market value at fiscal year mv = nparray.prcc_f * nparray.csho # market-to-book = market value of equity / common equity mtb = np.true_divide(mv, nparray.ceq, where=(nparray.ceq != 0)) # set mtb to missing if common equity is somehow missing mtb[np.isnan(nparray.ceq)] = np.nan # add book leverage to the result nparray = rfn.rec_append_fields(nparray, NAME, mtb) # keep only useful columns cols = set(rfn.get_names_flat(nparray.dtype)) nparray.sort(order=(keys := ["gvkey", "datadate"]))
def estimate( self, nparrays: List[np.recarray] ) -> Tuple[pd.DataFrame, Dict[str, str]]: company = pd.DataFrame.from_records(nparrays[0]) compositon = pd.DataFrame.from_records(nparrays[1]) funda = pd.DataFrame.from_records( filter_funda(nparrays[2]), columns=["gvkey", "datadate", "cik", "fyear"], ) tmp = (company.merge(compositon, left_on=["boardid"], right_on=["companyid" ]).merge(funda, left_on=["cikcode"], right_on=["cik" ]).drop_duplicates()) cond = (((tmp.datestartrole <= tmp.datadate) & (tmp.datadate <= tmp.dateendrole)) | (pd.isnull(tmp.dateendrole) & (tmp.datestartrole <= tmp.datadate)) | (pd.isnull(tmp.datestartrole) & (tmp.datadate <= tmp.dateendrole))) & np.in1d( tmp.seniority, ("Executive Director", "Supervisory Director")) tmp = tmp.where(cond) tmp.fillna({"rolename": ""}, inplace=True) keys = ["gvkey", "datadate"] board_size = (tmp.groupby(keys, as_index=False)["directorid"].count().rename( columns={"directorid": "BoardSize"})) independent_dirs = ( tmp[tmp.rolename.str.lower().str.contains("independent")].groupby( keys, as_index=False)["directorid"].count().rename( columns={"directorid": "IndependentMembers"})) result = board_size.merge(independent_dirs, on=keys, how="left") if self._missing_as_zero: result.fillna({"IndependentMembers": 0}, inplace=True) result["BoardIndependence"] = (result.IndependentMembers / result.BoardSize * 100) cols = [*keys, *(VARIABLE_LABELS.keys())] result = result[cols].drop_duplicates() return result, VARIABLE_LABELS
def estimate(self, nparrays: List[np.recarray]): """Compute the capital expenditures scaled by total assets Parameters ---------- nparrays : List[np.recarray] Input datasets, same order as specified in `DATASETS_REQUIRED` Returns ------- Tuple[pd.DataFrame, Dict[str, str]] Output dataset and the `VARIABLE_LABELS` """ nparray = filter_funda(nparrays[0]) capx = np.true_divide(nparray.capx, nparray.at, where=(nparray.at != 0)) capx[np.isnan(nparray.at)] = np.nan nparray = rfn.rec_append_fields(nparray, NAME, capx) # keep only useful columns cols = set(rfn.get_names_flat(nparray.dtype)) nparray.sort(order=(keys := ["gvkey", "datadate"]))