Python filter_funda 예제들, frds.data.utils.filter_funda Python 예제들

예제 #1

0

파일 보기

파일: book_leverage.py 프로젝트: Luo1916/frds

    def estimate(self, nparrays: List[np.recarray]):
        """Compute the book leverage

        Parameters
        ----------
        nparrays : List[np.recarray]
            Input datasets, same order as specified in `DATASETS_REQUIRED`

        Returns
        -------
        Tuple[pd.DataFrame, Dict[str, str]]
            Output dataset and the `VARIABLE_LABELS`
        """
        nparray = filter_funda(nparrays[0])
        # debts = long-term debt + debt in current liabilities
        debts = np.nansum([nparray.dltt, nparray.dlc], axis=0)
        # assets = debts + common equity
        assets = np.nansum([debts, nparray.ceq], axis=0)
        # book leverage = debts / assets
        bleverage = np.true_divide(debts, assets, where=(assets != 0))
        # set book leverage to missing if common equity is somehow missing
        bleverage[np.isnan(nparray.ceq)] = np.nan
        # add book leverage to the result
        nparray = rfn.rec_append_fields(nparray, NAME, bleverage)
        # keep only useful columns
        cols = set(rfn.get_names_flat(nparray.dtype))
        nparray.sort(order=(keys := ["gvkey", "datadate"]))

예제 #2

0

파일 보기

파일: executive_ownership.py 프로젝트: fisher-pdf/frds

    def estimate(self, nparrays: List[np.recarray]):

        funda = pd.DataFrame.from_records(filter_funda(nparrays[0]))
        anncomp = pd.DataFrame.from_records(nparrays[1])

        work = anncomp.merge(funda,
                             left_on=["gvkey", "year"],
                             right_on=["gvkey", "fyear"])

        # CSHO is in millions and SHROWN_TOT is in thousands
        work["ExecSharePct"] = work.shrown_tot / work.csho / 10
        work["ExecSharePctExclOpt"] = work.shrown_excl_opts / work.csho / 10
        work["ExecOptPct"] = work.opt_exer_num / work.csho / 10
        work["ExecShareVestPct"] = work.shrs_vest_num / work.csho / 10
        work["ExecIncentivePct"] = ((work.opt_exer_val + work.shrs_vest_num) /
                                    work.tdc1 / 10)
        # Replace infinity with nan
        work["ExecSharePctExclOpt"].replace(np.inf, np.nan, inplace=True)
        work["ExecOptPct"].replace(np.inf, np.nan, inplace=True)
        work["ExecShareVestPct"].replace(np.inf, np.nan, inplace=True)
        work["ExecIncentivePct"].replace(np.inf, np.nan, inplace=True)
        keys = ["gvkey", "datadate"]
        cols = [*keys, *(VARIABLE_LABELS.keys())]
        result = work[cols].drop_duplicates().sort_values(by=keys)
        return result, VARIABLE_LABELS

예제 #3

0

파일 보기

 def estimate(self, nparrays: List[np.recarray]):
     nparray = filter_funda(nparrays[0])
     size = np.log(nparray.at, where=(nparray.at > 0))
     size[np.isnan(nparray.at)] = np.nan
     nparray = rfn.rec_append_fields(nparray, NAME, size)
     # keep only useful columns
     cols = set(rfn.get_names_flat(nparray.dtype))
     nparray.sort(order=(keys := ["gvkey", "datadate"]))

예제 #4

0

파일 보기

파일: roa.py 프로젝트: fisher-pdf/frds

 def estimate(self, nparrays: List[np.recarray]):
     nparray = filter_funda(nparrays[0])
     roa = np.true_divide(nparray.ib, nparray.at, where=(nparray.at != 0))
     roa[np.isnan(nparray.at)] = np.nan
     nparray = rfn.rec_append_fields(nparray, NAME, roa)
     # keep only useful columns
     cols = set(rfn.get_names_flat(nparray.dtype))
     nparray.sort(order=(keys := ["gvkey", "datadate"]))

예제 #5

0

파일 보기

    def estimate(
            self, nparrays: List[np.recarray]
    ) -> Tuple[pd.DataFrame, Dict[str, str]]:
        """Counts the numer of restatements during the past fiscal year(s)

        Parameters
        ----------
        nparrays : List[np.recarray]
            Input datasets, same order as specified in `DATASETS_REQUIRED`

        Returns
        -------
        Tuple[pd.DataFrame, Dict[str, str]]
            Output dataset and the `VARIABLE_LABELS`
        """
        # Note that the order of dataset is preseved
        rests = pd.DataFrame.from_records(nparrays[0])
        funda = pd.DataFrame.from_records(filter_funda(nparrays[1]))
        # Inner join funda and restatements
        work = funda.merge(rests, left_on="cik", right_on="company_fkey")
        # Filing date must be in the past year(s) relative the datadate
        time_delta = work.datadate - work.file_date
        years = np.timedelta64(self._years, "Y")
        cond = (time_delta < years) & (work.datadate >= work.file_date)
        # Forget about the non-matched
        cond = cond & (work.cik != "")
        # Apply the filtering condition
        work.where(cond, inplace=True)
        # Count by gvkey and datadate/fyear
        keys = ["gvkey", "datadate"]
        work = work.groupby((keys), as_index=False).sum()
        # Rename columns to match output variable labels
        work.rename(
            columns={
                "res_accounting": "NumResAcct",
                "res_fraud": "NumResFraud",
                "res_adverse": "NumResAdver",
                "res_cler_err": "NumResClerErr",
                "res_sec_invest": "NumResSECInvest",
            },
            inplace=True,
        )
        # Left join with funda so to retain the missing values
        result = funda[keys].merge(work, how="left", on=keys, copy=False)
        # Keep only useful columns
        cols = [*keys, *(VARIABLE_LABELS.keys())]
        # Some cosmetic issues
        result = result[cols].drop_duplicates().sort_values(by=keys)

        if self._years > 1:
            for k, v in VARIABLE_LABELS.items():
                VARIABLE_LABELS.update({
                    k:
                    v.replace("fiscal year",
                              f"past {self._years} fiscal years")
                })

        return result, VARIABLE_LABELS

예제 #6

0

파일 보기

파일: market_to_book.py 프로젝트: Luo1916/frds

 def estimate(self, nparrays: List[np.recarray]):
     nparray = filter_funda(nparrays[0])
     # market value at fiscal year
     mv = nparray.prcc_f * nparray.csho
     # market-to-book = market value of equity / common equity
     mtb = np.true_divide(mv, nparray.ceq, where=(nparray.ceq != 0))
     # set mtb to missing if common equity is somehow missing
     mtb[np.isnan(nparray.ceq)] = np.nan
     # add book leverage to the result
     nparray = rfn.rec_append_fields(nparray, NAME, mtb)
     # keep only useful columns
     cols = set(rfn.get_names_flat(nparray.dtype))
     nparray.sort(order=(keys := ["gvkey", "datadate"]))

예제 #7

0

파일 보기

파일: board_independence.py 프로젝트: fisher-pdf/frds

    def estimate(
            self, nparrays: List[np.recarray]
    ) -> Tuple[pd.DataFrame, Dict[str, str]]:
        company = pd.DataFrame.from_records(nparrays[0])
        compositon = pd.DataFrame.from_records(nparrays[1])
        funda = pd.DataFrame.from_records(
            filter_funda(nparrays[2]),
            columns=["gvkey", "datadate", "cik", "fyear"],
        )
        tmp = (company.merge(compositon,
                             left_on=["boardid"],
                             right_on=["companyid"
                                       ]).merge(funda,
                                                left_on=["cikcode"],
                                                right_on=["cik"
                                                          ]).drop_duplicates())
        cond = (((tmp.datestartrole <= tmp.datadate)
                 & (tmp.datadate <= tmp.dateendrole))
                | (pd.isnull(tmp.dateendrole) &
                   (tmp.datestartrole <= tmp.datadate))
                | (pd.isnull(tmp.datestartrole) &
                   (tmp.datadate <= tmp.dateendrole))) & np.in1d(
                       tmp.seniority,
                       ("Executive Director", "Supervisory Director"))
        tmp = tmp.where(cond)
        tmp.fillna({"rolename": ""}, inplace=True)
        keys = ["gvkey", "datadate"]
        board_size = (tmp.groupby(keys,
                                  as_index=False)["directorid"].count().rename(
                                      columns={"directorid": "BoardSize"}))
        independent_dirs = (
            tmp[tmp.rolename.str.lower().str.contains("independent")].groupby(
                keys, as_index=False)["directorid"].count().rename(
                    columns={"directorid": "IndependentMembers"}))

        result = board_size.merge(independent_dirs, on=keys, how="left")

        if self._missing_as_zero:
            result.fillna({"IndependentMembers": 0}, inplace=True)

        result["BoardIndependence"] = (result.IndependentMembers /
                                       result.BoardSize * 100)

        cols = [*keys, *(VARIABLE_LABELS.keys())]
        result = result[cols].drop_duplicates()

        return result, VARIABLE_LABELS

예제 #8

0

파일 보기

    def estimate(self, nparrays: List[np.recarray]):
        """Compute the capital expenditures scaled by total assets

        Parameters
        ----------
        nparrays : List[np.recarray]
            Input datasets, same order as specified in `DATASETS_REQUIRED`

        Returns
        -------
        Tuple[pd.DataFrame, Dict[str, str]]
            Output dataset and the `VARIABLE_LABELS`
        """
        nparray = filter_funda(nparrays[0])
        capx = np.true_divide(nparray.capx,
                              nparray.at,
                              where=(nparray.at != 0))
        capx[np.isnan(nparray.at)] = np.nan
        nparray = rfn.rec_append_fields(nparray, NAME, capx)
        # keep only useful columns
        cols = set(rfn.get_names_flat(nparray.dtype))
        nparray.sort(order=(keys := ["gvkey", "datadate"]))