コード例 #1
0
    def test_method_between_1_3(self):
        cum_df = pd_utils.cumulate(
            self.df,
            "RET",
            "between",
            periodvar="Date",
            byvars=["PERMNO", "byvar"],
            time=[1, 3],
        )

        assert_frame_equal(self.expect_between_1_3, cum_df, check_dtype=False)
コード例 #2
0
    def test_method_between_m2_0(self):
        cum_df = pd_utils.cumulate(
            self.df,
            "RET",
            "between",
            periodvar="Date",
            byvars=["PERMNO", "byvar"],
            time=[-2, 0],
        )

        # Actually same result as [1,3]
        assert_frame_equal(self.expect_between_1_3, cum_df, check_dtype=False)
コード例 #3
0
    def test_shifted_index(self):
        df = self.df.copy()

        df.index = df.index + 10

        cum_df = pd_utils.cumulate(
            df,
            "RET",
            "between",
            periodvar="Date",
            byvars=["PERMNO", "byvar"],
            time=[-2, 0],
        )

        assert_frame_equal(self.expect_between_1_3, cum_df, check_dtype=False)
コード例 #4
0
    def test_grossify(self):
        df = self.df.copy()  # don't overwrite original
        df["RET"] -= 1  # ungrossify
        expect_first_grossify = self.expect_first.copy()
        expect_first_grossify["cum_RET"] -= 1
        expect_first_grossify["RET"] -= 1
        cum_df = pd_utils.cumulate(
            df,
            "RET",
            "first",
            periodvar="Date",
            byvars=["PERMNO", "byvar"],
            grossify=True,
        )

        assert_frame_equal(expect_first_grossify, cum_df, check_dtype=False)
コード例 #5
0
ファイル: load.py プロジェクト: nickderobertis/data-code
    def _handle_cumret(self):
        self._log('Cumret detected.')
        cumvars = ['RET']
        if self.abret: cumvars += ['ABRET']
        self.get += [
            'cum_' + str(c) for c in cumvars
        ]  #get will be used in the end for pivot, need to add pivoting variables
        with warnings.catch_warnings(
        ):  #cumulate will raise a warning if time is supplied when method is not between
            warnings.simplefilter('ignore')  #suppress that warning
            self._log('Cumulating returns with method {} for time {}.'.format(
                self.cumret, self.time))
            byvars = ['PERMNO', self.date]
            if self.other_byvars:
                byvars += self.other_byvars

            self.long_df = cumulate(self.long_df,
                                    cumvars,
                                    periodvar='Shift Date',
                                    method=self.cumret,
                                    byvars=byvars,
                                    time=self.time,
                                    grossify=True)

            ###########TEMP
#             import pdb
#             pdb.set_trace()

############

#Now need to remove unneeded periods
# First check if we should be getting intermediate periods, e.g. [1, 4] -> [1, 2, 3, 4]
        if self.intermediate_periods:
            keep_time = [t for t in range(min(self.time), max(self.time) + 1)]
        else:
            keep_time = self.time

        if self.drop_first:
            keep_time = keep_time[1:]
        self.long_df = self.long_df[self.long_df['Shift'].isin(keep_time)]
コード例 #6
0
    def test_method_first(self):
        cum_df = pd_utils.cumulate(
            self.df, "RET", "first", periodvar="Date", byvars=["PERMNO", "byvar"]
        )

        assert_frame_equal(self.expect_first, cum_df, check_dtype=False)
コード例 #7
0
ファイル: cumret.py プロジェクト: nickderobertis/data-code
def cumulate_buy_and_hold_portfolios(
    df: pd.DataFrame,
    port_var: str,
    id_var: str,
    date_var: str,
    port_date_var: str,
    ret_var: str,
    cum_days: Iterable[float] = (0, 1, 5),
    freq: str = "d",
    grossify: bool = True,
    weight_var: Optional[str] = None,
    include_stderr: bool = False,
    include_count: bool = False,
):
    """
    Creates buy-and-hold portfolios from normal portfolios and
    cumulates a variable within them.

    For each portfolio in each portfolio date, finds the ids which
    are present in that portfolio. Extends this portfolio for
    however many days are needed to cumulate.

    Then within these extended buy-and-hold portfolios, cumulates then calculates
    the average (and optionally, weighted-average) of the cumulated variable.

    :param df: DataFrame containing portfolios, a date variable, a portfolio
        formation date variable, and id variable, and a variable to be cumulated
    :param port_var: Name of variable containing portfolios
    :param id_var: Name of variable containing entity ids
    :param date_var: Name pf variable containing entity dates
    :param port_date_var: Name of variable containing portfolio formation dates
    :param ret_var: Name of variable to be cumulated
    :param cum_days: Cumulate to between this many days, e.g. (0, 1, 5) means
        give return for initial period (0), return for first period (0 to 1),
        and return for periods 1 to 5 cumulated
    :param freq: 'd' for daily, 'h' for hourly, 'w' for weekly, 'm' for monthly,
        'y' for annual
    :param grossify: Set to True to add one to all variables then subtract one at the end
    :param weight_var: Variable to use for calculating weights in weighted average, None
        to disable weighted averages
    :param include_stderr: Whether to include calculated standard errors in output DataFrame
    :param include_count: Whether to include counts of entities in each portfolio-date
        observation
    :return: Wide-format DataFrame which has portfolio variable, portfolio formation date
        variable, and cumulative return variables
    """
    daily_multiplier = _daily_multiplier(freq)
    cum_time: List[int] = [
        int(round(t * daily_multiplier, 0)) for t in cum_days
    ]
    needed_days = math.ceil(max(cum_days))

    # Get buy and hold portfolios
    persist_port_df = collect_portfolios_through_time(
        df,
        port_var,
        id_var,
        needed_days,
        datevar=date_var,
        portfolio_datevar=port_date_var,
    )

    cum_df = pd_utils.cumulate(
        persist_port_df,
        ret_var,
        "between",
        date_var,
        byvars=[port_var, port_date_var, id_var],
        time=cum_time,
        grossify=grossify,
    )

    port_periods = (cum_df[[port_var,
                            port_date_var]].drop_duplicates().sort_values(
                                [port_var, port_date_var]))

    out_df = port_periods
    for cum_period in cum_time:
        period_df = _average_for_cum_time(
            cum_df,
            cum_period,
            port_var,
            date_var,
            port_date_var,
            ret_var,
            freq=freq,
            weight_var=weight_var,
            include_stderr=include_stderr,
            include_count=include_count,
        )
        out_df = out_df.merge(period_df,
                              how="left",
                              on=[port_var, port_date_var])

    return out_df