def test_method_between_1_3(self): cum_df = pd_utils.cumulate( self.df, "RET", "between", periodvar="Date", byvars=["PERMNO", "byvar"], time=[1, 3], ) assert_frame_equal(self.expect_between_1_3, cum_df, check_dtype=False)
def test_method_between_m2_0(self): cum_df = pd_utils.cumulate( self.df, "RET", "between", periodvar="Date", byvars=["PERMNO", "byvar"], time=[-2, 0], ) # Actually same result as [1,3] assert_frame_equal(self.expect_between_1_3, cum_df, check_dtype=False)
def test_shifted_index(self): df = self.df.copy() df.index = df.index + 10 cum_df = pd_utils.cumulate( df, "RET", "between", periodvar="Date", byvars=["PERMNO", "byvar"], time=[-2, 0], ) assert_frame_equal(self.expect_between_1_3, cum_df, check_dtype=False)
def test_grossify(self): df = self.df.copy() # don't overwrite original df["RET"] -= 1 # ungrossify expect_first_grossify = self.expect_first.copy() expect_first_grossify["cum_RET"] -= 1 expect_first_grossify["RET"] -= 1 cum_df = pd_utils.cumulate( df, "RET", "first", periodvar="Date", byvars=["PERMNO", "byvar"], grossify=True, ) assert_frame_equal(expect_first_grossify, cum_df, check_dtype=False)
def _handle_cumret(self): self._log('Cumret detected.') cumvars = ['RET'] if self.abret: cumvars += ['ABRET'] self.get += [ 'cum_' + str(c) for c in cumvars ] #get will be used in the end for pivot, need to add pivoting variables with warnings.catch_warnings( ): #cumulate will raise a warning if time is supplied when method is not between warnings.simplefilter('ignore') #suppress that warning self._log('Cumulating returns with method {} for time {}.'.format( self.cumret, self.time)) byvars = ['PERMNO', self.date] if self.other_byvars: byvars += self.other_byvars self.long_df = cumulate(self.long_df, cumvars, periodvar='Shift Date', method=self.cumret, byvars=byvars, time=self.time, grossify=True) ###########TEMP # import pdb # pdb.set_trace() ############ #Now need to remove unneeded periods # First check if we should be getting intermediate periods, e.g. [1, 4] -> [1, 2, 3, 4] if self.intermediate_periods: keep_time = [t for t in range(min(self.time), max(self.time) + 1)] else: keep_time = self.time if self.drop_first: keep_time = keep_time[1:] self.long_df = self.long_df[self.long_df['Shift'].isin(keep_time)]
def test_method_first(self): cum_df = pd_utils.cumulate( self.df, "RET", "first", periodvar="Date", byvars=["PERMNO", "byvar"] ) assert_frame_equal(self.expect_first, cum_df, check_dtype=False)
def cumulate_buy_and_hold_portfolios( df: pd.DataFrame, port_var: str, id_var: str, date_var: str, port_date_var: str, ret_var: str, cum_days: Iterable[float] = (0, 1, 5), freq: str = "d", grossify: bool = True, weight_var: Optional[str] = None, include_stderr: bool = False, include_count: bool = False, ): """ Creates buy-and-hold portfolios from normal portfolios and cumulates a variable within them. For each portfolio in each portfolio date, finds the ids which are present in that portfolio. Extends this portfolio for however many days are needed to cumulate. Then within these extended buy-and-hold portfolios, cumulates then calculates the average (and optionally, weighted-average) of the cumulated variable. :param df: DataFrame containing portfolios, a date variable, a portfolio formation date variable, and id variable, and a variable to be cumulated :param port_var: Name of variable containing portfolios :param id_var: Name of variable containing entity ids :param date_var: Name pf variable containing entity dates :param port_date_var: Name of variable containing portfolio formation dates :param ret_var: Name of variable to be cumulated :param cum_days: Cumulate to between this many days, e.g. (0, 1, 5) means give return for initial period (0), return for first period (0 to 1), and return for periods 1 to 5 cumulated :param freq: 'd' for daily, 'h' for hourly, 'w' for weekly, 'm' for monthly, 'y' for annual :param grossify: Set to True to add one to all variables then subtract one at the end :param weight_var: Variable to use for calculating weights in weighted average, None to disable weighted averages :param include_stderr: Whether to include calculated standard errors in output DataFrame :param include_count: Whether to include counts of entities in each portfolio-date observation :return: Wide-format DataFrame which has portfolio variable, portfolio formation date variable, and cumulative return variables """ daily_multiplier = _daily_multiplier(freq) cum_time: List[int] = [ int(round(t * daily_multiplier, 0)) for t in cum_days ] needed_days = math.ceil(max(cum_days)) # Get buy and hold portfolios persist_port_df = collect_portfolios_through_time( df, port_var, id_var, needed_days, datevar=date_var, portfolio_datevar=port_date_var, ) cum_df = pd_utils.cumulate( persist_port_df, ret_var, "between", date_var, byvars=[port_var, port_date_var, id_var], time=cum_time, grossify=grossify, ) port_periods = (cum_df[[port_var, port_date_var]].drop_duplicates().sort_values( [port_var, port_date_var])) out_df = port_periods for cum_period in cum_time: period_df = _average_for_cum_time( cum_df, cum_period, port_var, date_var, port_date_var, ret_var, freq=freq, weight_var=weight_var, include_stderr=include_stderr, include_count=include_count, ) out_df = out_df.merge(period_df, how="left", on=[port_var, port_date_var]) return out_df