def _get_single_rolling_contract_info( self, real_contracts_prices_da: PricesDataFrame, rolling_dates: pd.DatetimeIndex, contract_number: int) \ -> RollingContractData: now = self.timer.now() # lists of series (each series being a partial result, they are concatenated in the end) prices_df_list = [] returns_tms_list = [] time_to_expiration_tms_list = [] last_rolling_date_idx = len(rolling_dates) - 1 tickers = real_contracts_prices_da.tickers.values for i, start_date in enumerate(rolling_dates): if i < last_rolling_date_idx: end_date = rolling_dates[i + 1] else: end_date = now front_contract_idx = contract_number - 1 + i front_contract_da = real_contracts_prices_da[:, front_contract_idx, :] front_contract_df = front_contract_da.to_pandas() front_contract_df = cast_dataframe(front_contract_df, PricesDataFrame) front_contract_df = front_contract_df.dropna() front_contract_df.name = tickers[front_contract_idx] partial_prices_df, partial_tte_tms = self._filter_dates( front_contract_df, start_date, end_date) close_prices_tms = partial_prices_df.loc[:, PriceField.Close] partial_returns_df = close_prices_tms.to_simple_returns() # remove price and first time to expiration for every contract except for the first one. Otherwise there # would be two data points on rolling dates # Think if the following code shouldn't be removed, # so that there would be 2 data points on rolling dates if i > 0: partial_prices_df = partial_prices_df.iloc[1:] partial_tte_tms = partial_tte_tms.iloc[1:] prices_df_list.append(partial_prices_df) time_to_expiration_tms_list.append(partial_tte_tms) returns_tms_list.append(partial_returns_df) prices_df = pd.concat(prices_df_list, axis=0) prices_df = cast_dataframe(prices_df, PricesDataFrame) time_to_expiration_tms = pd.concat(time_to_expiration_tms_list, axis=0) # type: pd.Series returns_tms = pd.concat(returns_tms_list, axis=0) returns_tms = cast_series(returns_tms, ReturnsSeries) # set names for series self._set_series_names(contract_number, prices_df, returns_tms, time_to_expiration_tms) return RollingContractData(prices_df, time_to_expiration_tms, returns_tms)
def drawdown_tms(input_data: InputData, frequency: Frequency = None) -> InputData: """ Calculates the timeseries of the same dates as prices_tms, which contains the drawdown value for each date. Parameters ---------- input_data: QFSeries, QFDataFrame QF timeseries or multiple timeseries grouped into a DataFrame frequency: Frequency optional parameter that improves teh performance of the function as to_prices does not need to infer the frequency Returns ------- QFSeries, QFDataFrame series of drawdowns (drawdown for each day). Drawdown for a given date is defined as the percentage difference between the the maximal price value up to the given date and the price value for that date. """ prices_tms = input_data.to_prices(frequency=frequency) max_price_tms = prices_tms.cummax() drawdowns = 1 - prices_tms / max_price_tms if isinstance(input_data, QFSeries): drawdowns = cast_series(drawdowns, QFSeries) else: drawdowns = cast_dataframe(drawdowns, QFDataFrame) return drawdowns
def test_cast_series(self): actual_casted_series = cast_series(self.test_simple_returns_tms, PricesSeries) self.assertEqual(PricesSeries, type(actual_casted_series)) self.assertEqual(list(self.test_simple_returns_tms.values), list(actual_casted_series.values))
def get_price(self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY) -> \ Union[None, PricesSeries, PricesDataFrame, QFDataArray]: got_single_date = False if frequency > Frequency.DAILY else ( bool(start_date and (start_date == end_date)) ) if got_single_date: raise NotImplementedError("Single date queries are not supported yet") fields_str = self._map_field_to_str(tickers, fields) container = self.get_history(tickers, fields_str, start_date, end_date, frequency) # Convert to PriceSeries / PriceDataFrame and replace the string index with PriceField index if self._is_single_price_field(fields): if self._is_single_ticker(tickers): container = cast_series(container, PricesSeries) else: container = cast_dataframe(container, PricesDataFrame) else: str_to_field_dict = self.str_to_price_field_map(self._get_first_ticker(tickers)) if self._is_single_ticker(tickers): # Many fields and single ticker - replace columns in PricesDataFrame container = cast_dataframe(container, PricesDataFrame) renaming_dict = {field_str: str_to_field_dict[field_str] for field_str in container.columns} container.rename(columns=renaming_dict, inplace=True) else: container = container.assign_coords(fields=[str_to_field_dict[field_str] for field_str in container.fields.values]) return container
def get_factor_return_attribution(cls, fund_tms: QFSeries, fit_tms: QFSeries, regressors_df: QFDataFrame, coefficients: QFSeries, alpha: float) -> Tuple[QFSeries, float]: """ Returns performance attribution for each factor in given regressors and also calculates the unexplained return. """ fund_returns = fund_tms.to_simple_returns() regressors_returns = regressors_df.to_simple_returns() annualised_fund_return = cagr(fund_returns) annualised_fit_return = cagr(fit_tms) total_nav = fit_tms.to_prices(initial_price=1.0) def calc_factors_profit(series) -> float: factor_ret = regressors_returns.loc[:, series.name].values return coefficients.loc[series.name] * (total_nav[:-1].values * factor_ret).sum() factors_profits = regressors_returns.apply(calc_factors_profit) alpha_profit = total_nav[:-1].sum() * alpha total_profit = factors_profits.sum() + alpha_profit regressors_return_attribution = factors_profits * annualised_fit_return / total_profit regressors_return_attribution = cast_series( regressors_return_attribution, QFSeries) unexplained_return = annualised_fund_return - regressors_return_attribution.sum( ) return regressors_return_attribution, unexplained_return
def rolling_window(self, window_size: int, func: Callable[[Union["QFSeries", np.ndarray]], float], step: int = 1, optimised: bool = False) -> "QFSeries": """ Looks at a number of windows of size ``window_size`` and transforms the data in those windows based on the specified ``func``. The window indices are stepped at a rate specified by ``step``. Parameters ---------- window_size The size of the window to look at specified as the number of data points. func The function to call during each iteration. When ``other`` is ``None`` this function should take one ``QFSeries`` and return a value (Usually a number such as a ``float``). Otherwise, this function should take two ``QFSeries`` arguments and return a value. step The amount of data points to step through after each iteration, i.e. how much to move the window by in each iteration. optimised Whether the more efficient pandas algorithm should be used for the rolling window application. Note: This has some limitations: The ``step`` must be 1 and ``func`` will get an ``ndarray`` parameter which only contains values and no index. Returns ------- QFSeries A ``QFSeries`` containing the transformed data. """ if optimised: from qf_lib.containers.series.cast_series import cast_series assert step == 1, "Optimised rolling is only possible with a step of 1." uncasted_result = self.rolling(window=window_size, center=False).apply(func=func) return cast_series(uncasted_result, self._constructor) result = QFSeries() # Apply a rolling window transformation on the QFSeries. # Based on https://github.com/quantopian/pyfolio/blob/master/pyfolio/timeseries.py#L616. window_start = 0 while window_start + window_size <= len(self): # Calculate the position of the window's end. window_end = window_start + window_size - 1 # Get the start and end dates at the current window indexes. start = self.index[window_start] end = self.index[window_end] # Return the data for the current window. result[end] = func(self.loc[start:end]) window_start += step return result
def drifting_weights(cls, assets_rets_df: SimpleReturnsDataFrame, weights: pd.Series) \ -> Tuple[SimpleReturnsSeries, QFDataFrame]: """ Calculates the time series of portfolio returns (given the initial weights of portfolio's assets). Weights of assets change over time because there is no rebalancing. The method also calculates the allocation matrix which shows what portfolio consists of on each date. Parameters ---------- assets_rets_df simple returns of assets which create the portfolio weights weights of assets which create the portfolio Returns ------- portfolio_rets_tms timeseries of portfolio's returns allocation_df dataframe indexed with dates and showing allocations in time (one column per asset) """ assert len(weights) == assets_rets_df.num_of_columns weights_sum = weights.sum() if abs(weights_sum - 1.0) > cls.EPSILON: cls.logger().warning( "Sum of all weights is not equal to 1.0: sum(weights) = {:f}". format(weights_sum)) # create a data frame with cumulative returns with a row of zeroes at the beginning assets_prices_df = assets_rets_df.to_prices( initial_prices=weights.values) portfolio_total_value_tms = cast_series(assets_prices_df.sum(axis=1), PricesSeries) portfolio_rets = portfolio_total_value_tms.to_simple_returns() portfolio_rets *= weights_sum # scale returns so that they correspond to the level of investment # to get an allocation matrix one needs to divide each row of assets' prices by the cumulative # portfolio return at that time portfolio_total_values = portfolio_total_value_tms.values.reshape( (-1, 1)) # make it a vertical vector normalizing_factor = np.tile(portfolio_total_values, (1, assets_prices_df.num_of_columns)) allocation_matrix = assets_prices_df.values / normalizing_factor # to keep the correct level of investment values in allocation matrix need to be multiplied by the sum # of weights allocation_matrix *= weights_sum allocation_matrix = allocation_matrix[:-1, :] allocation_df = QFDataFrame(index=assets_rets_df.index.copy(), columns=assets_rets_df.columns.copy(), data=allocation_matrix) return portfolio_rets, allocation_df
def cast_dataframe_to_proper_type(result): num_of_dimensions = len(result.axes) if num_of_dimensions == 1: casted_result = cast_series(result, QFSeries) elif num_of_dimensions == 2: casted_result = cast_dataframe(result, QFDataFrame) else: casted_result = result return casted_result
def close_open_gap(prices_df: PricesDataFrame, initial_price: int = 1, transaction_cost_percentage: float = 0, transaction_cost_value: float = 0) -> PricesSeries: """Calculates price changes during the night gap (opening price compared to closing price from the previous day). May be interpreted as performance of strategy based on buying at close and selling at next open. Parameters ---------- prices_df PricesDataFrame of at least 2 series: PriceField.Open and PriceField.Close initial_price initial price of the timeseries. If no price will be specified, then it will be assumed to be 1. transaction_cost_percentage cost of a single transaction [%]; percentage of the transaction value by default set to 0 can't have a non-zero value if transaction_cost_value is set! transaction_cost_value cost of a single transaction [currency of examined asset] by default set to 0 can't have a non-zero value if transaction_cost_percentage is set! Returns ------- PricesSeries price changes """ assert prices_df.num_of_rows > 1 assert transaction_cost_percentage >= 0 and transaction_cost_value >= 0 assert not (transaction_cost_percentage > 0 and transaction_cost_value > 0 ) # only one type may be used o1 = prices_df[PriceField.Open] c0 = prices_df[PriceField.Close].shift(1) sell_price = o1 buy_price = c0 if transaction_cost_value > 0: sell_price = o1 - transaction_cost_value buy_price = c0 + transaction_cost_value elif transaction_cost_percentage > 0: sell_price = o1 * (1 - transaction_cost_percentage / 100) buy_price = c0 * (1 + transaction_cost_percentage / 100) ret_tms = (sell_price / buy_price) - 1 ret_tms = cast_series(ret_tms.iloc[1:], SimpleReturnsSeries) prices_tms = ret_tms.to_prices(initial_price) return prices_tms
def total_cumulative_return(self) -> pd.Series: """ Calculates total cumulative return for each column. Returns ------- pandas.Series containing total cumulative return for each column of the original DataFrame. """ series_type = self._constructor_sliced series = self.apply(series_type.total_cumulative_return, axis=0) series = cast_series(series, pd.Series) return series
def _get_single_date_price( self, tickers: Union[Ticker, Sequence[Ticker]], nans_allowed: bool, frequency: Frequency = Frequency.DAILY) \ -> Union[float, QFSeries]: tickers, was_single_ticker_provided = convert_to_list(tickers, Ticker) # if an empty tickers list was supplied then return an empty result if not tickers: return QFSeries() # Compute the time ranges, used further by the get_price function current_datetime = self.timer.now() # We download the prices since the last 7 days. In case of getting the last available price, we assume that # within each 7 consecutive days, at least one price will occur. If not, in case e.g. future contracts, we # assume that the contract ended and we need to e.g. close the position for this ticker in the portfolio, if # open. start_date = current_datetime - RelativeDelta(days=7) current_date = self._zero_out_time_component(current_datetime) price_fields = [PriceField.Open, PriceField.Close] prices_data_array = self.data_provider.get_price( tickers, price_fields, start_date, current_date, frequency) prices_df = self._data_array_to_dataframe(prices_data_array) prices_df = prices_df.loc[:current_datetime] try: prices_series = prices_df.loc[current_datetime, :] except KeyError: prices_series = QFSeries(index=tickers) prices_series.name = "Current asset prices" if not nans_allowed: # fill NaNs with latest available prices last_available_close_prices = prices_df.apply( func=lambda series: series.asof(current_datetime)) if not last_available_close_prices.empty: unavailable_prices_tickers = prices_series.isnull() prices_series.loc[unavailable_prices_tickers] = \ last_available_close_prices.loc[unavailable_prices_tickers] prices_series.name = "Last available asset prices" prices_series = cast_series(prices_series, QFSeries) if was_single_ticker_provided: return prices_series[0] else: return prices_series
def _get_weighted_portfolio_rets(self, returns, weights, intercept): assert len(returns.columns) == len(weights) # normalize weights, so that they contain intercept factor in the end and that they all sum up to 1 norm_weights = list(weights) + [intercept] norm_weights = np.array(norm_weights) norm_weights = norm_weights / sum(norm_weights) norm_returns = sm.add_constant(returns, prepend=False) portfolio_returns = norm_returns.dot(norm_weights) portfolio_returns = cast_series(portfolio_returns, type(returns)) portfolio_returns.__finalize__(returns) return portfolio_returns
def _get_single_date_price(self, tickers: Union[Ticker, Sequence[Ticker]], nans_allowed: bool) -> Union[float, pd.Series]: tickers, was_single_ticker_provided = convert_to_list(tickers, Ticker) # if an empty tickers list was supplied then return an empty result if not tickers: return pd.Series() current_datetime = self.timer.now() current_date = self._zero_out_time_component(current_datetime) start_date = current_date - RelativeDelta(days=7) price_fields = [PriceField.Open, PriceField.Close] prices_data_array = self.price_data_provider.get_price( tickers, price_fields, start_date, current_date) prices_df = self._data_array_to_dataframe(prices_data_array) prices_df = prices_df.loc[:current_datetime] try: prices_series = prices_df.loc[ current_datetime, :] # axes: date, ticker except KeyError: prices_series = pd.Series(index=tickers) prices_series.name = "Current asset prices" if not nans_allowed: # fill NaNs with latest available prices last_available_close_prices = prices_df.apply( func=lambda series: series.asof(current_datetime)) if not last_available_close_prices.empty: unavailable_prices_tickers = prices_series.isnull() prices_series.loc[unavailable_prices_tickers] = \ last_available_close_prices.loc[unavailable_prices_tickers] prices_series.name = "Last available asset prices" prices_series = cast_series(prices_series, pd.Series) if was_single_ticker_provided: return prices_series[0] else: return prices_series
def cast_data_array_to_proper_type(result: QFDataArray, use_prices_types=False): if use_prices_types: series_type = PricesSeries data_frame_type = PricesDataFrame else: series_type = QFSeries data_frame_type = QFDataFrame num_of_dimensions = len(result.shape) if num_of_dimensions == 0: casted_result = result.item() elif num_of_dimensions == 1: casted_result = cast_series(result.to_pandas(), series_type) casted_result.name = result.name elif num_of_dimensions == 2: casted_result = cast_dataframe(result.to_pandas(), data_frame_type) else: casted_result = result return casted_result
def get_last_available_price( self, tickers: Union[Ticker, Sequence[Ticker]], frequency: Frequency = None) -> Union[float, QFSeries]: """ Gets the latest available price for given assets, even if the full bar is not yet available. The frequency parameter is always casted into 1 minute frequency, to represent the most recent price. It returns the CLOSE price of the last available bar. If "now" is after the market OPEN, and before the market CLOSE, the last available price is equal to the current price (CLOSE price of the bar, which right bound is equal to "now"). If the market did not open yet, the last available CLOSE price will be returned. Non-zero seconds or microseconds values are omitted (e.g. 13:40:01 is always treated as 13:40:00). Parameters ----------- tickers: Ticker, Sequence[Ticker] tickers of the securities which prices should be downloaded frequency: Frequency frequency of the data Returns ------- float, QFSeries last_prices series where: - last_prices.name contains a date of current prices, - last_prices.index contains tickers - last_prices.data contains latest available prices for given tickers """ frequency = frequency or self.fixed_data_provider_frequency or Frequency.MIN_1 if frequency <= Frequency.DAILY: raise ValueError( "The Intraday Data Handler can be used only with the Intraday Frequency" ) tickers, was_single_ticker_provided = convert_to_list(tickers, Ticker) # if an empty tickers list was supplied then return an empty result if not tickers: return QFSeries() current_datetime = self.timer.now() # If the current_datetime represents the time after Market Close and before Market Open, shift it to the # Market Close of the day before if current_datetime + MarketOpenEvent.trigger_time( ) > current_datetime: current_datetime = current_datetime - RelativeDelta(days=1) current_datetime = current_datetime + MarketCloseEvent.trigger_time( ) elif current_datetime + MarketCloseEvent.trigger_time( ) < current_datetime: current_datetime = current_datetime + MarketCloseEvent.trigger_time( ) # If the current_datetime represents Saturday or Sunday, shift it to last Friday if current_datetime.weekday() in (5, 6): current_datetime = current_datetime - RelativeDelta(weekday=4, weeks=1) # The time range denotes the current_datetime +- time delta related to the given frequency. The current price is # represented as the close price of (time_range_start, current_datetime) range, labeled using the time_range_ # start value in most of the cases. # # The only exception is the price at the market open - in this case we do not have the bar directly # leading up to market open time. Thus, the open price from the time range (current_datetime, time_range_end) # is used to denote the price. time_range_start = current_datetime - frequency.time_delta() time_range_end = current_datetime + frequency.time_delta() # The start date is used to download older data, in case if there is no price available currently and we are # interested in the last available one. Therefore, at first we look one hour in the past. If this amount of data # would not be sufficient, we would look up to a few days in the past. download_start_date = current_datetime - Frequency.MIN_60.time_delta() def download_prices(start_time, end_time, multiple_days=False): # Function which downloads prices for the given tickers. In case if the time range spans over multiple days # and thus contains at least one Market Open Event, combine the Open price for the first bar after the # market open with the Close prices for all other bars from this day. if multiple_days: price_fields = [PriceField.Open, PriceField.Close] prices = self.data_provider.get_price(tickers, price_fields, start_time, end_time, frequency) return self._data_array_to_dataframe(prices, frequency) else: return self.data_provider.get_price(tickers, PriceField.Close, start_time, end_time, frequency) # If the data contains the Market Open Price, merge the prices if download_start_date <= MarketOpenEvent.trigger_time( ) + time_range_end <= time_range_end: contains_market_open = True elif download_start_date <= MarketOpenEvent.trigger_time( ) + download_start_date <= time_range_end: contains_market_open = True elif (time_range_end - download_start_date) > timedelta(days=1): contains_market_open = True else: contains_market_open = False prices_data_array = download_prices(download_start_date, time_range_end, contains_market_open) # Access the price bar starting at time_range_start and ending at current_datetime try: prices_series = prices_data_array.asof(time_range_start) prices_series.name = "Last available asset prices" if prices_series.isnull().values.any(): # If any of the values is null, download more data, using a longer period of time raise IndexError except IndexError: # Download data using a longer period of time. In case of Monday or Tuesday, we download data from last 4 # days in order to handle situations, were there was no price on Monday or Friday (and during the weekend). # In all other cases, we download data from the last 2 days. number_of_days_to_go_back = 2 if download_start_date.weekday( ) not in (0, 1) else 4 prices_data_array = download_prices( download_start_date - RelativeDelta(days=number_of_days_to_go_back), time_range_end, multiple_days=True) prices_series = prices_data_array.asof(time_range_start) prices_series.name = "Last available asset prices" prices_series = cast_series(prices_series, QFSeries) if was_single_ticker_provided: return prices_series[0] else: return prices_series
def get_aggregate_returns(series: QFSeries, convert_to: Frequency, multi_index: bool = False) -> SimpleReturnsSeries: """ Aggregates returns by week, month, or year. Parameters ---------- series Daily returns of the strategy, noncumulative. convert_to Can be 'weekly', 'monthly', or 'yearly'. multi_index Determines whether the grouping multi-index should be preserved. Returns ------- Aggregated returns. """ simple_rets = series.to_simple_returns() grouping = get_grouping_for_frequency(convert_to) # fix for grouping with multi-index (whenever a tuple is identifying a group. # Example: in weekly grouping a group could be identified by a tuple (2014, 52). Then the whole series would be # identified by a multi-level index (dates, dates) which is forbidden (names of levels must be unique). # Ideally each grouping would define names of the levels, e.g. (year, week) but I don't know simple_rets.index.name = None aggregated_series = simple_rets.groupby(grouping).apply( lambda rets: rets.total_cumulative_return()) aggregated_series = cast_series(aggregated_series, SimpleReturnsSeries) if not multi_index: # calculate a simple index based on the grouped MultiIndex if convert_to == Frequency.DAILY: # it is a day index = [ datetime(date[2], date[1], date[0]) for date in aggregated_series.index ] elif convert_to == Frequency.WEEKLY: # it is always Friday index = [ iso_to_gregorian(date[0], date[1], 5) for date in aggregated_series.index ] elif convert_to == Frequency.MONTHLY: # it is the end of the month index = [ datetime(date[0], date[1], monthrange(date[0], date[1])[1]) for date in aggregated_series.index ] elif convert_to == Frequency.YEARLY: # it is the end of the year index = [ datetime(year, 12, 31) for year in aggregated_series.index ] else: assert False aggregated_series = SimpleReturnsSeries(data=aggregated_series.values, index=DatetimeIndex(index)) aggregated_series.sort_index(inplace=True) aggregated_series.name = series.name return aggregated_series
def _get_beta_and_alpha(self, benchmark_tms, column, columns_type, nans_in_column_idx): column_without_nans = column[~nans_in_column_idx] column_without_nans = cast_series(column_without_nans, columns_type) beta, alpha = beta_and_alpha(column_without_nans, benchmark_tms) return beta, alpha
def get_current_price( self, tickers: Union[Ticker, Sequence[Ticker]], frequency: Frequency = None) -> Union[float, QFSeries]: """ Works just like get_last_available_price() but it can return NaNs if data is not available at the current moment (e.g. it returns NaN on a non-trading day). The frequency parameter is always casted into 1 minute frequency, to represent the most recent price. If the frequency parameter is an intraday frequency, the CLOSE price of the currently available bar will be returned. E.g. for 1 minute frequency, at 13:00 (if the market opens before 13:00), the CLOSE price of the 12:59 - 13:00 bar will be returned. If "now" contains non-zero seconds or microseconds, None will be returned. Parameters ----------- tickers: Ticker, Sequence[Ticker] tickers of the securities which prices should be downloaded frequency: Frequency frequency of the data Returns ------- float, QFSeries current_prices series where: - current_prices.name contains a date of current prices, - current_prices.index contains tickers - current_prices.data contains latest available prices for given tickers """ frequency = frequency or self.fixed_data_provider_frequency or Frequency.MIN_1 if frequency <= Frequency.DAILY: raise ValueError( "The Intraday Data Handler can be used only with the Intraday Frequency" ) tickers, was_single_ticker_provided = convert_to_list(tickers, Ticker) # if an empty tickers list was supplied then return an empty result if not tickers: return QFSeries() current_datetime = self.timer.now() # Check if the current time is at the market open, if so - take the Open price of the time range, starting # at current datetime if current_datetime + MarketOpenEvent.trigger_time( ) == current_datetime: time_range_start = current_datetime field = PriceField.Open else: time_range_start = current_datetime - frequency.time_delta() field = PriceField.Close prices_data_array = self.data_provider.get_price( tickers, field, time_range_start, time_range_start + frequency.time_delta(), frequency) try: # Below, the loc[time_range_start] is used instead of iloc[0], in order to return the price exactly from the # time_range_start, and not from the range between time_range_start and time_range_start + # frequency.time_delta() prices_series = prices_data_array.loc[time_range_start] except KeyError: prices_series = QFSeries(index=tickers) prices_series.name = "Current asset prices" prices_series = cast_series(prices_series, QFSeries) if was_single_ticker_provided: return prices_series[0] else: return prices_series