def run_strategy_returns_stats(self, strategy): """ run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio) Parameters ---------- strategy : StrategyTemplate defining trading strategy """ pnl = strategy.get_strategy_pnl() tz = TimeSeriesTimezone() tsc = TimeSeriesCalcs() # PyFolio assumes UTC time based DataFrames (so force this localisation) try: pnl = tz.localise_index_as_UTC(pnl) except: pass # TODO for intraday strategy make daily # convert DataFrame (assumed to have only one column) to Series pnl = tsc.calculate_returns(pnl) pnl = pnl[pnl.columns[0]] fig = pf.create_returns_tear_sheet(pnl, return_fig=True) try: plt.savefig (strategy.DUMP_PATH + "stats.png") except: pass plt.show()
def calculate_ret_stats(self, returns_df, ann_factor): """ calculate_ret_stats - Calculates return statistics for an asset's returns including IR, vol, ret and drawdowns Parameters ---------- returns_df : DataFrame asset returns ann_factor : int annualisation factor to use on return statistics Returns ------- DataFrame """ tsc = TimeSeriesCalcs() self._rets = returns_df.mean(axis=0) * ann_factor self._vol = returns_df.std(axis=0) * math.sqrt(ann_factor) self._inforatio = self._rets / self._vol self._kurtosis = returns_df.kurtosis(axis=0) index_df = tsc.create_mult_index(returns_df) max2here = pandas.expanding_max(index_df) dd2here = index_df / max2here - 1 self._dd = dd2here.min()
def bus_day_of_month_seasonality(self, data_frame, month_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], cum = True, cal = "FX", partition_by_month = True, add_average = False, price_index = False): tsc = TimeSeriesCalcs() tsf = TimeSeriesFilter() if price_index: data_frame = data_frame.resample('B') # resample into business days data_frame = tsc.calculate_returns(data_frame) data_frame.index = pandas.to_datetime(data_frame.index) data_frame = tsf.filter_time_series_by_holidays(data_frame, cal) monthly_seasonality = tsc.average_by_month_day_by_bus_day(data_frame, cal) monthly_seasonality = monthly_seasonality.loc[month_list] if partition_by_month: monthly_seasonality = monthly_seasonality.unstack(level=0) if add_average: monthly_seasonality['Avg'] = monthly_seasonality.mean(axis=1) if cum is True: if partition_by_month: monthly_seasonality.loc[0] = numpy.zeros(len(monthly_seasonality.columns)) # monthly_seasonality.index = monthly_seasonality.index + 1 # shifting index monthly_seasonality = monthly_seasonality.sort() monthly_seasonality = tsc.create_mult_index(monthly_seasonality) return monthly_seasonality
def time_of_day_seasonality(self, data_frame, years=False): tsc = TimeSeriesCalcs() if years is False: return tsc.average_by_hour_min_of_day_pretty_output(data_frame) set_year = set(data_frame.index.year) year = sorted(list(set_year)) intraday_seasonality = None commonman = CommonMan() for i in year: temp_seasonality = tsc.average_by_hour_min_of_day_pretty_output(data_frame[data_frame.index.year == i]) temp_seasonality.columns = commonman.postfix_list(temp_seasonality.columns.values, " " + str(i)) if intraday_seasonality is None: intraday_seasonality = temp_seasonality else: intraday_seasonality = intraday_seasonality.join(temp_seasonality) return intraday_seasonality
def bus_day_of_month_seasonality( self, data_frame, month_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], cum=True, cal="FX", partition_by_month=True, ): tsc = TimeSeriesCalcs() tsf = TimeSeriesFilter() data_frame.index = pandas.to_datetime(data_frame.index) data_frame = tsf.filter_time_series_by_holidays(data_frame, cal) monthly_seasonality = tsc.average_by_month_day_by_bus_day(data_frame, cal) monthly_seasonality = monthly_seasonality.loc[month_list] if partition_by_month: monthly_seasonality = monthly_seasonality.unstack(level=0) if cum is True: monthly_seasonality.ix[0] = numpy.zeros(len(monthly_seasonality.columns)) if partition_by_month: monthly_seasonality.index = monthly_seasonality.index + 1 # shifting index monthly_seasonality = monthly_seasonality.sort() # sorting by index monthly_seasonality = tsc.create_mult_index(monthly_seasonality) return monthly_seasonality
def calculate_vol_adjusted_returns(self, returns_df, br, returns = True): """ calculate_vol_adjusted_returns - Adjusts returns for a vol target Parameters ---------- br : BacktestRequest Parameters for the backtest specifying start date, finish data, transaction costs etc. returns_a_df : pandas.DataFrame Asset returns to be traded Returns ------- pandas.DataFrame """ tsc = TimeSeriesCalcs() if not returns: returns_df = tsc.calculate_returns(returns_df) if not(hasattr(br, 'portfolio_vol_resample_type')): br.portfolio_vol_resample_type = 'mean' leverage_df = self.calculate_leverage_factor(returns_df, br.portfolio_vol_target, br.portfolio_vol_max_leverage, br.portfolio_vol_periods, br.portfolio_vol_obs_in_year, br.portfolio_vol_rebalance_freq, br.portfolio_vol_resample_freq, br.portfolio_vol_resample_type) vol_returns_df = tsc.calculate_signal_returns_with_tc_matrix(leverage_df, returns_df, tc = br.spot_tc_bp) vol_returns_df.columns = returns_df.columns return vol_returns_df, leverage_df
def compare_strategy_vs_benchmark(self, br, strategy_df, benchmark_df): """ compare_strategy_vs_benchmark - Compares the trading strategy we are backtesting against a benchmark Parameters ---------- br : BacktestRequest Parameters for backtest such as start and finish dates strategy_df : pandas.DataFrame Strategy time series benchmark_df : pandas.DataFrame Benchmark time series """ include_benchmark = False calc_stats = False if hasattr(br, 'include_benchmark'): include_benchmark = br.include_benchmark if hasattr(br, 'calc_stats'): calc_stats = br.calc_stats if include_benchmark: tsd = TimeSeriesDesc() cash_backtest = CashBacktest() ts_filter = TimeSeriesFilter() ts_calcs = TimeSeriesCalcs() # align strategy time series with that of benchmark strategy_df, benchmark_df = strategy_df.align(benchmark_df, join='left', axis = 0) # if necessary apply vol target to benchmark (to make it comparable with strategy) if hasattr(br, 'portfolio_vol_adjust'): if br.portfolio_vol_adjust is True: benchmark_df = cash_backtest.calculate_vol_adjusted_index_from_prices(benchmark_df, br = br) # only calculate return statistics if this has been specified (note when different frequencies of data # might underrepresent vol if calc_stats: benchmark_df = benchmark_df.fillna(method='ffill') tsd.calculate_ret_stats_from_prices(benchmark_df, br.ann_factor) benchmark_df.columns = tsd.summary() # realign strategy & benchmark strategy_benchmark_df = strategy_df.join(benchmark_df, how='inner') strategy_benchmark_df = strategy_benchmark_df.fillna(method='ffill') strategy_benchmark_df = ts_filter.filter_time_series_by_date(br.plot_start, br.finish_date, strategy_benchmark_df) strategy_benchmark_df = ts_calcs.create_mult_index_from_prices(strategy_benchmark_df) self._benchmark_pnl = benchmark_df self._benchmark_tsd = tsd return strategy_benchmark_df return strategy_df
def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods = 60, vol_obs_in_year = 252, vol_rebalance_freq = 'BM', returns = True, period_shift = 0): """ calculate_leverage_factor - Calculates the time series of leverage for a specified vol target Parameters ---------- returns_df : DataFrame Asset returns vol_target : float vol target for assets vol_max_leverage : float maximum leverage allowed vol_periods : int number of periods to calculate volatility vol_obs_in_year : int number of observations in the year vol_rebalance_freq : str how often to rebalance returns : boolean is this returns time series or prices? period_shift : int should we delay the signal by a number of periods? Returns ------- pandas.Dataframe """ tsc = TimeSeriesCalcs() if not returns: returns_df = tsc.calculate_returns(returns_df) roll_vol_df = tsc.rolling_volatility(returns_df, periods = vol_periods, obs_in_year = vol_obs_in_year).shift(period_shift) # calculate the leverage as function of vol target (with max lev constraint) lev_df = vol_target / roll_vol_df lev_df[lev_df > vol_max_leverage] = vol_max_leverage # only allow the leverage change at resampling frequency (eg. monthly 'BM') lev_df = lev_df.resample(vol_rebalance_freq) returns_df, lev_df = returns_df.align(lev_df, join='left', axis = 0) lev_df = lev_df.fillna(method='ffill') return lev_df
def calculate_ret_stats(self, returns_df, ann_factor): tsc = TimeSeriesCalcs() self._rets = returns_df.mean(axis=0) * ann_factor self._vol = returns_df.std(axis=0) * math.sqrt(ann_factor) self._inforatio = self._rets / self._vol index_df = tsc.create_mult_index(returns_df) max2here = pandas.expanding_max(index_df) dd2here = index_df / max2here - 1 self._dd = dd2here.min()
def get_pnl_trades(self): """ get_pnl_trades - Gets P&L of each individual trade per signal Returns ------- pandas.Dataframe """ if self._pnl_trades is None: tsc = TimeSeriesCalcs() self._pnl_trades = tsc.calculate_individual_trade_gains(self._signal, self._pnl) return self._pnl_trades
def run_day_of_month_analysis(self, strat): from pythalesians.economics.seasonality.seasonality import Seasonality from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs tsc = TimeSeriesCalcs() seas = Seasonality() strat.construct_strategy() pnl = strat.get_strategy_pnl() # get seasonality by day of the month pnl = pnl.resample('B').mean() rets = tsc.calculate_returns(pnl) bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True) # get seasonality by month pnl = pnl.resample('BM').mean() rets = tsc.calculate_returns(pnl) month = seas.monthly_seasonality(rets) self.logger.info("About to plot seasonality...") gp = GraphProperties() pf = PlotFactory() # Plotting spot over day of month/month of year gp.color = 'Blues' gp.scale_factor = self.SCALE_FACTOR gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.png' gp.html_file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.html' gp.title = strat.FINAL_STRATEGY + ' day of month seasonality' gp.display_legend = False gp.color_2_series = [bus_day.columns[-1]] gp.color_2 = ['red'] # red, pink gp.linewidth_2 = 4 gp.linewidth_2_series = [bus_day.columns[-1]] gp.y_axis_2_series = [bus_day.columns[-1]] pf.plot_line_graph(bus_day, adapter = self.DEFAULT_PLOT_ENGINE, gp = gp) gp = GraphProperties() gp.scale_factor = self.SCALE_FACTOR gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.png' gp.html_file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.html' gp.title = strat.FINAL_STRATEGY + ' month of year seasonality' pf.plot_line_graph(month, adapter = self.DEFAULT_PLOT_ENGINE, gp = gp) return month
def calculate_ret_stats_from_prices(self, prices_df, ann_factor): """ calculate_ret_stats_from_prices - Calculates return statistics for an asset's price Parameters ---------- prices_df : DataFrame asset prices ann_factor : int annualisation factor to use on return statistics Returns ------- DataFrame """ tsc = TimeSeriesCalcs() self.calculate_ret_stats(tsc.calculate_returns(prices_df), ann_factor)
def run_strategy_returns_stats(self, strategy): """ run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio) Parameters ---------- strategy : StrategyTemplate defining trading strategy """ pnl = strategy.get_strategy_pnl() tz = TimeSeriesTimezone() tsc = TimeSeriesCalcs() # PyFolio assumes UTC time based DataFrames (so force this localisation) try: pnl = tz.localise_index_as_UTC(pnl) except: pass # set the matplotlib style sheet & defaults # at present this only works in Matplotlib engine try: matplotlib.rcdefaults() plt.style.use(GraphicsConstants().plotfactory_pythalesians_style_sheet['pythalesians-pyfolio']) except: pass # TODO for intraday strategies, make daily # convert DataFrame (assumed to have only one column) to Series pnl = tsc.calculate_returns(pnl) pnl = pnl.dropna() pnl = pnl[pnl.columns[0]] fig = pf.create_returns_tear_sheet(pnl, return_fig=True) try: plt.savefig (strategy.DUMP_PATH + "stats.png") except: pass plt.show()
def calculate_vol_adjusted_index_from_prices(self, prices_df, br): """ calculate_vol_adjusted_index_from_price - Adjusts an index of prices for a vol target Parameters ---------- br : BacktestRequest Parameters for the backtest specifying start date, finish data, transaction costs etc. asset_a_df : pandas.DataFrame Asset prices to be traded Returns ------- pandas.Dataframe containing vol adjusted index """ tsc = TimeSeriesCalcs() returns_df, leverage_df = self.calculate_vol_adjusted_returns(prices_df, br, returns = False) return tsc.create_mult_index(returns_df)
def monthly_seasonality(self, data_frame, cum = True, add_average = False, price_index = False): tsc = TimeSeriesCalcs() if price_index: data_frame = data_frame.resample('BM') # resample into month end data_frame = tsc.calculate_returns(data_frame) data_frame.index = pandas.to_datetime(data_frame.index) monthly_seasonality = tsc.average_by_month(data_frame) if add_average: monthly_seasonality['Avg'] = monthly_seasonality.mean(axis=1) if cum is True: monthly_seasonality.loc[0] = numpy.zeros(len(monthly_seasonality.columns)) monthly_seasonality = monthly_seasonality.sort() monthly_seasonality = tsc.create_mult_index(monthly_seasonality) return monthly_seasonality
def g10_line_plot_gdp(self, start_date, finish_date): today_root = datetime.date.today().strftime("%Y%m%d") + " " country_group = 'g10-ez' gdp = self.get_GDP_QoQ(start_date, finish_date, country_group) from pythalesians_graphics.graphs import PlotFactory from pythalesians_graphics.graphs.graphproperties import GraphProperties gp = GraphProperties() pf = PlotFactory() gp.title = "G10 GDP" gp.units = 'Rebased' gp.scale_factor = Constants.plotfactory_scale_factor gp.file_output = today_root + 'G10 UNE ' + str(gp.scale_factor) + '.png' gdp.columns = [x.split('-')[0] for x in gdp.columns] gp.linewidth_2 = 3 gp.linewidth_2_series = ['United Kingdom'] from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs tsc = TimeSeriesCalcs() gdp = gdp / 100 gdp = tsc.create_mult_index_from_prices(gdp) pf.plot_generic_graph(gdp, type = 'line', adapter = 'pythalesians', gp = gp)
from pythalesians.market.loaders.lighttimeseriesfactory import LightTimeSeriesFactory from pythalesians.market.requests.timeseriesrequest import TimeSeriesRequest from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs from pythalesians.util.loggermanager import LoggerManager from chartesians.graphs.graphproperties import GraphProperties if True: logger = LoggerManager().getLogger(__name__) import datetime # just change "False" to "True" to run any of the below examples ###### download daily data from Bloomberg for USD/BRL and get biggest downmoves if True: tsc = TimeSeriesCalcs() time_series_request = TimeSeriesRequest( start_date = "01 Jan 2005", # start date finish_date = datetime.datetime.utcnow(), # finish date freq = 'daily', # daily data data_source = 'bloomberg', # use Bloomberg as data source tickers = ['USDBRL'] , # ticker (Thalesians) fields = ['close'], # which fields to download vendor_tickers = ['USDBRL BGN Curncy'], # ticker (Bloomberg) vendor_fields = ['PX_LAST'], # which Bloomberg fields to download cache_algo = 'internet_load_return') # how to return data ltsf = LightTimeSeriesFactory() df = ltsf.harvest_time_series(time_series_request)
def get_fx_cross(self, start, end, cross, cut = "NYC", source = "bloomberg", freq = "intraday", cache_algo='cache_algo_return', type = 'spot'): if source == "gain" or source == 'dukascopy' or freq == 'tick': return self.get_fx_cross_tick(start, end, cross, cut = cut, source = source, cache_algo='cache_algo_return', type = 'spot') if isinstance(cross, str): cross = [cross] time_series_request = TimeSeriesRequest() time_series_factory = self.time_series_factory time_series_calcs = TimeSeriesCalcs() data_frame_agg = None if freq == 'intraday': time_series_request.gran_freq = "minute" # intraday elif freq == 'daily': time_series_request.gran_freq = "daily" # intraday time_series_request.freq_mult = 1 # 1 min time_series_request.cut = cut # NYC/BGN ticker time_series_request.fields = 'close' # close field only time_series_request.cache_algo = cache_algo # cache_algo_only, cache_algo_return, internet_load time_series_request.environment = 'backtest' time_series_request.start_date = start time_series_request.finish_date = end time_series_request.data_source = source for cr in cross: base = cr[0:3] terms = cr[3:6] if (type == 'spot'): # non-USD crosses if base != 'USD' and terms != 'USD': base_USD = self.fxconv.correct_notation('USD' + base) terms_USD = self.fxconv.correct_notation('USD' + terms) # TODO check if the cross exists in the database # download base USD cross time_series_request.tickers = base_USD time_series_request.category = self.fxconv.em_or_g10(base, freq) base_vals = time_series_factory.harvest_time_series(time_series_request) # download terms USD cross time_series_request.tickers = terms_USD time_series_request.category = self.fxconv.em_or_g10(terms, freq) terms_vals = time_series_factory.harvest_time_series(time_series_request) if (base_USD[0:3] == 'USD'): base_vals = 1 / base_vals if (terms_USD[0:3] == 'USD'): terms_vals = 1 / terms_vals base_vals.columns = ['temp'] terms_vals.columns = ['temp'] cross_vals = base_vals.div(terms_vals, axis = 'index') cross_vals.columns = [cr + '.close'] else: if base == 'USD': non_USD = terms if terms == 'USD': non_USD = base correct_cr = self.fxconv.correct_notation(cr) time_series_request.tickers = correct_cr time_series_request.category = self.fxconv.em_or_g10(non_USD, freq) cross_vals = time_series_factory.harvest_time_series(time_series_request) # flip if not convention if(correct_cr != cr): cross_vals = 1 / cross_vals cross_vals.columns.names = [cr + '.close'] elif type[0:3] == "tot": if freq == 'daily': # download base USD cross time_series_request.tickers = base + 'USD' time_series_request.category = self.fxconv.em_or_g10(base, freq) + '-tot' if type == "tot": base_vals = time_series_factory.harvest_time_series(time_series_request) else: x = 0 # download terms USD cross time_series_request.tickers = terms + 'USD' time_series_request.category = self.fxconv.em_or_g10(terms, freq) + '-tot' if type == "tot": terms_vals = time_series_factory.harvest_time_series(time_series_request) else: x = 0 base_rets = time_series_calcs.calculate_returns(base_vals) terms_rets = time_series_calcs.calculate_returns(terms_vals) cross_rets = base_rets.sub(terms_rets.iloc[:,0],axis=0) # first returns of a time series will by NaN, given we don't know previous point cross_rets.iloc[0] = 0 cross_vals = time_series_calcs.create_mult_index(cross_rets) cross_vals.columns = [cr + '-tot.close'] elif freq == 'intraday': self.logger.info('Total calculated returns for intraday not implemented yet') return None if data_frame_agg is None: data_frame_agg = cross_vals else: data_frame_agg = data_frame_agg.join(cross_vals, how='outer') # strip the nan elements data_frame_agg = data_frame_agg.dropna() return data_frame_agg
logger.info("Running backtest...") # use technical indicator to create signals # (we could obviously create whatever function we wanted for generating the signal dataframe) tech_ind = TechIndicator() tech_ind.create_tech_ind(spot_df, indicator, tech_params); signal_df = tech_ind.get_signal() # use the same data for generating signals cash_backtest.calculate_trading_PnL(br, asset_df, signal_df) port = cash_backtest.get_cumportfolio() port.columns = [indicator + ' = ' + str(tech_params.sma_period) + ' ' + str(cash_backtest.get_portfolio_pnl_desc()[0])] signals = cash_backtest.get_porfolio_signal() # get final signals for each series returns = cash_backtest.get_pnl() # get P&L for each series time_series_calcs = TimeSeriesCalcs() trade_returns = time_series_calcs.calculate_individual_trade_gains(signals, returns) print(trade_returns) # print the last positions (we could also save as CSV etc.) print(signals.tail(1)) pf = PlotFactory() gp = GraphProperties() gp.title = "EUR/USD trend model" gp.source = 'Thalesians/BBG (calc with PyThalesians Python library)' gp.scale_factor = 1 gp.file_output = 'output_data/eurusd-trend-example.png' pf.plot_line_graph(port, adapter = 'pythalesians', gp = gp)
from pythalesians.graphics.graphs.graphproperties import GraphProperties # for making elementary calculations on the time series from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs from datetime import timedelta if True: logger = LoggerManager().getLogger(__name__) import datetime # just change "False" to "True" to run any of the below examples ###### download daily data from Bloomberg for USD/BRL and get biggest downmoves if True: tsc = TimeSeriesCalcs() time_series_request = TimeSeriesRequest( start_date="01 Jan 2005", # start date finish_date=datetime.datetime.utcnow(), # finish date freq='daily', # daily data data_source='bloomberg', # use Bloomberg as data source tickers=['USDBRL'], # ticker (Thalesians) fields=['close'], # which fields to download vendor_tickers=['USDBRL BGN Curncy'], # ticker (Bloomberg) vendor_fields=['PX_LAST'], # which Bloomberg fields to download cache_algo='internet_load_return') # how to return data ltsf = LightTimeSeriesFactory() df = ltsf.harvest_time_series(time_series_request)
def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods = 60, vol_obs_in_year = 252, vol_rebalance_freq = 'BM', data_resample_freq = None, data_resample_type = 'mean', returns = True, period_shift = 0): """ calculate_leverage_factor - Calculates the time series of leverage for a specified vol target Parameters ---------- returns_df : DataFrame Asset returns vol_target : float vol target for assets vol_max_leverage : float maximum leverage allowed vol_periods : int number of periods to calculate volatility vol_obs_in_year : int number of observations in the year vol_rebalance_freq : str how often to rebalance vol_resample_freq : str do we need to resample the underlying data first? (eg. have we got intraday data?) returns : boolean is this returns time series or prices? period_shift : int should we delay the signal by a number of periods? Returns ------- pandas.Dataframe """ tsc = TimeSeriesCalcs() if data_resample_freq is not None: return # TODO not implemented yet if not returns: returns_df = tsc.calculate_returns(returns_df) roll_vol_df = tsc.rolling_volatility(returns_df, periods = vol_periods, obs_in_year = vol_obs_in_year).shift(period_shift) # calculate the leverage as function of vol target (with max lev constraint) lev_df = vol_target / roll_vol_df lev_df[lev_df > vol_max_leverage] = vol_max_leverage # should we take the mean, first, last in our resample if data_resample_type == 'mean': lev_df = lev_df.resample(vol_rebalance_freq).mean() elif data_resample_type == 'first': lev_df = lev_df.resample(vol_rebalance_freq).first() elif data_resample_type == 'last': lev_df = lev_df.resample(vol_rebalance_freq).last() else: # TODO implement other types return returns_df, lev_df = returns_df.align(lev_df, join='left', axis = 0) lev_df = lev_df.fillna(method='ffill') lev_df.ix[0:vol_periods] = numpy.nan # ignore the first elements before the vol window kicks in return lev_df
def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods = 60, vol_obs_in_year = 252, vol_rebalance_freq = 'BM', data_resample_freq = None, data_resample_type = 'mean', returns = True, period_shift = 0): """ calculate_leverage_factor - Calculates the time series of leverage for a specified vol target Parameters ---------- returns_df : DataFrame Asset returns vol_target : float vol target for assets vol_max_leverage : float maximum leverage allowed vol_periods : int number of periods to calculate volatility vol_obs_in_year : int number of observations in the year vol_rebalance_freq : str how often to rebalance vol_resample_type : str do we need to resample the underlying data first? (eg. have we got intraday data?) returns : boolean is this returns time series or prices? period_shift : int should we delay the signal by a number of periods? Returns ------- pandas.Dataframe """ tsc = TimeSeriesCalcs() tsf = TimeSeriesFilter() if data_resample_freq is not None: return # TODO not implemented yet if not returns: returns_df = tsc.calculate_returns(returns_df) roll_vol_df = tsc.rolling_volatility(returns_df, periods = vol_periods, obs_in_year = vol_obs_in_year).shift(period_shift) # calculate the leverage as function of vol target (with max lev constraint) lev_df = vol_target / roll_vol_df lev_df[lev_df > vol_max_leverage] = vol_max_leverage lev_df = tsf.resample_time_series_frequency(lev_df, vol_rebalance_freq, data_resample_type) returns_df, lev_df = returns_df.align(lev_df, join='left', axis = 0) lev_df = lev_df.fillna(method='ffill') lev_df.ix[0:vol_periods] = numpy.nan # ignore the first elements before the vol window kicks in return lev_df
def construct_strategy(self, br = None): """ construct_strategy - Constructs the returns for all the strategies which have been specified. - gets parameters form fill_backtest_request - market data from fill_assets """ time_series_calcs = TimeSeriesCalcs() # get the parameters for backtesting if hasattr(self, 'br'): br = self.br elif br is None: br = self.fill_backtest_request() # get market data for backtest asset_df, spot_df, spot_df2, basket_dict = self.fill_assets() if hasattr(br, 'tech_params'): tech_params = br.tech_params else: tech_params = TechParams() cumresults = pandas.DataFrame(index = asset_df.index) portleverage = pandas.DataFrame(index = asset_df.index) from collections import OrderedDict tsdresults = OrderedDict() # each portfolio key calculate returns - can put parts of the portfolio in the key for key in basket_dict.keys(): asset_cut_df = asset_df[[x +'.close' for x in basket_dict[key]]] spot_cut_df = spot_df[[x +'.close' for x in basket_dict[key]]] self.logger.info("Calculating " + key) results, cash_backtest = self.construct_individual_strategy(br, spot_cut_df, spot_df2, asset_cut_df, tech_params, key) cumresults[results.columns[0]] = results portleverage[results.columns[0]] = cash_backtest.get_porfolio_leverage() tsdresults[key] = cash_backtest.get_portfolio_pnl_tsd() # for a key, designated as the final strategy save that as the "strategy" if key == self.FINAL_STRATEGY: self._strategy_pnl = results self._strategy_pnl_tsd = cash_backtest.get_portfolio_pnl_tsd() self._strategy_leverage = cash_backtest.get_porfolio_leverage() self._strategy_signal = cash_backtest.get_porfolio_signal() self._strategy_pnl_trades = cash_backtest.get_pnl_trades() # get benchmark for comparison benchmark = self.construct_strategy_benchmark() cumresults_benchmark = self.compare_strategy_vs_benchmark(br, cumresults, benchmark) self._strategy_group_benchmark_tsd = tsdresults if hasattr(self, '_benchmark_tsd'): tsdlist = tsdresults tsdlist['Benchmark'] = (self._benchmark_tsd) self._strategy_group_benchmark_tsd = tsdlist # calculate annualised returns years = time_series_calcs.average_by_annualised_year(time_series_calcs.calculate_returns(cumresults_benchmark)) self._strategy_group_pnl = cumresults self._strategy_group_pnl_tsd = tsdresults self._strategy_group_benchmark_pnl = cumresults_benchmark self._strategy_group_leverage = portleverage self._strategy_group_benchmark_annualised_pnl = years
# tickers for getting total return indices from Bloomberg directly time_series_request_total_ret = copy.copy(time_series_request_spot) time_series_request_total_ret.tickers = ['EURUSD', 'GBPUSD', 'AUDUSD'] time_series_request_total_ret.vendor_tickers = ['EURUSDCR BGN Curncy', 'GBPUSDCR BGN Curncy', 'AUDUSDCR BGN Curncy'] ltsf = LightTimeSeriesFactory() df = None spot_df = ltsf.harvest_time_series(time_series_request_spot) deposit_df = ltsf.harvest_time_series(time_series_request_deposit) deposit_df = deposit_df.fillna(method = 'ffill') deposit_df = deposit_df.fillna(method = 'bfill') # bit of a hack - because some deposit data sparse tot_df = ltsf.harvest_time_series(time_series_request_total_ret) tsc = TimeSeriesCalcs() tot_df = tsc.create_mult_index_from_prices(tot_df) # rebase index at 100 # we can change the tenor = 'ON' # plot total return series comparison for all our crosses # in practice, we would typically make a set of xxxUSD total return indices # and use them to compute all other crosses (assuming we are USD denominated investor) for cross in ['AUDUSD', 'EURUSD', 'GBPUSD']: # create total return index using spot + deposits ind = IndicesFX() ind_df = ind.create_total_return_index(cross, tenor, spot_df, deposit_df) ind_df.columns = [x + '.PYT (with carry)' for x in ind_df.columns]
"AUDUSD BGN Curncy", ], vendor_fields=["close"], # which Bloomberg fields to download cache_algo="internet_load_return", ) # how to return data ltsf = LightTimeSeriesFactory() df = ltsf.harvest_time_series(time_series_request) df.columns = [x.replace(".close", "") for x in df.columns.values] gp = GraphProperties() pf = PlotFactory() gp.source = "Thalesians/BBG (created with PyThalesians Python library)" tsc = TimeSeriesCalcs() df = tsc.create_mult_index_from_prices(df) pf.plot_line_graph(df, adapter="pythalesians", gp=gp) ###### download daily data from Quandl (via FRED) for EUR/USD and GBP/USD spot and then plot if False: time_series_request = TimeSeriesRequest( start_date="01 Jan 1970", # start date finish_date=datetime.date.today(), # finish date freq="daily", # daily data data_source="quandl", # use Quandl as data source tickers=["EURUSD", "GBPUSD"], # ticker (Thalesians) fields=["close"], # which fields to download vendor_tickers=["FRED/DEXUSEU", "FRED/DEXUSUK"], # ticker (Quandl)
def create_tech_ind(self, data_frame_non_nan, name, tech_params): self._signal = None data_frame = data_frame_non_nan.fillna(method="ffill") if name == "SMA": self._techind = pandas.rolling_mean(data_frame, tech_params.sma_period) narray = numpy.where(data_frame > self._techind, 1, -1) self._signal = pandas.DataFrame(index = data_frame.index, data = narray) self._signal.columns = [x + " SMA Signal" for x in data_frame.columns.values] self._techind.columns = [x + " SMA" for x in data_frame.columns.values] elif name == "ROC": tsc = TimeSeriesCalcs() data_frame = tsc.calculate_returns(data_frame) self._techind = pandas.rolling_mean(data_frame, tech_params.roc_period) narray = numpy.where(self._techind > 0, 1, -1) self._signal = pandas.DataFrame(index = data_frame.index, data = narray) self._signal.columns = [x + " ROC Signal" for x in data_frame.columns.values] self._techind.columns = [x + " ROC" for x in data_frame.columns.values] elif name == "SMA2": sma = pandas.rolling_mean(data_frame, tech_params.sma_period) sma2 = pandas.rolling_mean(data_frame, tech_params.sma2_period) narray = numpy.where(sma > sma2, 1, -1) self._signal = pandas.DataFrame(index = data_frame.index, data = narray) self._signal.columns = [x + " SMA2 Signal" for x in data_frame.columns.values] sma.columns = [x + " SMA" for x in data_frame.columns.values] sma2.columns = [x + " SMA2" for x in data_frame.columns.values] self._techind = pandas.concat([sma, sma2], axis = 1) elif name in ['RSI']: # delta = data_frame.diff() # # dUp, dDown = delta.copy(), delta.copy() # dUp[dUp < 0] = 0 # dDown[dDown > 0] = 0 # # rolUp = pandas.rolling_mean(dUp, tech_params.rsi_period) # rolDown = pandas.rolling_mean(dDown, tech_params.rsi_period).abs() # # rsi = rolUp / rolDown # Get the difference in price from previous step delta = data_frame.diff() # Get rid of the first row, which is NaN since it did not have a previous # row to calculate the differences delta = delta[1:] # Make the positive gains (up) and negative gains (down) Series up, down = delta.copy(), delta.copy() up[up < 0] = 0 down[down > 0] = 0 # Calculate the EWMA roll_up1 = pandas.stats.moments.ewma(up, tech_params.rsi_period) roll_down1 = pandas.stats.moments.ewma(down.abs(), tech_params.rsi_period) # Calculate the RSI based on EWMA RS1 = roll_up1 / roll_down1 RSI1 = 100.0 - (100.0 / (1.0 + RS1)) # Calculate the SMA roll_up2 = pandas.rolling_mean(up, tech_params.rsi_period) roll_down2 = pandas.rolling_mean(down.abs(), tech_params.rsi_period) # Calculate the RSI based on SMA RS2 = roll_up2 / roll_down2 RSI2 = 100.0 - (100.0 / (1.0 + RS2)) self._techind = RSI2 self._techind.columns = [x + " RSI" for x in data_frame.columns.values] signal = data_frame.copy() sells = (signal.shift(-1) < tech_params.rsi_lower) & (signal > tech_params.rsi_lower) buys = (signal.shift(-1) > tech_params.rsi_upper) & (signal < tech_params.rsi_upper) # print (buys[buys == True]) # buys signal[buys] = 1 signal[sells] = -1 signal[~(buys | sells)] = numpy.nan signal = signal.fillna(method = 'ffill') self._signal = signal self._signal.columns = [x + " RSI Signal" for x in data_frame.columns.values] elif name in ["BB"]: ## calcuate Bollinger bands mid = pandas.rolling_mean(data_frame, tech_params.bb_period); mid.columns = [x + " BB Mid" for x in data_frame.columns.values] std_dev = pandas.rolling_std(data_frame, tech_params.bb_period) BB_std = tech_params.bb_mult * std_dev lower = pandas.DataFrame(data = mid.values - BB_std.values, index = mid.index, columns = data_frame.columns) upper = pandas.DataFrame(data = mid.values + BB_std.values, index = mid.index, columns = data_frame.columns) ## calculate signals signal = data_frame.copy() buys = signal > upper sells = signal < lower signal[buys] = 1 signal[sells] = -1 signal[~(buys | sells)] = numpy.nan signal = signal.fillna(method = 'ffill') self._signal = signal self._signal.columns = [x + " " + name + " Signal" for x in data_frame.columns.values] lower.columns = [x + " BB Lower" for x in data_frame.columns.values] upper.columns = [x + " BB Mid" for x in data_frame.columns.values] upper.columns = [x + " BB Lower" for x in data_frame.columns.values] self._techind = pandas.concat([lower, mid, upper], axis = 1) elif name == "long-only": ## have +1 signals only self._techind = data_frame # the technical indicator is just "prices" narray = numpy.ones((len(data_frame.index), len(data_frame.columns))) self._signal = pandas.DataFrame(index = data_frame.index, data = narray) self._signal.columns = [x + " Long Only Signal" for x in data_frame.columns.values] self._techind.columns = [x + " Long Only" for x in data_frame.columns.values] # TODO create other indicators # apply signal multiplier (typically to flip signals) if hasattr(tech_params, 'signal_mult'): self._signal = self._signal * tech_params.signal_mult return self._techind
tech_ind = TechIndicator() tech_ind.create_tech_ind(spot_df, indicator, tech_params) signal_df = tech_ind.get_signal() # use the same data for generating signals cash_backtest.calculate_trading_PnL(br, asset_df, signal_df) port = cash_backtest.get_cumportfolio() port.columns = [ indicator + ' = ' + str(tech_params.sma_period) + ' ' + str(cash_backtest.get_portfolio_pnl_desc()[0]) ] signals = cash_backtest.get_porfolio_signal( ) # get final signals for each series returns = cash_backtest.get_pnl() # get P&L for each series time_series_calcs = TimeSeriesCalcs() trade_returns = time_series_calcs.calculate_individual_trade_gains( signals, returns) print(trade_returns) # print the last positions (we could also save as CSV etc.) print(signals.tail(1)) pf = PlotFactory() gp = GraphProperties() gp.title = "EUR/USD trend model" gp.source = 'Thalesians/BBG (calc with PyThalesians Python library)' gp.scale_factor = 1 gp.file_output = 'output_data/eurusd-trend-example.png'
def calculate_trading_PnL(self, br, asset_a_df, signal_df): """ calculate_trading_PnL - Calculates P&L of a trading strategy and statistics to be retrieved later Parameters ---------- br : BacktestRequest Parameters for the backtest specifying start date, finish data, transaction costs etc. asset_a_df : pandas.DataFrame Asset prices to be traded signal_df : pandas.DataFrame Signals for the trading strategy """ tsc = TimeSeriesCalcs() # signal_df.to_csv('e:/temp0.csv') # make sure the dates of both traded asset and signal are aligned properly asset_df, signal_df = asset_a_df.align(signal_df, join='left', axis = 'index') # only allow signals to change on the days when we can trade assets signal_df = signal_df.mask(numpy.isnan(asset_df.values)) # fill asset holidays with NaN signals signal_df = signal_df.fillna(method='ffill') # fill these down asset_df = asset_df.fillna(method='ffill') # fill down asset holidays returns_df = tsc.calculate_returns(asset_df) tc = br.spot_tc_bp signal_cols = signal_df.columns.values returns_cols = returns_df.columns.values pnl_cols = [] for i in range(0, len(returns_cols)): pnl_cols.append(returns_cols[i] + " / " + signal_cols[i]) # do we have a vol target for individual signals? if hasattr(br, 'signal_vol_adjust'): if br.signal_vol_adjust is True: if not(hasattr(br, 'signal_vol_resample_type')): br.signal_vol_resample_type = 'mean' leverage_df = self.calculate_leverage_factor(returns_df, br.signal_vol_target, br.signal_vol_max_leverage, br.signal_vol_periods, br.signal_vol_obs_in_year, br.signal_vol_rebalance_freq, br.signal_vol_resample_freq, br.signal_vol_resample_type) signal_df = pandas.DataFrame( signal_df.values * leverage_df.values, index = signal_df.index, columns = signal_df.columns) self._individual_leverage = leverage_df # contains leverage of individual signal (before portfolio vol target) _pnl = tsc.calculate_signal_returns_with_tc_matrix(signal_df, returns_df, tc = tc) _pnl.columns = pnl_cols # portfolio is average of the underlying signals: should we sum them or average them? if hasattr(br, 'portfolio_combination'): if br.portfolio_combination == 'sum': portfolio = pandas.DataFrame(data = _pnl.sum(axis = 1), index = _pnl.index, columns = ['Portfolio']) elif br.portfolio_combination == 'mean': portfolio = pandas.DataFrame(data = _pnl.mean(axis = 1), index = _pnl.index, columns = ['Portfolio']) else: portfolio = pandas.DataFrame(data = _pnl.mean(axis = 1), index = _pnl.index, columns = ['Portfolio']) portfolio_leverage_df = pandas.DataFrame(data = numpy.ones(len(_pnl.index)), index = _pnl.index, columns = ['Portfolio']) # should we apply vol target on a portfolio level basis? if hasattr(br, 'portfolio_vol_adjust'): if br.portfolio_vol_adjust is True: portfolio, portfolio_leverage_df = self.calculate_vol_adjusted_returns(portfolio, br = br) self._portfolio = portfolio self._signal = signal_df # individual signals (before portfolio leverage) self._portfolio_leverage = portfolio_leverage_df # leverage on portfolio # multiply portfolio leverage * individual signals to get final position signals length_cols = len(signal_df.columns) leverage_matrix = numpy.repeat(portfolio_leverage_df.values.flatten()[numpy.newaxis,:], length_cols, 0) # final portfolio signals (including signal & portfolio leverage) self._portfolio_signal = pandas.DataFrame( data = numpy.multiply(numpy.transpose(leverage_matrix), signal_df.values), index = signal_df.index, columns = signal_df.columns) if hasattr(br, 'portfolio_combination'): if br.portfolio_combination == 'sum': pass elif br.portfolio_combination == 'mean': self._portfolio_signal = self._portfolio_signal / float(length_cols) else: self._portfolio_signal = self._portfolio_signal / float(length_cols) self._pnl = _pnl # individual signals P&L # TODO FIX very slow - hence only calculate on demand _pnl_trades = None # _pnl_trades = tsc.calculate_individual_trade_gains(signal_df, _pnl) self._pnl_trades = _pnl_trades self._tsd_pnl = TimeSeriesDesc() self._tsd_pnl.calculate_ret_stats(self._pnl, br.ann_factor) self._portfolio.columns = ['Port'] self._tsd_portfolio = TimeSeriesDesc() self._tsd_portfolio.calculate_ret_stats(self._portfolio, br.ann_factor) self._cumpnl = tsc.create_mult_index(self._pnl) # individual signals cumulative P&L self._cumpnl.columns = pnl_cols self._cumportfolio = tsc.create_mult_index(self._portfolio) # portfolio cumulative P&L self._cumportfolio.columns = ['Port']
if True: time_series_request = TimeSeriesRequest( start_date="01 Jan 2013", # start date finish_date=datetime.date.today(), # finish date freq='daily', # daily data data_source='google', # use Bloomberg as data source tickers=['Apple', 'S&P500 ETF'], # ticker (Thalesians) fields=['close'], # which fields to download vendor_tickers=['aapl', 'spy'], # ticker (Google) vendor_fields=['Close'], # which Bloomberg fields to download cache_algo='internet_load_return') # how to return data ltsf = LightTimeSeriesFactory() tsc = TimeSeriesCalcs() df = tsc.create_mult_index_from_prices( ltsf.harvest_time_series(time_series_request)) gp = GraphProperties() gp.title = "S&P500 vs Apple" # plot first with PyThalesians and then Plotly (via Cufflinks) # just needs 1 word to change # (although, note that AdapterCufflinks does have some extra parameters that can be set in GraphProperties) gp.plotly_username = '******' # note: need to fill in Plotly API key on Constants and change this! gp.plotly_world_readable = True gp.plotly_plot_mode = "online" # will render on Plotly website pf = PlotFactory()
def calculate_trading_PnL(self, br, asset_a_df, signal_df): """ calculate_trading_PnL - Calculates P&L of a trading strategy and statistics to be retrieved later Parameters ---------- br : BacktestRequest Parameters for the backtest specifying start date, finish data, transaction costs etc. asset_a_df : pandas.DataFrame Asset prices to be traded signal_df : pandas.DataFrame Signals for the trading strategy """ tsc = TimeSeriesCalcs() # signal_df.to_csv('e:/temp0.csv') # make sure the dates of both traded asset and signal are aligned properly asset_df, signal_df = asset_a_df.align(signal_df, join='left', axis = 'index') # only allow signals to change on the days when we can trade assets signal_df = signal_df.mask(numpy.isnan(asset_df.values)) # fill asset holidays with NaN signals signal_df = signal_df.fillna(method='ffill') # fill these down asset_df = asset_df.fillna(method='ffill') # fill down asset holidays returns_df = tsc.calculate_returns(asset_df) tc = br.spot_tc_bp signal_cols = signal_df.columns.values returns_cols = returns_df.columns.values pnl_cols = [] for i in range(0, len(returns_cols)): pnl_cols.append(returns_cols[i] + " / " + signal_cols[i]) # do we have a vol target for individual signals? if hasattr(br, 'signal_vol_adjust'): if br.signal_vol_adjust is True: if not(hasattr(br, 'signal_vol_resample_type')): br.signal_vol_resample_type = 'mean' if not(hasattr(br, 'signal_vol_resample_freq')): br.signal_vol_resample_freq = None leverage_df = self.calculate_leverage_factor(returns_df, br.signal_vol_target, br.signal_vol_max_leverage, br.signal_vol_periods, br.signal_vol_obs_in_year, br.signal_vol_rebalance_freq, br.signal_vol_resample_freq, br.signal_vol_resample_type) signal_df = pandas.DataFrame( signal_df.values * leverage_df.values, index = signal_df.index, columns = signal_df.columns) self._individual_leverage = leverage_df # contains leverage of individual signal (before portfolio vol target) _pnl = tsc.calculate_signal_returns_with_tc_matrix(signal_df, returns_df, tc = tc) _pnl.columns = pnl_cols # portfolio is average of the underlying signals: should we sum them or average them? if hasattr(br, 'portfolio_combination'): if br.portfolio_combination == 'sum': portfolio = pandas.DataFrame(data = _pnl.sum(axis = 1), index = _pnl.index, columns = ['Portfolio']) elif br.portfolio_combination == 'mean': portfolio = pandas.DataFrame(data = _pnl.mean(axis = 1), index = _pnl.index, columns = ['Portfolio']) else: portfolio = pandas.DataFrame(data = _pnl.mean(axis = 1), index = _pnl.index, columns = ['Portfolio']) portfolio_leverage_df = pandas.DataFrame(data = numpy.ones(len(_pnl.index)), index = _pnl.index, columns = ['Portfolio']) # should we apply vol target on a portfolio level basis? if hasattr(br, 'portfolio_vol_adjust'): if br.portfolio_vol_adjust is True: portfolio, portfolio_leverage_df = self.calculate_vol_adjusted_returns(portfolio, br = br) self._portfolio = portfolio self._signal = signal_df # individual signals (before portfolio leverage) self._portfolio_leverage = portfolio_leverage_df # leverage on portfolio # multiply portfolio leverage * individual signals to get final position signals length_cols = len(signal_df.columns) leverage_matrix = numpy.repeat(portfolio_leverage_df.values.flatten()[numpy.newaxis,:], length_cols, 0) # final portfolio signals (including signal & portfolio leverage) self._portfolio_signal = pandas.DataFrame( data = numpy.multiply(numpy.transpose(leverage_matrix), signal_df.values), index = signal_df.index, columns = signal_df.columns) if hasattr(br, 'portfolio_combination'): if br.portfolio_combination == 'sum': pass elif br.portfolio_combination == 'mean': self._portfolio_signal = self._portfolio_signal / float(length_cols) else: self._portfolio_signal = self._portfolio_signal / float(length_cols) self._pnl = _pnl # individual signals P&L # TODO FIX very slow - hence only calculate on demand _pnl_trades = None # _pnl_trades = tsc.calculate_individual_trade_gains(signal_df, _pnl) self._pnl_trades = _pnl_trades self._tsd_pnl = TimeSeriesDesc() self._tsd_pnl.calculate_ret_stats(self._pnl, br.ann_factor) self._portfolio.columns = ['Port'] self._tsd_portfolio = TimeSeriesDesc() self._tsd_portfolio.calculate_ret_stats(self._portfolio, br.ann_factor) self._cumpnl = tsc.create_mult_index(self._pnl) # individual signals cumulative P&L self._cumpnl.columns = pnl_cols self._cumportfolio = tsc.create_mult_index(self._portfolio) # portfolio cumulative P&L self._cumportfolio.columns = ['Port']
# just change "False" to "True" to run any of the below examples if True: time_series_request = TimeSeriesRequest( start_date = "01 Jan 2013", # start date finish_date = datetime.date.today(), # finish date freq = 'daily', # daily data data_source = 'google', # use Bloomberg as data source tickers = ['Apple', 'S&P500 ETF'], # ticker (Thalesians) fields = ['close'], # which fields to download vendor_tickers = ['aapl', 'spy'], # ticker (Google) vendor_fields = ['Close'], # which Bloomberg fields to download cache_algo = 'internet_load_return') # how to return data ltsf = LightTimeSeriesFactory() tsc = TimeSeriesCalcs() df = tsc.create_mult_index_from_prices(ltsf.harvest_time_series(time_series_request)) gp = GraphProperties() gp.html_file_output = "output_data/apple.htm" gp.title = "S&P500 vs Apple" # plot first with PyThalesians and then Bokeh # just needs 1 word to change gp.display_legend = False pf = PlotFactory() pf.plot_generic_graph(df, type = 'line', adapter = 'pythalesians', gp = gp) pf.plot_generic_graph(df, type = 'line', adapter = 'bokeh', gp = gp)
tickers = ['EURUSD', # ticker (Thalesians) 'GBPUSD', 'AUDUSD'], fields = ['close'], # which fields to download vendor_tickers = ['EURUSD BGN Curncy', # ticker (Bloomberg) 'GBPUSD BGN Curncy', 'AUDUSD BGN Curncy'], vendor_fields = ['PX_LAST'], # which Bloomberg fields to download cache_algo = 'internet_load_return') # how to return data ltsf = LightTimeSeriesFactory() df = None df = ltsf.harvest_time_series(time_series_request) tsc = TimeSeriesCalcs() df = tsc.calculate_returns(df) df = tsc.rolling_corr(df['EURUSD.close'], 20, data_frame2 = df[['GBPUSD.close', 'AUDUSD.close']]) gp = GraphProperties() gp.title = "1M FX rolling correlations" gp.scale_factor = 3 pf = PlotFactory() pf.plot_line_graph(df, adapter = 'pythalesians', gp = gp) ###### download daily data from Bloomberg for AUD/JPY, NZD/JPY spot with S&P500, then calculate correlation if True: time_series_request = TimeSeriesRequest( start_date="01 Jan 2015", # start date finish_date=datetime.date.today(), # finish date
def get_intraday_moves_over_custom_event(self, data_frame_rets, ef_time_frame, vol=False, minute_start=5, mins=3 * 60, min_offset=0, create_index=False, resample=False, freq='minutes'): tsf = TimeSeriesFilter() ef_time_frame = tsf.filter_time_series_by_date( data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame) ef_time = ef_time_frame.index if freq == 'minutes': ef_time_start = ef_time - timedelta(minutes=minute_start) ef_time_end = ef_time + timedelta(minutes=mins) ann_factor = 252 * 1440 elif freq == 'days': ef_time = ef_time_frame.index.normalize() ef_time_start = ef_time - timedelta(days=minute_start) ef_time_end = ef_time + timedelta(days=mins) ann_factor = 252 ords = range(-minute_start + min_offset, mins + min_offset) # all data needs to be equally spaced if resample: tsf = TimeSeriesFilter() # make sure time series is properly sampled at 1 min intervals data_frame_rets = data_frame_rets.resample('1min') data_frame_rets = data_frame_rets.fillna(value=0) data_frame_rets = tsf.remove_out_FX_out_of_hours(data_frame_rets) data_frame_rets['Ind'] = numpy.nan start_index = data_frame_rets.index.searchsorted(ef_time_start) finish_index = data_frame_rets.index.searchsorted(ef_time_end) # not all observation windows will be same length (eg. last one?) # fill the indices which represent minutes # TODO vectorise this! for i in range(0, len(ef_time_frame.index)): try: data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords except: data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords[0:(finish_index[i] - start_index[i])] # set the release dates data_frame_rets.ix[start_index, 'Rel'] = ef_time # set entry points data_frame_rets.ix[finish_index + 1, 'Rel'] = numpy.zeros( len(start_index)) # set exit points data_frame_rets['Rel'] = data_frame_rets['Rel'].fillna( method='pad') # fill down signals data_frame_rets = data_frame_rets[pandas.notnull( data_frame_rets['Ind'])] # get rid of other data_frame = data_frame_rets.pivot(index='Ind', columns='Rel', values=data_frame_rets.columns[0]) data_frame.index.names = [None] if create_index: tsc = TimeSeriesCalcs() data_frame.ix[-minute_start + min_offset, :] = numpy.nan data_frame = tsc.create_mult_index(data_frame) else: if vol is True: # annualise (if vol) data_frame = data_frame.rolling( center=False, window=5).std() * math.sqrt(ann_factor) else: data_frame = data_frame.cumsum() return data_frame
def get_intraday_moves_over_custom_event(self, data_frame_rets, ef_time_frame, vol=False, minute_start = 5, mins = 3 * 60, min_offset = 0 , create_index = False, resample = False, freq = 'minutes'): tsf = TimeSeriesFilter() ef_time_frame = tsf.filter_time_series_by_date(data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame) ef_time = ef_time_frame.index if freq == 'minutes': ef_time_start = ef_time - timedelta(minutes = minute_start) ef_time_end = ef_time + timedelta(minutes = mins) ann_factor = 252 * 1440 elif freq == 'days': ef_time = ef_time_frame.index.normalize() ef_time_start = ef_time - timedelta(days = minute_start) ef_time_end = ef_time + timedelta(days = mins) ann_factor = 252 ords = range(-minute_start + min_offset, mins + min_offset) # all data needs to be equally spaced if resample: tsf = TimeSeriesFilter() # make sure time series is properly sampled at 1 min intervals data_frame_rets = data_frame_rets.resample('1min') data_frame_rets = data_frame_rets.fillna(value = 0) data_frame_rets = tsf.remove_out_FX_out_of_hours(data_frame_rets) data_frame_rets['Ind'] = numpy.nan start_index = data_frame_rets.index.searchsorted(ef_time_start) finish_index = data_frame_rets.index.searchsorted(ef_time_end) # not all observation windows will be same length (eg. last one?) # fill the indices which represent minutes # TODO vectorise this! for i in range(0, len(ef_time_frame.index)): try: data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords except: data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords[0:(finish_index[i] - start_index[i])] # set the release dates data_frame_rets.ix[start_index,'Rel'] = ef_time # set entry points data_frame_rets.ix[finish_index + 1,'Rel'] = numpy.zeros(len(start_index)) # set exit points data_frame_rets['Rel'] = data_frame_rets['Rel'].fillna(method = 'pad') # fill down signals data_frame_rets = data_frame_rets[pandas.notnull(data_frame_rets['Ind'])] # get rid of other data_frame = data_frame_rets.pivot(index='Ind', columns='Rel', values=data_frame_rets.columns[0]) data_frame.index.names = [None] if create_index: tsc = TimeSeriesCalcs() data_frame.ix[-minute_start + min_offset,:] = numpy.nan data_frame = tsc.create_mult_index(data_frame) else: if vol is True: # annualise (if vol) data_frame = pandas.rolling_std(data_frame, window=5) * math.sqrt(ann_factor) else: data_frame = data_frame.cumsum() return data_frame
time_series_request_total_ret.tickers = ['EURUSD', 'GBPUSD', 'AUDUSD'] time_series_request_total_ret.vendor_tickers = [ 'EURUSDCR BGN Curncy', 'GBPUSDCR BGN Curncy', 'AUDUSDCR BGN Curncy' ] ltsf = LightTimeSeriesFactory() df = None spot_df = ltsf.harvest_time_series(time_series_request_spot) deposit_df = ltsf.harvest_time_series(time_series_request_deposit) deposit_df = deposit_df.fillna(method='ffill') deposit_df = deposit_df.fillna( method='bfill') # bit of a hack - because some deposit data sparse tot_df = ltsf.harvest_time_series(time_series_request_total_ret) tsc = TimeSeriesCalcs() tot_df = tsc.create_mult_index_from_prices( tot_df) # rebase index at 100 # we can change the tenor = 'ON' # plot total return series comparison for all our crosses # in practice, we would typically make a set of xxxUSD total return indices # and use them to compute all other crosses (assuming we are USD denominated investor) for cross in ['AUDUSD', 'EURUSD', 'GBPUSD']: # create total return index using spot + deposits ind = IndicesFX() ind_df = ind.create_total_return_index(cross, tenor, spot_df,
cache_algo='internet_load_return') # how to return data df = ltsf.harvest_time_series(time_series_request) df.columns = [x.replace('.close', '') for x in df.columns.values] # Bloomberg does not give the milisecond field when you make a tick request, so might as well downsample to S df['JPYUSD'] = 1 / df['JPYUSD'] gp = GraphProperties() pf = PlotFactory() gp.scale_factor = 3 gp.title = 'FX around last NFP date' gp.source = 'Thalesians/BBG (created with PyThalesians Python library)' tsc = TimeSeriesCalcs() df = tsc.create_mult_index_from_prices(df) pf.plot_line_graph(df, adapter='pythalesians', gp=gp) ###### download tick data from Bloomberg for EUR/USD around last FOMC and then downsample to plot if True: finish_date = datetime.datetime.utcnow() start_date = finish_date - timedelta(days=60) # fetch Fed times from Bloomberg time_series_request = TimeSeriesRequest( start_date=start_date, # start date finish_date=finish_date, # finish date category="events", freq='daily', # daily data
data_source = 'bloomberg', # use Bloomberg as data source tickers = ['USDJPY'], # ticker (Thalesians) fields = ['close'], # which fields to download vendor_tickers = ['USDJPY BGN Curncy'], # ticker (Bloomberg) vendor_fields = ['PX_LAST'], # which Bloomberg fields to download cache_algo = 'internet_load_return') # how to return data ltsf = LightTimeSeriesFactory() df = None df = ltsf.harvest_time_series(time_series_request) utc_time = pytz.utc df.index = df.index.tz_localize(utc_time) # work in UTC time tsc = TimeSeriesCalcs() df = tsc.calculate_returns(df) # fetch NFP times from Bloomberg time_series_request = TimeSeriesRequest( start_date = start_date, # start date finish_date = finish_date, # finish date category = "events", freq = 'daily', # daily data data_source = 'bloomberg', # use Bloomberg as data source tickers = ['NFP'], fields = ['release-date-time-full'], # which fields to download vendor_tickers = ['NFP TCH Index'], # ticker (Bloomberg) vendor_fields = ['ECO_FUTURE_RELEASE_DATE_LIST'], # which Bloomberg fields to download cache_algo = 'internet_load_return') # how to return data
def construct_strategy(self): """ construct_strategy - Constructs the returns for all the strategies which have been specified. - gets parameters form fill_backtest_request - market data from fill_assets """ time_series_calcs = TimeSeriesCalcs() # get the parameters for backtesting if hasattr(self, 'br'): br = self.br else: br = self.fill_backtest_request() # get market data for backtest asset_df, spot_df, spot_df2, basket_dict = self.fill_assets() if hasattr(br, 'tech_params'): tech_params = br.tech_params else: tech_params = TechParams() cumresults = pandas.DataFrame(index=asset_df.index) portleverage = pandas.DataFrame(index=asset_df.index) tsdresults = {} # each portfolio key calculate returns - can put parts of the portfolio in the key for key in basket_dict.keys(): asset_cut_df = asset_df[[x + '.close' for x in basket_dict[key]]] spot_cut_df = spot_df[[x + '.close' for x in basket_dict[key]]] self.logger.info("Calculating " + key) results, cash_backtest = self.construct_individual_strategy( br, spot_cut_df, spot_df2, asset_cut_df, tech_params, key) cumresults[results.columns[0]] = results portleverage[ results.columns[0]] = cash_backtest.get_porfolio_leverage() tsdresults[key] = cash_backtest.get_portfolio_pnl_tsd() # for a key, designated as the final strategy save that as the "strategy" if key == self.FINAL_STRATEGY: self._strategy_pnl = results self._strategy_pnl_tsd = cash_backtest.get_portfolio_pnl_tsd() self._strategy_leverage = cash_backtest.get_porfolio_leverage() self._strategy_signal = cash_backtest.get_porfolio_signal() self._strategy_pnl_trades = cash_backtest.get_pnl_trades() # get benchmark for comparison benchmark = self.construct_strategy_benchmark() cumresults_benchmark = self.compare_strategy_vs_benchmark( br, cumresults, benchmark) self._strategy_group_benchmark_tsd = tsdresults if hasattr(self, '_benchmark_tsd'): tsdlist = tsdresults tsdlist['Benchmark'] = (self._benchmark_tsd) self._strategy_group_benchmark_tsd = tsdlist # calculate annualised returns years = time_series_calcs.average_by_annualised_year( time_series_calcs.calculate_returns(cumresults_benchmark)) self._strategy_group_pnl = cumresults self._strategy_group_pnl_tsd = tsdresults self._strategy_group_benchmark_pnl = cumresults_benchmark self._strategy_group_leverage = portleverage self._strategy_group_benchmark_annualised_pnl = years
def compare_strategy_vs_benchmark(self, br, strategy_df, benchmark_df): """ compare_strategy_vs_benchmark - Compares the trading strategy we are backtesting against a benchmark Parameters ---------- br : BacktestRequest Parameters for backtest such as start and finish dates strategy_df : pandas.DataFrame Strategy time series benchmark_df : pandas.DataFrame Benchmark time series """ include_benchmark = False calc_stats = False if hasattr(br, 'include_benchmark'): include_benchmark = br.include_benchmark if hasattr(br, 'calc_stats'): calc_stats = br.calc_stats if include_benchmark: tsd = TimeSeriesDesc() cash_backtest = CashBacktest() ts_filter = TimeSeriesFilter() ts_calcs = TimeSeriesCalcs() # align strategy time series with that of benchmark strategy_df, benchmark_df = strategy_df.align(benchmark_df, join='left', axis=0) # if necessary apply vol target to benchmark (to make it comparable with strategy) if hasattr(br, 'portfolio_vol_adjust'): if br.portfolio_vol_adjust is True: benchmark_df = cash_backtest.calculate_vol_adjusted_index_from_prices( benchmark_df, br=br) # only calculate return statistics if this has been specified if calc_stats: tsd.calculate_ret_stats_from_prices(benchmark_df, br.ann_factor) benchmark_df.columns = tsd.summary() # realign strategy & benchmark strategy_benchmark_df = strategy_df.join(benchmark_df, how='inner') strategy_benchmark_df = strategy_benchmark_df.fillna( method='ffill') strategy_benchmark_df = ts_filter.filter_time_series_by_date( br.plot_start, br.finish_date, strategy_benchmark_df) strategy_benchmark_df = ts_calcs.create_mult_index_from_prices( strategy_benchmark_df) self._benchmark_pnl = benchmark_df self._benchmark_tsd = tsd return strategy_benchmark_df return strategy_df
# process data from pythalesians.economics.seasonality.seasonality import Seasonality from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs # displaying data from pythalesians.graphics.graphs.plotfactory import PlotFactory from pythalesians.graphics.graphs.graphproperties import GraphProperties # logging from pythalesians.util.loggermanager import LoggerManager import datetime seasonality = Seasonality() tsc = TimeSeriesCalcs() logger = LoggerManager().getLogger(__name__) pf = PlotFactory() ###### calculate seasonal moves in EUR/USD and GBP/USD (using Quandl data) if True: time_series_request = TimeSeriesRequest( start_date = "01 Jan 1970", # start date finish_date = datetime.date.today(), # finish date freq = 'daily', # daily data data_source = 'quandl', # use Quandl as data source tickers = ['EURUSD', # ticker (Thalesians) 'GBPUSD'], fields = ['close'], # which fields to download vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXUSUK'], # ticker (Quandl)
def fetch_group_time_series(self, time_series_request_list): data_frame_agg = None time_series_calcs = TimeSeriesCalcs() # depends on the nature of operation as to whether we should use threading or multiprocessing library if Constants().time_series_factory_thread_technique is "thread": from multiprocessing.dummy import Pool else: # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly # note: currently not very stable from multiprocessing_on_dill import Pool thread_no = Constants().time_series_factory_thread_no['other'] if time_series_request_list[0].data_source in Constants().time_series_factory_thread_no: thread_no = Constants().time_series_factory_thread_no[time_series_request_list[0].data_source] pool = Pool(thread_no) # open the market data downloads in their own threads and return the results result = pool.map_async(self.fetch_single_time_series, time_series_request_list) data_frame_group = result.get() pool.close() pool.join() # data_frame_group = results.get() # data_frame_group = results # data_frame_group = None # import multiprocessing as multiprocessing # close the pool and wait for the work to finish # processes = [] # for x in range(0, len(time_series_request_list)): # time_series_request = time_series_request_list[x] # processes = [multiprocessing.Process(target = self.fetch_single_time_series, # args = (x)) for x in time_series_request_list] # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq, # exclude_freq_cat, force_new_download_freq_cat, include_freq_cat)) # Run processes # for p in processes: p.start() # Exit the completed processes # for p in processes: p.join() # collect together all the time series if data_frame_group is not None: data_frame_group = [i for i in data_frame_group if i is not None] if data_frame_group is not None: data_frame_agg = time_series_calcs.pandas_outer_join(data_frame_group) # for data_frame_single in data_frame_group: # # if you call for returning multiple tickers, be careful with memory considerations! # if data_frame_single is not None: # if data_frame_agg is not None: # data_frame_agg = data_frame_agg.join(data_frame_single, how='outer') # else: # data_frame_agg = data_frame_single return data_frame_agg