def example_request_mid_benchmark(): """Example of how to do a calculation to do market analysis to calculate mid, resample etc. without any trade data """ from tcapy.analysis.algos.benchmark import BenchmarkMarketMid, BenchmarkMarketSpreadToMid, BenchmarkMarketResampleOffset, \ BenchmarkMarketFilter from tcapy.analysis.algos.resultsform import BarResultsForm, TimelineResultsForm tca_request = get_tca_request() # Allow analysis to be done in a parallel approach day by day # (note: can't do analysis which requires data outside of the daily chunks to do this!) tca_request.multithreading_params['splice_request_by_dates'] = use_multithreading # Filter market data by time of day between 15:00-17:00 LDN # Then calculate the market mid, then calculate the spread to the mid, # Then resample the data into 1 minute, taking the mean of each minute (and TWAP) and calculating the absolute range tca_request.benchmark_calcs = [BenchmarkMarketFilter(time_of_day={'start_time' : "15:00", 'finish_time' : "17:00"}, time_zone='Europe/London'), BenchmarkMarketMid(), BenchmarkMarketSpreadToMid(), BenchmarkMarketResampleOffset(market_resample_freq='1', market_resample_unit='min', price_field='mid', resample_how=['mean', 'twap', 'absrange'], dropna=True), ] # Calculate the mean spread to mid for EURUSD by time of day during our sample (do not weight by any other field) # Calculate the mean absrange for EURUSD by time of day (London timezone)/month of _year (ie. proxy for volatility) tca_request.results_form = \ [TimelineResultsForm(market_trade_order_list='EURUSD', metric_name='ask_mid_spread', weighting_field=None, by_date='time', scalar=10000.0), TimelineResultsForm(market_trade_order_list='EURUSD', metric_name='absrange', weighting_field=None, by_date=['month', 'timeldn'], scalar=10000.0) ] # return tca_request.use_multithreading = True tca_engine = TCAEngineImpl() dict_of_df = tca_engine.calculate_tca(tca_request) # Print out all keys for all the DataFrames returned print(dict_of_df.keys()) # Print market data snapshots print(dict_of_df['EURUSD_df']) print(dict_of_df['USDJPY_df']) print(dict_of_df['EURUSD_df'].columns) print(dict_of_df['USDJPY_df'].columns) # Print out mean spread by time of day print(dict_of_df['timeline_EURUSD_ask_mid_spread_by/mean_time/all']) # Plot mean spread by time of day and absrange by time of day (in London timezone) Chart(engine='plotly').plot(dict_of_df['timeline_EURUSD_ask_mid_spread_by/mean_time/all']) # Plot absolute range over each minute, averaged by time of day and month of the _year Chart(engine='plotly').plot(dict_of_df['timeline_EURUSD_absrange_by/mean_month_timeldn/all'], style=Style(title='EURUSD absolute range by time of day (LDN)', color='Reds', scale_factor=-1))
def tca_example_csv_trade_data_dukascopy(): """Loads up trade/order data from CSV files and market data externally from Dukascopy. Does not use any databases, if you rarely use TCA, this is fine. However, for heavy use of TCA, we strongly recommend maintaining an internal tick database, as external downloading of data can be very slow. In this case we are simply calculating the slippage of every trade and orders above them. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage from tcapy.analysis.algos.resultsform import TimelineResultsForm tca_version = constants.tcapy_version tca_engine = TCAEngineImpl(version=tca_version) # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017 # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY' csv_trade_order_mapping = OrderedDict([ ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')), ('order_df', os.path.join(folder, 'small_test_order_df.csv')) ]) # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them, # To force deletion of the cache you can run the below # volatile_cache.clear_cache() # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than # downloading it externally from Dukascopy tca_request = TCARequest( start_date='05 May 2017', finish_date='10 May 2017', ticker=['EURUSD'], tca_type='detailed', trade_data_store='csv', market_data_store='dukascopy', trade_order_mapping=csv_trade_order_mapping, metric_calcs=[MetricSlippage()], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], use_multithreading=False) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys())
def tca_example_csv_trade_data_dukascopy_no_redis(): """Running TCA calculation but without any Redis caching at all. In practice, this should be avoided, since it will likely be much slower, given we'll end up accessing market data/trade data a lot more often from a slow source. This is particularly an issue when we're downloading large samples of market data from an external source. For very small time periods this might be fine. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage from tcapy.analysis.algos.resultsform import TimelineResultsForm tca_version = constants.tcapy_version tca_engine = TCAEngineImpl(version=tca_version) # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017 # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY' csv_trade_order_mapping = OrderedDict([ ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')), ('order_df', os.path.join(folder, 'small_test_order_df.csv')) ]) # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them, # To force deletion of the cache you can run the below # volatile_cache.clear_cache() # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than # downloading it externally from Dukascopy tca_request = TCARequest( start_date='05 May 2017', finish_date='06 May 2017', ticker=['EURUSD'], tca_type='detailed', trade_data_store='csv', market_data_store='dukascopy', trade_order_mapping=csv_trade_order_mapping, metric_calcs=[MetricSlippage()], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], use_multithreading=False) tca_request.multithreading_params = { 'splice_request_by_dates': False, # True or False 'cache_period': 'month', # month or week # Cache trade data in monthly/periodic chunks in Redis (reduces database calls a lot) 'cache_period_trade_data': False, # Cache market data in monthly/periodic chunks in Redis (reduces database calls a lot) 'cache_period_market_data': False, # Return trade data internally as handles (usually necessary for Celery) 'return_cache_handles_trade_data': False, # Return market data internally as handles (usually necessary for Celery) 'return_cache_handles_market_data': False, # Recommend using Celery, which allows us to reuse Python processes 'parallel_library': 'single' } # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys()) market_df = dict_of_df['market_df'] market_df_minute = market_df.resample('1min').last() print(market_df_minute)