def test_full_detailed_tca_calculation(): """Tests a detailed TCA calculation, checking that it has the right tables returned. """ tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) assert (trade_df_name in dict_of_df and 'sparse_market_' + trade_df_name in dict_of_df and 'market_df' in dict_of_df) tca_request.ticker = missing_ticker data_missing_exception = False try: dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) except DataMissingException: data_missing_exception = True assert data_missing_exception
def test_data_offset(): """Tests the offsetting of market and trade data by milliseconds by user. This might be useful if clocks are slightly offset when recording market or trade data """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) # Now offset both the trade and market data tca_request.trade_data_offset_ms = 1 tca_request.market_data_offset_ms = -1 dict_of_df_offset = tca_engine.calculate_tca(tca_request=tca_request) trade_df = dict_of_df[trade_df_name]; market_df = dict_of_df['market_df'] trade_df_offset = dict_of_df_offset[trade_df_name]; market_df_offset = dict_of_df_offset['market_df'] assert all(market_df.index + timedelta(milliseconds=-1) == market_df_offset.index) assert all(trade_df.index + timedelta(milliseconds=1) == trade_df_offset.index) for c in constants.date_columns: if c in trade_df.columns: assert all(trade_df[c]+ timedelta(milliseconds=1) == trade_df_offset[c])
def _split_tca_request_into_list(self, tca_request): """Splits a TCA request by ticker. Parameters ---------- tca_request : TCARequest TCA request to broken up into tickers Returns ------- TCARequest(list) """ ticker = tca_request.ticker if not (isinstance(ticker, list)): ticker = [ticker] tca_request_list = [] # go through every ticker (and also split into list) for tick in ticker: tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_list.append(tca_request_temp) return self._util_func.flatten_list_of_lists(tca_request_list)
def test_invalid_tca_inputs(): """Check exception is thrown with TCAEngine if ticker is not valid (eg. if none, or just a random string of 6 letters, or if the includes '/' """ tca_engine = TCAEngineImpl(version=tcapy_version) invalid_tickers = [None, 'KRPAZY', 'EUR/USD'] for t in invalid_tickers: tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=t) ticker_exception_ok = [] try: trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) ticker_exception_ok.append(False) except Exception as e: if isinstance(e, ValidationException): ticker_exception_ok.append(True) ### Check exception is thrown with TCAEngine if start/finish dates are messed up date_exception_ok = [] try: tca_request = TCARequest(start_date='01 Mar19', finish_date='01Oc t20', ticker='EURUSD') date_exception_ok.append(False) except Exception as e: if isinstance(e, DateException): date_exception_ok.append(True) assert any(ticker_exception_ok) and any(date_exception_ok)
def get_sample_data(): logger.info("About to load data for " + ticker) if use_test_csv: tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store='csv', reporting_currency='EUR', market_data_store=os.path.join(folder, 'small_test_market_df.csv.gz'), trade_order_mapping={'trade_df': os.path.join(folder, 'small_test_trade_df.csv'), 'order_df': os.path.join(folder, 'small_test_order_df.csv')}, tca_type=tca_type, benchmark_calcs=BenchmarkMarketMid()) else: tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_source, reporting_currency='EUR', market_data_store=market_data_store, trade_order_mapping=['trade_df', 'order_df'], tca_type=tca_type, benchmark_calcs=BenchmarkMarketMid()) tca_engine = TCAEngineImpl() trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) return trade_order_results_df_dict[ticker + '_df'], trade_order_results_df_dict['trade_df'], \ trade_order_results_df_dict['order_df']
def test_full_detailed_tca_calculation(fill_market_trade_databases): """Tests a detailed TCA calculation, checking that it has the right tables returned. """ tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) assert ('trade_df' in dict_of_df and 'sparse_market_trade_df' in dict_of_df and 'market_df' in dict_of_df) # Missing ticker won't return any data, internally a DataMissingException is thrown tca_request.ticker = missing_ticker data_missing_exception = False try: dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) except DataMissingException: data_missing_exception = True assert data_missing_exception
def example_market_data_non_usd_cross(): """Example for loading market data which has more exotic crosses, which are unlikely to be collected. For these exotic crosses tcapy will calculate the cross rates via the USD legs, eg. NZDCAD would be calculated from NZDUSD and USDCAD data. """ market_loader = Mediator.get_tca_market_trade_loader() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker='NZDUSD', market_data_store=market_data_store) market_base_df = market_loader.get_market_data(tca_request) tca_request.ticker = 'USDCAD' market_terms_df = market_loader.get_market_data(tca_request) market_df = pd.DataFrame(market_base_df['mid'] * market_terms_df['mid']).dropna() tca_request.ticker = 'NZDCAD' market_direct_df = market_loader.get_market_data(tca_request) market_df, market_direct_df = market_df.align(market_direct_df, join='inner') # check time series are equal to each other assert (market_df['mid'] - market_direct_df['mid']).sum() == 0
def test_overlapping_full_detailed_tca_calculation(): """Tests a detailed TCA calculation works with caching and overlapping dates, checking that it has the right tables returned. """ logger = LoggerManager.getLogger(__name__) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='detailed', trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping, use_multithreading=True) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) sparse_market_trade_df = dict_of_df['sparse_market_' + trade_df_name] logger.info("Running second TCA calculation, extending dates...") # Extend sample tca_request.start_date = pd.Timestamp(start_date) - timedelta(days=10) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) sparse_market_trade_df = dict_of_df['sparse_market_' + trade_df_name] assert len(sparse_market_trade_df.index[sparse_market_trade_df.index < '01 Feb 2018']) > 0
def get_tca_request(): """This TCARequest is purely for market analysis """ if use_test_csv: return TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, reporting_currency='USD', market_data_store=os.path.join(folder, 'small_test_market_df.csv.gz'), tca_type=tca_type) else: return TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, reporting_currency='USD', market_data_store=market_data_store, tca_type=tca_type)
def test_tag_filter_calculation(): """Test we can filter by venue and by broker correctly. """ trade_order_filter = TradeOrderFilterTag(tag_value_combinations={'broker_id': 'broker1'}) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, reporting_currency=reporting_currency, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping, tca_type=tca_type, trade_order_filter=trade_order_filter, venue='venue1') tca_engine = TCAEngineImpl(version=tcapy_version) trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) trade_df = trade_order_results_df_dict[trade_df_name] if trade_df is not None: if not(trade_df.empty): # note that this only works with the "test" data - it won't work with real data! match_brokers = len(trade_df[trade_df['broker_id'] == 'broker1']) non_brokers = len(trade_df[trade_df['broker_id'] != 'broker1']) match_venue = len(trade_df[trade_df['venue'] == 'venue1']) non_match_venue = len(trade_df[trade_df['venue'] != 'venue1']) # check the filtering has been correctly, so we only have trades by broker1 and venue1 assert match_brokers > 0 and non_brokers == 0 and match_venue > 0 and non_match_venue == 0
def multiple_ticker_tca_aggregated_example(): """Example of how to do TCa analysis on multiple tickers """ tca_engine = TCAEngineImpl(version=tca_version) # Run a TCA computation for multiple tickers, calculating slippage tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=mult_ticker, tca_type='aggregated', trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=MetricSlippage(), reporting_currency='EUR') dict_of_df = tca_engine.calculate_tca(tca_request) trade_df = dict_of_df['trade_df'] # Aggregate some of the results with the ResultsSummary class (we could have done this within the TCARequest) summary = ResultsSummary() # Bucket slippage by ticker and report the average summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='ticker') print(summary_slippage_df) # Bucket slippage by ticker & return the average as weighted by the executed notional in reporting currency # (in this case EUR) summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='venue', weighting_field='executed_notional_in_reporting_currency') print(summary_slippage_df) # Bucket slippage by ticker and report the average summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='venue') print(summary_slippage_df)
def simplest_tca_single_ticker_example(): """Example for doing detailed TCA analysis on the trades of a single ticker, calculating metrics for slippage, transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and spread to mid). Collects results for slippage into a daily timeline and also average by venue (by default weights by reporting currency) """ tca_engine = TCAEngineImpl(version=tca_version) # Specify the TCA request tca_request = TCARequest(start_date='01 Nov 2017', finish_date='20 Nov 2017', ticker='AUDUSD', tca_type='detailed', trade_data_store='ms_sql_server', market_data_store='arctic-ncfx', metric_calcs=[MetricSlippage(trade_order_list=['trade_df', 'order_df'])], results_form=[TimelineResultsForm(metric_name='slippage', by_date='date', scalar=10000.0)], benchmark_calcs=[BenchmarkArrival(), BenchmarkMarketSpreadToMid()], trade_order_mapping=['trade_df', 'order_df']) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys()) metric_df = dict_of_df['trade_df']['slippage'] # permanent market impact for every trade print(metric_df.head(500))
def get_sample_data(ticker_spec=None): if ticker_spec is None: ticker_spec = ticker logger.info("About to load data for " + ticker_spec) tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker_spec, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, reporting_currency=reporting_currency, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, tca_type=tca_type, benchmark_calcs=BenchmarkMarketMid(), use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) return trade_order_results_df_dict[ticker_spec + "_df"], trade_order_results_df_dict[trade_order_list[0]], \ trade_order_results_df_dict[trade_order_list[1]]
def multiple_ticker_tca_aggregated_with_results_example(): """Example of how to do TCa analysis on multiple tickers with TCAResults """ tca_engine = TCAEngineImpl(version=tca_version) # Run a TCA computation for multiple tickers, calculating slippage tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=mult_ticker, tca_type='aggregated', trade_data_store=trade_data_store, market_data_store=market_data_store, results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], metric_calcs=MetricSlippage(), reporting_currency='EUR', summary_display='candlestick') dict_of_df = tca_engine.calculate_tca(tca_request) # Show the output of objects print(dict_of_df.keys()) ### Generate TCA report using high level object # Use higher level TCAResults object to encapsulate results (easier to deal with than a dictionary of DataFrames) tca_results = TCAResults(dict_of_df, tca_request) tca_results.render_computation_charts() print(tca_results.sparse_market_charts.keys()) print(tca_results.sparse_market.keys())
def test_time_of_day_filter_calculation(): """Test we can filter by time of day/date """ trade_order_filter = TradeOrderFilterTimeOfDayWeekMonth( specific_dates=filter_date) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, reporting_currency=reporting_currency, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping, tca_type=tca_type, trade_order_filter=trade_order_filter) tca_engine = TCAEngineImpl(version=tcapy_version) trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) trade_df = trade_order_results_df_dict[trade_df_name] if trade_df is not None: if not (trade_df.empty): match_filtered_date = len( trade_df[start_filter_date:finish_filter_date]) non_filtered_date = len( trade_df[(trade_df.index > finish_filter_date) & (trade_df.index < start_filter_date)]) # check the filtering has been correctly, so we only have trades by broker1 and venue1 assert match_filtered_date > 0 and non_filtered_date == 0
def venue_tca_aggregated_example(): """Example of doing an aggregated TCA computation on a single ticker, and then later calculating the probability distribution function of slippage split by venue (when weighted by executed notional) """ tca_engine = TCAEngineImpl(version=tca_version) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='aggregated', trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=MetricSlippage()) dict_of_df = tca_engine.calculate_tca(tca_request) summary = ResultsSummary() summary_slippage_df = summary.field_distribution( dict_of_df['trade_df'], metric_name='slippage', aggregate_by_field='venue', pdf_only=True, weighting_field='executed_notional') # Plot PDF of slippage, split up by venue Chart(engine='plotly').plot(summary_slippage_df, style=Style(plotly_plot_mode='offline_html', connect_line_gaps=True))
def test_create_tca_report(fill_market_trade_databases): """Tests the creation of a TCAResults, checking they are fichecking it generates the right document """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, metric_calcs=MetricSlippage(), results_form=TimelineResultsForm(metric_name='slippage', by_date='datehour'), use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) tca_results = TCAResults(tca_engine.calculate_tca(tca_request=tca_request), tca_request) tca_results.render_computation_charts() assert tca_results.timeline is not None and tca_results.timeline_charts is not None tca_report = TCAReport(tca_results) html = tca_report.create_report() # Quick check to see that the html has been generated by checking existance of HTML head _tag assert '<head>' in html
def single_ticker_tca_example_1600LDN_benchmark(): tca_engine = TCAEngineImpl(version=tca_version) trade_order_type = 'trade_df' trade_order_list = ['trade_df', 'order_df'] # specify the TCA request tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='detailed', dummy_market=False, trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=[ # Calculate the slippage for trades/order MetricSlippage(trade_order_list=trade_order_list, bid_benchmark='twap1600LDN', ask_benchmark='twap1600LDN', metric_post_fix='twap1600LDN') ], results_form=[ # Aggregate the slippage average by date and hour TimelineResultsForm(metric_name='slippagetwap1600LDN', by_date='date', scalar=10000.0) ], benchmark_calcs=[ # At the arrival price for every trade/order BenchmarkArrival(), # Calculate TWAP over 16:00 LDN BenchmarkTWAP(start_time_before_offset={'m': 2}, finish_time_after_offset={'s': 30}, overwrite_time_of_day='16:00', overwrite_timezone='Europe/London', benchmark_post_fix="1600LDN"), # At the spread at the time of every trade/order BenchmarkMarketSpreadToMid() ], extra_lines_to_plot='twap1600LDN', trade_order_mapping=trade_order_list, use_multithreading=True) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df['trade_df'].head(5)) tca_results = TCAResults(dict_of_df, tca_request) tca_results.render_computation_charts() from tcapy.vis.report.computationreport import JinjaRenderer tca_report = TCAReport(tca_results, renderer=JinjaRenderer()) tca_report.create_report(output_filename='test_tca_twap_report.htm', output_format='html', offline_js=False)
def dataframe_tca_example(): """Example for doing detailed TCA analysis on all the trades in a CSV, calculating metrics for slippage, transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and spread to mid). Collects results for slippage into a daily timeline and also average by venue (by default weights by reporting currency) """ PLOT = False # clear entire cache # Mediator.get_volatile_cache(version='pro').clear_cache() tca_engine = TCAEngineImpl(version=tca_version) trade_order_type = 'trade_df' trade_order_list = ['trade_df'] trade_df = DatabaseSourceCSV(trade_data_database_csv=csv_trade_order_mapping['trade_df']).fetch_trade_order_data() data_frame_trade_order_mapping = OrderedDict([('trade_df', trade_df)]) start_date = trade_df.index[0]; finish_date = trade_df.index[-1] ticker_list = FXConv().correct_unique_notation_list(trade_df['ticker'].unique().tolist()) # Specify the TCA request tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker_list, tca_type='aggregated', dummy_market=True, trade_data_store='dataframe', market_data_store=market_data_store, metric_calcs=[MetricSlippage(trade_order_list=trade_order_list), MetricTransientMarketImpact(transient_market_impact_gap={'ms': 100}, trade_order_list=trade_order_list), MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1}, trade_order_list=trade_order_list)], results_form=[TimelineResultsForm(metric_name='slippage', by_date='date'), BarResultsForm(metric_name='slippage', aggregate_by_field='venue')], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], trade_order_mapping=data_frame_trade_order_mapping, use_multithreading=False) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys()) timeline_df = dict_of_df['timeline_' + trade_order_type + '_slippage_by_all'] # average slippage per day metric_df = dict_of_df[trade_order_type]['permanent_market_impact'] # permanent market impact for every trade print(metric_df.head(500)) if PLOT: from chartpy import Chart, Style # plot slippage by timeline Chart(engine='plotly').plot(timeline_df) # plot market impact (per trade) Chart(engine='plotly').plot(metric_df.head(500))
def tca_example_csv_trade_data_dukascopy(): """Loads up trade/order data from CSV files and market data externally from Dukascopy. Does not use any databases, if you rarely use TCA, this is fine. However, for heavy use of TCA, we strongly recommend maintaining an internal tick database, as external downloading of data can be very slow. In this case we are simply calculating the slippage of every trade and orders above them. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage from tcapy.analysis.algos.resultsform import TimelineResultsForm tca_version = constants.tcapy_version tca_engine = TCAEngineImpl(version=tca_version) # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017 # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY' csv_trade_order_mapping = OrderedDict([ ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')), ('order_df', os.path.join(folder, 'small_test_order_df.csv')) ]) # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them, # To force deletion of the cache you can run the below # volatile_cache.clear_cache() # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than # downloading it externally from Dukascopy tca_request = TCARequest( start_date='05 May 2017', finish_date='10 May 2017', ticker=['EURUSD'], tca_type='detailed', trade_data_store='csv', market_data_store='dukascopy', trade_order_mapping=csv_trade_order_mapping, metric_calcs=[MetricSlippage()], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], use_multithreading=False) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys())
def test_overlapping_full_detailed_tca_calculation(): """Tests a detailed TCA calculation works with caching and overlapping dates, checking that it has the right tables returned. """ tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='detailed', trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) # Extend sample tca_request.start_date = pd.Timestamp(start_date) - timedelta(days=10) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) sparse_market_trade_df = dict_of_df['sparse_market_trade_df'] assert len(sparse_market_trade_df.index[sparse_market_trade_df.index < '01 Jun 2017']) > 0
def get_sample_data(): from tcapy.analysis.algos.benchmark import BenchmarkMarketSpreadToMid logger.info("About to load data for " + ticker) tca_request = TCARequest(start_date='01 May 2017', finish_date='15 May 2017', ticker=ticker, trade_data_store='ms_sql_server', market_data_store=market_data_store, benchmark_calcs=[BenchmarkMarketSpreadToMid(bid_mid_bp=bid_mid_bp, ask_mid_bp=ask_mid_bp)], trade_order_mapping=['trade_df'], tca_type=tca_type) tca_engine = TCAEngineImpl() trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) trade_df = trade_order_results_df_dict['trade_df'] return trade_order_results_df_dict[ticker + '_df'], trade_df
def example_market_data_convention(): """Loads market data in the correct convention """ market_loader = Mediator.get_tca_market_trade_loader() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, market_data_store=market_data_store) market_correct_conv_df = market_loader.get_market_data(tca_request) tca_request.ticker = reverse_ticker market_reverse_conv_df = market_loader.get_market_data(tca_request) market_correct_conv_df, market_reverse_conv_df = \ market_correct_conv_df.align(market_reverse_conv_df, join='inner') synthetic_market_df = market_correct_conv_df.copy() synthetic_market_df['mid'] = 1.0 / synthetic_market_df['mid'] # Check time series are equal to each other assert (market_reverse_conv_df['mid'] - synthetic_market_df['mid']).sum() == 0
def compare_multithreading_type(): """Compares different type of use_multithreading types """ tca_engine = TCAEngineImpl(version=tca_version) trade_order_list = ['trade_df', 'order_df'] use_multithreading_list = [True, False] multithreading_params_list = [ {'splice_request_by_dates' : True, 'cache_period' : 'day', 'cache_period_trade_data' : True, 'cache_period_market_data' : True, 'return_cache_handles_trade_data' : True, 'return_cache_handles_market_data' : True, 'parallel_library' : 'celery'}, {'splice_request_by_dates': False, 'cache_period': 'day', 'cache_period_trade_data': True, 'cache_period_market_data': True, 'return_cache_handles_trade_data' : True, 'return_cache_handles_market_data' : True, 'parallel_library': 'celery'} ] for use_multithreading in use_multithreading_list: for multithreading_params in multithreading_params_list: start = time.time() # Specify the TCA request tca_request = TCARequest(start_date=long_start_date, finish_date=long_finish_date, ticker=ticker, tca_type='detailed', trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=[MetricSlippage(trade_order_list=trade_order_list), MetricTransientMarketImpact(transient_market_impact_gap={'ms': 100}, trade_order_list=trade_order_list), MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1}, trade_order_list=trade_order_list)], results_form=[TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0)], benchmark_calcs=[BenchmarkArrival(), BenchmarkMarketSpreadToMid()], trade_order_mapping=trade_order_list, use_multithreading=use_multithreading, multithreading_params=multithreading_params) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) finish = time.time() print('Multithreading example: calculated ' + str(round(finish - start, 3)) + "s for, use_multithreading = " + str(use_multithreading) + ' multithreading_params = ' + str(multithreading_params))
def _split_tca_request_by_date(self, tca_request, tick, split_dates=True, period='month'): tca_request_list = [] dates = [] # Break up dates into day/week/month chunks - our cache works on day/week/month chunks (can specify in constants) # Typically day chunks seem optimal # Careful to floor dates for midnight for caching purposes if split_dates: if period == 'month': split_dates_freq = 'MS' elif period == 'week': split_dates_freq = 'W-MON' elif period == 'day': split_dates_freq = 'D' start_date_floored = self._util_func.floor_tick_of_date( tca_request.start_date) finish_date_floored = self._util_func.floor_tick_of_date( tca_request.finish_date, add_day=True) dates = pd.date_range(start=start_date_floored, end=finish_date_floored, freq=split_dates_freq).tolist() # Add start date and finish date if necessary # if len(dates) > 0: # if start_date_floored < dates[0]: # dates.insert(0, start_date_floored) # # if finish_date_floored > dates[-1]: # dates.append(finish_date_floored) # else: # dates = [start_date_floored, finish_date_floored] logger = LoggerManager().getLogger(__name__) # If our start/finish date ends up being more than a month # eg. Jan 8th - Mar 7th - split into # Jan 8th - Jan 31st 23:59:59.999, Feb 1st 00:00:00.000 - Feb 28th 23:59:59.999 etc if len(dates) > 0: # For the very first chunk in our series if tca_request.start_date < dates[0]: tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_temp.start_date = tca_request.start_date tca_request_temp.finish_date = dates[0] - timedelta( microseconds=1) tca_request_list.append(tca_request_temp) # For full months in between during our request for i in range(0, len(dates) - 1): tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_temp.start_date = dates[i] tca_request_temp.finish_date = dates[i + 1] - timedelta( microseconds=1) tca_request_list.append(tca_request_temp) # For the very last chunk of our series if dates[-1] < tca_request.finish_date: tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_temp.start_date = dates[-1] tca_request_temp.finish_date = tca_request.finish_date tca_request_list.append(tca_request_temp) else: tca_request_temp = TCARequest(tca_request=tca_request) tca_request_temp.ticker = tick tca_request_list.append(tca_request_temp) date_str = '' for t in tca_request_list: date_str = date_str + ' / ' + str(t.start_date) + ' to ' + str( t.finish_date) logger.debug("Split TCA request for " + str(tca_request.ticker) + " dates " + date_str + " from original request " + str(tca_request.start_date) + ' to ' + str(tca_request.finish_date)) return tca_request_list
def callback(*args): """Calculates the aggregated TCA computation when the "Calculate" button is clicked. Cached the results and then updates the status label when done. Parameters ---------- ticker_val : str(list) tickers (eg. EURUSD, GBPUSD etc) venue_val : str(list) Trading venues start_date_val : str(list) Start date of TCA calculations finish_date_val : str(list) Finish date of TCA calculations reload_val : str Whether underlying market and trade data should be reloaded from dataframe or fetched from cache n_clicks : int Number of time button has been clicked Returns ------- str """ start = time.time() tag = tca_type + '-calculation-button' logger = LoggerManager.getLogger(__name__) logger.debug('Triggered click ' + tca_type) # old_clicks = self._session_manager.get_session_clicks(tag) # make sure none of the other charts are plotted till we have completed this! if tca_type == 'aggregated': uploadbox = args if uploadbox is not None: if isinstance(uploadbox, tuple): uploadbox = uploadbox[0] # Assume that the user uploaded a binary CSV file trade_df = DatabaseSourceCSVBinary( trade_data_database_csv=uploadbox ).fetch_trade_order_data() data_frame_trade_order_mapping = OrderedDict([('trade_df', trade_df)]) start_date = trade_df.index[0] finish_date = trade_df.index[-1] ticker_val = FXConv().correct_unique_notation_list( trade_df['ticker'].unique().tolist()) metric_val = 'slippage' self._session_manager.set_session_flag('metric', value=metric_val) self._session_manager.set_session_flag( 'aggregated-visualization', True) try: #if True: # clear the cache for the current user self._glob_volatile_cache.clear_key_match( self._session_manager.get_session_id()) results_form = [ # show the distribution of the selected metric for trades weighted by notional # aggregated by ticker and then by venue DistResultsForm( trade_order_list=['trade_df'], metric_name=metric_val, aggregate_by_field=[ 'ticker', 'broker_id', 'venue' ], weighting_field= 'executed_notional_in_reporting_currency'), # display the timeline of metrics average by day (and weighted by notional) TimelineResultsForm( trade_order_list=['trade_df'], by_date='date', metric_name=metric_val, aggregation_metric='mean', aggregate_by_field=['ticker'], scalar=10000.0, weighting_field= 'executed_notional_in_reporting_currency'), # display a bar chart showing the average metric weighted by notional and aggregated by ticker # venue BarResultsForm( trade_order_list=['trade_df'], metric_name=metric_val, aggregation_metric='mean', aggregate_by_field=[ 'ticker', 'venue', 'broker_id' ], scalar=10000.0, weighting_field= 'executed_notional_in_reporting_currency'), # create a table the markout of every trade TableResultsForm( trade_order_list=['trade_df'], metric_name='markout', filter_by='all', replace_text={ 'markout_': '', 'executed_notional': 'exec not', 'notional_currency': 'exec not cur' }, keep_fields=[ 'executed_notional', 'side', 'notional_currency' ], scalar={ 'all': 10000.0, 'exclude': ['executed_notional', 'side'] }, round_figures_by={ 'all': 2, 'executed_notional': 0, 'side': 0 }, weighting_field='executed_notional') ] try: #if True: timeline_trade_df_metric_by_ticker = self.get_cached_computation_analysis( key='timeline_trade_df_' + metric_val + '_by_ticker', tca_engine=self._tca_engine, force_calculate=True, tca_request=TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker_val, tca_type='aggregated', market_data_store='arctic-ncfx', trade_data_store='dataframe', trade_order_mapping= data_frame_trade_order_mapping, metric_calcs=[ MetricSlippage(), MetricMarkout( trade_order_list=['trade_df']) ], results_form=results_form, dummy_market=True, use_multithreading=True)) calc_start = timeline_trade_df_metric_by_ticker.index[ 0] calc_end = timeline_trade_df_metric_by_ticker.index[ -1] aggregated_title = self.create_status_msg_flags( 'aggregated', ticker_val, calc_start, calc_end) logger.debug('Plotted aggregated summary plot!') finish = time.time() except Exception as e: logger.exception(e) return "Status: error - " + str( e ) + ". Check data exists for these dates?" + self.get_username_string( ) except Exception as e: logger.exception(e) return 'Status: error - ' + str( e ) + ". Check data exists for these dates?" + self.get_username_string( ) return 'Status: calculated ' + str( round(finish - start, 3) ) + "s for " + aggregated_title + self.get_username_string( ) raise dash.exceptions.PreventUpdate( "No data changed" ) # not very elegant but only way to prevent plots disappearing
def tca_example_csv_trade_data_dukascopy_no_redis(): """Running TCA calculation but without any Redis caching at all. In practice, this should be avoided, since it will likely be much slower, given we'll end up accessing market data/trade data a lot more often from a slow source. This is particularly an issue when we're downloading large samples of market data from an external source. For very small time periods this might be fine. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage from tcapy.analysis.algos.resultsform import TimelineResultsForm tca_version = constants.tcapy_version tca_engine = TCAEngineImpl(version=tca_version) # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017 # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY' csv_trade_order_mapping = OrderedDict([ ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')), ('order_df', os.path.join(folder, 'small_test_order_df.csv')) ]) # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them, # To force deletion of the cache you can run the below # volatile_cache.clear_cache() # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than # downloading it externally from Dukascopy tca_request = TCARequest( start_date='05 May 2017', finish_date='06 May 2017', ticker=['EURUSD'], tca_type='detailed', trade_data_store='csv', market_data_store='dukascopy', trade_order_mapping=csv_trade_order_mapping, metric_calcs=[MetricSlippage()], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], use_multithreading=False) tca_request.multithreading_params = { 'splice_request_by_dates': False, # True or False 'cache_period': 'month', # month or week # Cache trade data in monthly/periodic chunks in Redis (reduces database calls a lot) 'cache_period_trade_data': False, # Cache market data in monthly/periodic chunks in Redis (reduces database calls a lot) 'cache_period_market_data': False, # Return trade data internally as handles (usually necessary for Celery) 'return_cache_handles_trade_data': False, # Return market data internally as handles (usually necessary for Celery) 'return_cache_handles_market_data': False, # Recommend using Celery, which allows us to reuse Python processes 'parallel_library': 'single' } # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys()) market_df = dict_of_df['market_df'] market_df_minute = market_df.resample('1min').last() print(market_df_minute)
def test_fetch_market_trade_data_csv(): """Tests downloading of market and trade/order data from CSV files """ ### Get market data market_loader = Mediator.get_tca_market_trade_loader() market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=csv_market_data_store) market_df = market_loader.get_market_data(market_request) assert not(market_df.empty) \ and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') # For a high level trade data request, we need to use TCA request, because it usually involves some # market data download (we are assuming that the market data is being downloaded from our arctic database) # eg. for converting notionals to reporting currency tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store='csv', market_data_store=arctic_market_data_store, trade_order_mapping=csv_trade_order_mapping) for t in trade_order_list: trade_order_df = market_loader.get_trade_order_data(tca_request, t) try: trade_order_df = Mediator.get_volatile_cache( ).get_dataframe_handle(trade_order_df) except: pass assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DataFactory and DatabaseSource from tcapy.data.datafactory import DataFactory data_factory = DataFactory() for t in trade_order_list: ### Test using DataFactory trade_request = TradeRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store='csv', trade_order_mapping=csv_trade_order_mapping, trade_order_type=t) trade_order_df = data_factory.fetch_table(trade_request) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DatabaseSourceCSV from tcapy.data.databasesource import DatabaseSourceCSV database_source = DatabaseSourceCSV() trade_order_df = database_source.fetch_trade_order_data( start_date, finish_date, ticker, table_name=csv_trade_order_mapping[t]) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')
def get(self): logger = LoggerManager.getLogger(__name__) if request.content_type == 'application/json': json_input = request.json if 'trade_df' in json_input.keys() and 'username' in json_input.keys() and 'password' in json_input.keys(): username = json_input['username'] # TODO check passwords password = json_input['password'] logger.info("Received API request from user: "******"Failed to complete request for user: "******" - " + str(e)) return "Failed to complete request" logger.info("Completed API request from user: "******"Unsupported media type, only accepts JSON"
def single_ticker_tca_example(): """Example for doing detailed TCA analysis on the trades of a single ticker, calculating metrics for slippage, transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and spread to mid). Creates a TCAReport which generates standalone HTML and PDF files Also on a lower level it collects results for slippage into a daily timeline and also average by venue (by default weights by reporting currency) """ # Note: running Orca might not work in WSL, also when generating Plotly charts, might get an error with WSL, if # it doesn't have silent_display=True, as it will try to open a web page in a browser (which isn't supported in WSL1 # but is in WSL2) PLOT = True # clear entire cache # Mediator.get_volatile_cache().clear_cache() tca_engine = TCAEngineImpl(version=tca_version) trade_order_type = 'trade_df' trade_order_list = ['trade_df', 'order_df'] # Ensure orca is started, if want to convert to PDF (sometimes you may need to specify the path) # Can be slow to start if PLOT: from chartpy.engine import EnginePlotly EnginePlotly().start_orca() # constants.orca_server_path) # specify the TCA request tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='detailed', dummy_market=False, trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=[ # Calculate the slippage for trades/order MetricSlippage(trade_order_list=trade_order_list), # Calculate the shorter and longer term market impact after every trade/order MetricTransientMarketImpact( transient_market_impact_gap={'ms': 100}, trade_order_list=trade_order_list), MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1}, trade_order_list=trade_order_list) ], results_form=[ # Aggregate the slippage average by date and hour TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0), # Aggregate the total executed notional in reporting currency (usually USD) # for every hour TimelineResultsForm( metric_name='executed_notional_in_reporting_currency', by_date='datehour', aggregation_metric='sum', scalar=1.0), # Aggregate the average slippage on trades by venue HeatmapResultsForm( metric_name=['slippage', 'transient_market_impact'], aggregate_by_field=['venue', 'ticker'], scalar=10000.0, trade_order_list='trade_df'), # Aggregate the average slippage on trades by venue BarResultsForm(metric_name='slippage', aggregate_by_field='venue', scalar=10000.0, trade_order_list='trade_df'), # Aggregate the average slippage on trades/orders by broker_id BarResultsForm(metric_name='slippage', aggregate_by_field='broker_id', scalar=10000.0), # Aggregate the average slippage on trades/orders by broker_id DistResultsForm(metric_name='slippage', aggregate_by_field='side', scalar=10000.0), # Create a scatter chart of slippage vs. executed notional ScatterResultsForm(scatter_fields=[ 'slippage', 'executed_notional_in_reporting_currency' ], scalar={'slippage': 10000.0}) ], benchmark_calcs=[ # At the arrival price for every trade/order BenchmarkArrival(), # At the spread at the time of every trade/order BenchmarkMarketSpreadToMid() ], trade_order_mapping=trade_order_list, use_multithreading=True) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df['trade_df']) print(dict_of_df.keys()) # Heatmap of slippage and transient market impact broken down by venue and ticker heatmap_slippage_market_impact_df = dict_of_df[ 'heatmap_' + trade_order_type + '_slippage#transient_market_impact_by/mean/venue#ticker'] print(heatmap_slippage_market_impact_df) # Average slippage per date/hour timeline_slippage_df = dict_of_df['timeline_' + trade_order_type + '_slippage_by/mean_datehour/all'] # Total executed notional per date/hour timeline_executed_notional_df = dict_of_df[ 'timeline_' + trade_order_type + '_executed_notional_in_reporting_currency_by/sum_datehour/all'] # Permanent market impact for every trade metric_df = dict_of_df[trade_order_type]['permanent_market_impact'] print(metric_df.head(500)) from tcapy.vis.report.computationreport import JinjaRenderer if PLOT: ### Generate TCA report using high level object # Use higher level TCAResults object to encapsulate results (easier to deal with than a dictionary of DataFrames) tca_results = TCAResults(dict_of_df, tca_request) tca_results.render_computation_charts() tca_report = TCAReport(tca_results, renderer=JinjaRenderer()) tca_report.create_report(output_filename='test_tca_report.htm', output_format='html', offline_js=False) # Note needs plotly orca + wkhtmltopdf installed to render PDFs try: tca_report.create_report(output_filename='test_tca_report.pdf', output_format='pdf') except Exception as e: print(str(e)) ### Lower level creation of TCA report ### Plot charts individually # Plot slippage by timeline Chart(engine='plotly').plot(timeline_slippage_df) # Plot total executed notional by timeline Chart(engine='plotly').plot(timeline_executed_notional_df) # Plot market impact (per trade) Chart(engine='plotly').plot(metric_df.head(500))