def __init__(self, tables_dict={}, scalar=1, round_figures_by=None): self._tables_dict = tables_dict self._scalar = scalar self._round_figures_by = round_figures_by self._time_series_ops = TimeSeriesOps() self._util_func = UtilFunc()
def __init__(self, version=constants.tcapy_version): self._util_func = UtilFunc( ) # general utility operations (such as flatten lists) self._trade_order_tag = TradeOrderFilterTag( ) # to filter trade/orders according to the values of certain tags self._version = version
def __init__(self, temp_data_folder=constants.temp_data_folder, temp_large_data_folder=constants.temp_large_data_folder, tickers=None, data_store=None): self.temp_data_folder = temp_data_folder self.temp_large_data_folder = temp_large_data_folder self.tickers = None self.util_func = UtilFunc() self.time_series_ops = TimeSeriesOps() self.data_store = data_store logger = LoggerManager().getLogger(__name__) if not (os.path.isdir(self.temp_data_folder)): logger.warn("Temp data folder " + self.temp_data_folder + " does not exist") if not (os.path.isdir(self.temp_large_data_folder)): logger.warn("Temp large data folder " + self.temp_data_folder + " does not exist") if tickers is not None: self.tickers = tickers
def __init__(self, computation_results, title='Cuemacro Computation', renderer=CanvasRenderer(), chart_report_height=constants.chart_report_height, chart_report_width=constants.chart_report_width): """Initialize class, with the computation results we wish to convert into a report like format Parameters ---------- computation_results : ComputationResults The results of a large scale computation, which contains charts and DataFrames title : str Title of webpage to be rendered """ self._util_func = UtilFunc() self._computation_results = computation_results self._title = title self._chart = Chart(engine='plotly') self._renderer = renderer self._computation_request = computation_results.computation_request self._chart_report_width = chart_report_width self._chart_report_height = chart_report_height
def _write_df_to_db_single_thread(self, ticker, remove_duplicates=True, if_exists_table='append', if_exists_ticker='replace'): logger = LoggerManager.getLogger(__name__) postfix = '-' + self._get_postfix() + '-with-duplicates' if remove_duplicates: postfix = '-' + self._get_postfix() + '-no-duplicates' filename = os.path.join(self.temp_large_data_folder, ticker + postfix) + '.' + fileformat logger.info("Reading " + filename) util_func = UtilFunc() time_series_ops = TimeSeriesOps() data_source_local = self._get_output_data_source() df = util_func.read_dataframe_from_binary(filename, format=binary_format) if df is not None: df = time_series_ops.localize_as_UTC(df) data_source_local.append_market_data(df, ticker, if_exists_table=if_exists_table, if_exists_ticker=if_exists_ticker) else: logger.warn("Couldn't write dataframe for " + ticker + " to database, appears it is empty!")
def __init__(self, trade_order_list=None, metric_name=None, filter_by=['all'], tag_value_combinations={}, keep_fields=['executed_notional', 'side'], replace_text={}, round_figures_by=1, scalar=1.0, weighting_field=constants.table_weighting_field, exclude_fields_from_avg=[]): self._trade_order_list = trade_order_list self._metric_name = metric_name self._results_summary = ResultsSummary() self._keep_fields = keep_fields self._filter_by = filter_by self._replace_text = replace_text self._round_figures_by = round_figures_by self._weighting_field = weighting_field self._scalar = scalar self._exclude_fields_from_avg = exclude_fields_from_avg self._tag_value_combinations = tag_value_combinations self._trade_order_filter_tag = TradeOrderFilterTag() self._results_form_tag = 'table' self._util_func = UtilFunc() self._time_series_ops = TimeSeriesOps()
def __init__(self, app, session_manager, callback_manager, glob_volatile_cache, layout): super(TCACallerImplGen, self).__init__(app, session_manager, callback_manager, glob_volatile_cache, layout) self._util_func = UtilFunc()
def create_plot_flags(self, session_manager, layout): """Creates flags for each display component (eg. plot or table) on each web page in the project. These are necessary so we can keep track of whether we need to recalculate the underlying TCA analysis. Parameters ---------- session_manager : SessionManager Stores and modifies session data which is unique for each user layout : Layout Specifies the layout of an HTML page using Dash components Returns ------- dict """ plot_flags = {} plot_lines = {} for page in layout.pages: page_flags = [] line_flags = [] # For redrawing plots for gen_flag in self._generic_plot_flags: key = page + gen_flag # Append a plot flag if it exists if key in layout.id_flags: page_flags.append( self._session_manager.create_calculated_flags( 'redraw-' + page, session_manager.create_calculated_flags( self._util_func.dict_key_list( layout.id_flags[key].keys()), self._generic_plot_flags[gen_flag]))) plot_flags[page] = UtilFunc().flatten_list_of_lists(page_flags) # For clicking on charts for gen_flag in self._generic_line_flags: key = page + gen_flag # Append a line clicking flag if it exists if key in layout.id_flags: line_flags.append( self._session_manager.create_calculated_flags( 'redraw-' + page, session_manager.create_calculated_flags( self._util_func.dict_key_list( layout.id_flags[key].keys()), self._generic_plot_flags[gen_flag]))) if line_flags != []: plot_lines[page] = UtilFunc().flatten_list_of_lists(line_flags) return plot_flags
def __init__(self, app, session_manager, callback_manager, glob_volatile_cache, layout, callback_dict=None): self._util_func = UtilFunc() self._session_manager = session_manager self._callback_manager = callback_manager self._glob_volatile_cache = glob_volatile_cache self.attach_callbacks(app, callback_manager, callback_dict=callback_dict)
def __init__(self, version=constants.tcapy_version, volatile_cache_engine=constants.volatile_cache_engine): self._data_factory = DataFactory(version=version) self._util_func = UtilFunc() # general utility operations (such as flatten lists) self._fx_conv = FXConv() # for determining if FX crosses are in the correct convention self._time_series_ops = TimeSeriesOps() # time series operations, such as filtering by date self._metric_executed_price = MetricExecutedPriceNotional() # for determining the executed notionals/price of orders # from trades self._benchmark_mid = BenchmarkMid() # to calculate mid price from bid/ask quote market data self._trade_order_tag = TradeOrderFilterTag() # to filter trade/orders according to the values of certain tags self._version = version self._volatile_cache_engine = volatile_cache_engine
def __init__(self, tca_request=None, tag_value_combinations={}): """Initialise with the TCA parameters of our analysis and which field/value combinations we wish to filter for. Parameters ---------- tca_request : TCARequest TCA parameters for our analysis tag_value_combinations : dict User defined fields and their value to be filtered """ self._util_func = UtilFunc() self.set_trade_order_params(tca_request=tca_request, tag_value_combinations=tag_value_combinations)
def get_util_func(): with Mediator._util_func_lock: if Mediator._util_func is None: Mediator._util_func = UtilFunc() return Mediator._util_func
def __init__(self, computation_results, title='Cuemacro Computation'): """Initialize class, with the computation results we wish to convert into a report like format Parameters ---------- computation_results : ComputationResults The results of a large scale computation, which contains charts and DataFrames title : str Title of webpage to be rendered """ self._util_func = UtilFunc() self._computation_results = computation_results self._title = title self._canvas_plotter = 'plain' self._chart = Chart(engine='plotly')
def __init__(self, app=None, constants=None, url_prefix=''): super(LayoutDash, self).__init__(app=app, constants=constants, url_prefix=url_prefix) self.id_flags = {} self.pages = {} self._util_func = UtilFunc() self._url_prefix = url_prefix
def __init__(self, dict_of_df, computation_request, text_preamble=''): self._plot_render = PlotRender() self._util_func = UtilFunc() self.text_preamble = text_preamble self._computation_request = computation_request self._rendered = False
def __init__(self, version=constants.tcapy_version): self._util_func = UtilFunc() self._tca_market_trade_loader = Mediator.get_tca_market_trade_loader(version=version) self._time_series_ops = TimeSeriesOps() self._trade_order_tag = TradeOrderFilterTag() logger = LoggerManager.getLogger(__name__) logger.info("Init TCAEngine version: " + self._tca_market_trade_loader.get_tca_version() + " - Env: " + constants.env)
def set_trade_order_params(self, tca_request=None, tag_value_combinations={}): """Sets the parameters for filtering of trade/orders according to the values of tags Parameters ---------- tca_request : TCARequest tag_value_combinations : dict Filter for a combination of _tag/values Returns ------- """ self._tca_request = tca_request self._tag_value_combinations = tag_value_combinations self._util_func = UtilFunc() if tag_value_combinations != {}: self._tag = self._util_func.dict_key_list(tag_value_combinations.keys())
def test_write_csv_from_data_vendor(): """Tests downloading market data from the data vendor and dumping to CSV. Checks written CSV against what is loaded in memory. Also checks data is available in each 'usual' market hour. Note, that we use cached data from disk, as we want to download relatively large sections of data, and doing this externally can cause the test to run very slowly. """ for data_vendor_name in data_vendor_name_list: # database_source = database_source_dict[data_vendor_name] database_populator = database_populator_dict[data_vendor_name] chunk_int_min = chunk_int_min_dict[data_vendor_name] # Specifically choose dates which straddle the weekend boundary # 1) during British Summer Time in London # 2) during GMT time in London start_date = '27 Apr 2018'; finish_date = '03 May 2018'; expected_csv_files = 5 # start_date = '02 Feb 2018'; finish_date = '07 Feb 2018'; expected_csv_files = 4 split_size = 'daily' write_csv = False # Prepare the CSV folder first csv_folder = resource('csv_' + data_vendor_name + '_dump') # Empty the CSV test harness folder UtilFunc().forcibly_create_empty_folder(csv_folder) msg, df_dict = database_populator.download_to_csv( start_date, finish_date, ['EURUSD'], chunk_int_min=chunk_int_min, split_size=split_size, csv_folder=csv_folder, return_df=True, write_large_csv=write_csv, remove_duplicates=False, web_proxies=web_proxies) df_read_direct_from_data_vendor = df_dict['EURUSD'] # Check it has data for every market hour (eg. ignoring Saturdays) assert util_func.check_data_frame_points_in_every_hour(df_read_direct_from_data_vendor, start_date, finish_date) if write_csv: # read back the CSVs dumped on disk in the test harness CSV folder csv_file_list = glob.glob(csv_folder + '/EURUSD*.csv') assert len(csv_file_list) == expected_csv_files df_list = [] for c in csv_file_list: df = pd.read_csv(c, index_col=0) df.index = pd.to_datetime(df.index) df_list.append(df) # now compare the CSVs on disk versus those read directly df_read_from_csv = pd.concat(df_list).tz_localize(pytz.utc) assert_frame_equal(df_read_from_csv, df_read_direct_from_data_vendor)
def set_trade_order_params(self, tca_request=None, time_of_day=None, day_of_week=None, month_of_year=None, specific_dates=None, time_zone='utc'): """Initialise our filter, by the times of day, days of the week and months we wish to filter our trade/filters by. Note that it is optional which period to filter by (eg. we can filter just by time of day if we want to). Parameters ---------- tca_request : TCARequest TCA parameters for our analysis time_of_day : dict Describing the start and finish time of our filter day_of_week : str Which day of the week to filter by? month_of_year : str Which month of the of the year to filter by? specific_dates : str / str (list) Which dates to filter by time_zone : str Time zone to use (eg. 'utc') """ self.tca_request = tca_request self.time_of_day = time_of_day self.day_of_week = day_of_week self.month_of_year = month_of_year self.specific_dates = specific_dates self.time_zone = time_zone self._util_func = UtilFunc()
def create_market_trade_data_eikon(): """Creates a small dataset for testing purposes for market, trade and order data for EURUSD at the start of May 2017, which is dumped to the designated tcapy test harness folder. Returns ------- """ # Use database source as Arctic (or directly from Dukascopy) for market data (assume we are using market data as a source) tca_market = TCAMarketTradeLoaderImpl() util_func = UtilFunc() market_df = [] for tick in ticker: market_request = MarketRequest(ticker=tick, data_store=data_store, start_date=start_date, finish_date=finish_date) market_df.append( tca_market.get_market_data(market_request=market_request)) # Note: it can be very slow to write these CSV files market_df = pd.concat(market_df) market_df.to_csv(os.path.join(folder, 'small_test_market_df_eikon.csv.gz'), compression='gzip') # Also write to disk as HDF5 file (easier to load up later) util_func.write_dataframe_to_binary( market_df, os.path.join(folder, 'small_test_market_df_eikon.gzip')) # Create a spot file in reverse order market_df.sort_index(ascending=False)\ .to_csv(os.path.join(folder, 'small_test_market_df_reverse_eikon.csv.gz'), compression='gzip') # Also write to disk as Parquet file (easier to load up later) util_func.write_dataframe_to_binary( market_df, os.path.join(folder, 'small_test_market_df_reverse_eikon.parquet')) if create_trade_order_data: # Use the market data we just downloaded to CSV, and perturb it to generate the trade data data_test_creator = DataTestCreator( market_data_postfix=postfix, csv_market_data=os.path.join(folder, 'small_test_market_df_eikon.csv.gz'), write_to_db=False) # Create randomised trade/order data trade_order = data_test_creator.create_test_trade_order( ticker_trades, start_date=start_date, finish_date=finish_date) trade_order['trade_df'].to_csv( os.path.join(folder, 'small_test_trade_df_eikon.csv')) trade_order['order_df'].to_csv( os.path.join(folder, 'small_test_order_df_eikon.csv'))
def __init__(self, trade_order_list=None, metric_name=None, aggregate_by_field=None, aggregation_metric='mean', tag_value_combinations={}): self._trade_order_list = trade_order_list self._metric_name = metric_name self._aggregate_by_field = aggregate_by_field self._aggregation_metric = aggregation_metric self._results_summary = ResultsSummary() self._tag_value_combinations = tag_value_combinations self._trade_order_filter_tag = TradeOrderFilterTag() self._util_func = UtilFunc() self._time_series_ops = TimeSeriesOps()
def _fetch_market_data(self, start, finish, ticker, write_to_disk=True, read_cached_from_disk=True, web_proxies=constants.web_proxies): logger = LoggerManager.getLogger(__name__) key = (str(start) + str(finish) + ticker + '_' + self._get_postfix()).replace(":", '_') filename = os.path.join(self.temp_data_folder, key) + '.' + fileformat util_func = UtilFunc() start_time_stamp = pd.Timestamp(start) finish_time_stamp = pd.Timestamp(finish) if self._remove_weekend_points(): weekend_data = "Weekend? " + key weekday_point = UtilFunc().is_weekday_point(start_time_stamp, finish_time_stamp, friday_close_nyc_hour=constants.friday_close_utc_hour, sunday_open_utc_hour=constants.sunday_open_utc_hour) if not(weekday_point): return None, weekend_data df = None if read_cached_from_disk: if os.path.exists(filename): df = util_func.read_dataframe_from_binary(filename, format=binary_format) if df is not None: logger.debug("Read " + filename + " from disk") if df is None: # Convert tcapy ticker into vendor ticker df = self._get_input_data_source().fetch_market_data(start, finish, ticker=self._get_tickers_vendor()[ticker], web_proxies=web_proxies) if df is not None: if write_to_disk: # Write a small temporary dataframe to disk (if the process fails later, these can be picked up, # without having a call the external vendor again util_func.write_dataframe_to_binary(df, filename, format=binary_format) msg = None if df is None: msg = "No data? " + key return df, msg
class TCACallerImplGen(TCACaller): def __init__(self, app, session_manager, callback_manager, glob_volatile_cache, layout): super(TCACallerImplGen, self).__init__(app, session_manager, callback_manager, glob_volatile_cache, layout) self._util_func = UtilFunc() def calculate_computation_summary(self, tca_type, external_params=None): # callback triggered by Dash application def callback(*args): """Kicks off fetching of data of market data and TCA calculations for a specific currency pair. Caches the data in a VolatileCache instance, ready to be read in by the other charts. Parameters ---------- ticker_val : str ticker to be used in TCA calculations start_date_val : str Start date of TCA analysis start_time_val : str Start time of TCA analysis finish_date_val : str Finish date of TCA analysis finish_time_val : str Finish time of TCA analysis venue_val : str Venue data to be used n_clicks : int Number of clicks Returns ------- str """ start = time.time() tag = tca_type + '-calculation-button' old_clicks = self._session_manager.get_session_clicks(tag) # make sure none of the other charts/links are plotted till we have completed this! self._session_manager.set_session_flag([ self._plot_flags['aggregated'], self._plot_flags['detailed'], self._plot_flags['compliance'] ], False) logger = LoggerManager.getLogger(__name__) if tca_type == 'detailed': ticker_val, start_date_val, start_time_val, finish_date_val, finish_time_val, \ broker_val, algo_val, venue_val, market_data_val, metric_val, n_clicks = args # Catch cases where users repeatedly click, which can cause misalignment in clicks self._session_manager.set_session_clicks(tag, n_clicks, old_clicks=old_clicks) logger.debug( self.create_generate_button_msg(old_clicks, n_clicks)) # Make sure all the parameters have been selected if ticker_val != '' and venue_val != '' and start_date_val != '' and start_time_val != '' and \ finish_date_val != '' and finish_time_val != '' and market_data_val != '' and broker_val != '' and \ algo_val != '' and n_clicks > old_clicks: # Expand tickers/broker fields etc, in case for example 'All' has been specified or any other groups broker_val = self._util_func.populate_field( broker_val, constants.available_brokers_dictionary, exception_fields='All') algo_val = self._util_func.populate_field( algo_val, constants.available_algos_dictionary, exception_fields='All') venue_val = self._util_func.populate_field( venue_val, constants.available_venues_dictionary, exception_fields='All') # Combine the start date/time and finish date/time start_date_val = start_date_val + ' ' + start_time_val finish_date_val = finish_date_val + ' ' + finish_time_val metric_val = metric_val.replace(' ', '_') logger.debug('Calculation click old: ' + str(old_clicks) + " clicks vs new " + str(n_clicks)) self._session_manager.set_session_clicks(tag, n_clicks) self._session_manager.set_session_flag('metric', value=metric_val) self._session_manager.set_session_flag( 'detailed-visualization', value=True) logger.info('Selected ' + ticker_val + " " + start_date_val + " - " + finish_date_val) # Check that dates are less than 1 month apart if pd.Timestamp(finish_date_val) - pd.Timestamp( start_date_val) > pd.Timedelta( days=constants.max_plot_days): return "Status: Cannot plot more than " + str( constants.max_plot_days) + " days!" elif pd.Timestamp(start_date_val) >= pd.Timestamp( finish_date_val): return "Status: Start date must be before the end date" try: #if True: # Clear the cache for the current user self._glob_volatile_cache.clear_key_match( self._session_manager.get_session_id()) results_form = [ # Calculate the distribute of the metric for trades/orders, broken down by trade side (buy/sell) DistResultsForm( trade_order_list=['trade_df', 'order_df'], metric_name=metric_val, aggregate_by_field='side', scalar=10000.0, weighting_field= 'executed_notional_in_reporting_currency'), # Create a table the markout of every trade TableResultsForm( trade_order_list=['trade_df'], metric_name='markout', filter_by='all', replace_text={ 'markout_': '', 'executed_notional': 'exec not', 'notional_currency': 'exec not cur' }, keep_fields=[ 'executed_notional', 'side', 'notional_currency' ], scalar={ 'all': 10000.0, 'exclude': ['executed_notional', 'side'] }, round_figures_by={ 'all': 2, 'executed_notional': 0, 'side': 0 }, weighting_field='executed_notional') ] benchmark_calcs = [ # Calculate the arrival prices for every trade/order BenchmarkArrival( trade_order_list=['trade_df', 'order_df']), # Calculate the VWAP for each order BenchmarkVWAP(trade_order_list=['order_df']), # Calculate the TWAP for each order BenchmarkTWAP(trade_order_list=['order_df']) ] metric_calcs = [ metric_val, MetricMarkout(trade_order_list=['trade_df']) ] # Get from cache, note given that we are in the first part of the chain we should force it to calculate! sparse_market_trade_df = self.get_cached_computation_analysis( key='sparse_market_trade_df', start_date=start_date_val, finish_date=finish_date_val, ticker=ticker_val, venue=venue_val, market_data=market_data_val, event_type='trade', dummy_market=False, broker=broker_val, algo=algo_val, metric_calcs=metric_calcs, metric_trade_order_list=['trade_df', 'order_df'], benchmark_calcs=benchmark_calcs, tca_type='detailed', tca_engine=self._tca_engine, results_form=results_form, force_calculate=True) calc_start = sparse_market_trade_df.index[0] calc_end = sparse_market_trade_df.index[-1] detailed_title = self.create_status_msg_flags( 'detailed', ticker_val, calc_start, calc_end) except Exception as e: LoggerManager().getLogger(__name__).exception(e) return "Status: error " + str(e) + ". Check dates?" finish = time.time() return 'Status: calculated ' + str(round( finish - start, 3)) + "s for " + detailed_title elif tca_type == 'aggregated': ticker_val, start_date_val, finish_date_val, broker_val, algo_val, venue_val, reload_val, market_data_val, \ event_type_val, metric_val, n_clicks = args # Catch cases where users repeatedly click, which can cause misalignment in clicks self._session_manager.set_session_clicks(tag, n_clicks, old_clicks=old_clicks) logger.debug( self.create_generate_button_msg(old_clicks, n_clicks)) if ticker_val != '' and start_date_val != '' and venue_val != '' \ and finish_date_val != '' and reload_val != '' and event_type_val != '' and metric_val != '' and \ n_clicks > old_clicks: # Expand tickers/broker fields etc, in case for example 'All' has been specified or any other groups ticker_val_list = self._util_func.populate_field( ticker_val, constants.available_tickers_dictionary) broker_val_list = self._util_func.populate_field( broker_val, constants.available_brokers_dictionary) algo_val_list = self._util_func.populate_field( algo_val, constants.available_algos_dictionary) venue_val_list = self._util_func.populate_field( venue_val, constants.available_venues_dictionary) metric_val = metric_val.replace(' ', '_') logger.debug('Calculation click old: ' + str(old_clicks) + " clicks vs new " + str(n_clicks)) self._session_manager.set_session_clicks(tag, n_clicks) self._session_manager.set_session_flag('metric', value=metric_val) self._session_manager.set_session_flag( 'aggregated-visualization', True) try: # if True: # Clear the cache for the current user self._glob_volatile_cache.clear_key_match( self._session_manager.get_session_id()) results_form = [ # Show the distribution of the selected metric for trades weighted by notional # aggregated by ticker and then by venue DistResultsForm( trade_order_list=['trade_df'], metric_name=metric_val, aggregate_by_field=['ticker', 'venue'], weighting_field= 'executed_notional_in_reporting_currency'), # Display the timeline of metrics average by day (and weighted by notional) TimelineResultsForm( trade_order_list=['trade_df'], by_date='date', metric_name=metric_val, aggregation_metric='mean', aggregate_by_field='ticker', scalar=10000.0, weighting_field= 'executed_notional_in_reporting_currency'), # Display a bar chart showing the average metric weighted by notional and aggregated by ticker # venue BarResultsForm( trade_order_list=['trade_df'], metric_name=metric_val, aggregation_metric='mean', aggregate_by_field=['ticker', 'venue'], scalar=10000.0, weighting_field= 'executed_notional_in_reporting_currency') ] try: # if True: timeline_trade_df_metric_by_ticker = self.get_cached_computation_analysis( key='timeline_trade_df_' + metric_val + '_by_ticker', start_date=start_date_val, finish_date=finish_date_val, event_type=event_type_val, ticker=ticker_val_list, broker=broker_val_list, algo=algo_val_list, venue=venue_val_list, market_data=market_data_val, dummy_market=True, tca_engine=self._tca_engine, tca_type='aggregated', metric_calcs=metric_val, metric_trade_order_list=['trade_df'], results_form=results_form, force_calculate=True, reload_val=reload_val, trade_order_mapping=['trade_df']) calc_start = timeline_trade_df_metric_by_ticker.index[ 0] calc_end = timeline_trade_df_metric_by_ticker.index[ -1] aggregated_title = self.create_status_msg_flags( 'aggregated', ticker_val, calc_start, calc_end) logger.debug('Plotted aggregated summary plot!') finish = time.time() except Exception as e: LoggerManager().getLogger(__name__).exception(e) return "Status: error - " + str( e) + ". Check data exists for these dates?" except Exception as e: LoggerManager().getLogger(__name__).exception(e) return 'Status: error - ' + str( e) + ". Check data exists for these dates?" return 'Status: calculated ' + str(round( finish - start, 3)) + "s for " + aggregated_title elif tca_type == 'compliance': ticker_val, start_date_val, finish_date_val, broker_val, algo_val, venue_val, reload_val, market_data_val, \ filter_time_of_day_val, start_time_of_day_val, finish_time_of_day_val, slippage_bounds_val, visualization_val, n_clicks = args # Catch cases where users repeatedly click, which can cause misalignment in clicks self._session_manager.set_session_clicks(tag, n_clicks, old_clicks=old_clicks) logger.debug( self.create_generate_button_msg(old_clicks, n_clicks)) if ticker_val != '' and start_date_val != '' and broker_val != '' and algo_val != '' and venue_val != '' \ and finish_date_val != '' and reload_val != '' and filter_time_of_day_val != '' \ and start_time_of_day_val != '' and finish_time_of_day_val != '' and slippage_bounds_val != '' \ and n_clicks > old_clicks: ticker_val_list = self._util_func.populate_field( ticker_val, constants.available_tickers_dictionary) broker_val_list = self._util_func.populate_field( broker_val, constants.available_brokers_dictionary, exception_fields='All') algo_val_list = self._util_func.populate_field( algo_val, constants.available_algos_dictionary, exception_fields='All') venue_val_list = self._util_func.populate_field( venue_val, constants.available_venues_dictionary, exception_fields='All') logger.debug('Calculation click old: ' + str(old_clicks) + " clicks vs new " + str(n_clicks)) self._session_manager.set_session_clicks(tag, n_clicks) if visualization_val == 'yes': self._session_manager.set_session_flag( 'compliance-visualization', True) else: self._session_manager.set_session_flag( 'compliance-visualization', False) try: # if True: # Clear the cache for the current user self._glob_volatile_cache.clear_key_match( self._session_manager.get_session_id()) slippage_bounds = 0.0 overwrite_bid_ask = True if slippage_bounds_val == 'bid/ask': overwrite_bid_ask = False else: slippage_bounds = float(slippage_bounds_val) metric_calcs = [ # Calculate slippage for trades MetricSlippage(trade_order_list='trade_df'), ] benchmark_calcs = [ # Generate the spread to mid for market data (in certain case artificially create a spread) BenchmarkSpreadToMid( bid_mid_bp=slippage_bounds, ask_mid_bp=slippage_bounds, overwrite_bid_ask=overwrite_bid_ask) ] results_form = [ # Display a table of all the anomalous trades by slippage (ie. outside bid/ask) TableResultsForm( # Only display for trades trade_order_list=['trade_df'], # Display slippage metric_name='slippage', # Order by the worst slippage filter_by='worst_all', # Replace text on table to make it look nicer replace_text={ 'markout_': '', 'executed_notional': 'exec not', '_currency': ' cur', '_in_reporting': ' in rep', 'slippage_benchmark': 'benchmark', 'slippage_anomalous': 'anomalous', 'broker_id': 'broker ID', 'algo_id': 'algo ID', 'executed_price': 'price' }, exclude_fields_from_avg=[ 'slippage_anomalous', 'slippage_benchmark', 'side' ], # Only select trades outside bid/ask (ie. where slippage anomalous = 1) tag_value_combinations={ 'slippage_anomalous': 1.0 }, # Display several columns keep_fields=[ 'ticker', 'broker_id', 'algo_id', 'notional_currency', 'executed_notional', 'executed_notional_in_reporting_currency', 'side', 'executed_price' ], # Multiply slippage field by 10000 (to convert into basis points) scalar={'slippage': 10000.0}, # Round figures to make them easier to read round_figures_by={ 'executed_notional': 0, 'executed_notional_in_reporting_currency': 0, 'side': 0, 'slippage': 2, 'slippage_benchmark': 4 }), # Get the total notional executed by broker (in reporting currency) BarResultsForm( # Select child orders trade_order_list=['trade_df'], # Aggregate by broker name aggregate_by_field='broker_id', # Select the notional for analysis metric_name= 'executed_notional_in_reporting_currency', # analyse notional # Sum all the notionals aggregation_metric='sum', # Round figures round_figures_by=0) ] # Reformat tables for notional by broker join_tables = [ # JoinTables( # tables_dict={'table_name': 'jointables_broker_id_df', # # # fetch the following calculated tables # 'table_list': [ # 'bar_order_df_executed_notional_in_reporting_currency_by_broker_id'], # # # append to the columns of each table # 'column_list': ['notional (rep cur)'], # 'replace_text': {'broker_id': 'broker ID'} # }) ] try: # if True: trade_df = self.get_cached_computation_analysis( key='trade_df', start_date=start_date_val, finish_date=finish_date_val, start_time_of_day=start_time_of_day_val, finish_time_of_day=finish_time_of_day_val, filter_time_of_day=filter_time_of_day_val, event_type='trade', ticker=ticker_val_list, broker=broker_val_list, algo=algo_val_list, venue=venue_val_list, dummy_market=True, market_data=market_data_val, tca_engine=self._tca_engine, tca_type='compliance', metric_calcs=metric_calcs, benchmark_calcs=benchmark_calcs, metric_trade_order_list=['trade_df'], results_form=results_form, join_tables=join_tables, force_calculate=True, reload_val=reload_val, trade_order_mapping=['trade_df']) calc_start = trade_df.index[0] calc_end = trade_df.index[-1] compliance_title = self.create_status_msg_flags( 'compliance', ticker_val, calc_start, calc_end) logger.debug( 'Generated compliance summary.. awaiting plot callbacks!' ) finish = time.time() except Exception as e: logger.exception(e) return "Status: error " + str( e) + ". Check data exists for these dates?" except Exception as e: logger.exception(e) return 'Status: error ' + str( e) + ". Check data exists for these dates?" return 'Status: calculated ' + str(round( finish - start, 3)) + "s for " + compliance_title raise dash.exceptions.PreventUpdate( "No data changed - " + tca_type ) # Not very elegant but only way to prevent plots disappearing # return "Status: ok" if external_params is not None: return callback(**external_params) return callback
def __init__(self): self._util_func = UtilFunc()
class ComputationReport(ABC): """Converts ComputationResults (largely consisting of Plotly based Figures and HTML tables) into self contained HTML pages. Can also render these HTML pages into PDFs. Uses Renderer objects to create the HTML including BasicRenderer (which uses chartpy's "Canvas" object extensively) and JinjaRenderer (uses Jinja templating for HTML and WeasyPrint for PDF conversion). """ def __init__(self, computation_results, title='Cuemacro Computation', renderer=CanvasRenderer(), chart_report_height=constants.chart_report_height, chart_report_width=constants.chart_report_width): """Initialize class, with the computation results we wish to convert into a report like format Parameters ---------- computation_results : ComputationResults The results of a large scale computation, which contains charts and DataFrames title : str Title of webpage to be rendered """ self._util_func = UtilFunc() self._computation_results = computation_results self._title = title self._chart = Chart(engine='plotly') self._renderer = renderer self._computation_request = computation_results.computation_request self._chart_report_width = chart_report_width self._chart_report_height = chart_report_height def create_report(self, output_filename=None, output_format='html', offline_js=False): """Creates an HTML/PDF report from a ComputationResult object, which can (optionally) be written to disk, alternatively returns a binary representation of the HTML or PDF. Parameters ---------- output_filename : str (optional) File output, if this is not specified a binary object is returned output_format : str 'html' (default) - output an HTML page offline_js : bool False (default) - download's Plotly.js in webpage to be rendered True - includes Plotly.js in web page to be rendered (results in much bigger file sizes) Returns ------- pdf or HTML binary """ extra_head_code = '' if output_format == 'html': # Embed plotly.js in HTML (makes it bigger, but then doesn't require web connection) if offline_js: embed_chart = 'offline_embed_js_div' else: # Otherwise put web link to plotly.js (but this means we need to download every time) embed_chart = 'offline_div' extra_head_code = '<head><script src="https://cdn.plot.ly/plotly-latest.min.js"></script></head>' elif output_format == 'pdf': # For PDFs we need to create static SVGs of plotly charts embed_chart = 'offline_image_svg_in_html' elif output_format == 'xlwings': embed_chart = 'leave_as_fig' # Get a list of the HTML to render elements_to_render_dict = self._layout_computation_results_to_html( embed_chart) return self._renderer.render_elements(elements_to_render_dict, title=self._title, output_filename=output_filename, output_format=output_format, extra_head_code=extra_head_code) def _generate_filename(self, extension): return (self._get_time_stamp() + "." + extension) def _get_time_stamp(self): return str(datetime.datetime.now()).replace(':', '-').replace( ' ', '-').replace(".", "-") def _create_text_html(self, text, add_hr=True): """Takes text and then creates the appropriate HTML to represent it, split by horizontal HTML bars Parameters ---------- text : str (list) Text to be added in HTML Returns ------- list (of HTML) """ if text != [] and text is not None and add_hr: html_output = [['<hr>']] else: html_output = [] if not (isinstance(text, list)): text = [text] for t in text: html_output.append([t]) return html_output def _create_table_html(self, table): """Takes tables in HTML and then creates the appropriate HTML to represent it, split by horizontal HTML bars Parameters ---------- text : str (list) Tables in HTML format Returns ------- list (of HTML) """ if table != {} and table is not None: html_output = [['<hr>']] else: html_output = [] for t in self._util_func.dict_key_list(table.keys()): html_output.append(table[t]) return html_output def _create_chart_html(self, chart, embed_chart): if chart != {} and chart is not None: html_output = [['<hr>']] else: html_output = [] style = Style(plotly_plot_mode=embed_chart) for c in self._util_func.dict_key_list(chart.keys()): # Update chart size and padding (if it's Plotly), so it fits well on PDF try: chart[c].update_layout( autosize=False, width=self._chart_report_width, height=self._chart_report_height, margin=dict(l=10, r=10, b=10, t=60, pad=4), ) except: pass if embed_chart == 'leave_as_fig': html_output.append([chart[c]]) else: html_output.append([self._chart.plot(chart[c], style=style)]) return html_output @abc.abstractmethod def _layout_computation_results_to_html(self, embed_chart='offline_embed_js_div' ): """Converts the computation results to a list containing HTML, primarily of the charts. Should be implemented by concrete subclasses, where we can select the order of the charts (and which charts are converted) Parameters ---------- embed_chart : str 'offline_embed_js_div' (default) - converts Plotly Figures into HTML + includes Plotly.js script 'offline_div' - converts Plotly Figures into HTML (but excludes Plotly.js script) Returns ------- list (containing HTML), list (containing HTML of descriptions) """ pass
def test_write_multiple_wildcard_market_data_csvs_arctic(): """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv). It is assumed that the CSVs are in chronological orders, from their filenames. """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) arctic_start_date = '01 Jan 2016' arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) ### Read CSV data which is sorted ascending (default!) database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=10**6, remove_duplicates=False) database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) # Prepare the CSV folder first csv_folder = os.path.join(constants.test_data_harness_folder, 'csv_arctic_mult') # Empty the CSV test harness folder, where we shall dump the mini CSVs UtilFunc().forcibly_create_empty_folder(csv_folder) # Split the CSV file into several mini CSV files (and also HDF5 files) market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=3) chunk_no = 0 for m in market_df_list: m.to_csv( os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv')) UtilFunc().write_dataframe_to_binary( m, os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.parquet'), format='parquet') chunk_no = chunk_no + 1 file_ext = ['csv', 'parquet'] for f in file_ext: ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic database_source.convert_csv_to_table( os.path.join(csv_folder, "EURUSD*." + f), ticker, test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=10**6, remove_duplicates=False) market_request = MarketRequest( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=database_source, market_data_database_table=test_harness_arctic_market_data_table ) # Read back from Arctic market_df_load = market_loader.get_market_data( market_request=market_request) # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs) diff_df = abs(market_df_load['mid'] - market_df_csv['mid']) outside_bounds = diff_df[diff_df >= eps] assert len(outside_bounds) == 0
# for caching data (in Redis) from tcapy.util.mediator import Mediator # utility stuff from tcapy.conf.constants import Constants from tcapy.util.loggermanager import LoggerManager from tcapy.util.utilfunc import UtilFunc # for caching data (in Redis) # creates the HTML layout of the web pages from chartpy.dashboard import CallbackManager, SessionManager constants = Constants() util_func = UtilFunc() # manage session information for every client session_manager = SessionManager() # manage creation of callback for Dash callback_manager = CallbackManager(constants) logger = LoggerManager.getLogger(__name__) # print constants for user information logger.info("Platform = " + constants.plat) logger.info("Env = " + constants.env) logger.info("Python = " + sys.executable) logger.info("Debug environment = " + str(constants.debug_start_flask_server_directly))
class ComputationCaller(ABC): """Abstract class which adds listeners to the GUI buttons in the tcapy application for doing TCA or other _calculations. At initialisation it adds listeners for these buttons and links them to the various text box inputs (where the user can specify the various computation parameters such as start date, finish date, ticker, TCA metrics etc.) When a button is pressed it triggers various "calculate" methods, which convert the GUI input, into computation request/TCARequest objects which are then sent to another object for doing the actual computation. This analysis is then cached in Redis. The completion of this calculation will then trigger a callback from every display component (such as a plot or table) which search the cache for the appropriate output to display. If a user wishes to create programmatically call tcapy, it is recommended they create a comptuation request directly, rather than attempting to use ComputationCaller, and then submit that to an external computation engine. """ def __init__(self, app, session_manager, callback_manager, glob_volatile_cache, layout, callback_dict=None): self._util_func = UtilFunc() self._session_manager = session_manager self._callback_manager = callback_manager self._glob_volatile_cache = glob_volatile_cache self.create_callbacks(app, callback_manager, callback_dict=callback_dict) def create_plot_flags(self, session_manager, layout): """Creates flags for each display component (eg. plot or table) on each web page in the project. These are necessary so we can keep track of whether we need to recalculate the underlying TCA analysis. Parameters ---------- session_manager : SessionManager Stores and modifies session data which is unique for each user layout : Layout Specifies the layout of an HTML page using Dash components Returns ------- dict """ plot_flags = {} plot_lines = {} for page in layout.pages: page_flags = [] line_flags = [] # For redrawing plots for gen_flag in self._generic_plot_flags: key = page + gen_flag # Append a plot flag if it exists if key in layout.id_flags: page_flags.append( self._session_manager.create_calculated_flags( 'redraw-' + page, session_manager.create_calculated_flags( self._util_func.dict_key_list( layout.id_flags[key].keys()), self._generic_plot_flags[gen_flag]))) plot_flags[page] = UtilFunc().flatten_list_of_lists(page_flags) # For clicking on charts for gen_flag in self._generic_line_flags: key = page + gen_flag # Append a line clicking flag if it exists if key in layout.id_flags: line_flags.append( self._session_manager.create_calculated_flags( 'redraw-' + page, session_manager.create_calculated_flags( self._util_func.dict_key_list( layout.id_flags[key].keys()), self._generic_plot_flags[gen_flag]))) if line_flags != []: plot_lines[page] = UtilFunc().flatten_list_of_lists(line_flags) return plot_flags def create_callbacks(self, app, callback_manager, callback_dict=None): """Creates callbacks for each calculation button in the application, so that it is linked to execution code, when that button is pressed. Typically these button presses kick off a large computation (eg. TCA analysis). Parameters ---------- app : dash.App A dash app is wrapper over a Flask mini-webserver callback_manager : CallbackManager Creates callbacks for dash components callback_dict : dict Dictionary of callbacks for Dash """ if callback_dict is None: callback_dict = constants.dash_callbacks for k in callback_dict.keys(): # Dash callbacks for detailed page app.callback(callback_manager.output_callback(k, 'status'), callback_manager.input_callback(k, callback_dict[k]))( self.calculate_computation_summary(k)) def add_list_kwargs(self, kwargs, tag, addition): """Adds a value to the kwargs dictionary (or appends it to an existing _tag Parameters ---------- kwargs : dict Existing kwargs dictionary tag : str Key to be added to kwargs addition : str Value of key to be added Returns ------- dict """ if addition is not None: if tag not in kwargs: kwargs[tag] = addition else: if kwargs[tag] is not None: if isinstance(kwargs[tag], list): kwargs[tag] = kwargs[tag].append(addition) else: kwargs[tag] = [kwargs[tag], addition] else: kwargs[tag] = addition return kwargs def fill_computation_request_kwargs(self, kwargs, fields): pass def create_computation_request(self, **kwargs): pass def _fetch_cached_list(self, force_calculate=False, computation_type=None, session_id=None, key=None): """Fetches a cached list of objects (typically DataFrames) which have been generated during a larger computation (eg. TCA analysis) for a particular session. Parameters ---------- force_calculate : bool (default: False) Should a large calculation be recomputed? If so, do not attempt to fetch from cache computation_type : str What computation type are we doing? session_id : str A unique identifer for the current web session key : str Which key to retrieve from the cache, which (usually) relates to a DataFrame generated by TCA output Returns ------- list (usually of pd.DataFrames) """ cached_list = [] # First try to get from the cache (only need the key for this, no hash!) if not (force_calculate): if not (isinstance(key, list)): key = [key] if session_id != '' and computation_type != '': sessions_id_computation = session_id + '' + computation_type + '_' else: sessions_id_computation = '' for k in key: # this will be unique to each user cached_list.append( self._glob_volatile_cache.get(sessions_id_computation + k)) return cached_list def get_cached_computation_analysis(self, **kwargs): """Fetches a computation outoput from a cache (typically Redis) or computes the analysis directly using another object, if requested. Typically, a computation is initiated and then that large analysis is cached, ready to be consumed by display components which repeatedly call this function. Parameters ---------- kwargs Variables generated by GUI which relate to our computations (eg. start date, finish date, ticker etc.) Returns ------- pd.DataFrame """ try: force_calculate = kwargs['force_calculate'] except: force_calculate = False key = None if 'key' in kwargs: key = kwargs['key'] if 'test' not in kwargs: computation_type = self._tca_engine.get_engine_description() session_id = self._session_manager.get_session_id() + "_expiry_" session_id_computation = session_id + '' + computation_type + '_' else: computation_type = '' session_id = '' session_id_computation = '' # Try to fetch some TCA analysis output from the cache cached_list = self._fetch_cached_list( force_calculate=force_calculate, computation_type=computation_type, session_id=session_id, key=key) # Otherwise force the calculation (or if doesn't exist in the cache!) # when a button is pressed, typically force calculate will be set to True if force_calculate: computation_request = self.create_computation_request(**kwargs) # Delete any existing keys for the current session self._glob_volatile_cache.clear_key_match("*" + session_id + "*") dict_of_df = self.run_computation_request(computation_request) dict_key_list = [] dict_element_list = [] # Cache all the dataframes in Redis/or other memory space (will likely need for later calls!) # from security perspective probably better not to cache the TCAEngine objects on a database (which can execute code) for dict_key in dict_of_df.keys(): # check if we have all the keys filled (will be missing if for example there are no trades) if dict_key not in dict_of_df: raise Exception('Missing ' + dict_key) dict_key_list.append(session_id_computation + dict_key) dict_element_list.append(dict_of_df[dict_key]) self._session_manager.set_session_flag('user_df', dict_key_list) # self._glob_volatile_cache.put(session_id_computation + dict_key, dict_of_df[dict_key]) # Put it back into Redis cache (to be fetched by Dash callbacks) self._glob_volatile_cache.put(dict_key_list, dict_element_list) logger = LoggerManager.getLogger(__name__) logger.debug('Generated tables: ' + str(self._util_func.dict_key_list(dict_of_df.keys()))) if key is None: return None if not (isinstance(key, list)): key = [key] for k in key: # Has one of the dataframes we want, just been calculated, if so return it! if k in dict_of_df.keys(): cached_list.append(dict_of_df[k]) # Otherwise look in Redis for the table for the user else: # as last resort get from our global, this key is unique to each user cached_list.append( self._glob_volatile_cache.get(session_id_computation + k)) # return as tuples tup = list(cached_list) if len(tup) == 1: return tup[0] else: return tup def create_status_msg_flags(self, computation_type, ticker, calc_start, calc_end): if isinstance(ticker, list): ticker = self._util_func.pretty_str_list(ticker) title = ticker + ": " \ + str(calc_start).replace(':00+00:00', '').replace('000+00:00', '') + " - " \ + str(calc_end).replace(':00+00:00', '').replace('000+00:00', '') + " at " \ + str(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")) self._session_manager.set_session_flag({ computation_type + '-title': title, computation_type + '-ticker': ticker }) self._session_manager.set_session_flag( self._plot_flags[computation_type], True) return title def create_generate_button_msg(self, old_clicks, n_clicks): return 'Triggered click old: ' + str(old_clicks) + " clicks vs new " + str(n_clicks) + \ " for " + str(self._session_manager.get_session_id()) def get_username_string(self): username = self._session_manager.get_username() if username is None: username = '' else: username = '******' + username return username @abc.abstractmethod def fill_computation_request_kwargs(self, kwargs, fields): """Fills a dictionary with the appropriate parameters which can be consumed by a ComputationRequest object. This involves a large number of object conversations, eg. str based dates to TimeStamps, metric names to Metric objects etc. Parameters ---------- kwargs : dict Contains parameters related to computation analysis fields : str(list) List of fields we should fill with None if they don't exist in kwargs Returns ------- dict """ pass @abc.abstractmethod def run_computation_request(self, computation_request): """Creates a ComputationRequest object, populating its' fields with those from a kwargs dictionary, which consisted of parameters such as the start date, finish date, ticker, metrics to be computed, benchmark to be computed etd. The ComputationRequest object can later be consumed by a computation engine such as a TCAEngine Parameters ---------- kwargs : dict For describing a computational analysis, such as the start date, finish date, ticker etc. Returns ------- ComptuationRequest """ pass @abc.abstractmethod def calculate_computation_summary(self, computation_type, external_params=None): """ Parameters ---------- comptuation_type : str Type of computation eg. 'detailed' external_params : dict Returns ------- """ pass
class TCATickerLoader(ABC): """This class is designed to load up market and trade data for single _tickers and also makes appropriate metric calculations for that specific ticker. It is generally called by the higher level TCAMarketTradeLoader class, which can handle multiple _tickers. """ def __init__(self, version=constants.tcapy_version, volatile_cache_engine=constants.volatile_cache_engine): self._data_factory = DataFactory(version=version) self._util_func = UtilFunc( ) # general utility operations (such as flatten lists) self._fx_conv = FXConv( ) # for determining if FX crosses are in the correct convention self._time_series_ops = TimeSeriesOps( ) # time series operations, such as filtering by date self._metric_executed_price = MetricExecutedPriceNotional( ) # for determining the executed notionals/price of orders # from trades self._benchmark_mid = BenchmarkMarketMid( ) # to calculate mid price from bid/ask quote market data self._trade_order_tag = TradeOrderFilterTag( ) # to filter trade/orders according to the values of certain tags self._version = version self._volatile_cache_engine = volatile_cache_engine def get_market_data(self, market_request): """Gets market data for a particular ticker. When we ask for non-standard FX crosses, only the mid-field is returned (calculated as a cross rate). We do not give bid/ask quotes for calculated non-standard _tickers, as these can difficult to estimate. Parameters ---------- market_request : MarketRequest The type of market data to get Returns ------- DataFrame """ logger = LoggerManager.getLogger(__name__) if isinstance(market_request, TCARequest): market_request = MarketRequest(market_request=market_request) old_ticker = market_request.ticker if market_request.asset_class == 'fx': # Check if we can get ticker directly or need to create synthetic cross rates ticker = self._fx_conv.correct_notation(market_request.ticker) else: # If not FX we don't have to invert ticker = old_ticker # If ticker is in the correct convention is in crosses where we collect data (typically this will be the USD # crosses, also some liquid non-USD pairs like EURJPY) # available_tickers = [] if isinstance(market_request.data_store, DatabaseSource): # TODO improve ticker check here! available_tickers = [ticker] elif 'csv' in market_request.data_store or 'h5' in market_request.data_store or 'gzip' in market_request.data_store \ or 'parquet' in market_request.data_store or isinstance(market_request.data_store, pd.DataFrame) : # For CSV (or H5) we don't have much choice, and could differ between CSV files (if CSV has 'ticker' field, will # match on that) available_tickers = [ticker] elif market_request.data_store in constants.market_data_tickers: available_tickers = self._util_func.dict_key_list( constants.market_data_tickers[ market_request.data_store].keys()) else: err_msg = 'Ticker ' + str( ticker ) + " doesn't seem available in the data source " + market_request.data_store logger.error(err_msg) raise Exception(err_msg) if ticker in available_tickers: # In the correct convention or is not FX if ticker == old_ticker: market_df = self._get_correct_convention_market_data( market_request) # Otherwise need to flip to the correct convention (only will return 'mid') else: market_request_flipped = MarketRequest( market_request=market_request) market_request_flipped.ticker = ticker market_df = self._invert_quoting_market( self._get_correct_convention_market_data( market_request_flipped)) if 'ticker' in market_df.columns: market_df['ticker'] = old_ticker else: if market_request.asset_class == 'fx' and market_request.instrument == 'spot': # Otherwise we need to get both legs # eg. for NZDCAD, we shall download NZDUSD and USDCAD => multiply them to get NZDCAD # get the USD crosses for each leg and then multiply market_request_base = MarketRequest( market_request=market_request) market_request_terms = MarketRequest( market_request=market_request) market_request_base.ticker = old_ticker[0:3] + 'USD' market_request_terms.ticker = 'USD' + old_ticker[3:7] tickers_exist = self._fx_conv.currency_pair_in_list( self._fx_conv.correct_notation(market_request_base.ticker), available_tickers) and \ self._fx_conv.currency_pair_in_list( self._fx_conv.correct_notation(market_request_terms.ticker), available_tickers) # If both USD _tickers don't exist try computing via EUR _tickers? (eg. USDSEK from EURUSD & EURSEK) if not (tickers_exist): market_request_base.ticker = old_ticker[0:3] + 'EUR' market_request_terms.ticker = 'EUR' + old_ticker[3:7] tickers_exist = self._fx_conv.currency_pair_in_list( self._fx_conv.correct_notation(market_request_base.ticker), available_tickers) and \ self._fx_conv.currency_pair_in_list( self._fx_conv.correct_notation(market_request_terms.ticker), available_tickers) # Check if that currency (in the CORRECT convention) is in the available _tickers # we will typically not collect market data for currencies in their wrong convention if tickers_exist: fields_try = ['bid', 'ask', 'mid'] market_base_df = self.get_market_data(market_request_base) market_terms_df = self.get_market_data( market_request_terms) market_has_data = False if market_base_df is not None and market_terms_df is not None: if not (market_base_df.empty) and not ( market_terms_df.empty): market_has_data = True # If there's no data in either DataFrame, don't attempt to calculate anything if not (market_has_data): return pd.DataFrame() fields = [] for f in fields_try: if f in market_base_df.columns and f in market_terms_df.columns: fields.append(f) # Only attempt to calculate if the fields exist if len(fields) > 0: # Remove any other columns (eg. with ticker name etc.) market_base_df = market_base_df[fields] market_terms_df = market_terms_df[fields] # Need to align series to multiply (and then fill down points which don't match) # can't use interpolation, given that would use FUTURE data market_base_df, market_terms_df = market_base_df.align( market_terms_df, join="outer") market_base_df = market_base_df.fillna(method='ffill') market_terms_df = market_terms_df.fillna( method='ffill') market_df = pd.DataFrame(data=market_base_df.values * market_terms_df.values, columns=fields, index=market_base_df.index) # Values at the start of the series MIGHT be nan, so need to ignore those market_df = market_df.dropna(subset=['mid']) if 'ticker' in market_df.columns: market_df['ticker'] = old_ticker else: return None else: # Otherwise couldn't compute either from the USD legs or EUR legs logger.warning("Couldn't find market data for ticker: " + str(ticker)) return None else: # Otherwise couldn't find the non-FX ticker logger.warning("Couldn't find market data for ticker: " + str(ticker)) return None return market_df def get_trade_order_data(self, tca_request, trade_order_type, start_date=None, finish_date=None): """Gets trade data for specified parameters (eg. start/finish dates _tickers). Will also try to find trades when they have booked in the inverted market convention, and change the fields appropriately. For example, if we ask for GBPUSD trade data, it will also search for USDGBP and convert those trades in the correct convention. Parameters ---------- tca_request : TCARequest What type of trade data do we want trade_order_type : str Do we want trade or order data? Returns ------- DataFrame """ logger = LoggerManager().getLogger(__name__) # by default, assume we want trade data (rather than order data) if trade_order_type is None: trade_order_type = 'trade_df' if start_date is None and finish_date is None: start_date = tca_request.start_date finish_date = tca_request.finish_date # Create request for actual executed trades trade_request = TradeRequest(trade_request=tca_request) trade_request.start_date = start_date trade_request.finish_date = finish_date trade_request.trade_order_type = trade_order_type # Fetch all the trades done in that ticker (will be sparse-like randomly spaced tick data) # assumed to be the correct convention (eg. GBPUSD) trade_df = self._data_factory.fetch_table(data_request=trade_request) # if fx see if inverted or not if tca_request.asset_class == 'fx' and tca_request.instrument == 'spot': # Also fetch data in the inverted cross (eg. USDGBP) as some trades may be recorded this way inv_trade_request = TradeRequest(trade_request=tca_request) inv_trade_request.start_date = start_date inv_trade_request.finish_date = finish_date inv_trade_request.trade_order_type = trade_order_type inv_trade_request.ticker = self._fx_conv.reverse_notation( trade_request.ticker) trade_inverted_df = self._data_factory.fetch_table( data_request=inv_trade_request) # Only add inverted trades if they exist! if trade_inverted_df is not None: if not (trade_inverted_df.empty): invert_price_columns = [ 'executed_price', 'price_limit', 'market_bid', 'market_mid', 'market_ask', 'arrival_price' ] invert_price_columns = [ x for x in invert_price_columns if x in trade_inverted_df.columns ] # For trades (but not orders), there is an executed price field, which needs to be inverted if invert_price_columns != []: trade_inverted_df[ invert_price_columns] = 1.0 / trade_inverted_df[ invert_price_columns].values trade_inverted_df['side'] = -trade_inverted_df[ 'side'] # buys become sells, and vice versa! trade_inverted_df['ticker'] = trade_request.ticker if trade_df is not None: trade_df = trade_df.append(trade_inverted_df) trade_df = trade_df.sort_index() else: trade_df = trade_inverted_df # Check if trade data is not empty? if it is return None if self._check_is_empty_trade_order(trade_df, tca_request, start_date, finish_date, trade_order_type): return None if tca_request.asset_class == 'fx' and tca_request.instrument == 'spot': # Check if any notionals of any trade/order are quoted in the TERMS currency? terms_notionals = trade_df[ 'notional_currency'] == tca_request.ticker[3:6] # If any notional are quoted as terms, we should invert these so we quote notionals with base currency # for consistency if terms_notionals.any(): inversion_ticker = tca_request.ticker[ 3:6] + tca_request.ticker[0:3] inversion_spot, trade_df = self._fill_reporting_spot( inversion_ticker, trade_df, start_date, finish_date, tca_request) notional_fields = [ 'notional', 'order_notional', 'executed_notional' ] # Need to check terms notionals again, as trade data could have shrunk (because can only get trades, where we have market data) terms_notionals = trade_df['notional_currency'] == str( tca_request.ticker[3:6]) # Only get the inversion spot if any terms notionals are quoted wrong way around if terms_notionals.any(): if inversion_spot is not None: for n in notional_fields: if n in trade_inverted_df.columns: # trade_df[n][terms_notionals] = trade_df[n][terms_notionals].values * inversion_spot[terms_notionals].values trade_df[n][terms_notionals] = pd.Series( index=trade_df.index[ terms_notionals.values], data=trade_df[n][terms_notionals].values * inversion_spot[terms_notionals].values) else: logger.warning( "Couldn't get spot data for " + inversion_ticker + " to invert notionals. Hence not returning trading data." ) if terms_notionals.any(): trade_df['notional_currency'][ terms_notionals] = trade_request.ticker[0:3] # Also represent notional is reporting currency notional amount (eg. if we are USD based investors, convert # notional to USDs) # Using a reporting currency can be particularly useful if we are trying to aggregate metrics from many different # currency pairs (and wish to weight by a commonly measured reporting notional) # Eg. if we don't have USDUSD, then we need to convert if trade_request.ticker[0:3] != tca_request.reporting_currency: # So if we have EURJPY, we want to download EURUSD data reporting_ticker = trade_request.ticker[ 0:3] + tca_request.reporting_currency reporting_spot, trade_df = self._fill_reporting_spot( reporting_ticker, trade_df, start_date, finish_date, tca_request) if reporting_spot is not None: trade_df[ 'notional_reporting_currency_mid'] = reporting_spot.values # trade_df['notional_reporting_currency_mid'] = \ # self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[0].values trade_df[ 'reporting_currency'] = tca_request.reporting_currency columns_to_report = [ 'executed_notional', 'notional', 'order_notional' ] for c in columns_to_report: if c in trade_df.columns: trade_df[c + '_in_reporting_currency'] = \ trade_df['notional_reporting_currency_mid'].values * trade_df[c] else: logger.warning( "Couldn't get spot data to convert notionals into reporting currency. Hence not returning trading data." ) return None else: # ie. USDUSD, so spot is 1 trade_df['notional_reporting_currency_mid'] = 1.0 # Reporting currency is the same as the notional of the trade, so no need to convert, just # replicate columns trade_df['reporting_currency'] = tca_request.reporting_currency columns_to_report = [ 'executed_notional', 'notional', 'order_notional' ] for c in columns_to_report: if c in trade_df.columns: trade_df[c + '_in_reporting_currency'] = trade_df[c] return trade_df def get_trade_order_holder(self, tca_request): logger = LoggerManager.getLogger(__name__) # Get all the trade/orders which have been requested, eg. trade_df and order_df # do separate calls given they are assumed to be stored in different database tables trade_order_holder = DataFrameHolder() if tca_request.trade_order_mapping is not None: logger.debug("Get trade order holder for " + str(tca_request.ticker) + " from " + str(tca_request.start_date) + " - " + str(tca_request.finish_date)) for trade_order_type in tca_request.trade_order_mapping: trade_order_df = self.get_trade_order_data( tca_request, trade_order_type) trade_order_holder.add_dataframe(trade_order_df, trade_order_type) return trade_order_holder def get_market_trade_order_holder(self, tca_request): """Gets the both the market data and trade/order data associated with a TCA calculation as a tuple of (DataFrame, DataFrameHolder) Parameters ---------- tca_request : TCARequest Parameters for a TCA calculation Returns ------- DataFrame, DataFrameHolder """ logger = LoggerManager.getLogger(__name__) logger.debug("Get market and trade/order data for " + str(tca_request.ticker) + " from " + str(tca_request.start_date) + " - " + str(tca_request.finish_date)) # Get all the trade/orders which have been requested, eg. trade_df and order_df # do separate calls given they are assumed to be stored in different database tables return self.get_market_data(tca_request), \ self.get_trade_order_holder(tca_request) def calculate_metrics_single_ticker(self, market_trade_order_combo, tca_request, dummy_market): """Calls auxillary methods to get market/trade data for a single ticker. If necessary splits up the request into smaller date chunks to collect market and trade data in parallel (using Celery) Parameters ---------- tca_request : TCARequest Parameter for the TCA analysis dummy_market : bool Should we put a dummy variable instead of returning market data Returns ------- DataFrame, DataFrameHolder, str """ trade_order_filter = tca_request.trade_order_filter benchmark_calcs = tca_request.benchmark_calcs metric_calcs = tca_request.metric_calcs ticker = tca_request.ticker logger = LoggerManager.getLogger(__name__) # Reassemble market and trade data from the tuple market_df, trade_order_df_dict = self.trim_sort_market_trade_order( market_trade_order_combo, tca_request.start_date, tca_request.finish_date, tca_request.ticker) # Calculate BenchmarkMarket's which only require market data and no trade data market_df = self.calculate_benchmark_market(market_df, tca_request) trade_order_df_values = [] trade_order_df_keys = [] # Calculations on trades with market data if len(trade_order_df_dict.keys()) > 0 and self._check_valid_market( market_df): # NOTE: this will not filter orders, only TRADES (as orders do not have venue parameters) logger.debug("Filter trades by venue") simple_filters = {'venue': tca_request.venue} if 'trade_df' in self._util_func.dict_key_list( trade_order_df_dict.keys()): for s in simple_filters.keys(): trade_order_df_dict[ 'trade_df'] = self._trade_order_tag.filter_trade_order( trade_order_df=trade_order_df_dict['trade_df'], tag_value_combinations={s: simple_filters[s]}) # Do additional more customised post-filtering of the trade/orders (eg. by broker_id, algo_id) if trade_order_filter is not None: for a in trade_order_filter: trade_order_df_dict = a.filter_trade_order_dict( trade_order_df_dict=trade_order_df_dict) # NOTE: this will not filter orders, only TRADES (as orders do not have event type parameters) simple_filters = {'event_type': tca_request.event_type} if 'trade_df' in self._util_func.dict_key_list( trade_order_df_dict.keys()): for s in simple_filters.keys(): trade_order_df_dict[ 'trade_df'] = self._trade_order_tag.filter_trade_order( trade_order_df=trade_order_df_dict['trade_df'], tag_value_combinations={s: simple_filters[s]}) # Remove any trade/orders which aren't empty t_remove = [] for t in trade_order_df_dict.keys(): if trade_order_df_dict[t] is None: t_remove.append(t) logger.warninging( t + " is empty.. might cause problems later!") elif trade_order_df_dict[t].empty: t_remove.append(t) logger.warninging( t + " is empty.. might cause problems later!") for t in t_remove: trade_order_df_dict.pop(t) trade_order_list = self._util_func.dict_key_list( trade_order_df_dict.keys()) # Check if we have any trades/orders left to analyse? if len(trade_order_list) == 0: logger.error("No trade/orders for " + ticker) else: # ok we have some trade/orders left to analyse if not (isinstance(trade_order_list, list)): trade_order_list = [trade_order_list] logger.debug("Calculating derived fields and benchmarks") logger.debug("Calculating execution fields") # Calculate derived executed fields for orders # can only do this if trade_df is also available if len(trade_order_df_dict.keys() ) > 1 and 'trade_df' in self._util_func.dict_key_list( trade_order_df_dict.keys()): # For the orders, calculate the derived fields for executed notional, trade etc. aggregated_notional_fields = 'executed_notional' # Calculate the derived fields of the orders from the trades # alao calculate any benchmarks for the orders for i in range(1, len(trade_order_list)): # NOTIONAL_EXECUTED: add derived field for executed price and notional executed for the orders trade_order_df_dict[trade_order_list[ i]] = self._metric_executed_price.calculate_metric( lower_trade_order_df=trade_order_df_dict[ trade_order_list[i - 1]], upper_trade_order_df=trade_order_df_dict[ trade_order_list[i]], aggregated_ids=constants.order_name + '_pointer_id', aggregated_notional_fields= aggregated_notional_fields, notional_reporting_currency_spot= 'notional_reporting_currency_mid')[0] # TODO not sure about this? if 'trade_df' in self._util_func.dict_key_list( trade_order_df_dict.keys()): if 'notional' not in trade_order_df_dict[ 'trade_df'].columns: trade_order_df_dict['trade_df'][ 'notional'] = trade_order_df_dict['trade_df'][ 'executed_notional'] logger.debug("Calculating benchmarks") # Calculate user specified benchmarks for each trade order (which has been selected) if benchmark_calcs is not None: for i in range(0, len(trade_order_df_dict)): for b in benchmark_calcs: # For benchmarks which need to be generated on a trade by trade basis (eg. VWAP, arrival etc) if not (isinstance(b, BenchmarkMarket)): logger.debug("Calculating " + type(b).__name__ + " for " + trade_order_list[i]) if trade_order_df_dict[ trade_order_list[i]] is not None: if not (trade_order_df_dict[ trade_order_list[i]].empty): trade_order_df_dict[trade_order_list[ i]], _ = b.calculate_benchmark( trade_order_df= trade_order_df_dict[ trade_order_list[i]], market_df=market_df, trade_order_name= trade_order_list[i]) logger.debug("Calculating metrics") # Calculate user specified metrics for each trade order (which has been selected) if metric_calcs is not None: for i in range(0, len(trade_order_df_dict)): for m in metric_calcs: logger.debug("Calculating " + type(m).__name__ + " for " + trade_order_list[i]) if trade_order_df_dict[ trade_order_list[i]] is not None: if not (trade_order_df_dict[ trade_order_list[i]].empty): trade_order_df_dict[trade_order_list[ i]], _ = m.calculate_metric( trade_order_df=trade_order_df_dict[ trade_order_list[i]], market_df=market_df, trade_order_name=trade_order_list[ i]) logger.debug("Completed derived field calculations for " + ticker) trade_order_df_dict = self._calculate_additional_metrics( market_df, trade_order_df_dict, tca_request) if dummy_market: market_df = None trade_order_df_keys = self._util_func.dict_key_list( trade_order_df_dict.keys()) trade_order_df_values = [] for k in trade_order_df_keys: trade_order_df_values.append(trade_order_df_dict[k]) # print("--- dataframes/keys ---") # print(trade_order_df_values) # print(trade_order_df_keys) return market_df, trade_order_df_values, ticker, trade_order_df_keys def calculate_benchmark_market(self, market_df, tca_request): logger = LoggerManager.getLogger(__name__) benchmark_calcs = tca_request.benchmark_calcs valid_market = self._check_valid_market(market_df) # Calculations on market data only if valid_market: for b in benchmark_calcs: # For benchmarks which only modify market data (and don't need trade specific information) if isinstance(b, BenchmarkMarket): logger.debug("Calculating " + type(b).__name__ + " for market data") market_df = b.calculate_benchmark(market_df=market_df) return market_df def _check_valid_market(self, market_df): if market_df is not None: if not (market_df.empty): return True return False def _fill_reporting_spot(self, ticker, trade_df, start_date, finish_date, tca_request): logger = LoggerManager.getLogger(__name__) market_request = MarketRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=tca_request.market_data_store, data_offset_ms=tca_request.market_data_offset_ms, use_multithreading=tca_request.use_multithreading, market_data_database_table=tca_request.market_data_database_table, multithreading_params=tca_request.multithreading_params) market_conversion_df = self.get_market_data(market_request) # Make sure the trades/orders are within the market data (for the purposes of the reporting spot) # we don't need to consider the length of the order, JUST the starting point trade_df = self.strip_trade_order_data_to_market( trade_df, market_conversion_df, consider_order_length=False) reporting_spot = None # need to check whether we actually have any trade data/market data if trade_df is not None and market_conversion_df is not None: if not (trade_df.empty) and not (market_conversion_df.empty): try: reporting_spot = \ self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[ 0] except: logger.error( "Reporting spot is missing for this trade data sample!" ) if reporting_spot is None: market_start_finish = "No market data in this sample. " if market_conversion_df is not None: market_start_finish = "Market data is between " + str( market_conversion_df.index[0]) + " - " + str( market_conversion_df.index[-1]) + ". " logger.warning(market_start_finish) logger.warning("Trade data is between " + str(trade_df.index[0]) + " - " + str(trade_df.index[-1]) + ".") logger.warning( "Couldn't get spot data to convert notionals currency. Hence not returning trading data." ) return reporting_spot, trade_df def _invert_quoting_market(self, market_df): """Inverts the quote data for an FX pair (eg. converts USD/GBP to GBP/USD) by calculating the reciprical. Also swaps around the bid/ask fields for consistency. Parameters ---------- market_df : DataFrame Contains market data, typically quote data Returns ------- DataFrame """ if isinstance(market_df, pd.Series): market_df = pd.DataFrame(market_df) if 'mid' in market_df.columns: market_df['mid'] = 1.0 / market_df['mid'].values # Need to swap around bid/ask when inverting market data! if 'bid' in market_df.columns and 'ask' in market_df.columns: market_df['bid'] = 1.0 / market_df['ask'].values market_df['ask'] = 1.0 / market_df['bid'].values return market_df def _get_correct_convention_market_data(self, market_request, start_date=None, finish_date=None): """Gets market data for a ticker, when it is in the correct market convention. Otherwise throws an exception. Parameters ---------- market_request : MarketRequest Parameters for the market data. Returns ------- DataFrame """ # Check that cross is in correct convention if self._fx_conv.correct_notation( market_request.ticker) != market_request.ticker: raise Exception( 'Method expecting only crosses in correct market convention') if start_date is None and finish_date is None: start_date = market_request.start_date finish_date = market_request.finish_date return self._get_underlying_market_data(start_date, finish_date, market_request) def _get_underlying_market_data(self, start_date, finish_date, market_request): # Create request for market data market_request = MarketRequest( start_date=start_date, finish_date=finish_date, ticker=market_request.ticker, data_store=market_request.data_store, data_offset_ms=market_request.data_offset_ms, market_data_database_table=market_request. market_data_database_table) # Fetch market data in that ticker (will be tick data) market_df = self._data_factory.fetch_table(data_request=market_request) # TODO do further filtering of market and trade data as necessary if constants.resample_ms is not None: market_df = self._time_series_ops.resample_time_series( market_df, resample_ms=constants.resample_ms) market_df.dropna(inplace=True) ## TODO drop stale quotes for market data and add last update time? # Calculate mid market rate, if it doesn't exist if market_df is not None: if not (market_df.empty): market_df = self._benchmark_mid.calculate_benchmark( market_df=market_df) return market_df def trim_sort_market_trade_order(self, market_trade_order_tuple, start_date, finish_date, ticker): """Takes market and trade/order data, then trims it so that the trade/order data is entirely within the start/finish date range of market data. If trade/order data does not fully overlap with the market data it can cause problems later when computing metrics/benchmarks. Parameters ---------- market_trade_order_tuple : tuple Tuple of market data with trade/order data start_date : datetime Start date of TCA analysis finish_date : datetime Finish data of TCA analysis ticker : str Ticker Returns ------- DataFrame, DataFrame (dict) """ logger = LoggerManager.getLogger(__name__) market_df, trade_order_holder = self._convert_tuple_to_market_trade( market_trade_order_tuple) logger.debug("Filter the market date by start/finish date") # Check market data and trade data is not empty! market_df = self._time_series_ops.filter_start_finish_dataframe( market_df, start_date, finish_date) # When reassembling the market data, give user option of sorting it, in case the order of loading was in an odd order if market_df is not None and constants.re_sort_market_data_when_assembling: if not (market_df.empty): logger.debug("Filtered by start/finish date now sorting") market_df = market_df.sort_index() # Check if there's any market data? if we have none at all, then can't do any TCA, so warn user... if market_df is None or len(market_df.index) == 0: err_msg = "No market data between selected dates for " + ticker + " between " + str(start_date) + " - " \ + str(finish_date) logger.warning(err_msg) # raise DataMissingException(err_msg) logger.debug("Combine trade/order data") # Combine all the trades in a single dataframe (and also the same for orders) # which are placed into a single dict trade_order_df_dict = trade_order_holder.get_combined_dataframe_dict() # Make sure the trade data is totally within the market data (if trade data is outside market data, then # can't calculate any metrics later) for k in self._util_func.dict_key_list(trade_order_df_dict.keys()): trade_order_df_dict[k] = self.strip_trade_order_data_to_market( trade_order_df_dict[k], market_df) # Note, can sometimes get empty results when doing in parallel (eg. split up into days, and don't # get for a particular day, so don't raise an exception) if not (trade_order_holder.check_empty_combined_dataframe_dict( trade_order_df_dict)): err_msg = "No trade/order data between selected dates for " + ticker + " between " + str(start_date) + " - " \ + str(finish_date) logger.warning(err_msg) # raise DataMissingException(err_msg) return market_df, trade_order_df_dict def strip_trade_order_data_to_market(self, trade_order_df, market_df, consider_order_length=True): """Strips down the trade/order data so that it is within the market data provided. Hence, trade/order data will fully overlap with the market data. Parameters ---------- trade_order_df : DataFrame Trade/order data from the client market_df : DataFrame Market data consider_order_length : bool (default: True) Should we consider the length of the order, when we consider the overlap? Returns ------- DataFrame """ if market_df is not None and trade_order_df is not None: if not (market_df.empty) and not (trade_order_df.empty): add_cond = True # For orders (ensure that the start/end time of every order is within the market data start/finish dates) # this is important, given that we often want to calculate benchmarks over orders from market data if consider_order_length: if 'benchmark_date_start' in trade_order_df.columns and 'benchmark_date_end' in trade_order_df.columns \ and trade_order_df is not None: add_cond = (trade_order_df['benchmark_date_start'] >= market_df.index[0]) & ( trade_order_df['benchmark_date_end'] <= market_df.index[-1]) # For trades (ensure that every trade is within the market data start/finish dates) trade_order_df = trade_order_df.loc[ (trade_order_df.index >= market_df.index[0]) & (trade_order_df.index <= market_df.index[-1]) & add_cond] return trade_order_df def _strip_start_finish_dataframe(self, data_frame, start_date, finish_date, tca_request): """Strips down the data frame to the dates which have been requested in the initial TCA request Parameters ---------- data_frame : DataFrame Data to be stripped down start_date : datetime Start date of the computation finish_date : datetime Finish date of the computation tca_request : TCARequest Parameters for the TCA request Returns ------- DataFrame """ # print(data_frame) if start_date != tca_request.start_date: if data_frame is not None: if not (data_frame.empty): data_frame = data_frame.loc[ data_frame.index >= tca_request.start_date] if finish_date != tca_request.finish_date: if data_frame is not None: if not (data_frame.empty): data_frame = data_frame.loc[ data_frame.index <= tca_request.finish_date] return data_frame def _check_is_empty_trade_order(self, trade_df, tca_request, start_date, finish_date, trade_order_type): logger = LoggerManager.getLogger(__name__) if trade_df is None: logger.warning("Missing trade data for " + tca_request.ticker + " between " + str(start_date) + " - " + str(finish_date) + " in " + trade_order_type) return True elif trade_df.empty: logger.warning("Missing trade data for " + tca_request.ticker + " between " + str(start_date) + " - " + str(finish_date) + " in " + trade_order_type) return True return False @abc.abstractmethod def _calculate_additional_metrics(self, market_df, trade_order_df_dict, tca_request): pass @abc.abstractmethod def _convert_tuple_to_market_trade(self, market_trade_order_tuple): pass @abc.abstractmethod def get_tca_version(self): pass
def _fetch_market_data(self, start, finish, ticker, write_to_disk=True, read_cached_from_disk=True, web_proxies=constants.web_proxies): logger = LoggerManager.getLogger(__name__) key = (str(start) + str(finish) + ticker + '_' + self._get_postfix()).replace(":", '_') filename = os.path.join(self.temp_data_folder, key) + '.' + fileformat util_func = UtilFunc() start_time_stamp = pd.Timestamp(start) finish_time_stamp = pd.Timestamp(finish) if self._remove_saturday(): weekend_data = "Saturday? " + key # Ignore Saturday, and don't attempt to download if start_time_stamp.dayofweek == 5 or finish_time_stamp.dayofweek == 5: return None, weekend_data if self._remove_weekend_points(): weekend_data = "Weekend? " + key if start_time_stamp.dayofweek == 6 and start_time_stamp.hour < 20: return None, weekend_data if start_time_stamp.dayofweek == 4 and start_time_stamp.hour > 22: return None, weekend_data df = None if read_cached_from_disk: if os.path.exists(filename): df = util_func.read_dataframe_from_binary(filename, format=binary_format) if df is not None: logger.debug("Read " + filename + " from disk") if df is None: # Convert tcapy ticker into vendor ticker df = self._get_input_data_source().fetch_market_data( start, finish, ticker=self._get_tickers_vendor()[ticker], web_proxies=web_proxies) if df is not None: df = df.drop('ticker', axis=1) if write_to_disk: # Write a small temporary dataframe to disk (if the process fails later, these can be picked up, # without having a call the external vendor again util_func.write_dataframe_to_binary(df, filename, format=binary_format) msg = None if df is None: msg = "No data? " + key return df, msg