def __init__(self, temp_data_folder=constants.temp_data_folder, temp_large_data_folder=constants.temp_large_data_folder, tickers=None, data_store=None): self.temp_data_folder = temp_data_folder self.temp_large_data_folder = temp_large_data_folder self.tickers = None self.util_func = UtilFunc() self.time_series_ops = TimeSeriesOps() self.data_store = data_store logger = LoggerManager().getLogger(__name__) if not (os.path.isdir(self.temp_data_folder)): logger.warn("Temp data folder " + self.temp_data_folder + " does not exist") if not (os.path.isdir(self.temp_large_data_folder)): logger.warn("Temp large data folder " + self.temp_data_folder + " does not exist") if tickers is not None: self.tickers = tickers
def check_empty_combined_dataframe_dict(self, df_dict=None): if df_dict is None: df_dict = self.get_combined_dataframe_dict() logger = LoggerManager().getLogger(__name__) valid_data = True if df_dict is not None: if len(df_dict.keys()) > 0: t_remove = [] for t in df_dict.keys(): if df_dict[t] is None: logger.warn("Market/trade/order data not in " + t) t_remove.append(t) else: if df_dict[t].empty: logger.warn("Market/trade/order data not in " + t) t_remove.append(t) for t in t_remove: df_dict.pop(t) else: valid_data = False if len(df_dict.keys()) == 0: valid_data = False else: valid_data = False return valid_data
from tcapy.data.databasesource import DatabaseSourceMySQL, DatabaseSourceArctic from test.config import * logger = LoggerManager().getLogger(__name__) constants = Constants() logger.info('Make sure you have created folder ' + constants.csv_folder + ' & ' + constants.temp_data_folder + ' otherwise tests will fail.') if not (os.path.exists(constants.csv_folder)): try: os.mkdir(constants.csv_folder) except: logger.warn('Could not create ' + constants.csv_folder) if not (os.path.exists(constants.temp_data_folder)): try: os.mkdir(constants.temp_data_folder) except: logger.warn('Could not create ' + constants.temp_data_folder) ######################################################################################################################## start_date = '26 Apr 2017' finish_date = '05 Jun 2017' ticker_arctic = ['EURUSD', 'USDJPY'] # Market data parameters for tables/databases market_data_table = 'market_data_table_test_harness'
def get_trade_order_data(self, tca_request, trade_order_type, start_date=None, finish_date=None): """Gets trade data for specified parameters (eg. start/finish dates tickers). Will also try to find trades when they have booked in the inverted market convention, and change the fields appropriately. For example, if we ask for GBPUSD trade data, it will also search for USDGBP and convert those trades in the correct convention. Parameters ---------- tca_request : TCARequest What type of trade data do we want trade_order_type : str Do we want trade or order data? Returns ------- DataFrame """ logger = LoggerManager().getLogger(__name__) # by default, assume we want trade data (rather than order data) if trade_order_type is None: trade_order_type = 'trade_df' if start_date is None and finish_date is None: start_date = tca_request.start_date finish_date = tca_request.finish_date # Create request for actual executed trades trade_request = TradeRequest(trade_request=tca_request) trade_request.start_date = start_date; trade_request.finish_date = finish_date trade_request.trade_order_type = trade_order_type # Fetch all the trades done in that ticker (will be sparse-like randomly spaced tick data) # assumed to be the correct convention (eg. GBPUSD) trade_df = self._data_factory.fetch_table(data_request=trade_request) # if fx see if inverted or not if tca_request.asset_class == 'fx' and tca_request.instrument == 'spot': # Also fetch data in the inverted cross (eg. USDGBP) as some trades may be recorded this way inv_trade_request = TradeRequest(trade_request=tca_request) inv_trade_request.start_date = start_date; inv_trade_request.finish_date = finish_date inv_trade_request.trade_order_type = trade_order_type inv_trade_request.ticker = self._fx_conv.reverse_notation(trade_request.ticker) trade_inverted_df = self._data_factory.fetch_table(data_request=inv_trade_request) # Only add inverted trades if they exist! if not (trade_inverted_df.empty): invert_price_columns = ['executed_price', 'price_limit', 'market_bid', 'market_mid', 'market_ask', 'arrival_price'] invert_price_columns = [x for x in invert_price_columns if x in trade_inverted_df.columns] # For trades (but not orders), there is an executed price field, which needs to be inverted if invert_price_columns != []: trade_inverted_df[invert_price_columns] = 1.0 / trade_inverted_df[invert_price_columns].values trade_inverted_df['side'] = -trade_inverted_df['side'] # buys become sells, and vice versa! trade_inverted_df['ticker'] = trade_request.ticker if trade_df is not None: trade_df = trade_df.append(trade_inverted_df) trade_df = trade_df.sort_index() else: trade_df = trade_inverted_df # Check if trade data is not empty? if it is return None if self._check_is_empty_trade_order(trade_df, tca_request, start_date, finish_date, trade_order_type): return None if tca_request.asset_class == 'fx' and tca_request.instrument == 'spot': # Check if any notionals of any trade/order are quoted in the TERMS currency? terms_notionals = trade_df['notional_currency'] == tca_request.ticker[3:6] # If any notional are quoted as terms, we should invert these so we quote notionals with base currency # for consistency if terms_notionals.any(): inversion_ticker = tca_request.ticker[3:6] + tca_request.ticker[0:3] inversion_spot, trade_df = self._fill_reporting_spot(inversion_ticker, trade_df, start_date, finish_date, tca_request) notional_fields = ['notional', 'order_notional', 'executed_notional'] # Need to check terms notionals again, as trade data could have shrunk (because can only get trades, where we have market data) terms_notionals = trade_df['notional_currency'] == str(tca_request.ticker[3:6]) # Only get the inversion spot if any terms notionals are quoted wrong way around if terms_notionals.any(): if inversion_spot is not None: for n in notional_fields: if n in trade_inverted_df.columns: # trade_df[n][terms_notionals] = trade_df[n][terms_notionals].values * inversion_spot[terms_notionals].values trade_df[n][terms_notionals] = pd.Series(index=trade_df.index[terms_notionals.values], data=trade_df[n][terms_notionals].values * inversion_spot[terms_notionals].values) else: logger.warn("Couldn't get spot data for " + inversion_ticker + " to invert notionals. Hence not returning trading data.") if terms_notionals.any(): trade_df['notional_currency'][terms_notionals] = trade_request.ticker[0:3] # Also represent notional is reporting currency notional amount (eg. if we are USD based investors, convert # notional to USDs) # Using a reporting currency can be particularly useful if we are trying to aggregate metrics from many different # currency pairs (and wish to weight by a commonly measured reporting notional) # Eg. if we don't have USDUSD, then we need to convert if trade_request.ticker[0:3] != tca_request.reporting_currency: # So if we have EURJPY, we want to download EURUSD data reporting_ticker = trade_request.ticker[0:3] + tca_request.reporting_currency reporting_spot, trade_df = self._fill_reporting_spot( reporting_ticker, trade_df, start_date, finish_date, tca_request) if reporting_spot is not None: trade_df['notional_reporting_currency_mid'] = reporting_spot.values # trade_df['notional_reporting_currency_mid'] = \ # self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[0].values trade_df['reporting_currency'] = tca_request.reporting_currency columns_to_report = ['executed_notional', 'notional', 'order_notional'] for c in columns_to_report: if c in trade_df.columns: trade_df[c + '_in_reporting_currency'] = \ trade_df['notional_reporting_currency_mid'].values * trade_df[c] else: logger.warn( "Couldn't get spot data to convert notionals into reporting currency. Hence not returning trading data.") return None else: # ie. USDUSD, so spot is 1 trade_df['notional_reporting_currency_mid'] = 1.0 # Reporting currency is the same as the notional of the trade, so no need to convert, just # replicate columns trade_df['reporting_currency'] = tca_request.reporting_currency columns_to_report = ['executed_notional', 'notional', 'order_notional'] for c in columns_to_report: if c in trade_df.columns: trade_df[c + '_in_reporting_currency'] = trade_df[c] return trade_df
def fetch_table(self, data_request): """Fetches table from underlying DatabaseSource Parameters ---------- data_request : DataRequest Request for data with start/finish date etc. Returns ------- DataFrame """ # Fetch table from the underlying database (CSV, SQL or RESTful etc.) logger = LoggerManager().getLogger(__name__) data_norm = data_request.data_norm if data_norm is None: data_norm = Mediator.get_data_norm(version=self._version) # Where do we get data from? database_source = Mediator.get_database_source_picker().get_database_source(data_request) if database_source is None: Exception("User asked for an unsupported database source") # Extract the start/finish dates and ticker we wish to download data for start_date = data_request.start_date finish_date = data_request.finish_date ticker = data_request.ticker # Are we requesting market data or trade/order data of our own executions? if isinstance(data_request, MarketRequest): df = database_source.fetch_market_data(start_date=start_date, finish_date=finish_date, ticker=ticker, table_name=data_request.market_data_database_table) df = data_norm.normalize_market_data(df, None, data_request) elif isinstance(data_request, TradeRequest): trade_order_type = data_request.trade_order_type trade_order_mapping = data_request.trade_order_mapping if data_request.data_store == 'csv' and trade_order_type != None and trade_order_mapping != None: df = database_source.fetch_trade_order_data(start_date=start_date, finish_date=finish_date, ticker=ticker, table_name=trade_order_mapping[trade_order_type]) elif trade_order_mapping is not None: df = database_source.fetch_trade_order_data(start_date=start_date, finish_date=finish_date, ticker=ticker, table_name=trade_order_mapping[trade_order_type]) else: # Otherwise we have a CSV file without any sort of mapping, which we assume only contains trade_df data df = database_source.fetch_trade_order_data(start_date=start_date, finish_date=finish_date, ticker=ticker) df = data_norm.normalize_trade_data(df, None, data_request) if df is None: pass if df is not None and df.empty: logger.warn('Dataframe empty for ticker ' + ticker) return df
def get_dataframe_by_key(self, key, combined=True, start_date=None, finish_date=None): """Gets a specific trade/order and combine it into a single DataFrame. Parameters ---------- key : str Which market data ticker or trades/order to return combined : True Should we combine all the market data for a specific ticker or trades (or orders) into a single DataFrame before returning? Returns ------- DataFrame """ if key in self._df_dict.keys(): dataframe_key_list = self._df_dict[key] logger = LoggerManager().getLogger(__name__) is_data_frame_key = None # Special cases if 'df' in key if 'df' in key: is_data_frame_key = True # Plotly Figures if 'fig' in key: is_data_frame_key = False if is_data_frame_key is None: logger.warn('Cannot guess key type for ' + key + ', assuming DataFrame') is_data_frame_key = True if is_data_frame_key: try: df = Mediator.get_volatile_cache().get_dataframe_handle( Mediator.get_util_func().flatten_list_of_lists( dataframe_key_list), burn_after_reading=True) except Exception as e: # print("DATAFRAMEHOLDER ERROR" + str(e)) df = dataframe_key_list if combined: df = Mediator.get_time_series_ops().concat_dataframe_list( df) if df is not None: if not (df.empty): df = df.sort_index() if start_date is not None and finish_date is not None: df = Mediator.get_time_series_ops( ).filter_start_finish_dataframe(df, start_date, finish_date) return df # elif 'fig' in key: # try: # df = self._volatile_cache.get_dataframe_handle( # self._util_func.flatten_list_of_lists(dataframe_key_list), burn_after_reading=True) # except: # df = dataframe_key_list # # if combined: # # xy_dict = {} # # for fig in df: # for trace in fig['data']: # name = trace['name'] # # xy_dict[name + '_x'] = [] # xy_dict[name + '_y'] = [] # xy_dict['trace_name_list'] = [] # # for fig in df: # for trace in fig['data']: # name = trace['name'] # # xy_dict[name + '_x'].append(trace['x']) # xy_dict[name + '_y'].append(trace['y']) # # if name not in xy_dict['trace_name_list']: # xy_dict['trace_name_list'].append(name) # # fig = df[0] # # # aggregate all the x & y values # for i in range(0, len(fig['data'])): # name = fig['data'][i]['name'] # # for j in range(1, len(xy_dict[name + '_x'])): # fig['data'][i]['x'].extend(xy_dict[name + '_x']) # fig['data'][i]['y'].extend(xy_dict[name + '_y']) # # return fig else: # Otherwise different type of metadata (don't attempt to combine it) - eg. Plotly Fig try: df = Mediator.get_volatile_cache().get_dataframe_handle( Mediator.get_util_func().flatten_list_of_lists( dataframe_key_list), burn_after_reading=True) except Exception as e: print(e) df = dataframe_key_list if isinstance(df, list): return df[0] return df return None