def fill_market_trade_databases(): """Fills market and trade data with test data """ Mediator.get_volatile_cache().clear_cache() replace_append = 'replace' # Fill market data (assume: CHUNK_STORE as our default format!) for ticker in ticker_arctic: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=arctic_lib_type) # Write CSV to Arctic database_source.convert_csv_to_table(csv_market_data_store, ticker, market_data_table, if_exists_table=replace_append, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) replace_append = 'append' # Fill trade/order data database_source = DatabaseSourceMySQL() for t in trade_order_list: # Dump trade_df to SQL test harness database and overwrite database_source.convert_csv_to_table( csv_trade_order_mapping[t], None, (trade_order_mapping[trade_data_store])[t], database_name=trade_data_database_name, if_exists_table='replace', market_trade_data='trade')
def __init__(self, tables_dict={}, scalar=1, round_figures_by=None): self._tables_dict = tables_dict self._scalar = scalar self._round_figures_by = round_figures_by self._time_series_ops = Mediator.get_time_series_ops() self._util_func = Mediator.get_util_func()
def test_executed_price_notional_calculation(fill_market_trade_databases): """Test that the executed average price calculation from trades is correctly reflected in the order level """ Mediator.get_volatile_cache().clear_cache() market_df, trade_df, order_df = get_sample_data() # get the first and last points given boundary cases (and a few other random orders) to check index_boundary = np.random.randint(0, len(order_df.index) - 1, 100) index_boundary = index_boundary.tolist() index_boundary.append(0) index_boundary.append(-1) for i in index_boundary: # order_df.ix[i, 'notional'] if order_df.iloc[i]['notional'] > 1: # executed_price = order_df.ix[i, 'executed_price'] # id = order_df.ix[i, 'id'] executed_price = order_df.iloc[i]['executed_price'] id = order_df.iloc[i]['id'] executed_price_trade = trade_df[trade_df['ancestor_pointer_id'] == id]['executed_price'].fillna(0) executed_notional_trade = trade_df[ trade_df['ancestor_pointer_id'] == id]['executed_notional'].fillna(0) executed_avg_trade = ( (executed_price_trade * executed_notional_trade).sum() / executed_notional_trade.sum()) assert abs(executed_price - executed_avg_trade) < eps
def test_create_tca_report(fill_market_trade_databases): """Tests the creation of a TCAResults, checking they are fichecking it generates the right document """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, metric_calcs=MetricSlippage(), results_form=TimelineResultsForm(metric_name='slippage', by_date='datehour'), use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) tca_results = TCAResults(tca_engine.calculate_tca(tca_request=tca_request), tca_request) tca_results.render_computation_charts() assert tca_results.timeline is not None and tca_results.timeline_charts is not None tca_report = TCAReport(tca_results) html = tca_report.create_report() # Quick check to see that the html has been generated by checking existance of HTML head _tag assert '<head>' in html
def test_market_data_convention(fill_market_trade_databases): """Tests that market data for unusual quotations is consistent (ie. if the user requests USDEUR, this should be inverted EURUSD (which is the correct convention) """ Mediator.get_volatile_cache().clear_cache() market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=market_data_store, market_data_database_table=market_data_database_table) #### Compare EURUSD to USDEUR market_correct_conv_series = pd.DataFrame(market_loader.get_market_data(market_request)['mid']) market_request.ticker = 'USDEUR' market_reverse_conv_series = pd.DataFrame(1.0 / market_loader.get_market_data(market_request)['mid']) assert_frame_equal(market_correct_conv_series, market_reverse_conv_series, check_dtype=False) ### Compare EURJPY (which is autogenerated, if EURJPY is not collected directly) vs. EURUSD & USDJPY multiplied # Use resampled series for comparison market_request.ticker = 'USDJPY' market_df_USDJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']) market_request.ticker = 'EURJPY' market_df_EURJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']).resample('1min').mean() market_df_EURJPY_comp = (market_correct_conv_series.resample('1min').mean() * market_df_USDJPY.resample('1min').mean()) market_df_EURJPY, market_df_EURJPY_comp = market_df_EURJPY.align(market_df_EURJPY_comp, join='inner') comp = (market_df_EURJPY - market_df_EURJPY_comp).dropna() assert all(comp < eps)
def test_data_offset(): """Tests the offsetting of market and trade data by milliseconds by user. This might be useful if clocks are slightly offset when recording market or trade data """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) # Now offset both the trade and market data tca_request.trade_data_offset_ms = 1 tca_request.market_data_offset_ms = -1 dict_of_df_offset = tca_engine.calculate_tca(tca_request=tca_request) trade_df = dict_of_df[trade_df_name]; market_df = dict_of_df['market_df'] trade_df_offset = dict_of_df_offset[trade_df_name]; market_df_offset = dict_of_df_offset['market_df'] assert all(market_df.index + timedelta(milliseconds=-1) == market_df_offset.index) assert all(trade_df.index + timedelta(milliseconds=1) == trade_df_offset.index) for c in constants.date_columns: if c in trade_df.columns: assert all(trade_df[c]+ timedelta(milliseconds=1) == trade_df_offset[c])
def test_stress_tca(fill_market_trade_databases): """Makes several large TCARequests at the same time to stress test tcapy application and also to check it works with parallel requests (note: you may need to reduce the length of the dataset if your machine has limited amounts of RAM). It can be possible that when deployed on the web, several users might make simultaneous requests. Note, do not use pylibmc, and instead use python-memcached, when using memcached as a result backend. pylibmc is not thread-safe so will come undone if you end up making parallel requests. """ from tcapy.util.swim import Swim if not (stress_test): return # Clear cache to ensure all test code runs! Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list, dummy_market=True, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, use_multithreading=True, tca_type='aggregated') # Kick off several simulanteous large TCA requests request_no = 2 tca_request_list = [] for i in range(0, request_no): tca_request_list.append(TCARequest(tca_request=tca_request)) tca_engine = TCAEngineImpl(version=tcapy_version) swim = Swim(parallel_library='thread') pool = swim.create_pool(thread_no=len(tca_request_list)) result = [] for item in tca_request_list: result.append(pool.apply_async(tca_engine.calculate_tca, args=(item, ))) output = [p.get() for p in result] swim.close_pool(pool, True) assert len(output) == len(tca_request_list) # Check that several DataFrames exist in the results for trade_order_results_df_dict in output: assert 'trade_df' in trade_order_results_df_dict.keys()
def test_invalid_dates_missing_data_tca(fill_market_trade_databases): """Tests if the trade/order and market data is identical for use_multithreading versus singlethreading for detailed, aggregated and compliance. Note that we need a running Celery server for use_multithreading to work (as well as the usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample """ Mediator.get_volatile_cache().clear_cache() # Clear cache to ensure all test code runs! tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) ## Test invalid dates tca_request.start_date = invalid_start_date; tca_request.finish_date = invalid_finish_date for t in tca_type: for m in use_multithreading: tca_request.use_multithreading = m tca_request.tca_type = t exception_triggered = True try: dict_of_df_invalid = tca_engine.calculate_tca(tca_request=tca_request) exception_triggered = False except DataMissingException: assert exception_triggered ## Test a single valid ticker, but missing data (only one ticker) tca_request.start_date = start_date; tca_request.finish_date = finish_date; tca_request.ticker = missing_ticker for t in tca_type: for m in use_multithreading: Mediator.get_volatile_cache().clear_cache() # Clear cache to ensure all test code runs! tca_request.use_multithreading = m tca_request.tca_type = t exception_triggered = True try: dict_of_df_missing_ticker = tca_engine.calculate_tca(tca_request=tca_request) exception_triggered = False except DataMissingException: assert exception_triggered
def __init__(self, trade_order_list=None, metric_name=None, aggregate_by_field=None, aggregation_metric='mean', tag_value_combinations={}): if not(isinstance(trade_order_list, list)) and trade_order_list is not None: trade_order_list = [trade_order_list] self._trade_order_list = trade_order_list self._metric_name = metric_name self._aggregate_by_field = aggregate_by_field self._aggregation_metric = aggregation_metric self._results_summary = ResultsSummary() self._tag_value_combinations = tag_value_combinations self._trade_order_filter_tag = TradeOrderFilterTag() self._util_func = Mediator.get_util_func() self._time_series_ops = Mediator.get_time_series_ops()
def _convert_tuple_to_market_trade(self, market_trade_order_tuple): volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) # Gather market and trade/order data (which might be stored in a list) if isinstance(market_trade_order_tuple, list): market_df_list = [] trade_order_holder = DataFrameHolder() for market_df_single, trade_order_holder_single in market_trade_order_tuple: market_df_list.append(market_df_single) trade_order_holder.add_dataframe_holder(trade_order_holder_single) market_df_list = volatile_cache.get_dataframe_handle(market_df_list, burn_after_reading=True) # to ensure that any spurious/None elements are removed market_df_list = [x for x in market_df_list if isinstance(x, pd.DataFrame)] # want to make sure the data is properly ordered too (not guarenteed we'll get it back in right order) market_df = self._time_series_ops.concat_dataframe_list(market_df_list) else: market_df = volatile_cache.get_dataframe_handle(market_trade_order_tuple[0], burn_after_reading=True) trade_order_holder = market_trade_order_tuple[1] return market_df, trade_order_holder
def _get_correct_convention_market_data(self, market_request, start_date=None, finish_date=None): # Check that cross is in correct convention if self._fx_conv.correct_notation(market_request.ticker) != market_request.ticker: raise Exception('Method expecting only crosses in correct market convention') cache = True if isinstance(market_request.data_store, pd.DataFrame): cache = False if market_request.multithreading_params['cache_period_market_data'] and cache: volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) start_date, finish_date, market_key, market_df = \ volatile_cache.get_data_request_cache(market_request, market_request.data_store, 'market_df', market_request.data_offset_ms) if market_df is None: market_df = super(TCATickerLoaderImpl, self)._get_underlying_market_data(start_date, finish_date, market_request) volatile_cache.put_data_request_cache(market_request, market_key, market_df) return self._strip_start_finish_dataframe(market_df, start_date, finish_date, market_request) else: if start_date is None or finish_date is None: start_date = market_request.start_date finish_date = market_request.finish_date return super(TCATickerLoaderImpl, self)._get_underlying_market_data(start_date, finish_date, market_request)
def example_market_data_non_usd_cross(): """Example for loading market data which has more exotic crosses, which are unlikely to be collected. For these exotic crosses tcapy will calculate the cross rates via the USD legs, eg. NZDCAD would be calculated from NZDUSD and USDCAD data. """ market_loader = Mediator.get_tca_market_trade_loader() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker='NZDUSD', market_data_store=market_data_store) market_base_df = market_loader.get_market_data(tca_request) tca_request.ticker = 'USDCAD' market_terms_df = market_loader.get_market_data(tca_request) market_df = pd.DataFrame(market_base_df['mid'] * market_terms_df['mid']).dropna() tca_request.ticker = 'NZDCAD' market_direct_df = market_loader.get_market_data(tca_request) market_df, market_direct_df = market_df.align(market_direct_df, join='inner') # check time series are equal to each other assert (market_df['mid'] - market_direct_df['mid']).sum() == 0
def __init__(self, market_data_postfix='dukascopy', csv_market_data=None, write_to_db=True, sql_trade_database_type='ms_sql_server'): if csv_market_data is None: self._market_data_source = 'arctic-' + market_data_postfix else: self._market_data_source = csv_market_data self._tca_market = Mediator.get_tca_market_trade_loader() # Assumes MongoDB for tick data and MSSQL for trade/order data if write_to_db: self._database_source_market = DatabaseSourceArctic( postfix=market_data_postfix) # market data source self._market_data_database_name = constants.arctic_market_data_database_name self._market_data_database_table = constants.arctic_market_data_database_table if sql_trade_database_type == 'ms_sql_server': self._database_source_trade = DatabaseSourceMSSQLServer( ) # trade data source self._trade_data_database_name = constants.ms_sql_server_trade_data_database_name elif sql_trade_database_type == 'mysql': self._database_source_trade = DatabaseSourceMySQL( ) # trade data source self._trade_data_database_name = constants.mysql_trade_data_database_name self.time_series_ops = TimeSeriesOps() self.rand_time_series = RandomiseTimeSeries()
def normalize_trade_data(self, df, dataset, data_request): if df is None: return None # For cancelled trades the trade price might be recorded as "zero" or a negative price, which is invalid, make these NaNs if 'executed_price' in df.columns: # df['executed_price'][df['executed_price'] <= 0] = np.nan df.loc[df['executed_price'] <= 0, 'executed_price'] = np.nan # Rename fields if necessary if 'executed_notional_currency' in df.columns: df = df.rename(columns={'executed_notional_currency' : 'notional_currency'}) # Convert buy/sell to -1/+1 # TODO do regex/case insensitive version # vals_to_replace = {'buy': 1, 'sell' : -1, 'Buy' : 1, 'Sell' : -1, 'BUY' : 1, 'SELL' : -1} # df['side'] = df['side'].map(vals_to_replace) df['side'].replace('buy', 1, inplace=True) df['side'].replace('sell', -1, inplace=True) df['side'].replace('Buy', 1, inplace=True) df['side'].replace('Sell', -1, inplace=True) df['side'].replace('BUY', 1, inplace=True) df['side'].replace('SELL', -1, inplace=True) if 'event_type' in df.columns: df['event_type'].replace('execution', 'trade', inplace=True) # Also assume selected date columns are UTC (eg. benchmark start and finish dates for the orders) df = Mediator.get_time_series_ops().localize_cols_as_UTC(df, constants.date_columns, index=True).sort_index() df = self.offset_data_ms(df, data_request) return df
def get_market_data(self, market_request): """Gets market data for tickers. When we ask for non-standard FX crosses, only the mid-field is returned (calculated as a cross rate). We do not give bid/ask quotes for calculated non-standard tickers, as these can difficult to estimate. Parameters ---------- market_request : MarketRequest The type of market data to get Returns ------- DataFrame """ tca_ticker_loader = Mediator.get_tca_ticker_loader( version=self._version) if isinstance(market_request.ticker, list): if len(market_request.ticker) > 1: market_request_list = self._split_tca_request_into_list( market_request) market_df_dict = {} for market_request_single in market_request_list: market_df_dict[market_request.ticker] = \ tca_ticker_loader(version=self._version).get_market_data(market_request_single) return tca_ticker_loader.get_market_data(market_request)
def get_trade_order_holder(self, tca_request): """Gets the trades/orders in the form of a TradeOrderHolder Parameters ---------- tca_request : TCARequest Parameters for the TCA computation Returns ------- TradeOrderHolder """ tca_ticker_loader = Mediator.get_tca_ticker_loader( version=self._version) if isinstance(tca_request.ticker, list): if len(tca_request.ticker) > 1: tca_request_list = self._split_tca_request_into_list( tca_request) trade_order_holder = DataFrameHolder() for tca_request_single in tca_request_list: trade_order_holder.add_dataframe_holder( tca_ticker_loader(version=self._version). get_trade_order_holder(tca_request_single)) return tca_ticker_loader( version=self._version).get_trade_order_holder(tca_request)
def calculate_benchmark(self, trade_order_df=None, market_df=None, trade_order_name=None, market_resample_freq=None, market_resample_unit=None, market_offset_ms=None, resample_how=None): # if not (self._check_calculate_benchmark(trade_order_name=trade_order_name)): return trade_order_df, market_df if market_resample_freq is None: market_resample_freq = self._market_resample_freq if market_resample_unit is None: market_resample_unit = self._market_resample_unit if market_offset_ms is None: market_offset_ms = self._market_offset_ms if resample_how is None: resample_how = self._resample_how if market_offset_ms is not None: market_df.index = market_df.index + timedelta( milliseconds=market_offset_ms) if market_resample_freq is not None and market_resample_unit is not None: market_df = Mediator.get_time_series_ops().resample_time_series( market_df, resample_amount=market_resample_freq, how=resample_how, unit=market_resample_unit) return trade_order_df, market_df
def keys(self): """Returns the names of all the market or trades/orders stored internally. Returns ------- str (list) """ return Mediator.get_util_func().dict_key_list(self._df_dict.keys())
def calculate_benchmark(self, trade_order_df=None, market_df=None, trade_order_name=None, market_resample_freq=None, market_resample_unit=None, market_offset_ms=None, resample_how=None, price_field=None, volume_field=None): # if not (self._check_calculate_benchmark(trade_order_name=trade_order_name)): return trade_order_df, market_df if market_resample_freq is None: market_resample_freq = self._market_resample_freq if market_resample_unit is None: market_resample_unit = self._market_resample_unit if market_offset_ms is None: market_offset_ms = self._market_offset_ms if resample_how is None: resample_how = self._resample_how if price_field is None: price_field = self._price_field if volume_field is None: volume_field = self._volume_field if market_offset_ms is not None: market_df.index = market_df.index + timedelta( milliseconds=market_offset_ms) if market_resample_freq is not None and market_resample_unit is not None: if not (isinstance(resample_how, list)): resample_how = [resample_how] market_df_list = [] for how in resample_how: market_df_list.append( Mediator.get_time_series_ops().resample_time_series( market_df, resample_amount=market_resample_freq, how=how, unit=market_resample_unit, price_field=price_field, volume_field=volume_field)) market_df = Mediator.get_time_series_ops().outer_join( market_df_list) return trade_order_df, market_df
def __init__(self, trade_order_list=None): # self.logger = LoggerManager().getLogger(__name__) if not (isinstance(trade_order_list, list)) and trade_order_list is not None: trade_order_list = [trade_order_list] self._trade_order_list = trade_order_list self._time_series_ops = Mediator.get_time_series_ops()
def get_market_data(self, market_request, return_cache_handles=False): # Handles returns a pointer volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) cache = True # Don't attempt to cache DataFrames if hasattr(market_request, 'market_data_store'): if (isinstance(market_request.market_data_store, pd.DataFrame)): cache = False elif isinstance(market_request.data_store, pd.DataFrame): cache = False # If we have allowed the caching of monthly/periodic market data if market_request.multithreading_params['cache_period_market_data'] and cache: old_start_date = market_request.start_date; old_finish_date = market_request.finish_date # so we can also take TCARequest objects if hasattr(market_request, 'market_data_store'): data_store = market_request.market_data_store data_offset_ms = market_request.market_data_offset_ms else: data_store = market_request.data_store data_offset_ms = market_request.data_offset_ms # See if we can fetch from the cache (typically Redis) start_date, finish_date, market_key, market_df = \ volatile_cache.get_data_request_cache(market_request, data_store, 'market_df', data_offset_ms) # If data is already cached, just return the existing CacheHandle (which is like a pointer to the reference # in Redis) if market_df is not None and start_date == old_start_date and finish_date == old_finish_date and return_cache_handles: return CacheHandle(market_key, add_time_expiry=False) if market_df is None: market_request_copy = MarketRequest(market_request=market_request) market_request_copy.start_date = start_date market_request_copy.finish_date = finish_date market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request_copy) volatile_cache.put_data_request_cache(market_request_copy, market_key, market_df) market_df = self._strip_start_finish_dataframe(market_df, old_start_date, old_finish_date, market_request) else: market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request) # Return as a cache handle (which can be easily passed across Celery for example) # Only if use_multithreading if return_cache_handles and market_request.use_multithreading: return volatile_cache.put_dataframe_handle(market_df, use_cache_handles=market_request.multithreading_params['cache_period_market_data']) return market_df
def __init__(self, version=constants.tcapy_version): self._util_func = UtilFunc() self._tca_market_trade_loader = Mediator.get_tca_market_trade_loader(version=version) self._time_series_ops = TimeSeriesOps() self._trade_order_tag = TradeOrderFilterTag() logger = LoggerManager.getLogger(__name__) logger.info("Init TCAEngine version: " + self._tca_market_trade_loader.get_tca_version() + " - Env: " + constants.env)
def test_results_form_average(fill_market_trade_databases): """Tests averages are calculated correctly by ResultsForm, compared to a direct calculation """ Mediator.get_volatile_cache().clear_cache() market_df, trade_df, order_df = get_sample_data() trade_df, _ = MetricSlippage().calculate_metric(trade_order_df=trade_df, market_df=market_df, bid_benchmark='mid', ask_benchmark='mid') results_form = BarResultsForm( market_trade_order_list=['trade_df'], metric_name='slippage', aggregation_metric='mean', aggregate_by_field=['ticker', 'venue'], scalar=10000.0, weighting_field='executed_notional_in_reporting_currency') results_df = results_form.aggregate_results( market_trade_order_df=trade_df, market_df=market_df, market_trade_order_name='trade_df') slippage_average = float(results_df[0][0].values[0]) # Directly calculate slippage def grab_slippage(trade_df): return 10000.0 * ((trade_df['slippage'] * trade_df['executed_notional_in_reporting_currency']).sum() \ / trade_df['executed_notional_in_reporting_currency'].sum()) slippage_average_comp = grab_slippage(trade_df) # Check the average slippage assert slippage_average - slippage_average_comp < eps slippage_average_venue = results_df[1][0]['venue'][venue_filter] slippage_average_venue_comp = grab_slippage( trade_df[trade_df['venue'] == venue_filter]) # Check the average slippage by venue assert slippage_average_venue - slippage_average_venue_comp < eps
def test_write_market_data_arctic(): """Tests we can write market data to Arctic """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) replace_append = ['replace', 'append'] # Check first when replacing full table and then appending for a in arctic_lib_type: for i in replace_append: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) # Write CSV to Arctic database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table=i, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) # Fetch data directly from CSV database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker) # Read back data from Arctic and compare with CSV market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store= database_source, # test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table ) market_df_load = market_loader.get_market_data( market_request=market_request) diff_df = market_df_csv['mid'] - market_df_load['mid'] diff_df.to_csv('test' + i + '.csv') assert all(diff_df < eps)
def test_fetch_market_data_db(): """Tests that we can fetch data from Arctic/KDB/InfluxDB. Note you need to populate the database first before running this for the desired dates. """ market_loader = Mediator.get_tca_market_trade_loader() market_data_store_list, market_data_database_table_list = _get_db_market_data_store( ) for market_data_store, market_data_database_table in zip( market_data_store_list, market_data_database_table_list): market_request = MarketRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=market_data_store, market_data_database_table=market_data_database_table) market_df = market_loader.get_market_data(market_request) try: market_df = Mediator.get_volatile_cache().get_dataframe_handle( market_df) except: pass assert not(market_df.empty) \ and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') market_request.start_date = invalid_start_date market_request.finish_date = invalid_finish_date market_empty_df = market_loader.get_market_data(market_request) try: market_empty_df = Mediator.get_volatile_cache( ).get_dataframe_handle(market_empty_df) except: pass assert market_empty_df.empty
def test_write_market_data_db(): """Tests we can write market data to KDB/Influxdb/PyStore """ database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_market_database_source( ) market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) for i in range(0, len(database_source_list)): database_source = database_source_list[i] test_harness_market_data_table = test_harness_market_data_table_list[i] test_harness_data_store = test_harness_data_store_list[i] ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) replace_append = ['replace', 'append'] database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker) # Check first when replacing full table and then appending (will still replace ticker though) for i in replace_append: database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_market_data_table, if_exists_table=i, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store=test_harness_data_store, market_data_database_table=test_harness_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) diff_df = market_df_csv['mid'] - market_df_load['mid'] assert all(diff_df < eps)
def test_write_chunked_market_data_arctic(): """For very large CSV files we might need to read them in chunks. tcapy supports this and also supports CSVs which are sorted in reverse (ie. descending). We need to enable chunking and reverse reading with flags. This tests whether chunked data is written correctly to Arctic, comparing it with that read from CSV directly """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # load data from CSVs directly (for comparison later) market_df_csv_desc = DatabaseSourceCSV(market_data_database_csv=csv_reverse_market_data_store).fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) market_df_csv_asc = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store).fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) ### write CSV data to Arctic which is sorted ascending (default!) database_source.convert_csv_to_table(csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=100000, remove_duplicates=False) market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) market_df_load = market_loader.get_market_data(market_request=market_request) # compare reading directly from the CSV vs. reading back from arctic assert all(market_df_csv_asc['mid'] - market_df_load['mid'] < eps) ### write CSV data to Arctic which is sorted descending database_source.convert_csv_to_table(csv_reverse_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=100000, read_in_reverse=True, remove_duplicates=False) market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) market_df_load = market_loader.get_market_data(market_request=market_request) # compare reading directly from the CSV vs. reading back from arctic assert all(market_df_csv_desc['mid'] - market_df_load['mid'] < eps)
def calculate_metrics_single_ticker(self, market_trade_order_combo, tca_request, dummy_market): volatile_cache = Mediator.get_volatile_cache(version=self._version) market_df, trade_order_df_values, ticker, trade_order_df_keys \ = super(TCATickerLoaderImpl, self).calculate_metrics_single_ticker(market_trade_order_combo, tca_request, dummy_market) # Return as a cache handle (which can be easily passed across Celery for example) or not for the market # and trade/order data return volatile_cache.put_dataframe_handle(market_df, tca_request.multithreading_params['return_cache_handles_market_data']), \ volatile_cache.put_dataframe_handle(trade_order_df_values, tca_request.multithreading_params['return_cache_handles_trade_data']), \ ticker, trade_order_df_keys
def test_append_market_data_arctic(): """Tests we can append market data to arctic (we will have already written data to the test harness database) """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can append (non-overlapping) data to Arctic arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # use this market request later when reading back from Arctic market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) # load data from CSV for comparison later database_source_csv = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) market_df_lower = market_df_list[0]; market_df_higher = market_df_list[1] database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False) overlap_error = False ## Try to append overlapping data (this will fail!) try: database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) except ErrorWritingOverlapDataException as e: overlap_error = True assert overlap_error # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!) database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) market_df_all_read_back = market_loader.get_market_data(market_request=market_request) assert all(market_df_all_read_back['mid'] - market_df_csv['mid'] < eps)
def normalize_market_data(self, df, dataset, data_request): df = Mediator.get_time_series_ops().localize_as_UTC(df) # For each dataset have a different field mapping (get field mapping for that dataset from stored CSV files) # Convert vendor specific field names to the Cuemacro names # Convert vendor specific asset names (eg. GBP=) to Cuemacro standard names (GBPUSD) # The dataset is very dense, we assume it is stored on disk ordered (Arctic only allows this) # df = df.sort_index() return self.offset_data_ms(df, data_request)