def __init__(self, market_data_postfix='dukascopy', csv_market_data=None, write_to_db=True, sql_trade_database_type='ms_sql_server'): if csv_market_data is None: self._market_data_source = 'arctic-' + market_data_postfix else: self._market_data_source = csv_market_data self._tca_market = Mediator.get_tca_market_trade_loader() # Assumes MongoDB for tick data and MSSQL for trade/order data if write_to_db: self._database_source_market = DatabaseSourceArctic( postfix=market_data_postfix) # market data source self._market_data_database_name = constants.arctic_market_data_database_name self._market_data_database_table = constants.arctic_market_data_database_table if sql_trade_database_type == 'ms_sql_server': self._database_source_trade = DatabaseSourceMSSQLServer( ) # trade data source self._trade_data_database_name = constants.ms_sql_server_trade_data_database_name elif sql_trade_database_type == 'mysql': self._database_source_trade = DatabaseSourceMySQL( ) # trade data source self._trade_data_database_name = constants.mysql_trade_data_database_name self.time_series_ops = TimeSeriesOps() self.rand_time_series = RandomiseTimeSeries()
def example_market_data_non_usd_cross(): """Example for loading market data which has more exotic crosses, which are unlikely to be collected. For these exotic crosses tcapy will calculate the cross rates via the USD legs, eg. NZDCAD would be calculated from NZDUSD and USDCAD data. """ market_loader = Mediator.get_tca_market_trade_loader() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker='NZDUSD', market_data_store=market_data_store) market_base_df = market_loader.get_market_data(tca_request) tca_request.ticker = 'USDCAD' market_terms_df = market_loader.get_market_data(tca_request) market_df = pd.DataFrame(market_base_df['mid'] * market_terms_df['mid']).dropna() tca_request.ticker = 'NZDCAD' market_direct_df = market_loader.get_market_data(tca_request) market_df, market_direct_df = market_df.align(market_direct_df, join='inner') # check time series are equal to each other assert (market_df['mid'] - market_direct_df['mid']).sum() == 0
def test_market_data_convention(): """Tests that market data for unusual quotations is consistent (ie. if the user requests USDEUR, this should be inverted EURUSD (which is the correct convention) """ market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=market_data_store) #### Compare EURUSD to USDEUR market_correct_conv_series = pd.DataFrame(market_loader.get_market_data(market_request)['mid']) market_request.ticker = 'USDEUR' market_reverse_conv_series = pd.DataFrame(1.0 / market_loader.get_market_data(market_request)['mid']) assert_frame_equal(market_correct_conv_series, market_reverse_conv_series, check_dtype=False) ### Compare EURJPY (which is autogenerated, if EURJPY is not collected directly) vs. EURUSD & USDJPY multiplied # Use resampled series for comparison market_request.ticker = 'USDJPY' market_df_USDJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']) market_request.ticker = 'EURJPY' market_df_EURJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']).resample('1min').mean() market_df_EURJPY_comp = (market_correct_conv_series.resample('1min').mean() * market_df_USDJPY.resample('1min').mean()) market_df_EURJPY, market_df_EURJPY_comp = market_df_EURJPY.align(market_df_EURJPY_comp, join='inner') comp = (market_df_EURJPY - market_df_EURJPY_comp).dropna() assert all(comp < eps)
def __init__(self, version=constants.tcapy_version): self._util_func = UtilFunc() self._tca_market_trade_loader = Mediator.get_tca_market_trade_loader(version=version) self._time_series_ops = TimeSeriesOps() self._trade_order_tag = TradeOrderFilterTag() logger = LoggerManager.getLogger(__name__) logger.info("Init TCAEngine version: " + self._tca_market_trade_loader.get_tca_version() + " - Env: " + constants.env)
def test_write_market_data_arctic(): """Tests we can write market data to Arctic """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) replace_append = ['replace', 'append'] # Check first when replacing full table and then appending for a in arctic_lib_type: for i in replace_append: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) # Write CSV to Arctic database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table=i, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) # Fetch data directly from CSV database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker) # Read back data from Arctic and compare with CSV market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store= database_source, # test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table ) market_df_load = market_loader.get_market_data( market_request=market_request) diff_df = market_df_csv['mid'] - market_df_load['mid'] diff_df.to_csv('test' + i + '.csv') assert all(diff_df < eps)
def test_write_market_data_db(): """Tests we can write market data to KDB/Influxdb/PyStore """ database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_market_database_source( ) market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) for i in range(0, len(database_source_list)): database_source = database_source_list[i] test_harness_market_data_table = test_harness_market_data_table_list[i] test_harness_data_store = test_harness_data_store_list[i] ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) replace_append = ['replace', 'append'] database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker) # Check first when replacing full table and then appending (will still replace ticker though) for i in replace_append: database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_market_data_table, if_exists_table=i, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store=test_harness_data_store, market_data_database_table=test_harness_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) diff_df = market_df_csv['mid'] - market_df_load['mid'] assert all(diff_df < eps)
def test_write_chunked_market_data_arctic(): """For very large CSV files we might need to read them in chunks. tcapy supports this and also supports CSVs which are sorted in reverse (ie. descending). We need to enable chunking and reverse reading with flags. This tests whether chunked data is written correctly to Arctic, comparing it with that read from CSV directly """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # load data from CSVs directly (for comparison later) market_df_csv_desc = DatabaseSourceCSV(market_data_database_csv=csv_reverse_market_data_store).fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) market_df_csv_asc = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store).fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) ### write CSV data to Arctic which is sorted ascending (default!) database_source.convert_csv_to_table(csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=100000, remove_duplicates=False) market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) market_df_load = market_loader.get_market_data(market_request=market_request) # compare reading directly from the CSV vs. reading back from arctic assert all(market_df_csv_asc['mid'] - market_df_load['mid'] < eps) ### write CSV data to Arctic which is sorted descending database_source.convert_csv_to_table(csv_reverse_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=100000, read_in_reverse=True, remove_duplicates=False) market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) market_df_load = market_loader.get_market_data(market_request=market_request) # compare reading directly from the CSV vs. reading back from arctic assert all(market_df_csv_desc['mid'] - market_df_load['mid'] < eps)
def test_append_market_data_arctic(): """Tests we can append market data to arctic (we will have already written data to the test harness database) """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can append (non-overlapping) data to Arctic arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # use this market request later when reading back from Arctic market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) # load data from CSV for comparison later database_source_csv = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) market_df_lower = market_df_list[0]; market_df_higher = market_df_list[1] database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False) overlap_error = False ## Try to append overlapping data (this will fail!) try: database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) except ErrorWritingOverlapDataException as e: overlap_error = True assert overlap_error # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!) database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) market_df_all_read_back = market_loader.get_market_data(market_request=market_request) assert all(market_df_all_read_back['mid'] - market_df_csv['mid'] < eps)
def test_append_market_data_db(): """Tests we can append market data to KDB/InfluxDB. """ database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_database_source() market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) for i in range(0, len(database_source_list)): database_source = database_source_list[i] test_harness_market_data_table = test_harness_market_data_table_list[i] test_harness_data_store = test_harness_data_store_list[i] ### Test we can append (non-overlapping) data to KDB/InfluxDB db_start_date = '01 Jan 2016'; db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # TODO market_request = MarketRequest(start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store=test_harness_data_store, market_data_database_table=test_harness_market_data_table) market_df_load = market_loader.get_market_data(market_request=market_request) market_df_list = TimeSeriesOps().split_array_chunks(market_df_load, chunks=2) market_df_lower = market_df_list[0]; market_df_higher = market_df_list[1] database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_market_data_table, if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False) overlap_error = False ## try to append overlapping data (this will fail!) try: database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) except ErrorWritingOverlapDataException as e: overlap_error = True assert overlap_error # append non-overlapping data which follows (writing overlapping data can end up with duplicated values - although # KDB/InfluxDB will allow this) database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) market_df_all_read_back = market_loader.get_market_data(market_request=market_request) assert all(market_df_all_read_back['mid'] - market_df_load['mid'] < eps)
def test_fetch_market_data_db(): """Tests that we can fetch data from Arctic/KDB/InfluxDB. Note you need to populate the database first before running this for the desired dates. """ market_loader = Mediator.get_tca_market_trade_loader() market_data_store_list, market_data_database_table_list = _get_db_market_data_store( ) for market_data_store, market_data_database_table in zip( market_data_store_list, market_data_database_table_list): market_request = MarketRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=market_data_store, market_data_database_table=market_data_database_table) market_df = market_loader.get_market_data(market_request) try: market_df = Mediator.get_volatile_cache().get_dataframe_handle( market_df) except: pass assert not(market_df.empty) \ and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') market_request.start_date = invalid_start_date market_request.finish_date = invalid_finish_date market_empty_df = market_loader.get_market_data(market_request) try: market_empty_df = Mediator.get_volatile_cache( ).get_dataframe_handle(market_empty_df) except: pass assert market_empty_df.empty
def example_market_data_convention(): """Loads market data in the correct convention """ market_loader = Mediator.get_tca_market_trade_loader() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, market_data_store=market_data_store) market_correct_conv_df = market_loader.get_market_data(tca_request) tca_request.ticker = reverse_ticker market_reverse_conv_df = market_loader.get_market_data(tca_request) market_correct_conv_df, market_reverse_conv_df = \ market_correct_conv_df.align(market_reverse_conv_df, join='inner') synthetic_market_df = market_correct_conv_df.copy() synthetic_market_df['mid'] = 1.0 / synthetic_market_df['mid'] # Check time series are equal to each other assert (market_reverse_conv_df['mid'] - synthetic_market_df['mid']).sum() == 0
def write_mongo_db_atlas_arctic(): """Tests we can write market data to Arctic/MongoDB on Atlas (cloud) """ market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) database_source = DatabaseSourceArctic( postfix='testharness', arctic_lib_type='CHUNK_STORE', connection_string=arctic_connection_string) # Write CSV to Arctic database_source.convert_csv_to_table(csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) # Read back data from Arctic and compare with CSV market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store=database_source, # test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) print(market_df_load)
def test_write_multiple_wildcard_market_data_csvs_arctic(): """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv). It is assumed that the CSVs are in chronological orders, from their filenames. """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) arctic_start_date = '01 Jan 2016' arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) ### Read CSV data which is sorted ascending (default!) database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=10**6, remove_duplicates=False) database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) # Prepare the CSV folder first csv_folder = os.path.join(constants.test_data_harness_folder, 'csv_arctic_mult') # Empty the CSV test harness folder, where we shall dump the mini CSVs UtilFunc().forcibly_create_empty_folder(csv_folder) # Split the CSV file into several mini CSV files (and also HDF5 files) market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=3) chunk_no = 0 for m in market_df_list: m.to_csv( os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv')) UtilFunc().write_dataframe_to_binary( m, os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.parquet'), format='parquet') chunk_no = chunk_no + 1 file_ext = ['csv', 'parquet'] for f in file_ext: ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic database_source.convert_csv_to_table( os.path.join(csv_folder, "EURUSD*." + f), ticker, test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=10**6, remove_duplicates=False) market_request = MarketRequest( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=database_source, market_data_database_table=test_harness_arctic_market_data_table ) # Read back from Arctic market_df_load = market_loader.get_market_data( market_request=market_request) # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs) diff_df = abs(market_df_load['mid'] - market_df_csv['mid']) outside_bounds = diff_df[diff_df >= eps] assert len(outside_bounds) == 0
def test_fetch_market_trade_data_csv(): """Tests downloading of market and trade/order data from CSV files """ ### Get market data market_loader = Mediator.get_tca_market_trade_loader() market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=csv_market_data_store) market_df = market_loader.get_market_data(market_request) assert not(market_df.empty) \ and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') # For a high level trade data request, we need to use TCA request, because it usually involves some # market data download (we are assuming that the market data is being downloaded from our arctic database) # eg. for converting notionals to reporting currency tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store='csv', market_data_store=arctic_market_data_store, trade_order_mapping=csv_trade_order_mapping) for t in trade_order_list: trade_order_df = market_loader.get_trade_order_data(tca_request, t) try: trade_order_df = Mediator.get_volatile_cache( ).get_dataframe_handle(trade_order_df) except: pass assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DataFactory and DatabaseSource from tcapy.data.datafactory import DataFactory data_factory = DataFactory() for t in trade_order_list: ### Test using DataFactory trade_request = TradeRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store='csv', trade_order_mapping=csv_trade_order_mapping, trade_order_type=t) trade_order_df = data_factory.fetch_table(trade_request) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DatabaseSourceCSV from tcapy.data.databasesource import DatabaseSourceCSV database_source = DatabaseSourceCSV() trade_order_df = database_source.fetch_trade_order_data( start_date, finish_date, ticker, table_name=csv_trade_order_mapping[t]) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')
def test_fetch_trade_data_ms_sql_server(): """Tests that we can fetch data from the Microsoft SQL Server database. Note you need to populate the database first before running this for the desired dates. """ if not (run_ms_sql_server_tests): return from tcapy.data.datafactory import DataFactory from tcapy.data.databasesource import DatabaseSourceMSSQLServer ### Test using TCAMarketTradeLoader market_loader = Mediator.get_tca_market_trade_loader() for t in trade_order_list: trade_order_mapping = {t: ms_sql_server_trade_order_mapping[t]} trade_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=ms_sql_server_trade_data_store, trade_order_mapping=trade_order_mapping, market_data_store=arctic_market_data_store, use_multithreading=use_multithreading) trade_order_df = market_loader.get_trade_order_data(trade_request, t) try: trade_order_df = Mediator.get_volatile_cache( ).get_dataframe_handle(trade_order_df) except: pass assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DataFactory data_factory = DataFactory() trade_request = TradeRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=ms_sql_server_trade_data_store, trade_order_mapping=trade_order_mapping, trade_order_type=t) trade_order_df = data_factory.fetch_table(trade_request) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DatabaseSourceSQL database_source = DatabaseSourceMSSQLServer() trade_order_df = database_source.fetch_trade_order_data( start_date, finish_date, ticker, database_name=ms_sql_server_trade_data_database_name, table_name=trade_order_mapping[t]) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')