def test_write_market_data_arctic(): """Tests we can write market data to Arctic """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) replace_append = ['replace', 'append'] # Check first when replacing full table and then appending for a in arctic_lib_type: for i in replace_append: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) # Write CSV to Arctic database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table=i, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) # Fetch data directly from CSV database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker) # Read back data from Arctic and compare with CSV market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store= database_source, # test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table ) market_df_load = market_loader.get_market_data( market_request=market_request) diff_df = market_df_csv['mid'] - market_df_load['mid'] diff_df.to_csv('test' + i + '.csv') assert all(diff_df < eps)
def test_fetch_market_data_db(): """Tests that we can fetch data from Arctic/KDB/InfluxDB. Note you need to populate the database first before running this for the desired dates. """ market_loader = Mediator.get_tca_market_trade_loader() market_data_store_list, market_data_database_table_list = _get_db_market_data_store( ) for market_data_store, market_data_database_table in zip( market_data_store_list, market_data_database_table_list): market_request = MarketRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=market_data_store, market_data_database_table=market_data_database_table) market_df = market_loader.get_market_data(market_request) try: market_df = Mediator.get_volatile_cache().get_dataframe_handle( market_df) except: pass assert not(market_df.empty) \ and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') market_request.start_date = invalid_start_date market_request.finish_date = invalid_finish_date market_empty_df = market_loader.get_market_data(market_request) try: market_empty_df = Mediator.get_volatile_cache( ).get_dataframe_handle(market_empty_df) except: pass assert market_empty_df.empty
def test_write_market_data_db(): """Tests we can write market data to KDB/Influxdb/PyStore """ market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) for database_source, test_harness_market_data_table, test_harness_data_store in zip( *_get_db_market_database_source()): ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) replace_append = ['replace', 'append'] database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker) # Check first when replacing full table and then appending (will still replace ticker though) for i in replace_append: database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_market_data_table, if_exists_table=i, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store=test_harness_data_store, market_data_database_table=test_harness_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) diff_df = market_df_csv['mid'] - market_df_load['mid'] assert all(diff_df < eps)
def write_mongo_db_atlas_arctic(): """Tests we can write market data to Arctic/MongoDB on Atlas (cloud) """ market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) database_source = DatabaseSourceArctic( postfix='testharness', arctic_lib_type='CHUNK_STORE', connection_string=arctic_connection_string) # Write CSV to Arctic database_source.convert_csv_to_table(csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) # Read back data from Arctic and compare with CSV market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store=database_source, # test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) print(market_df_load)
def test_fetch_market_trade_data_csv(): """Tests downloading of market and trade/order data from CSV files """ ### Get market data market_loader = Mediator.get_tca_market_trade_loader() market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=csv_market_data_store) market_df = market_loader.get_market_data(market_request) assert not(market_df.empty) \ and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') # For a high level trade data request, we need to use TCA request, because it usually involves some # market data download (we are assuming that the market data is being downloaded from our arctic database) # eg. for converting notionals to reporting currency tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store='csv', market_data_store=arctic_market_data_store, trade_order_mapping=csv_trade_order_mapping) for t in trade_order_list: trade_order_df = market_loader.get_trade_order_data(tca_request, t) try: trade_order_df = Mediator.get_volatile_cache( ).get_dataframe_handle(trade_order_df) except: pass assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DataFactory and DatabaseSource from tcapy.data.datafactory import DataFactory data_factory = DataFactory() for t in trade_order_list: ### Test using DataFactory trade_request = TradeRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store='csv', trade_order_mapping=csv_trade_order_mapping, trade_order_type=t) trade_order_df = data_factory.fetch_table(trade_request) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DatabaseSourceCSV from tcapy.data.databasesource import DatabaseSourceCSV database_source = DatabaseSourceCSV() trade_order_df = database_source.fetch_trade_order_data( start_date, finish_date, ticker, table_name=csv_trade_order_mapping[t]) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')
def test_append_market_data_db(): """Tests we can append market data to KDB/InfluxDB/PyStore. """ market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) for database_source, test_harness_market_data_table, test_harness_data_store in zip( *_get_db_market_database_source()): ### Test we can append (non-overlapping) data to InfluxDB/KDB/PyStore db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store=test_harness_data_store, market_data_database_table=test_harness_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) market_df_list = TimeSeriesOps().split_array_chunks(market_df_load, chunks=2) market_df_lower = market_df_list[0] market_df_higher = market_df_list[1] database_source.append_market_data( market_df_lower, ticker, table_name=test_harness_market_data_table, if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False) overlap_error = False ## Try to append overlapping data (this will fail!) try: database_source.append_market_data( market_df_lower, ticker, table_name=test_harness_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) except ErrorWritingOverlapDataException as e: overlap_error = True assert overlap_error # Append non-overlapping data which follows (writing overlapping data can end up with duplicated values - although # KDB/InfluxDB/PyStore will allow this) database_source.append_market_data( market_df_higher, ticker, table_name=test_harness_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) market_df_all_read_back = market_loader.get_market_data( market_request=market_request) assert all( market_df_all_read_back['mid'] - market_df_load['mid'] < eps)
def test_write_multiple_wildcard_market_data_csvs_arctic(): """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv). It is assumed that the CSVs are in chronological orders, from their filenames. """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) arctic_start_date = '01 Jan 2016' arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) ### Read CSV data which is sorted ascending (default!) database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=10**6, remove_duplicates=False) database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) # Prepare the CSV folder first csv_folder = resource('csv_arctic_mult') # Empty the CSV test harness folder, where we shall dump the mini CSVs UtilFunc().forcibly_create_empty_folder(csv_folder) # Split the CSV file into several mini CSV files (and also HDF5 files) market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=3) chunk_no = 0 for m in market_df_list: m.to_csv( os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv')) UtilFunc().write_dataframe_to_binary( m, os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.parquet'), format='parquet') chunk_no = chunk_no + 1 file_ext = ['csv', 'parquet'] for f in file_ext: ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic database_source.convert_csv_to_table( os.path.join(csv_folder, "EURUSD*." + f), ticker, test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=10**6, remove_duplicates=False) market_request = MarketRequest( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=database_source, market_data_database_table=test_harness_arctic_market_data_table ) # Read back from Arctic market_df_load = market_loader.get_market_data( market_request=market_request) # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs) diff_df = abs(market_df_load['mid'] - market_df_csv['mid']) outside_bounds = diff_df[diff_df >= eps] assert len(outside_bounds) == 0
def test_write_chunked_market_data_arctic(): """For very large CSV files we might need to read them in chunks. tcapy supports this and also supports CSVs which are sorted in reverse (ie. descending). We need to enable chunking and reverse reading with flags. This tests whether chunked data is written correctly to Arctic, comparing it with that read from CSV directly """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) arctic_start_date = '01 Jan 2016' arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # Load data from CSVs directly (for comparison later) market_df_csv_desc = DatabaseSourceCSV( market_data_database_csv=csv_reverse_market_data_store ).fetch_market_data(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) market_df_csv_asc = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store).fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) ### Write CSV data to Arctic which is sorted ascending (default!) database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=100000, remove_duplicates=False) market_request = MarketRequest( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=database_source, market_data_database_table=test_harness_arctic_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) # Compare reading directly from the CSV vs. reading back from arctic assert all(market_df_csv_asc['mid'] - market_df_load['mid'] < eps) ### Write CSV data to Arctic which is sorted descending database_source.convert_csv_to_table( csv_reverse_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=100000, read_in_reverse=True, remove_duplicates=False) market_request = MarketRequest( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=database_source, market_data_database_table=test_harness_arctic_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) # Compare reading directly from the CSV vs. reading back from arctic assert all(market_df_csv_desc['mid'] - market_df_load['mid'] < eps)
def test_append_market_data_arctic(): """Tests we can append market data to arctic (we will have already written data to the test harness database) """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can append (non-overlapping) data to Arctic arctic_start_date = '01 Jan 2016' arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # Load data from CSV for comparison later database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) market_df_lower = market_df_list[0] market_df_higher = market_df_list[1] database_source.append_market_data( market_df_lower, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False) overlap_error = False ## Try to append overlapping data (this will fail!) try: database_source.append_market_data( market_df_lower, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) except ErrorWritingOverlapDataException as e: overlap_error = True assert overlap_error # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!) database_source.append_market_data( market_df_higher, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) # Use this market request later when reading back from Arctic market_request = MarketRequest( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=database_source, market_data_database_table=test_harness_arctic_market_data_table) market_df_all_read_back = market_loader.get_market_data( market_request=market_request) diff_df = abs(market_df_all_read_back['mid'] - market_df_csv['mid']) outside_bounds = diff_df[diff_df >= eps] assert len(outside_bounds) == 0