def test_write_market_data_arctic(): """Tests we can write market data to Arctic """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) replace_append = ['replace', 'append'] # Check first when replacing full table and then appending for a in arctic_lib_type: for i in replace_append: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) # Write CSV to Arctic database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table=i, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) # Fetch data directly from CSV database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker) # Read back data from Arctic and compare with CSV market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store= database_source, # test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table ) market_df_load = market_loader.get_market_data( market_request=market_request) diff_df = market_df_csv['mid'] - market_df_load['mid'] diff_df.to_csv('test' + i + '.csv') assert all(diff_df < eps)
def test_write_market_data_db(): """Tests we can write market data to KDB/Influxdb/PyStore """ database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_market_database_source( ) market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) for i in range(0, len(database_source_list)): database_source = database_source_list[i] test_harness_market_data_table = test_harness_market_data_table_list[i] test_harness_data_store = test_harness_data_store_list[i] ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) replace_append = ['replace', 'append'] database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker) # Check first when replacing full table and then appending (will still replace ticker though) for i in replace_append: database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_market_data_table, if_exists_table=i, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) market_request = MarketRequest( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, data_store=test_harness_data_store, market_data_database_table=test_harness_market_data_table) market_df_load = market_loader.get_market_data( market_request=market_request) diff_df = market_df_csv['mid'] - market_df_load['mid'] assert all(diff_df < eps)
def test_append_market_data_arctic(): """Tests we can append market data to arctic (we will have already written data to the test harness database) """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) ### Test we can append (non-overlapping) data to Arctic arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # use this market request later when reading back from Arctic market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=test_harness_arctic_market_data_store, market_data_database_table=test_harness_arctic_market_data_table) # load data from CSV for comparison later database_source_csv = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) market_df_lower = market_df_list[0]; market_df_higher = market_df_list[1] database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False) overlap_error = False ## Try to append overlapping data (this will fail!) try: database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) except ErrorWritingOverlapDataException as e: overlap_error = True assert overlap_error # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!) database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='append', remove_duplicates=False) market_df_all_read_back = market_loader.get_market_data(market_request=market_request) assert all(market_df_all_read_back['mid'] - market_df_csv['mid'] < eps)
def test_write_multiple_wildcard_market_data_csvs_arctic(): """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv). It is assumed that the CSVs are in chronological orders, from their filenames. """ if not (run_arctic_tests): return market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) arctic_start_date = '01 Jan 2016' arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow()) for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) ### Read CSV data which is sorted ascending (default!) database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=10**6, remove_duplicates=False) database_source_csv = DatabaseSourceCSV( market_data_database_csv=csv_market_data_store) market_df_csv = database_source_csv.fetch_market_data( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker) # Prepare the CSV folder first csv_folder = os.path.join(constants.test_data_harness_folder, 'csv_arctic_mult') # Empty the CSV test harness folder, where we shall dump the mini CSVs UtilFunc().forcibly_create_empty_folder(csv_folder) # Split the CSV file into several mini CSV files (and also HDF5 files) market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=3) chunk_no = 0 for m in market_df_list: m.to_csv( os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv')) UtilFunc().write_dataframe_to_binary( m, os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.parquet'), format='parquet') chunk_no = chunk_no + 1 file_ext = ['csv', 'parquet'] for f in file_ext: ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic database_source.convert_csv_to_table( os.path.join(csv_folder, "EURUSD*." + f), ticker, test_harness_arctic_market_data_table, if_exists_table='append', if_exists_ticker='replace', market_trade_data='market', csv_read_chunksize=10**6, remove_duplicates=False) market_request = MarketRequest( start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker, data_store=database_source, market_data_database_table=test_harness_arctic_market_data_table ) # Read back from Arctic market_df_load = market_loader.get_market_data( market_request=market_request) # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs) diff_df = abs(market_df_load['mid'] - market_df_csv['mid']) outside_bounds = diff_df[diff_df >= eps] assert len(outside_bounds) == 0
# # Copyright 2020 Cuemacro # # See the License for the specific language governing permissions and limitations under the License. # import os from chartpy import Chart from tcapy.data.databasesource import DatabaseSourceCSVBinary parquet_path = '/data/csv_dump/dukascopy/' filename = [ 'EURUSD_dukascopy_2020-05-01_00_00_00.096000+00_002020-05-31_23_59_59.084000+00_00.parquet' ] for f in filename: final_path = os.path.join(parquet_path, f) database_source = DatabaseSourceCSVBinary( market_data_database_csv=final_path) df = database_source.fetch_market_data() print(df) df_resample = df.resample('1min').last() Chart().plot(df_resample)