def test_delete_market_data_arctic(): """Tests we can delete a section of a data for a particular """ if not (run_arctic_tests): return for a in arctic_lib_type: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a) ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV) db_start_date = '01 Jan 2016' db_finish_date = pd.Timestamp(datetime.datetime.utcnow()) # Write test market data CSV to arctic first database_source.convert_csv_to_table( csv_market_data_store, ticker, test_harness_arctic_market_data_table, if_exists_table='replace', if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) db_start_cut_off = '26 Apr 2017 00:00' db_finish_cut_off = '27 Apr 2017 00:50' market_df_old = database_source.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, table_name=test_harness_arctic_market_data_table) market_df_old = market_df_old.loc[ (market_df_old.index <= db_start_cut_off) | (market_df_old.index >= db_finish_cut_off)] # Do it with Arctic (note: underneath this will just use pandas, as can't do on database deletion with Arctic) database_source.delete_market_data( ticker, start_date=db_start_cut_off, finish_date=db_finish_cut_off, table_name=test_harness_arctic_market_data_table) # read back data from database (will exclude the deleted records) market_df_new = database_source.fetch_market_data( start_date=db_start_date, finish_date=db_finish_date, ticker=ticker, table_name=test_harness_arctic_market_data_table) # sort columns so they are same order market_df_old = market_df_old.sort_index(axis=1) market_df_new = market_df_new.sort_index(axis=1) assert_frame_equal(market_df_old, market_df_new)
def example_arctic_dukacopy_download(): """Example of downloading from the lower level Arctic wrapper directly (DatabaseSourceArctic, rather than using any higher level classes such as TCAMarketTradeDataLoader """ data_loader = DatabaseSourceArctic(postfix='dukascopy') df = data_loader.fetch_market_data('01 May 2017', '30 May 2017', ticker='EURUSD') print(df)
def example_arctic_ncfx_download(): """Example of downloading from the lower level Arctic wrapper directly (DatabaseSourceArctic, rather than using any higher level classes such as TCAMarketTradeDataLoader """ data_loader = DatabaseSourceArctic(postfix='ncfx') df = data_loader.fetch_market_data(short_start_date, short_finish_date, ticker=ticker) print(df)
if_exists_table = 'replace' database_source.convert_csv_to_table(csv_file, ticker, market_data_database_table, if_exists_table=if_exists_table, remove_duplicates=remove_duplicates, if_exists_ticker=if_append_replace_ticker, date_format=date_format, read_in_reverse=read_in_reverse) # It is worth plotting the data to check validity sometimes (make sure you choose appropriate start/finish dates # loading a *very* large tick history into memory will result in your computer running out of memory if PLOT_BACK_DATA: from chartpy import Chart import datetime import pandas as pd df = database_source.fetch_market_data(start_date='01 Jan 2000', finish_date=datetime.datetime.utcnow(), ticker=ticker) df = pd.DataFrame(df.resample('5min').mean()) if 'mid' not in df.columns: df['mid'] = (df['bid'] + df['ask']) / 2.0 df = pd.DataFrame(df['mid']) Chart(engine='plotly').plot(df) print(df) logger.info("Finished uploading data to " + market_data_store)
def upload_market_data_flat_file(self, data_vendor='dukascopy', market_data_store='arctic', server_host=None, server_port=None, ticker_mkt=['EURUSD', 'GBPUSD', 'AUDUSD', 'NZDUSD', 'USDCAD', 'USDCHF', 'EURNOK', 'EURSEK', 'USDJPY', 'USDNOK', 'USDSEK', 'EURJPY', 'USDMXN', 'USDTRY', 'USDZAR', 'EURPLN'], csv_folder=None, if_exists_table='replace', if_append_replace_ticker='replace', file_extension='parquet', read_in_reverse=False, remove_duplicates=False, date_format=None, plot_back_data=False, access_control=AccessControl(), market_data_database_table=None): logger = LoggerManager.getLogger(__name__) # Files dumped by DatabasePopulator look like this ## 'AUDUSD_dukascopy_2016-01-03_22_00_01.868000+00_002016-01-31_23_59_57.193000+00_00.parquet' csv_file = [x + '_' + data_vendor + '_20*.' + file_extension for x in ticker_mkt] # assume that ALL TIME IN UTC! #################################################################################################################### # Load market data # Create market data store for database and associated data vendor if market_data_store == 'arctic': if server_host is None: server_host = constants.arctic_host if server_port is None: server_port = constants.arctic_port database_source = DatabaseSourceArctic(postfix=data_vendor, username=access_control.arctic_username, password=access_control.arctic_password, server_host=server_host, server_port=server_port) if market_data_database_table is None: market_data_database_table = constants.arctic_market_data_database_table if market_data_store == 'pystore': database_source = DatabaseSourcePyStore(postfix=data_vendor) if market_data_database_table is None: market_data_database_table = constants.pystore_market_data_database_table if market_data_store == 'influxdb': if server_host is None: server_host = constants.influxdb_host if server_port is None: server_port = constants.influxdb_port database_source = DatabaseSourceInfluxDB(postfix=data_vendor, username=access_control.influxdb_username, password=access_control.influxdb_password, server_host=server_host, server_port=server_port) if market_data_database_table is None: market_data_database_table = constants.influxdb_market_data_database_table if market_data_store == 'kdb': if server_host is None: server_host = constants.kdb_host if server_port is None: server_port = constants.kdb_port database_source = DatabaseSourceKDB(postfix=data_vendor, username=access_control.kdb_username, password=access_control.kdb_password, server_host=server_host, server_port=server_port) if market_data_database_table is None: market_data_database_table = constants.kdb_market_data_database_table if csv_folder is None: csv_folder = constants.test_data_folder # This relies on you have market data stored in Parquet/H5/CSV files already (eg. by downloading from DukasCopy) # note: whilst free FX data can be used for testing (in particular for generating randomised trades), # you may to choose other high frequency quality data for actual benchmark csv_market_data = [os.path.join(csv_folder, x) for x in csv_file] # For each ticker, read in the H5/CSV file and then dump into tick database # Potentionally, we can thread this? for i in range(0, len(ticker_mkt)): ticker = ticker_mkt[i] csv_file = csv_market_data[i] # On the second time through the loop, we make sure to append to table # otherwise will keep overwriting! if if_exists_table == 'replace': if i >= 1: if_exists_table = 'append' else: if_exists_table = 'replace' database_source.convert_csv_to_table(csv_file, ticker, market_data_database_table, if_exists_table=if_exists_table, remove_duplicates=remove_duplicates, if_exists_ticker=if_append_replace_ticker, date_format=date_format, read_in_reverse=read_in_reverse) # It is worth plotting the data to check validity sometimes (make sure you choose appropriate start/finish dates # loading a *very* large tick history into memory will result in your computer running out of memory if plot_back_data: from chartpy import Chart import datetime import pandas as pd df = database_source.fetch_market_data(start_date='01 Jan 2000', finish_date=datetime.datetime.utcnow(), ticker=ticker) df = pd.DataFrame(df.resample('5min').mean()) if 'mid' not in df.columns: df['mid'] = (df['bid'] + df['ask']) / 2.0 df = pd.DataFrame(df['mid']) Chart(engine='plotly').plot(df) print(df) logger.info("Finished uploading data to " + market_data_store)
'EURUSD', 'GBPUSD', 'AUDUSD', 'NZDUSD', 'USDCAD', 'USDCHF', 'EURNOK', 'EURSEK', 'USDJPY' ] if database_dialect == 'arctic': from tcapy.data.databasesource import DatabaseSourceArctic as DatabaseSource elif database_dialect == 'pystore': from tcapy.data.databasesource import DatabaseSourcePyStore as DatabaseSource database_source = DatabaseSource(postfix=data_vendor) file_format = 'parquet' for t in tickers: market_df = database_source.fetch_market_data(start_date=start_date, finish_date=finish_date, ticker=t) key = '_' + data_vendor + "_" + \ (str(market_df.index[0]) + str(market_df.index[-1])).replace(":", '_').replace(" ", '_') filename = os.path.join(folder, t + key) + '.' + file_format if market_df is not None: c = market_df.columns print('Writing ' + t + ' to ' + filename) print('No of items ' + str(len(market_df.index))) if file_format == 'parquet': market_df.to_parquet(filename) elif file_format == 'csv': market_df.to_csv(filename)