def test_data_catalog_instrument_ids_correctly_unmapped(self): # Arrange catalog = DataCatalog.from_env() instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) trade_tick = TradeTick( instrument_id=instrument.id, price=Price.from_str("2.0"), size=Quantity.from_int(10), aggressor_side=AggressorSide.UNKNOWN, trade_id=TradeId("1"), ts_event=0, ts_init=0, ) write_objects(catalog=catalog, chunk=[instrument, trade_tick]) # Act instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] trade_tick = catalog.trade_ticks(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] # Assert assert instrument.id.value == "AUD/USD.SIM" assert trade_tick.instrument_id.value == "AUD/USD.SIM"
def test_catalog_generic_data_not_overwritten(self): # Arrange TestPersistenceStubs.setup_news_event_persistence() process_files( glob_path=f"{TEST_DATA_DIR}/news_events.csv", reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser), catalog=self.catalog, ) objs = self.catalog.generic_data( cls=NewsEventData, filter_expr=ds.field("currency") == "USD", as_nautilus=True ) # Clear the catalog again data_catalog_setup() self.catalog = DataCatalog.from_env() assert ( len(self.catalog.generic_data(NewsEventData, raise_on_empty=False, as_nautilus=True)) == 0 ) chunk1, chunk2 = objs[:10], objs[5:15] # Act, Assert write_objects(catalog=self.catalog, chunk=chunk1) assert len(self.catalog.generic_data(NewsEventData)) == 10 write_objects(catalog=self.catalog, chunk=chunk2) assert len(self.catalog.generic_data(NewsEventData)) == 15
def test_serialize_and_deserialize_account_state(self, event): serialized = ParquetSerializer.serialize(event) [deserialized] = ParquetSerializer.deserialize(cls=AccountState, chunk=serialized) # Assert assert deserialized == event write_objects(catalog=self.catalog, chunk=[event])
def test_serialize_and_deserialize_order_book_snapshot(self): book = TestStubs.order_book_snapshot() serialized = ParquetSerializer.serialize(book) deserialized = ParquetSerializer.deserialize(cls=OrderBookSnapshot, chunk=serialized) # Assert assert deserialized == [book] write_objects(catalog=self.catalog, chunk=[book])
def test_serialize_and_deserialize_instruments(self, instrument): serialized = ParquetSerializer.serialize(instrument) assert serialized deserialized = ParquetSerializer.deserialize(cls=type(instrument), chunk=[serialized]) # Assert assert deserialized == [instrument] write_objects(catalog=self.catalog, chunk=[instrument]) df = self.catalog.instruments() assert len(df) == 1
def test_serialize_and_deserialize_trading_state_changed(self): event = TestStubs.event_trading_state_changed() serialized = ParquetSerializer.serialize(event) [deserialized] = ParquetSerializer.deserialize(cls=TradingStateChanged, chunk=[serialized]) # Assert assert deserialized == event write_objects(catalog=self.catalog, chunk=[event])
def test_data_catalog_currency_with_null_max_price_loads(self): # Arrange catalog = DataCatalog.from_env() instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) write_objects(catalog=catalog, chunk=[instrument]) # Act instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] # Assert assert instrument.max_price is None
def test_data_catalog_instruments_load(self): # Arrange instruments = [ TestInstrumentProvider.aapl_equity(), TestInstrumentProvider.es_future(), TestInstrumentProvider.aapl_option(), ] write_objects(catalog=self.catalog, chunk=instruments) # Act instruments = self.catalog.instruments(as_nautilus=True) # Assert assert len(instruments) == 3
def test_data_catalog_instruments_filter_by_instrument_id(self): # Arrange instruments = [ TestInstrumentProvider.aapl_equity(), TestInstrumentProvider.es_future(), TestInstrumentProvider.aapl_option(), ] write_objects(catalog=self.catalog, chunk=instruments) # Act instrument_ids = [instrument.id.value for instrument in instruments] instruments = self.catalog.instruments(instrument_ids=instrument_ids) # Assert assert len(instruments) == 3
def test_catalog_bar_query_instrument_id(self): # Arrange bar = TestDataStubs.bar_5decimal() write_objects(catalog=self.catalog, chunk=[bar]) # Act objs = self.catalog.bars( instrument_ids=[TestIdStubs.audusd_id().value], as_nautilus=True) data = self.catalog.bars( instrument_ids=[TestIdStubs.audusd_id().value]) # Assert assert len(objs) == 1 assert data.shape[0] == 1 assert "instrument_id" in data.columns
def back_fill_catalog( ib: IB, catalog: DataCatalog, contracts: List[Contract], start_date: datetime.date, end_date: datetime.date, tz_name="Asia/Hong_Kong", kinds=("BID_ASK", "TRADES"), ): """ Back fill the data catalog with market data from Interactive Brokers. Parameters ---------- ib : IB The ib_insync client. catalog : DataCatalog DataCatalog to write the data to contracts : List[Contract] The list of IB Contracts to collect data for start_date : datetime.date The start_date for the back fill. end_date : datetime.date The end_date for the back fill. tz_name : str The timezone of the contracts kinds : tuple[str] (default: ('BID_ASK', 'TRADES') The kinds to query data for """ for date in pd.bdate_range(start_date, end_date): for kind in kinds: for contract in contracts: [details] = ib.reqContractDetails(contract=contract) instrument = parse_instrument(contract_details=details) raw = fetch_market_data(contract=contract, date=date.to_pydatetime(), kind=kind, tz_name=tz_name, ib=ib) if kind == "TRADES": ticks = parse_historic_trade_ticks( historic_ticks=raw, instrument_id=instrument.id) elif kind == "BID_ASK": ticks = parse_historic_quote_ticks( historic_ticks=raw, instrument_id=instrument.id) else: raise RuntimeError() write_objects(catalog=catalog, chunk=ticks)
def main(catalog: DataCatalog): """Rename match_id to trade_id in TradeTick""" fs: fsspec.AbstractFileSystem = catalog.fs print("Loading instrument ids") instrument_ids = catalog.query(TradeTick, table_kwargs={"columns": ["instrument_id"] })["instrument_id"].unique() tmp_catalog = DataCatalog(str(catalog.path) + "_tmp") tmp_catalog.fs = catalog.fs for ins_id in tqdm(instrument_ids): # Load trades for instrument trades = catalog.trade_ticks( instrument_ids=[ins_id], projections={"trade_id": ds.field("match_id")}, as_nautilus=True, ) # Create temp parquet in case of error fs.move( f"{catalog.path}/data/trade_tick.parquet/instrument_id={ins_id}", f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}", recursive=True, ) try: # Rewrite to new catalog write_objects(tmp_catalog, trades) # Ensure we can query again _ = tmp_catalog.trade_ticks(instrument_ids=[ins_id], as_nautilus=True) # Clear temp parquet fs.rm( f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}", recursive=True) except Exception: warnings.warn(f"Failed to write or read instrument_id {ins_id}") fs.move( f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}", f"{catalog.path}/data/trade_tick.parquet/instrument_id={ins_id}", recursive=True, )
def _test_serialization(self, obj: Any): cls = type(obj) serialized = ParquetSerializer.serialize(obj) if not isinstance(serialized, list): serialized = [serialized] deserialized = ParquetSerializer.deserialize(cls=cls, chunk=serialized) # Assert expected = obj if isinstance(deserialized, list) and not isinstance(expected, list): expected = [expected] assert deserialized == expected write_objects(catalog=self.catalog, chunk=[obj]) df = self.catalog._query(cls=cls) assert len(df) == 1 nautilus = self.catalog._query(cls=cls, as_dataframe=False)[0] assert nautilus.ts_init == 0 return True
def test_serialize_and_deserialize_order_book_deltas(self): kw = { "instrument_id": "AUD/USD.SIM", "ts_event": 0, "ts_init": 0, "book_type": "L2_MBP", } deltas = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[ OrderBookDelta.from_dict({ "action": "ADD", "order_side": "BUY", "order_price": 8.0, "order_size": 30.0, "order_id": "e0364f94-8fcb-0262-cbb3-075c51ee4917", **kw, }), OrderBookDelta.from_dict({ "action": "ADD", "order_side": "SELL", "order_price": 15.0, "order_size": 10.0, "order_id": "cabec174-acc6-9204-9ebf-809da3896daf", **kw, }), ], ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(deltas) deserialized = ParquetSerializer.deserialize(cls=OrderBookDeltas, chunk=serialized) # Assert assert deserialized == [deltas] write_objects(catalog=self.catalog, chunk=[deltas])
def test_serialize_and_deserialize_order_book_delta(self): delta = OrderBookDelta( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, action=BookAction.CLEAR, order=None, ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(delta) [deserialized] = ParquetSerializer.deserialize(cls=OrderBookDelta, chunk=serialized) # Assert expected = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[delta], ts_event=0, ts_init=0, ) assert deserialized == expected write_objects(catalog=self.catalog, chunk=[delta])
def test_writing_instruments_doesnt_overwrite(self): instruments = self.catalog.instruments(as_nautilus=True) write_objects(catalog=self.catalog, chunk=[instruments[0]]) write_objects(catalog=self.catalog, chunk=[instruments[1]]) instruments = self.catalog.instruments(as_nautilus=True) assert len(instruments) == 2
def test_serialize_and_deserialize_order_book_deltas_grouped(self): kw = { "instrument_id": "AUD/USD.SIM", "ts_event": 0, "ts_init": 0, "book_type": "L2_MBP", } deltas = [ { "action": "ADD", "order_side": "SELL", "order_price": 0.9901, "order_size": 327.25, "order_id": "1", }, { "action": "CLEAR", "order_side": None, "order_price": None, "order_size": None, "order_id": None, }, { "action": "ADD", "order_side": "SELL", "order_price": 0.98039, "order_size": 27.91, "order_id": "2", }, { "action": "ADD", "order_side": "SELL", "order_price": 0.97087, "order_size": 14.43, "order_id": "3", }, ] deltas = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[OrderBookDelta.from_dict({ **kw, **d }) for d in deltas], ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(deltas) [deserialized] = ParquetSerializer.deserialize(cls=OrderBookDeltas, chunk=serialized) # Assert assert deserialized == deltas write_objects(catalog=self.catalog, chunk=[deserialized]) assert [d.action for d in deserialized.deltas] == [ BookAction.ADD, BookAction.CLEAR, BookAction.ADD, BookAction.ADD, ]