def merge_existing_data(catalog: DataCatalog, cls: type, df: pd.DataFrame) -> pd.DataFrame: """ Handle existing data for instrument subclasses. Instruments all live in a single file, so merge with existing data. For all other classes, simply return data unchanged. """ if cls not in Instrument.__subclasses__(): return df else: try: existing = catalog.instruments(instrument_type=cls) return existing.append(df.drop(["type"], axis=1)).drop_duplicates() except pa.lib.ArrowInvalid: return df
class TestParquetSerializer: def setup(self): # Fixture Setup _reset() self.catalog = DataCatalog(path="/root", fs_protocol="memory") self.order_factory = OrderFactory( trader_id=TraderId("T-001"), strategy_id=StrategyId("S-001"), clock=TestClock(), ) self.order = self.order_factory.market( AUDUSD_SIM.id, OrderSide.BUY, Quantity.from_int(100000), ) self.order_submitted = copy.copy(self.order) self.order_submitted.apply(TestStubs.event_order_submitted(self.order)) self.order_accepted = copy.copy(self.order_submitted) self.order_accepted.apply( TestStubs.event_order_accepted(self.order_submitted)) self.order_pending_cancel = copy.copy(self.order_accepted) self.order_pending_cancel.apply( TestStubs.event_order_pending_cancel(self.order_accepted)) self.order_cancelled = copy.copy(self.order_pending_cancel) self.order_cancelled.apply( TestStubs.event_order_canceled(self.order_pending_cancel)) def _test_serialization(self, obj: Any): cls = type(obj) serialized = ParquetSerializer.serialize(obj) if not isinstance(serialized, list): serialized = [serialized] deserialized = ParquetSerializer.deserialize(cls=cls, chunk=serialized) # Assert expected = obj if isinstance(deserialized, list) and not isinstance(expected, list): expected = [expected] assert deserialized == expected write_objects(catalog=self.catalog, chunk=[obj]) df = self.catalog._query(cls=cls) assert len(df) == 1 nautilus = self.catalog._query(cls=cls, as_dataframe=False)[0] assert nautilus.ts_init == 0 return True @pytest.mark.parametrize( "tick", [ TestStubs.ticker(), TestStubs.quote_tick_5decimal(), TestStubs.trade_tick_5decimal(), ], ) def test_serialize_and_deserialize_tick(self, tick): self._test_serialization(obj=tick) def test_serialize_and_deserialize_order_book_delta(self): delta = OrderBookDelta( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, action=BookAction.CLEAR, order=None, ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(delta) [deserialized] = ParquetSerializer.deserialize(cls=OrderBookDelta, chunk=serialized) # Assert expected = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[delta], ts_event=0, ts_init=0, ) assert deserialized == expected write_objects(catalog=self.catalog, chunk=[delta]) def test_serialize_and_deserialize_order_book_deltas(self): kw = { "instrument_id": "AUD/USD.SIM", "ts_event": 0, "ts_init": 0, "book_type": "L2_MBP", } deltas = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[ OrderBookDelta.from_dict({ "action": "ADD", "order_side": "BUY", "order_price": 8.0, "order_size": 30.0, "order_id": "e0364f94-8fcb-0262-cbb3-075c51ee4917", **kw, }), OrderBookDelta.from_dict({ "action": "ADD", "order_side": "SELL", "order_price": 15.0, "order_size": 10.0, "order_id": "cabec174-acc6-9204-9ebf-809da3896daf", **kw, }), ], ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(deltas) deserialized = ParquetSerializer.deserialize(cls=OrderBookDeltas, chunk=serialized) # Assert assert deserialized == [deltas] write_objects(catalog=self.catalog, chunk=[deltas]) def test_serialize_and_deserialize_order_book_deltas_grouped(self): kw = { "instrument_id": "AUD/USD.SIM", "ts_event": 0, "ts_init": 0, "book_type": "L2_MBP", } deltas = [ { "action": "ADD", "order_side": "SELL", "order_price": 0.9901, "order_size": 327.25, "order_id": "1", }, { "action": "CLEAR", "order_side": None, "order_price": None, "order_size": None, "order_id": None, }, { "action": "ADD", "order_side": "SELL", "order_price": 0.98039, "order_size": 27.91, "order_id": "2", }, { "action": "ADD", "order_side": "SELL", "order_price": 0.97087, "order_size": 14.43, "order_id": "3", }, ] deltas = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[OrderBookDelta.from_dict({ **kw, **d }) for d in deltas], ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(deltas) [deserialized] = ParquetSerializer.deserialize(cls=OrderBookDeltas, chunk=serialized) # Assert assert deserialized == deltas write_objects(catalog=self.catalog, chunk=[deserialized]) assert [d.action for d in deserialized.deltas] == [ BookAction.ADD, BookAction.CLEAR, BookAction.ADD, BookAction.ADD, ] def test_serialize_and_deserialize_order_book_snapshot(self): book = TestStubs.order_book_snapshot() serialized = ParquetSerializer.serialize(book) deserialized = ParquetSerializer.deserialize(cls=OrderBookSnapshot, chunk=serialized) # Assert assert deserialized == [book] write_objects(catalog=self.catalog, chunk=[book]) def test_serialize_and_deserialize_component_state_changed(self): event = TestStubs.event_component_state_changed() serialized = ParquetSerializer.serialize(event) [deserialized ] = ParquetSerializer.deserialize(cls=ComponentStateChanged, chunk=[serialized]) # Assert assert deserialized == event write_objects(catalog=self.catalog, chunk=[event]) def test_serialize_and_deserialize_trading_state_changed(self): event = TestStubs.event_trading_state_changed() serialized = ParquetSerializer.serialize(event) [deserialized] = ParquetSerializer.deserialize(cls=TradingStateChanged, chunk=[serialized]) # Assert assert deserialized == event write_objects(catalog=self.catalog, chunk=[event]) def test_serialize_and_deserialize_account_state(self): event = TestStubs.event_cash_account_state() serialized = ParquetSerializer.serialize(event) [deserialized] = ParquetSerializer.deserialize(cls=AccountState, chunk=serialized) # Assert assert deserialized == event write_objects(catalog=self.catalog, chunk=[event]) @pytest.mark.parametrize( "event_func", [ TestStubs.event_order_accepted, TestStubs.event_order_rejected, TestStubs.event_order_submitted, ], ) def test_serialize_and_deserialize_order_events_base(self, event_func): order = TestStubs.limit_order() event = event_func(order=order) self._test_serialization(obj=event) @pytest.mark.parametrize( "event_func", [ TestStubs.event_order_submitted, TestStubs.event_order_accepted, TestStubs.event_order_canceled, TestStubs.event_order_pending_update, TestStubs.event_order_pending_cancel, TestStubs.event_order_triggered, TestStubs.event_order_expired, TestStubs.event_order_rejected, TestStubs.event_order_canceled, ], ) def test_serialize_and_deserialize_order_events_post_accepted( self, event_func): # Act event = event_func(order=self.order_accepted) assert self._test_serialization(obj=event) @pytest.mark.parametrize( "event_func", [ TestStubs.event_order_filled, ], ) def test_serialize_and_deserialize_order_events_filled(self, event_func): # Act event = event_func(order=self.order_accepted, instrument=AUDUSD_SIM) self._test_serialization(obj=event) @pytest.mark.parametrize( "position_func", [ TestStubs.event_position_opened, TestStubs.event_position_changed, ], ) def test_serialize_and_deserialize_position_events_open_changed( self, position_func): instrument = TestInstrumentProvider.default_fx_ccy("GBPUSD") order3 = self.order_factory.market( instrument.id, OrderSide.BUY, Quantity.from_int(100000), ) fill3 = TestStubs.event_order_filled( order3, instrument=instrument, position_id=PositionId("P-3"), strategy_id=StrategyId("S-1"), last_px=Price.from_str("1.00000"), ) position = Position(instrument=instrument, fill=fill3) event = position_func(position=position) self._test_serialization(obj=event) @pytest.mark.parametrize( "position_func", [ TestStubs.event_position_closed, ], ) def test_serialize_and_deserialize_position_events_closed( self, position_func): instrument = TestInstrumentProvider.default_fx_ccy("GBPUSD") open_order = self.order_factory.market( instrument.id, OrderSide.BUY, Quantity.from_int(100000), ) open_fill = TestStubs.event_order_filled( open_order, instrument=instrument, position_id=PositionId("P-3"), strategy_id=StrategyId("S-1"), last_px=Price.from_str("1.00000"), ) close_order = self.order_factory.market( instrument.id, OrderSide.SELL, Quantity.from_int(100000), ) close_fill = TestStubs.event_order_filled( close_order, instrument=instrument, position_id=PositionId("P-3"), strategy_id=StrategyId("S-1"), last_px=Price.from_str("1.20000"), ) position = Position(instrument=instrument, fill=open_fill) position.apply(close_fill) event = position_func(position=position) self._test_serialization(obj=event) @pytest.mark.parametrize( "instrument", [ TestInstrumentProvider.btcusdt_binance(), TestInstrumentProvider.aapl_equity(), TestInstrumentProvider.es_future(), TestInstrumentProvider.aapl_option(), ], ) def test_serialize_and_deserialize_instruments(self, instrument): serialized = ParquetSerializer.serialize(instrument) assert serialized deserialized = ParquetSerializer.deserialize(cls=type(instrument), chunk=[serialized]) # Assert assert deserialized == [instrument] write_objects(catalog=self.catalog, chunk=[instrument]) df = self.catalog.instruments() assert len(df) == 1 @pytest.mark.parametrize("name, obj", [(obj.__class__.__name__, obj) for obj in nautilus_objects()]) def test_serialize_and_deserialize_all(self, name, obj): # Arrange, Act assert self._test_serialization(obj)