Esempio n. 1
0
    def test_data_catalog_instrument_ids_correctly_unmapped(self):
        # Arrange
        catalog = DataCatalog.from_env()
        instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD",
                                                           venue=Venue("SIM"))
        trade_tick = TradeTick(
            instrument_id=instrument.id,
            price=Price.from_str("2.0"),
            size=Quantity.from_int(10),
            aggressor_side=AggressorSide.UNKNOWN,
            trade_id=TradeId("1"),
            ts_event=0,
            ts_init=0,
        )
        write_objects(catalog=catalog, chunk=[instrument, trade_tick])

        # Act
        instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"],
                                         as_nautilus=True)[0]
        trade_tick = catalog.trade_ticks(instrument_ids=["AUD/USD.SIM"],
                                         as_nautilus=True)[0]

        # Assert
        assert instrument.id.value == "AUD/USD.SIM"
        assert trade_tick.instrument_id.value == "AUD/USD.SIM"
    def test_catalog_generic_data_not_overwritten(self):
        # Arrange
        TestPersistenceStubs.setup_news_event_persistence()
        process_files(
            glob_path=f"{TEST_DATA_DIR}/news_events.csv",
            reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser),
            catalog=self.catalog,
        )
        objs = self.catalog.generic_data(
            cls=NewsEventData, filter_expr=ds.field("currency") == "USD", as_nautilus=True
        )

        # Clear the catalog again
        data_catalog_setup()
        self.catalog = DataCatalog.from_env()

        assert (
            len(self.catalog.generic_data(NewsEventData, raise_on_empty=False, as_nautilus=True))
            == 0
        )

        chunk1, chunk2 = objs[:10], objs[5:15]

        # Act, Assert
        write_objects(catalog=self.catalog, chunk=chunk1)
        assert len(self.catalog.generic_data(NewsEventData)) == 10

        write_objects(catalog=self.catalog, chunk=chunk2)
        assert len(self.catalog.generic_data(NewsEventData)) == 15
    def test_serialize_and_deserialize_account_state(self, event):
        serialized = ParquetSerializer.serialize(event)
        [deserialized] = ParquetSerializer.deserialize(cls=AccountState,
                                                       chunk=serialized)

        # Assert
        assert deserialized == event

        write_objects(catalog=self.catalog, chunk=[event])
    def test_serialize_and_deserialize_order_book_snapshot(self):
        book = TestStubs.order_book_snapshot()

        serialized = ParquetSerializer.serialize(book)
        deserialized = ParquetSerializer.deserialize(cls=OrderBookSnapshot,
                                                     chunk=serialized)

        # Assert
        assert deserialized == [book]
        write_objects(catalog=self.catalog, chunk=[book])
    def test_serialize_and_deserialize_instruments(self, instrument):
        serialized = ParquetSerializer.serialize(instrument)
        assert serialized
        deserialized = ParquetSerializer.deserialize(cls=type(instrument),
                                                     chunk=[serialized])

        # Assert
        assert deserialized == [instrument]
        write_objects(catalog=self.catalog, chunk=[instrument])
        df = self.catalog.instruments()
        assert len(df) == 1
    def test_serialize_and_deserialize_trading_state_changed(self):
        event = TestStubs.event_trading_state_changed()

        serialized = ParquetSerializer.serialize(event)
        [deserialized] = ParquetSerializer.deserialize(cls=TradingStateChanged,
                                                       chunk=[serialized])

        # Assert
        assert deserialized == event

        write_objects(catalog=self.catalog, chunk=[event])
Esempio n. 7
0
    def test_data_catalog_currency_with_null_max_price_loads(self):
        # Arrange
        catalog = DataCatalog.from_env()
        instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM"))
        write_objects(catalog=catalog, chunk=[instrument])

        # Act
        instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0]

        # Assert
        assert instrument.max_price is None
    def test_data_catalog_instruments_load(self):
        # Arrange
        instruments = [
            TestInstrumentProvider.aapl_equity(),
            TestInstrumentProvider.es_future(),
            TestInstrumentProvider.aapl_option(),
        ]
        write_objects(catalog=self.catalog, chunk=instruments)

        # Act
        instruments = self.catalog.instruments(as_nautilus=True)

        # Assert
        assert len(instruments) == 3
    def test_data_catalog_instruments_filter_by_instrument_id(self):
        # Arrange
        instruments = [
            TestInstrumentProvider.aapl_equity(),
            TestInstrumentProvider.es_future(),
            TestInstrumentProvider.aapl_option(),
        ]
        write_objects(catalog=self.catalog, chunk=instruments)

        # Act
        instrument_ids = [instrument.id.value for instrument in instruments]
        instruments = self.catalog.instruments(instrument_ids=instrument_ids)

        # Assert
        assert len(instruments) == 3
Esempio n. 10
0
    def test_catalog_bar_query_instrument_id(self):
        # Arrange
        bar = TestDataStubs.bar_5decimal()
        write_objects(catalog=self.catalog, chunk=[bar])

        # Act
        objs = self.catalog.bars(
            instrument_ids=[TestIdStubs.audusd_id().value], as_nautilus=True)
        data = self.catalog.bars(
            instrument_ids=[TestIdStubs.audusd_id().value])

        # Assert
        assert len(objs) == 1
        assert data.shape[0] == 1
        assert "instrument_id" in data.columns
Esempio n. 11
0
def back_fill_catalog(
        ib: IB,
        catalog: DataCatalog,
        contracts: List[Contract],
        start_date: datetime.date,
        end_date: datetime.date,
        tz_name="Asia/Hong_Kong",
        kinds=("BID_ASK", "TRADES"),
):
    """
    Back fill the data catalog with market data from Interactive Brokers.

    Parameters
    ----------
    ib : IB
        The ib_insync client.
    catalog : DataCatalog
        DataCatalog to write the data to
    contracts : List[Contract]
        The list of IB Contracts to collect data for
    start_date : datetime.date
        The start_date for the back fill.
    end_date : datetime.date
        The end_date for the back fill.
    tz_name : str
        The timezone of the contracts
    kinds : tuple[str] (default: ('BID_ASK', 'TRADES')
        The kinds to query data for
    """
    for date in pd.bdate_range(start_date, end_date):
        for kind in kinds:
            for contract in contracts:
                [details] = ib.reqContractDetails(contract=contract)
                instrument = parse_instrument(contract_details=details)
                raw = fetch_market_data(contract=contract,
                                        date=date.to_pydatetime(),
                                        kind=kind,
                                        tz_name=tz_name,
                                        ib=ib)
                if kind == "TRADES":
                    ticks = parse_historic_trade_ticks(
                        historic_ticks=raw, instrument_id=instrument.id)
                elif kind == "BID_ASK":
                    ticks = parse_historic_quote_ticks(
                        historic_ticks=raw, instrument_id=instrument.id)
                else:
                    raise RuntimeError()
                write_objects(catalog=catalog, chunk=ticks)
Esempio n. 12
0
def main(catalog: DataCatalog):
    """Rename match_id to trade_id in TradeTick"""
    fs: fsspec.AbstractFileSystem = catalog.fs

    print("Loading instrument ids")
    instrument_ids = catalog.query(TradeTick,
                                   table_kwargs={"columns": ["instrument_id"]
                                                 })["instrument_id"].unique()

    tmp_catalog = DataCatalog(str(catalog.path) + "_tmp")
    tmp_catalog.fs = catalog.fs

    for ins_id in tqdm(instrument_ids):

        # Load trades for instrument
        trades = catalog.trade_ticks(
            instrument_ids=[ins_id],
            projections={"trade_id": ds.field("match_id")},
            as_nautilus=True,
        )

        # Create temp parquet in case of error
        fs.move(
            f"{catalog.path}/data/trade_tick.parquet/instrument_id={ins_id}",
            f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}",
            recursive=True,
        )

        try:
            # Rewrite to new catalog
            write_objects(tmp_catalog, trades)

            # Ensure we can query again
            _ = tmp_catalog.trade_ticks(instrument_ids=[ins_id],
                                        as_nautilus=True)

            # Clear temp parquet
            fs.rm(
                f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}",
                recursive=True)

        except Exception:
            warnings.warn(f"Failed to write or read instrument_id {ins_id}")
            fs.move(
                f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}",
                f"{catalog.path}/data/trade_tick.parquet/instrument_id={ins_id}",
                recursive=True,
            )
    def _test_serialization(self, obj: Any):
        cls = type(obj)
        serialized = ParquetSerializer.serialize(obj)
        if not isinstance(serialized, list):
            serialized = [serialized]
        deserialized = ParquetSerializer.deserialize(cls=cls, chunk=serialized)

        # Assert
        expected = obj
        if isinstance(deserialized, list) and not isinstance(expected, list):
            expected = [expected]
        assert deserialized == expected
        write_objects(catalog=self.catalog, chunk=[obj])
        df = self.catalog._query(cls=cls)
        assert len(df) == 1
        nautilus = self.catalog._query(cls=cls, as_dataframe=False)[0]
        assert nautilus.ts_init == 0
        return True
    def test_serialize_and_deserialize_order_book_deltas(self):
        kw = {
            "instrument_id": "AUD/USD.SIM",
            "ts_event": 0,
            "ts_init": 0,
            "book_type": "L2_MBP",
        }
        deltas = OrderBookDeltas(
            instrument_id=TestStubs.audusd_id(),
            book_type=BookType.L2_MBP,
            deltas=[
                OrderBookDelta.from_dict({
                    "action": "ADD",
                    "order_side": "BUY",
                    "order_price": 8.0,
                    "order_size": 30.0,
                    "order_id": "e0364f94-8fcb-0262-cbb3-075c51ee4917",
                    **kw,
                }),
                OrderBookDelta.from_dict({
                    "action": "ADD",
                    "order_side": "SELL",
                    "order_price": 15.0,
                    "order_size": 10.0,
                    "order_id": "cabec174-acc6-9204-9ebf-809da3896daf",
                    **kw,
                }),
            ],
            ts_event=0,
            ts_init=0,
        )

        serialized = ParquetSerializer.serialize(deltas)
        deserialized = ParquetSerializer.deserialize(cls=OrderBookDeltas,
                                                     chunk=serialized)

        # Assert
        assert deserialized == [deltas]
        write_objects(catalog=self.catalog, chunk=[deltas])
    def test_serialize_and_deserialize_order_book_delta(self):
        delta = OrderBookDelta(
            instrument_id=TestStubs.audusd_id(),
            book_type=BookType.L2_MBP,
            action=BookAction.CLEAR,
            order=None,
            ts_event=0,
            ts_init=0,
        )

        serialized = ParquetSerializer.serialize(delta)
        [deserialized] = ParquetSerializer.deserialize(cls=OrderBookDelta,
                                                       chunk=serialized)

        # Assert
        expected = OrderBookDeltas(
            instrument_id=TestStubs.audusd_id(),
            book_type=BookType.L2_MBP,
            deltas=[delta],
            ts_event=0,
            ts_init=0,
        )
        assert deserialized == expected
        write_objects(catalog=self.catalog, chunk=[delta])
Esempio n. 16
0
 def test_writing_instruments_doesnt_overwrite(self):
     instruments = self.catalog.instruments(as_nautilus=True)
     write_objects(catalog=self.catalog, chunk=[instruments[0]])
     write_objects(catalog=self.catalog, chunk=[instruments[1]])
     instruments = self.catalog.instruments(as_nautilus=True)
     assert len(instruments) == 2
    def test_serialize_and_deserialize_order_book_deltas_grouped(self):
        kw = {
            "instrument_id": "AUD/USD.SIM",
            "ts_event": 0,
            "ts_init": 0,
            "book_type": "L2_MBP",
        }
        deltas = [
            {
                "action": "ADD",
                "order_side": "SELL",
                "order_price": 0.9901,
                "order_size": 327.25,
                "order_id": "1",
            },
            {
                "action": "CLEAR",
                "order_side": None,
                "order_price": None,
                "order_size": None,
                "order_id": None,
            },
            {
                "action": "ADD",
                "order_side": "SELL",
                "order_price": 0.98039,
                "order_size": 27.91,
                "order_id": "2",
            },
            {
                "action": "ADD",
                "order_side": "SELL",
                "order_price": 0.97087,
                "order_size": 14.43,
                "order_id": "3",
            },
        ]
        deltas = OrderBookDeltas(
            instrument_id=TestStubs.audusd_id(),
            book_type=BookType.L2_MBP,
            deltas=[OrderBookDelta.from_dict({
                **kw,
                **d
            }) for d in deltas],
            ts_event=0,
            ts_init=0,
        )

        serialized = ParquetSerializer.serialize(deltas)
        [deserialized] = ParquetSerializer.deserialize(cls=OrderBookDeltas,
                                                       chunk=serialized)

        # Assert
        assert deserialized == deltas
        write_objects(catalog=self.catalog, chunk=[deserialized])
        assert [d.action for d in deserialized.deltas] == [
            BookAction.ADD,
            BookAction.CLEAR,
            BookAction.ADD,
            BookAction.ADD,
        ]