Exemple #1
0
    def test_data_catalog_instrument_ids_correctly_unmapped(self):
        # Arrange
        catalog = DataCatalog.from_env()
        instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD",
                                                           venue=Venue("SIM"))
        trade_tick = TradeTick(
            instrument_id=instrument.id,
            price=Price.from_str("2.0"),
            size=Quantity.from_int(10),
            aggressor_side=AggressorSide.UNKNOWN,
            trade_id=TradeId("1"),
            ts_event=0,
            ts_init=0,
        )
        write_objects(catalog=catalog, chunk=[instrument, trade_tick])

        # Act
        instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"],
                                         as_nautilus=True)[0]
        trade_tick = catalog.trade_ticks(instrument_ids=["AUD/USD.SIM"],
                                         as_nautilus=True)[0]

        # Assert
        assert instrument.id.value == "AUD/USD.SIM"
        assert trade_tick.instrument_id.value == "AUD/USD.SIM"
 def setup(self):
     data_catalog_setup()
     dask.config.set(scheduler="single-threaded")
     aud_usd_data_loader()
     self.catalog = DataCatalog.from_env()
     self.backtest_config = BacktestRunConfig(
         engine=BacktestEngineConfig(),
         venues=[
             BacktestVenueConfig(
                 name="SIM",
                 oms_type="HEDGING",
                 account_type="MARGIN",
                 base_currency="USD",
                 starting_balances=["1000000 USD"],
                 # fill_model=fill_model,  # TODO(cs): Implement next iteration
             )
         ],
         data=[
             BacktestDataConfig(
                 catalog_path="/root",
                 catalog_fs_protocol="memory",
                 data_cls=QuoteTick,
                 instrument_id="AUD/USD.SIM",
                 start_time=1580398089820000000,
                 end_time=1580504394501000000,
             )
         ],
     )
Exemple #3
0
def aud_usd_data_loader():
    from nautilus_trader.backtest.data.providers import TestInstrumentProvider
    from tests.test_kit.stubs import TestStubs
    from tests.unit_tests.backtest.test_backtest_config import TEST_DATA_DIR

    instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM"))

    def parse_csv_tick(df, instrument_id):
        yield instrument
        for r in df.values:
            ts = secs_to_nanos(pd.Timestamp(r[0]).timestamp())
            tick = QuoteTick(
                instrument_id=instrument_id,
                bid=Price.from_str(str(r[1])),
                ask=Price.from_str(str(r[2])),
                bid_size=Quantity.from_int(1_000_000),
                ask_size=Quantity.from_int(1_000_000),
                ts_event=ts,
                ts_init=ts,
            )
            yield tick

    catalog = DataCatalog.from_env()
    instrument_provider = InstrumentProvider()
    instrument_provider.add(instrument)
    process_files(
        glob_path=f"{TEST_DATA_DIR}/truefx-audusd-ticks.csv",
        reader=CSVReader(
            block_parser=partial(parse_csv_tick, instrument_id=TestStubs.audusd_id()),
            as_dataframe=True,
        ),
        instrument_provider=instrument_provider,
        catalog=catalog,
    )
    def test_catalog_generic_data_not_overwritten(self):
        # Arrange
        TestPersistenceStubs.setup_news_event_persistence()
        process_files(
            glob_path=f"{TEST_DATA_DIR}/news_events.csv",
            reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser),
            catalog=self.catalog,
        )
        objs = self.catalog.generic_data(
            cls=NewsEventData, filter_expr=ds.field("currency") == "USD", as_nautilus=True
        )

        # Clear the catalog again
        data_catalog_setup()
        self.catalog = DataCatalog.from_env()

        assert (
            len(self.catalog.generic_data(NewsEventData, raise_on_empty=False, as_nautilus=True))
            == 0
        )

        chunk1, chunk2 = objs[:10], objs[5:15]

        # Act, Assert
        write_objects(catalog=self.catalog, chunk=chunk1)
        assert len(self.catalog.generic_data(NewsEventData)) == 10

        write_objects(catalog=self.catalog, chunk=chunk2)
        assert len(self.catalog.generic_data(NewsEventData)) == 15
    def test_write_parquet_partitions(
        self,
    ):
        # Arrange
        catalog = DataCatalog.from_env()
        fs = catalog.fs
        root = catalog.path
        path = "sample.parquet"

        df = pd.DataFrame(
            {"value": np.random.random(5), "instrument_id": ["a", "a", "a", "b", "b"]}
        )

        # Act
        write_parquet(
            fs=fs,
            path=f"{root}/{path}",
            df=df,
            schema=pa.schema({"value": pa.float64(), "instrument_id": pa.string()}),
            partition_cols=["instrument_id"],
        )
        dataset = ds.dataset(str(root.joinpath("sample.parquet")), filesystem=fs)
        result = dataset.to_table().to_pandas()

        # Assert
        assert result.equals(df[["value"]])  # instrument_id is a partition now
        assert dataset.files[0].startswith("/root/sample.parquet/instrument_id=a/")
        assert dataset.files[1].startswith("/root/sample.parquet/instrument_id=b/")
    def test_write_parquet_no_partitions(
        self,
    ):
        # Arrange
        df = pd.DataFrame(
            {"value": np.random.random(5), "instrument_id": ["a", "a", "a", "b", "b"]}
        )
        catalog = DataCatalog.from_env()
        fs = catalog.fs
        root = catalog.path

        # Act
        write_parquet(
            fs=fs,
            path=f"{root}/sample.parquet",
            df=df,
            schema=pa.schema({"value": pa.float64(), "instrument_id": pa.string()}),
            partition_cols=None,
        )
        result = (
            ds.dataset(str(root.joinpath("sample.parquet")), filesystem=fs).to_table().to_pandas()
        )

        # Assert
        assert result.equals(df)
Exemple #7
0
    def test_data_catalog_currency_with_null_max_price_loads(self):
        # Arrange
        catalog = DataCatalog.from_env()
        instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM"))
        write_objects(catalog=catalog, chunk=[instrument])

        # Act
        instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0]

        # Assert
        assert instrument.max_price is None
def _reset():
    """Cleanup resources before each test run"""
    os.environ["NAUTILUS_CATALOG"] = "memory:///root/"
    catalog = DataCatalog.from_env()
    assert isinstance(catalog.fs, MemoryFileSystem)
    try:
        catalog.fs.rm("/", recursive=True)
    except FileNotFoundError:
        pass
    catalog.fs.mkdir("/root/data")
    assert catalog.fs.exists("/root/")
Exemple #9
0
def data_catalog_setup():
    """
    Reset the filesystem and DataCatalog to a clean state
    """
    clear_singleton_instances(DataCatalog)

    os.environ["NAUTILUS_CATALOG"] = "memory:///root/"
    catalog = DataCatalog.from_env()
    assert isinstance(catalog.fs, MemoryFileSystem)
    try:
        catalog.fs.rm("/", recursive=True)
    except FileNotFoundError:
        pass
    catalog.fs.mkdir("/root/data")
    assert catalog.fs.exists("/root/")
    assert not catalog.fs.ls("/root/data")
    def test_repartition_dataset(self):
        # Arrange
        catalog = DataCatalog.from_env()
        fs = catalog.fs
        root = catalog.path
        path = "sample.parquet"

        # Write some out of order, overlapping
        for start_date in ("2020-01-01", "2020-01-8", "2020-01-04"):
            df = pd.DataFrame(
                {
                    "value": np.arange(5),
                    "instrument_id": ["a", "a", "a", "b", "b"],
                    "ts_init": [
                        int(ts.to_datetime64())
                        for ts in pd.date_range(start_date, periods=5, tz="UTC")
                    ],
                }
            )
            write_parquet(
                fs=fs,
                path=f"{root}/{path}",
                df=df,
                schema=pa.schema(
                    {"value": pa.float64(), "instrument_id": pa.string(), "ts_init": pa.int64()}
                ),
                partition_cols=["instrument_id"],
            )

        original_partitions = fs.glob(f"{root}/{path}/**/*.parquet")

        # Act
        _validate_dataset(catalog=catalog, path=f"{root}/{path}")
        new_partitions = fs.glob(f"{root}/{path}/**/*.parquet")

        # Assert
        assert len(original_partitions) == 6
        expected = [
            "/root/sample.parquet/instrument_id=a/20200101.parquet",
            "/root/sample.parquet/instrument_id=a/20200104.parquet",
            "/root/sample.parquet/instrument_id=a/20200108.parquet",
            "/root/sample.parquet/instrument_id=b/20200101.parquet",
            "/root/sample.parquet/instrument_id=b/20200104.parquet",
            "/root/sample.parquet/instrument_id=b/20200108.parquet",
        ]
        assert new_partitions == expected
 def setup(self):
     data_catalog_setup()
     self.catalog = DataCatalog.from_env()
     self.venue_config = BacktestVenueConfig(
         name="SIM",
         venue_type="ECN",
         oms_type="HEDGING",
         account_type="MARGIN",
         base_currency="USD",
         starting_balances=["1000000 USD"],
         # fill_model=fill_model,  # TODO(cs): Implement next iteration
     )
     self.data_config = BacktestDataConfig(
         catalog_path="/root",
         catalog_fs_protocol="memory",
         data_cls_path="nautilus_trader.model.data.tick.QuoteTick",
         instrument_id="AUD/USD.SIM",
         start_time=1580398089820000000,
         end_time=1580504394501000000,
     )
     self.backtest_configs = [
         BacktestRunConfig(
             engine=BacktestEngineConfig(),
             venues=[self.venue_config],
             data=[self.data_config],
         )
     ]
     self.strategies = [
         ImportableStrategyConfig(
             path="nautilus_trader.examples.strategies.ema_cross:EMACross",
             config=EMACrossConfig(
                 instrument_id="AUD/USD.SIM",
                 bar_type="AUD/USD.SIM-100-TICK-MID-INTERNAL",
                 fast_ema_period=10,
                 slow_ema_period=20,
                 trade_size=Decimal(1_000_000),
                 order_id_tag="001",
             ),
         )
     ]
 def setup(self):
     data_catalog_setup()
     self.catalog = DataCatalog.from_env()
     self.fs: fsspec.AbstractFileSystem = self.catalog.fs
     self._loaded_data_into_catalog()
 def setup(self):
     data_catalog_setup()
     self.catalog = DataCatalog.from_env()
     self.fs = self.catalog.fs
     self.reader = BetfairTestStubs.betfair_reader()
 def setup(self):
     data_catalog_setup()
     self.catalog = DataCatalog.from_env()
     self.reader = MockReader()
     self.line_preprocessor = TestLineProcessor()
Exemple #15
0
 def setup(self):
     data_catalog_setup()
     self.catalog = DataCatalog.from_env()
     self.fs = self.catalog.fs
     self._loaded_data_into_catalog()
 def setup(self):
     data_catalog_setup()
     self.catalog = DataCatalog.from_env()
     self.fs = self.catalog.fs
     self.reader = MockReader()