def test_data_catalog_instrument_ids_correctly_unmapped(self): # Arrange catalog = DataCatalog.from_env() instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) trade_tick = TradeTick( instrument_id=instrument.id, price=Price.from_str("2.0"), size=Quantity.from_int(10), aggressor_side=AggressorSide.UNKNOWN, trade_id=TradeId("1"), ts_event=0, ts_init=0, ) write_objects(catalog=catalog, chunk=[instrument, trade_tick]) # Act instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] trade_tick = catalog.trade_ticks(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] # Assert assert instrument.id.value == "AUD/USD.SIM" assert trade_tick.instrument_id.value == "AUD/USD.SIM"
def setup(self): data_catalog_setup() dask.config.set(scheduler="single-threaded") aud_usd_data_loader() self.catalog = DataCatalog.from_env() self.backtest_config = BacktestRunConfig( engine=BacktestEngineConfig(), venues=[ BacktestVenueConfig( name="SIM", oms_type="HEDGING", account_type="MARGIN", base_currency="USD", starting_balances=["1000000 USD"], # fill_model=fill_model, # TODO(cs): Implement next iteration ) ], data=[ BacktestDataConfig( catalog_path="/root", catalog_fs_protocol="memory", data_cls=QuoteTick, instrument_id="AUD/USD.SIM", start_time=1580398089820000000, end_time=1580504394501000000, ) ], )
def aud_usd_data_loader(): from nautilus_trader.backtest.data.providers import TestInstrumentProvider from tests.test_kit.stubs import TestStubs from tests.unit_tests.backtest.test_backtest_config import TEST_DATA_DIR instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) def parse_csv_tick(df, instrument_id): yield instrument for r in df.values: ts = secs_to_nanos(pd.Timestamp(r[0]).timestamp()) tick = QuoteTick( instrument_id=instrument_id, bid=Price.from_str(str(r[1])), ask=Price.from_str(str(r[2])), bid_size=Quantity.from_int(1_000_000), ask_size=Quantity.from_int(1_000_000), ts_event=ts, ts_init=ts, ) yield tick catalog = DataCatalog.from_env() instrument_provider = InstrumentProvider() instrument_provider.add(instrument) process_files( glob_path=f"{TEST_DATA_DIR}/truefx-audusd-ticks.csv", reader=CSVReader( block_parser=partial(parse_csv_tick, instrument_id=TestStubs.audusd_id()), as_dataframe=True, ), instrument_provider=instrument_provider, catalog=catalog, )
def test_catalog_generic_data_not_overwritten(self): # Arrange TestPersistenceStubs.setup_news_event_persistence() process_files( glob_path=f"{TEST_DATA_DIR}/news_events.csv", reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser), catalog=self.catalog, ) objs = self.catalog.generic_data( cls=NewsEventData, filter_expr=ds.field("currency") == "USD", as_nautilus=True ) # Clear the catalog again data_catalog_setup() self.catalog = DataCatalog.from_env() assert ( len(self.catalog.generic_data(NewsEventData, raise_on_empty=False, as_nautilus=True)) == 0 ) chunk1, chunk2 = objs[:10], objs[5:15] # Act, Assert write_objects(catalog=self.catalog, chunk=chunk1) assert len(self.catalog.generic_data(NewsEventData)) == 10 write_objects(catalog=self.catalog, chunk=chunk2) assert len(self.catalog.generic_data(NewsEventData)) == 15
def test_write_parquet_partitions( self, ): # Arrange catalog = DataCatalog.from_env() fs = catalog.fs root = catalog.path path = "sample.parquet" df = pd.DataFrame( {"value": np.random.random(5), "instrument_id": ["a", "a", "a", "b", "b"]} ) # Act write_parquet( fs=fs, path=f"{root}/{path}", df=df, schema=pa.schema({"value": pa.float64(), "instrument_id": pa.string()}), partition_cols=["instrument_id"], ) dataset = ds.dataset(str(root.joinpath("sample.parquet")), filesystem=fs) result = dataset.to_table().to_pandas() # Assert assert result.equals(df[["value"]]) # instrument_id is a partition now assert dataset.files[0].startswith("/root/sample.parquet/instrument_id=a/") assert dataset.files[1].startswith("/root/sample.parquet/instrument_id=b/")
def test_write_parquet_no_partitions( self, ): # Arrange df = pd.DataFrame( {"value": np.random.random(5), "instrument_id": ["a", "a", "a", "b", "b"]} ) catalog = DataCatalog.from_env() fs = catalog.fs root = catalog.path # Act write_parquet( fs=fs, path=f"{root}/sample.parquet", df=df, schema=pa.schema({"value": pa.float64(), "instrument_id": pa.string()}), partition_cols=None, ) result = ( ds.dataset(str(root.joinpath("sample.parquet")), filesystem=fs).to_table().to_pandas() ) # Assert assert result.equals(df)
def test_data_catalog_currency_with_null_max_price_loads(self): # Arrange catalog = DataCatalog.from_env() instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) write_objects(catalog=catalog, chunk=[instrument]) # Act instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] # Assert assert instrument.max_price is None
def _reset(): """Cleanup resources before each test run""" os.environ["NAUTILUS_CATALOG"] = "memory:///root/" catalog = DataCatalog.from_env() assert isinstance(catalog.fs, MemoryFileSystem) try: catalog.fs.rm("/", recursive=True) except FileNotFoundError: pass catalog.fs.mkdir("/root/data") assert catalog.fs.exists("/root/")
def data_catalog_setup(): """ Reset the filesystem and DataCatalog to a clean state """ clear_singleton_instances(DataCatalog) os.environ["NAUTILUS_CATALOG"] = "memory:///root/" catalog = DataCatalog.from_env() assert isinstance(catalog.fs, MemoryFileSystem) try: catalog.fs.rm("/", recursive=True) except FileNotFoundError: pass catalog.fs.mkdir("/root/data") assert catalog.fs.exists("/root/") assert not catalog.fs.ls("/root/data")
def test_repartition_dataset(self): # Arrange catalog = DataCatalog.from_env() fs = catalog.fs root = catalog.path path = "sample.parquet" # Write some out of order, overlapping for start_date in ("2020-01-01", "2020-01-8", "2020-01-04"): df = pd.DataFrame( { "value": np.arange(5), "instrument_id": ["a", "a", "a", "b", "b"], "ts_init": [ int(ts.to_datetime64()) for ts in pd.date_range(start_date, periods=5, tz="UTC") ], } ) write_parquet( fs=fs, path=f"{root}/{path}", df=df, schema=pa.schema( {"value": pa.float64(), "instrument_id": pa.string(), "ts_init": pa.int64()} ), partition_cols=["instrument_id"], ) original_partitions = fs.glob(f"{root}/{path}/**/*.parquet") # Act _validate_dataset(catalog=catalog, path=f"{root}/{path}") new_partitions = fs.glob(f"{root}/{path}/**/*.parquet") # Assert assert len(original_partitions) == 6 expected = [ "/root/sample.parquet/instrument_id=a/20200101.parquet", "/root/sample.parquet/instrument_id=a/20200104.parquet", "/root/sample.parquet/instrument_id=a/20200108.parquet", "/root/sample.parquet/instrument_id=b/20200101.parquet", "/root/sample.parquet/instrument_id=b/20200104.parquet", "/root/sample.parquet/instrument_id=b/20200108.parquet", ] assert new_partitions == expected
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.venue_config = BacktestVenueConfig( name="SIM", venue_type="ECN", oms_type="HEDGING", account_type="MARGIN", base_currency="USD", starting_balances=["1000000 USD"], # fill_model=fill_model, # TODO(cs): Implement next iteration ) self.data_config = BacktestDataConfig( catalog_path="/root", catalog_fs_protocol="memory", data_cls_path="nautilus_trader.model.data.tick.QuoteTick", instrument_id="AUD/USD.SIM", start_time=1580398089820000000, end_time=1580504394501000000, ) self.backtest_configs = [ BacktestRunConfig( engine=BacktestEngineConfig(), venues=[self.venue_config], data=[self.data_config], ) ] self.strategies = [ ImportableStrategyConfig( path="nautilus_trader.examples.strategies.ema_cross:EMACross", config=EMACrossConfig( instrument_id="AUD/USD.SIM", bar_type="AUD/USD.SIM-100-TICK-MID-INTERNAL", fast_ema_period=10, slow_ema_period=20, trade_size=Decimal(1_000_000), order_id_tag="001", ), ) ]
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.fs: fsspec.AbstractFileSystem = self.catalog.fs self._loaded_data_into_catalog()
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.fs = self.catalog.fs self.reader = BetfairTestStubs.betfair_reader()
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.reader = MockReader() self.line_preprocessor = TestLineProcessor()
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.fs = self.catalog.fs self._loaded_data_into_catalog()
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.fs = self.catalog.fs self.reader = MockReader()