def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register("test", lambda *args: None) cls.add_class_callback(partial(unregister, "test")) with tarfile.open(test_resource_path("example_data.tar.gz")) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath("example_data/expected_perf/%s" % pd.__version__.replace(".", "-")), serialization="pickle", ) # We need to call gc.collect before tearing down our class because we # have a cycle between TradingAlgorithm and AlgorithmSimulator which # ultimately holds a reference to the pipeline engine passed to the # tests here. # This means that we're not guaranteed to have deleted our disk-backed # resource readers (e.g. SQLiteAdjustmentReader) before trying to # delete the tempdir, which causes failures on Windows because Windows # doesn't allow you to delete a file if someone still has an open # handle to that file. # :( cls.add_class_callback(gc.collect)
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register("test", lambda *args: None) cls.add_class_callback(partial(unregister, "test")) with tarfile.open(test_resource_path("example_data.tar.gz")) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath("example_data/expected_perf/%s" % pd.__version__.replace(".", "-")), serialization="pickle", )
def _df_cache(_setup_class, request): request.cls.expected_perf = dataframe_cache( join( str(request.cls.tmp_path), "example_data", f"expected_perf/{request.param}", ), serialization="pickle", ) request.cls.no_benchmark_expected_perf = { example_name: _no_benchmark_expectations_applied(expected_perf.copy()) for example_name, expected_perf in request.cls.expected_perf.items() }
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', )
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register("test", lambda *args: None) cls.add_class_callback(partial(unregister, "test")) with tarfile.open(test_resource_path("example_data.tar.gz")) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( "example_data/expected_perf/%s" % pd.__version__.replace(".", "-"), ), serialization="pickle", ) cls.no_benchmark_expected_perf = { example_name: cls._no_benchmark_expectations_applied(expected_perf.copy()) for example_name, expected_perf in cls.expected_perf.items() }
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', ) market_data = ('SPY_benchmark.csv', 'treasury_curves.csv') for data in market_data: update_modified_time( cls.tmpdir.getpath( 'example_data/root/data/' + data ) )
def ingest(name, environ=os.environ, timestamp=None, show_progress=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context(working_dir( pth.data_path([], environ=environ)) ) daily_bars_path = wd.ensure_dir( *daily_equity_relative( name, timestr, environ=environ, ) ) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, nyse_cal, bundle.calendar[0], bundle.calendar[-1] ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bar_writer = BcolzMinuteBarWriter( bundle.calendar[0], wd.ensure_dir(*minute_equity_relative( name, timestr, environ=environ) ), bundle.opens, bundle.closes, minutes_per_day=bundle.minutes_per_day, ) asset_db_writer = AssetDBWriter( wd.getpath(*asset_db_relative( name, timestr, environ=environ, )) ) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), bundle.calendar, overwrite=True, ) ) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, bundle.calendar, cache, show_progress, pth.data_path([name, timestr], environ=environ), )
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context(working_dir( pth.data_path([], environ=environ)) ) daily_bars_path = wd.ensure_dir( *daily_equity_relative( name, timestr, environ=environ, ) ) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_equity_relative( name, timestr, environ=environ) ), calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) assets_db_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, )) asset_db_writer = AssetDBWriter(assets_db_path) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), calendar.all_sessions, overwrite=True, ) ) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, start_session, end_session, cache, show_progress, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)
def main(ctx, rebuild_input): """Rebuild the perf data for test_examples""" example_path = test_resource_path("example_data.tar.gz") with tmp_dir() as d: with tarfile.open(example_path) as tar: tar.extractall(d.path) # The environ here should be the same (modulo the tempdir location) # as we use in test_examples.py. environ = {"ZIPLINE_ROOT": d.getpath("example_data/root")} if rebuild_input: raise NotImplementedError( "We cannot rebuild input for Yahoo because of " "changes Yahoo made to their API, so we cannot " "use Yahoo data bundles anymore. This will be fixed in " "a future release", ) # we need to register the bundle; it is already ingested and saved in # the example_data.tar.gz file @register("test") def nop_ingest(*args, **kwargs): raise NotImplementedError("we cannot rebuild the test buindle") new_perf_path = d.getpath( "example_data/new_perf/%s" % pd.__version__.replace(".", "-"), ) c = dataframe_cache( new_perf_path, serialization="pickle:2", ) with c: for name in EXAMPLE_MODULES: c[name] = examples.run_example( EXAMPLE_MODULES, name, environ=environ, benchmark_returns=read_checked_in_benchmark_data(), ) correct_called = [False] console = None def _exit(*args, **kwargs): console.raw_input = eof def correct(): correct_called[0] = True _exit() expected_perf_path = d.getpath( "example_data/expected_perf/%s" % pd.__version__.replace(".", "-"), ) # allow users to run some analysis to make sure that the new # results check out console = InteractiveConsole( { "correct": correct, "exit": _exit, "incorrect": _exit, "new": c, "np": np, "old": dataframe_cache( expected_perf_path, serialization="pickle", ), "pd": pd, "cols_to_check": examples._cols_to_check, "changed_results": changed_results, } ) console.interact(banner) if not correct_called[0]: ctx.fail( "`correct()` was not called! This means that the new" " results will not be written", ) # move the new results to the expected path shutil.rmtree(expected_perf_path) shutil.copytree(new_perf_path, expected_perf_path) # Clear out all the temporary new perf so it doesn't get added to the # tarball. shutil.rmtree(d.getpath("example_data/new_perf/")) with tarfile.open(example_path, "w|gz") as tar: tar.add(d.getpath("example_data"), "example_data")