def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', ) market_data = ('SPY_benchmark.csv', 'treasury_curves.csv') for data in market_data: update_modified_time( cls.tmpdir.getpath('example_data/root/data/' + data))
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', ) market_data = ('SPY_benchmark.csv', 'treasury_curves.csv') for data in market_data: update_modified_time( cls.tmpdir.getpath( 'example_data/root/data/' + data ) )
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False, is_compile=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context(working_dir( pth.data_path([], environ=environ)) ) daily_bars_path = wd.ensure_dir( *daily_relative( name, timestr, environ=environ, ) ) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) five_minute_bar_writer = BcolzFiveMinuteBarWriter( wd.ensure_dir(*five_minute_relative( name, timestr, environ=environ) ), calendar, start_session, end_session, five_minutes_per_day=bundle.five_minutes_per_day, ) minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_relative( name, timestr, environ=environ) ), calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) assets_db_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, )) asset_db_writer = AssetDBWriter(assets_db_path) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), calendar.all_sessions, overwrite=True, ) ) else: daily_bar_writer = None five_minute_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') bundle.ingest( environ, asset_db_writer, minute_bar_writer, five_minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, start_session, end_session, cache, show_progress, is_compile, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False, is_compile=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context(working_dir( pth.data_path([], environ=environ)) ) daily_bars_path = wd.ensure_dir( *daily_relative( name, timestr, environ=environ, ) ) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_relative( name, timestr, environ=environ) ), calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) assets_db_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, )) asset_db_writer = AssetDBWriter(assets_db_path) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), calendar.all_sessions, overwrite=True, ) ) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, start_session, end_session, cache, show_progress, is_compile, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)