def ingest( environ: Mapping, asset_db_writer: AssetDBWriter, minute_bar_writer: BcolzMinuteBarWriter, daily_bar_writer: BcolzDailyBarWriter, adjustment_writer: SQLiteAdjustmentWriter, calendar: TradingCalendar, start_session: pd.Timestamp, end_session: pd.Timestamp, cache: dataframe_cache, show_progress: bool, output_dir: Text, ) -> NoReturn: sid_map = list(zip(range(len(symbols)), symbols)) asset_db_writer.write( futures=_get_metadata(sid_map), exchanges=pd.DataFrame(data=[['bitmex', 'UTC']], columns=['exchange', 'timezone']), ) minute_bar_writer.write( _get_minute_bars(sid_map, start_session, end_session, cache), show_progress=show_progress, ) daily_bar_writer.write( _get_daily_bars(sid_map, start_session, end_session, cache), show_progress=show_progress, )
def init_instance_fixtures(self): super(TestWrite, self).init_instance_fixtures() self.assets_db_path = path = os.path.join( self.instance_tmpdir.path, 'assets.db', ) self.writer = AssetDBWriter(path)
def test_v5_to_v4_selects_most_recent_ticker(self): T = pd.Timestamp AssetDBWriter(self.engine).write( equities=pd.DataFrame( [['A', 'A', T('2014-01-01'), T('2014-01-02')], ['B', 'B', T('2014-01-01'), T('2014-01-02')], # these two are both ticker sid 2 ['B', 'C', T('2014-01-03'), T('2014-01-04')], ['C', 'C', T('2014-01-01'), T('2014-01-02')]], index=[0, 1, 2, 2], columns=['symbol', 'asset_name', 'start_date', 'end_date'], ), ) downgrade(self.engine, 4) metadata = sa.MetaData(self.engine) metadata.reflect() def select_fields(r): return r.sid, r.symbol, r.asset_name, r.start_date, r.end_date expected_data = { (0, 'A', 'A', T('2014-01-01').value, T('2014-01-02').value), (1, 'B', 'B', T('2014-01-01').value, T('2014-01-02').value), (2, 'B', 'C', T('2014-01-01').value, T('2014-01-04').value), } actual_data = set(map( select_fields, sa.select(metadata.tables['equities'].c).execute(), )) assert_equal(expected_data, actual_data)
def __init__(self, load=None, bm_symbol='^GSPC', exchange_tz="US/Eastern", trading_schedule=default_nyse_schedule, asset_db_path=':memory:'): self.bm_symbol = bm_symbol if not load: load = load_market_data self.benchmark_returns, self.treasury_curves = load( trading_schedule.day, trading_schedule.schedule.index, self.bm_symbol, ) self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///%s' % asset_db_path self.engine = engine = create_engine(asset_db_path) else: self.engine = engine = asset_db_path if engine is not None: AssetDBWriter(engine).init_db() self.asset_finder = AssetFinder(engine) else: self.asset_finder = None
def write_data(self, **kwargs): """Write data into the asset_db. Parameters ---------- **kwargs Forwarded to AssetDBWriter.write """ AssetDBWriter(self.engine).write(**kwargs)
def __init__(self, load=None, bm_symbol='^GSPC', exchange_tz="US/Eastern", min_date=None, max_date=None, env_trading_calendar=tradingcalendar, asset_db_path=':memory:'): self.trading_day = env_trading_calendar.trading_day.copy() # `tc_td` is short for "trading calendar trading days" tc_td = env_trading_calendar.trading_days self.trading_days = tc_td[tc_td.slice_indexer(min_date, max_date)] self.first_trading_day = self.trading_days[0] self.last_trading_day = self.trading_days[-1] self.early_closes = env_trading_calendar.get_early_closes( self.first_trading_day, self.last_trading_day) self.open_and_closes = env_trading_calendar.open_and_closes.loc[ self.trading_days] self.bm_symbol = bm_symbol if not load: load = load_market_data self.benchmark_returns, self.treasury_curves = \ load(self.trading_day, self.trading_days, self.bm_symbol) if max_date: tr_c = self.treasury_curves # Mask the treasury curves down to the current date. # In the case of live trading, the last date in the treasury # curves would be the day before the date considered to be # 'today'. self.treasury_curves = tr_c[tr_c.index <= max_date] self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///%s' % asset_db_path self.engine = engine = create_engine(asset_db_path) else: self.engine = engine = asset_db_path if engine is not None: AssetDBWriter(engine).init_db() self.asset_finder = AssetFinder(engine) else: self.asset_finder = None
def __init__( self, load=None, bm_symbol='SPY', exchange_tz="US/Eastern", trading_calendar=None, trading_day=None, trading_days=None, asset_db_path=':memory:', future_chain_predicates=CHAIN_PREDICATES, environ=None, ): self.bm_symbol = bm_symbol if not load: load = partial(load_market_data, environ=environ) if trading_day is None: if not trading_calendar: trading_calendar = get_calendar("NYSE") trading_day = trading_calendar.day if trading_days is None: if not trading_calendar: trading_calendar = get_calendar("NYSE") trading_days = trading_calendar.schedule.index self.benchmark_returns, self.treasury_curves = load( trading_day, trading_days, self.bm_symbol, ) self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///' + asset_db_path self.engine = engine = create_engine(asset_db_path) else: self.engine = engine = asset_db_path if engine is not None: AssetDBWriter(engine).init_db() self.asset_finder = AssetFinder( engine, future_chain_predicates=future_chain_predicates) else: self.asset_finder = None
def __init__( self, load=None, bm_symbol=None, exchange_tz="Asia/Shanghai", trading_calendar=None, asset_db_path=':memory:', future_chain_predicates=CHAIN_PREDICATES, environ=None, ): self.bm_symbol = bm_symbol #if not load: # load = partial(load_market_data, environ=environ) if not load and self.bm_symbol is None: load = load_dump_data if not load and not self.bm_symbol is not None: load = load_market_data if not trading_calendar: trading_calendar = get_calendar("SHSZ") self.benchmark_returns, self.treasury_curves = load( trading_calendar.day, trading_calendar.schedule.index, self.bm_symbol, ) self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///' + asset_db_path self.engine = engine = create_engine(asset_db_path) else: self.engine = engine = asset_db_path if engine is not None: AssetDBWriter(engine).init_db() self.asset_finder = AssetFinder( engine, future_chain_predicates=future_chain_predicates) else: self.asset_finder = None
def ingest(name, environ=os.environ, timestamp=None, show_progress=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context( working_dir(pth.data_path([], environ=environ))) daily_bars_path = wd.ensure_dir(*daily_equity_relative( name, timestr, environ=environ, )) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, bundle.calendar, bundle.start_session, bundle.end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_equity_relative( name, timestr, environ=environ)), bundle.calendar, bundle.start_session, bundle.end_session, minutes_per_day=bundle.minutes_per_day, ) asset_db_writer = AssetDBWriter( wd.getpath(*asset_db_relative( name, timestr, environ=environ, ))) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), bundle.calendar.all_sessions, overwrite=True, )) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, bundle.calendar, bundle.start_session, bundle.end_session, cache, show_progress, pth.data_path([name, timestr], environ=environ), )
def __enter__(self): self._eng = eng = create_engine(self._url) AssetDBWriter(eng).write(**self._frames) return eng
class TestWrite(WithInstanceTmpDir, ZiplineTestCase): def init_instance_fixtures(self): super(TestWrite, self).init_instance_fixtures() self.assets_db_path = path = os.path.join( self.instance_tmpdir.path, 'assets.db', ) self.writer = AssetDBWriter(path) def new_asset_finder(self): return AssetFinder(self.assets_db_path) def test_write_multiple_exchanges(self): # Incrementing by two so that start and end dates for each # generated Asset don't overlap (each Asset's end_date is the # day after its start date). dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC') sids = list(range(5)) df = pd.DataFrame.from_records([ { 'sid': sid, 'real_sid': str(sid), 'currency': 'USD', 'symbol': str(sid), 'start_date': date.value, 'end_date': (date + timedelta(days=1)).value, # Change the exchange with each mapping period. We don't # currently support point in time exchange information, # so we just take the most recent by end date. 'exchange': 'EXCHANGE-%d-%d' % (sid, n), } for n, date in enumerate(dates) for sid in sids ]) self.writer.write(equities=df) reader = self.new_asset_finder() equities = reader.retrieve_all(reader.sids) for eq in equities: expected_exchange = 'EXCHANGE-%d-%d' % (eq.sid, len(dates) - 1) assert_equal(eq.exchange, expected_exchange) def test_write_direct(self): # don't include anything with a default to test that those work. equities = pd.DataFrame({ 'sid': [0, 1], 'real_sid': ['0', '1'], 'currency': ['USD', 'CAD'], 'asset_name': ['Ayy Inc.', 'Lmao LP'], # the full exchange name 'exchange': ['NYSE', 'TSE'], }) equity_symbol_mappings = pd.DataFrame({ 'sid': [0, 1], 'symbol': ['AYY', 'LMAO'], 'company_symbol': ['AYY', 'LMAO'], 'share_class_symbol': ['', ''], }) exchanges = pd.DataFrame({ 'exchange': ['NYSE', 'TSE'], 'country_code': ['US', 'JP'], }) self.writer.write_direct( equities=equities, equity_symbol_mappings=equity_symbol_mappings, exchanges=exchanges, ) reader = self.new_asset_finder() equities = reader.retrieve_all(reader.sids) expected_equities = [ Equity( 0, '0', ExchangeInfo('NYSE', 'NYSE', 'US'), currency='USD', symbol='AYY', asset_name='Ayy Inc.', start_date=pd.Timestamp(0, tz='UTC'), end_date=pd.Timestamp.max.tz_localize('UTC'), first_traded=None, auto_close_date=None, tick_size=0.01, multiplier=1.0, ), Equity( 1, '1', ExchangeInfo('TSE', 'TSE', 'JP'), currency='CAD', symbol='LMAO', asset_name='Lmao LP', start_date=pd.Timestamp(0, tz='UTC'), end_date=pd.Timestamp.max.tz_localize('UTC'), first_traded=None, auto_close_date=None, tick_size=0.01, multiplier=1.0, ) ] assert_equal(equities, expected_equities) exchange_info = reader.exchange_info expected_exchange_info = { 'NYSE': ExchangeInfo('NYSE', 'NYSE', 'US'), 'TSE': ExchangeInfo('TSE', 'TSE', 'JP'), } assert_equal(exchange_info, expected_exchange_info)
def init_instance_fixtures(self): super(AssetFinderMultipleCountries, self).init_instance_fixtures() conn = self.enter_instance_context(empty_assets_db()) self._asset_writer = AssetDBWriter(conn) self.asset_finder = AssetFinder(conn)
def init_instance_fixtures(self): super(AssetFinderTestCase, self).init_instance_fixtures() conn = self.enter_instance_context(empty_assets_db()) self._asset_writer = AssetDBWriter(conn) self.asset_finder = self.asset_finder_type(conn)
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context( working_dir(pth.data_path([], environ=environ))) daily_bars_path = wd.ensure_dir( *daily_equity_relative(name, timestr)) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_equity_relative(name, timestr)), calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) assets_db_path = wd.getpath(*asset_db_relative(name, timestr)) asset_db_writer = AssetDBWriter(assets_db_path) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative(name, timestr)), BcolzDailyBarReader(daily_bars_path), overwrite=True, )) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') log.info("Ingesting {}.", name) bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, start_session, end_session, cache, show_progress, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)
def asset_finder(self): AssetDBWriter(self.db_engine).init_db() return AssetFinder(self.db_engine)