def make_splits_data(cls): return pd.DataFrame.from_records([ { "effective_date": str_to_seconds("2016-01-06"), "ratio": 0.5, "sid": cls.SPLIT_ASSET_SID, }, { "effective_date": str_to_seconds("2016-01-07"), "ratio": 0.5, "sid": cls.ILLIQUID_SPLIT_ASSET_SID, }, ])
def make_mergers_data(cls): return pd.DataFrame.from_records([ { 'effective_date': str_to_seconds('2016-01-06'), 'ratio': 0.5, 'sid': cls.MERGER_ASSET_SID, }, { 'effective_date': str_to_seconds('2016-01-07'), 'ratio': 0.6, 'sid': cls.ILLIQUID_MERGER_ASSET_SID, } ])
def make_mergers_data(cls): return pd.DataFrame.from_records([ { "effective_date": str_to_seconds("2016-01-06"), "ratio": 0.5, "sid": cls.MERGER_ASSET_SID, }, { "effective_date": str_to_seconds("2016-01-07"), "ratio": 0.6, "sid": cls.ILLIQUID_MERGER_ASSET_SID, }, ])
def make_splits_data(cls): return pd.DataFrame.from_records([ { 'effective_date': str_to_seconds("2016-01-06"), 'ratio': 0.5, 'sid': cls.SPLIT_ASSET_SID, }, { 'effective_date': str_to_seconds("2016-01-07"), 'ratio': 0.5, 'sid': cls.ILLIQUID_SPLIT_ASSET_SID, }, ])
def create_adjustments_reader(cls): path = cls.tempdir.getpath("test_adjustments.db") adj_writer = SQLiteAdjustmentWriter(path, cls.env.trading_days, MockDailyBarReader()) splits = pd.DataFrame([{ 'effective_date': str_to_seconds("2016-01-06"), 'ratio': 0.5, 'sid': cls.asset3.sid }]) # Mergers and Dividends are not tested, but we need to have these # anyway mergers = pd.DataFrame({}, columns=['effective_date', 'ratio', 'sid']) mergers.effective_date = mergers.effective_date.astype(np.int64) mergers.ratio = mergers.ratio.astype(np.float64) mergers.sid = mergers.sid.astype(np.int64) dividends = pd.DataFrame({}, columns=[ 'ex_date', 'record_date', 'declared_date', 'pay_date', 'amount', 'sid' ]) dividends.amount = dividends.amount.astype(np.float64) dividends.sid = dividends.sid.astype(np.int64) adj_writer.write(splits, mergers, dividends) return SQLiteAdjustmentReader(path)
def create_adjustments_reader(cls): path = cls.tempdir.getpath("test_adjustments.db") adj_writer = SQLiteAdjustmentWriter( path, cls.env.trading_days, MockDailyBarReader() ) splits = pd.DataFrame([ { 'effective_date': str_to_seconds("2016-01-06"), 'ratio': 0.5, 'sid': cls.asset3.sid } ]) # Mergers and Dividends are not tested, but we need to have these # anyway mergers = pd.DataFrame({}, columns=['effective_date', 'ratio', 'sid']) mergers.effective_date = mergers.effective_date.astype(np.int64) mergers.ratio = mergers.ratio.astype(np.float64) mergers.sid = mergers.sid.astype(np.int64) dividends = pd.DataFrame({}, columns=['ex_date', 'record_date', 'declared_date', 'pay_date', 'amount', 'sid']) dividends.amount = dividends.amount.astype(np.float64) dividends.sid = dividends.sid.astype(np.int64) adj_writer.write(splits, mergers, dividends) return SQLiteAdjustmentReader(path)
def create_adjustment_reader(cls, tempdir): dbpath = tempdir.getpath('adjustments.sqlite') writer = SQLiteAdjustmentWriter(dbpath, cls.env.trading_days, MockDailyBarSpotReader()) splits = DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-06-09'), 'ratio': (1 / 7.0), 'sid': cls.AAPL, } ]) mergers = DataFrame( { # Hackery to make the dtypes correct on an empty frame. 'effective_date': array([], dtype=int), 'ratio': array([], dtype=float), 'sid': array([], dtype=int), }, index=DatetimeIndex([]), columns=['effective_date', 'ratio', 'sid'], ) dividends = DataFrame({ 'sid': array([], dtype=uint32), 'amount': array([], dtype=float64), 'record_date': array([], dtype='datetime64[ns]'), 'ex_date': array([], dtype='datetime64[ns]'), 'declared_date': array([], dtype='datetime64[ns]'), 'pay_date': array([], dtype='datetime64[ns]'), }) writer.write(splits, mergers, dividends) return SQLiteAdjustmentReader(dbpath)
def make_splits_data(cls): return pd.DataFrame([ { 'effective_date': str_to_seconds('2016-01-06'), 'ratio': 0.5, 'sid': 3, } ])
def make_splits_data(cls): return pd.DataFrame.from_records([{ "effective_date": str_to_seconds("2014-06-09"), "ratio": (1 / 7.0), "sid": cls.AAPL, }])
def make_splits_data(cls): return DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-06-09'), 'ratio': (1 / 7.0), 'sid': cls.AAPL, } ])
def make_splits_data(cls): return pd.DataFrame( [ { "effective_date": str_to_seconds("2016-01-06"), "ratio": 0.5, "sid": 3, } ] )
# upon inspection. # # 1s place is the equity # # 0.1s place is the action type, with: # # splits, 1 # mergers, 2 # dividends, 3 # # 0.001s is the date SPLITS = DataFrame( [ # Before query range, should be excluded. { 'effective_date': str_to_seconds('2015-06-03'), 'ratio': 1.103, 'sid': 1 }, # First day of query range, should be excluded. { 'effective_date': str_to_seconds('2015-06-10'), 'ratio': 3.110, 'sid': 3 }, # Third day of query range, should have last_row of 2 { 'effective_date': str_to_seconds('2015-06-12'), 'ratio': 3.112, 'sid': 3 },
# upon inspection. # # 1s place is the equity # # 0.1s place is the action type, with: # # splits, 1 # mergers, 2 # dividends, 3 # # 0.001s is the date SPLITS = pd.DataFrame( [ # Before query range, should be excluded. { "effective_date": str_to_seconds("2015-06-03"), "ratio": 1.103, "sid": 1, }, # First day of query range, should be excluded. { "effective_date": str_to_seconds("2015-06-10"), "ratio": 3.110, "sid": 3, }, # Third day of query range, should have last_row of 2 { "effective_date": str_to_seconds("2015-06-12"), "ratio": 3.112, "sid": 3, },
# ADJUSTMENTS use the following scheme to indicate information about the value # upon inspection. # # 1s place is the equity # # 0.1s place is the action type, with: # # splits, 1 # mergers, 2 # dividends, 3 # # 0.001s is the date SPLITS = DataFrame( [ # Before query range, should be excluded. {'effective_date': str_to_seconds('2015-06-03'), 'ratio': 1.103, 'sid': 1}, # First day of query range, should be excluded. {'effective_date': str_to_seconds('2015-06-10'), 'ratio': 3.110, 'sid': 3}, # Third day of query range, should have last_row of 2 {'effective_date': str_to_seconds('2015-06-12'), 'ratio': 3.112, 'sid': 3}, # After query range, should be excluded. {'effective_date': str_to_seconds('2015-06-21'), 'ratio': 6.121, 'sid': 6}, # Another action in query range, should have last_row of 1
def make_splits_data(cls): return DataFrame.from_records( [{"effective_date": str_to_seconds("2014-06-09"), "ratio": (1 / 7.0), "sid": cls.AAPL}] )
def test_ingest(self): start = pd.Timestamp('2014-01-06', tz='utc') end = pd.Timestamp('2014-01-10', tz='utc') calendar = get_calendar('NYSE') sessions = calendar.sessions_in_range(start, end) minutes = calendar.minutes_for_sessions_in_range(start, end) sids = tuple(range(3)) equities = make_simple_equity_info( sids, start, end, ) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-01-08'), 'ratio': first_split_ratio, 'sid': 0, }, { 'effective_date': str_to_seconds('2014-01-09'), 'ratio': second_split_ratio, 'sid': 1, }, ]) @self.register( 'bundle', calendar=calendar, start_session=start, end_session=end, ) def bundle_ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, TradingCalendar) assert_is_instance(cache, dataframe_cache) assert_is_instance(show_progress, bool) self.ingest('bundle', environ=self.environ) bundle = self.load('bundle', environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = 'open', 'high', 'low', 'close', 'volume' actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(minutes, equities, colname), msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, start, end, sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(sessions, equities, colname), msg=colname, ) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( columns, sessions, pd.Index(sids), ) for column, adjustments in zip(columns, adjustments_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, )], 3: [Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, )], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjustments_for_cols[-1], { 2: [Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, )], 3: [Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, )], }, msg='volume', )
def test_ingest(self): calendar = get_calendar('XNYS') sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE) minutes = calendar.minutes_for_sessions_in_range( self.START_DATE, self.END_DATE, ) sids = tuple(range(3)) equities = make_simple_equity_info( sids, self.START_DATE, self.END_DATE, ) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-01-08'), 'ratio': first_split_ratio, 'sid': 0, }, { 'effective_date': str_to_seconds('2014-01-09'), 'ratio': second_split_ratio, 'sid': 1, }, ]) @self.register( 'bundle', calendar_name='NYSE', start_session=self.START_DATE, end_session=self.END_DATE, ) def bundle_ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, output_dir): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, TradingCalendar) assert_is_instance(cache, dataframe_cache) self.ingest('bundle', environ=self.environ) bundle = self.load('bundle', environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = 'open', 'high', 'low', 'close', 'volume' actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(minutes, sids, equities, colname), msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, self.START_DATE, self.END_DATE, sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(sessions, sids, equities, colname), msg=colname, ) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( columns, sessions, pd.Index(sids), ) for column, adjustments in zip(columns, adjs_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, )], 3: [Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, )], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjs_for_cols[-1], { 2: [Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, )], 3: [Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, )], }, msg='volume', )
def test_ingest(self): start = pd.Timestamp('2014-01-06', tz='utc') end = pd.Timestamp('2014-01-10', tz='utc') trading_days = get_calendar('NYSE').all_trading_days calendar = trading_days[trading_days.slice_indexer(start, end)] minutes = get_calendar('NYSE').trading_minutes_for_days_in_range( calendar[0], calendar[-1]) sids = tuple(range(3)) equities = make_simple_equity_info( sids, calendar[0], calendar[-1], ) daily_bar_data = make_bar_data(equities, calendar) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-01-08'), 'ratio': first_split_ratio, 'sid': 0, }, { 'effective_date': str_to_seconds('2014-01-09'), 'ratio': second_split_ratio, 'sid': 1, }, ]) schedule = get_calendar('NYSE').schedule @self.register( 'bundle', calendar=calendar, opens=schedule.market_open[calendar[0]:calendar[-1]], closes=schedule.market_close[calendar[0]:calendar[-1]], ) def bundle_ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, cache, show_progress, output_dir): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, pd.DatetimeIndex) assert_is_instance(cache, dataframe_cache) assert_is_instance(show_progress, bool) self.ingest('bundle', environ=self.environ) bundle = self.load('bundle', environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = 'open', 'high', 'low', 'close', 'volume' actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(minutes, equities, colname), msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, calendar[0], calendar[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(calendar, equities, colname), msg=colname, ) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( columns, calendar, pd.Index(sids), ) for column, adjustments in zip(columns, adjustments_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, ) ], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjustments_for_cols[-1], { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, ) ], }, msg='volume', )
def test_ingest(self): calendar = get_calendar("XNYS") sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE) minutes = calendar.minutes_for_sessions_in_range( self.START_DATE, self.END_DATE, ) sids = tuple(range(3)) equities = make_simple_equity_info( sids, self.START_DATE, self.END_DATE, ) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { "effective_date": str_to_seconds("2014-01-08"), "ratio": first_split_ratio, "sid": 0, }, { "effective_date": str_to_seconds("2014-01-09"), "ratio": second_split_ratio, "sid": 1, }, ]) @self.register( "bundle", calendar_name="NYSE", start_session=self.START_DATE, end_session=self.END_DATE, ) def bundle_ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir, ): assert environ is self.environ asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert isinstance(calendar, TradingCalendar) assert isinstance(cache, dataframe_cache) assert isinstance(show_progress, bool) self.ingest("bundle", environ=self.environ) bundle = self.load("bundle", environ=self.environ) assert set(bundle.asset_finder.sids) == set(sids) columns = "open", "high", "low", "close", "volume" actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): np.testing.assert_array_equal( actual_column, expected_bar_values_2d(minutes, sids, equities, colname), err_msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, self.START_DATE, self.END_DATE, sids, ) for actual_column, colname in zip(actual, columns): np.testing.assert_array_equal( actual_column, expected_bar_values_2d(sessions, sids, equities, colname), err_msg=colname, ) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( columns, sessions, pd.Index(sids), ) for column, adjustments in zip(columns, adjs_for_cols[:-1]): # iterate over all the adjustments but `volume` assert adjustments == { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, ) ], }, column # check the volume, the value should be 1/ratio assert adjs_for_cols[-1] == { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, ) ], }, "volume"
def test_ingest(self): calendar = get_calendar("NYSE") sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE) minutes = calendar.minutes_for_sessions_in_range(self.START_DATE, self.END_DATE) sids = tuple(range(3)) equities = make_simple_equity_info(sids, self.START_DATE, self.END_DATE) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records( [ {"effective_date": str_to_seconds("2014-01-08"), "ratio": first_split_ratio, "sid": 0}, {"effective_date": str_to_seconds("2014-01-09"), "ratio": second_split_ratio, "sid": 1}, ] ) @self.register("bundle", calendar_name="NYSE", start_session=self.START_DATE, end_session=self.END_DATE) def bundle_ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir, ): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, TradingCalendar) assert_is_instance(cache, dataframe_cache) assert_is_instance(show_progress, bool) self.ingest("bundle", environ=self.environ) bundle = self.load("bundle", environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = "open", "high", "low", "close", "volume" actual = bundle.equity_minute_bar_reader.load_raw_arrays(columns, minutes[0], minutes[-1], sids) for actual_column, colname in zip(actual, columns): assert_equal(actual_column, expected_bar_values_2d(minutes, equities, colname), msg=colname) actual = bundle.equity_daily_bar_reader.load_raw_arrays(columns, self.START_DATE, self.END_DATE, sids) for actual_column, colname in zip(actual, columns): assert_equal(actual_column, expected_bar_values_2d(sessions, equities, colname), msg=colname) adjustments_for_cols = bundle.adjustment_reader.load_adjustments(columns, sessions, pd.Index(sids)) for column, adjustments in zip(columns, adjustments_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [Float64Multiply(first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio)], 3: [Float64Multiply(first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio)], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjustments_for_cols[-1], { 2: [Float64Multiply(first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio)], 3: [Float64Multiply(first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio)], }, msg="volume", )