def setUpClass(cls): cls._cleanup_stack = stack = ExitStack() cls.cols = {} cls.dataset = {sid: df for sid, df in enumerate(earnings_cases)} cls.finder = stack.enter_context(tmp_asset_finder(equities=cls.get_equity_info())) cls.loader_type = EarningsCalendarLoader
def _test_id(self, df, dshape, expected, finder, add): expr = bz.data(df, name='expr', dshape=dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() for a in add: p.add(getattr(ds, a).latest, a) dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) assert_frame_equal( result, _utc_localize_index_level_0(expected), check_dtype=False, )
def test_novel_deltas_macro(self): asset_info = asset_infos[0][0] base_dates = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04') ]) baseline = pd.DataFrame({ 'value': (0, 1), 'asof_date': base_dates, 'timestamp': base_dates, }) expr = bz.data(baseline, name='expr', dshape=self.macro_dshape) deltas = bz.data(baseline, name='deltas', dshape=self.macro_dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap(pd.Timestamp, { '2014-01-03': repeat_last_axis( np.array([10.0, 10.0, 10.0]), nassets, ), '2014-01-06': repeat_last_axis( np.array([10.0, 10.0, 11.0]), nassets, ), }) cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def test_id_take_last_in_group_macro(self): """ output (expected): other value 2014-01-01 Equity(65 [A]) NaN 1 Equity(66 [B]) NaN 1 Equity(67 [C]) NaN 1 2014-01-02 Equity(65 [A]) 1 2 Equity(66 [B]) 1 2 Equity(67 [C]) 1 2 2014-01-03 Equity(65 [A]) 2 2 Equity(66 [B]) 2 2 Equity(67 [C]) 2 2 """ T = pd.Timestamp df = pd.DataFrame( columns=['asof_date', 'timestamp', 'other', 'value'], data=[ [T('2014-01-01'), T('2014-01-01 00'), np.nan, 1], [T('2014-01-01'), T('2014-01-01 01'), np.nan, np.nan], [T('2014-01-02'), T('2014-01-02 00'), 1, np.nan], [T('2014-01-02'), T('2014-01-02 01'), np.nan, 2], [T('2014-01-03'), T('2014-01-03 00'), 2, np.nan], [T('2014-01-03'), T('2014-01-03 01'), 3, 3], ], ) fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = pd.DataFrame( columns=[ 'other', 'value', ], data=[ [np.nan, 1], # 2014-01-01 Equity(65 [A]) [np.nan, 1], # Equity(66 [B]) [np.nan, 1], # Equity(67 [C]) [1, 2], # 2014-01-02 Equity(65 [A]) [1, 2], # Equity(66 [B]) [1, 2], # Equity(67 [C]) [2, 2], # 2014-01-03 Equity(65 [A]) [2, 2], # Equity(66 [B]) [2, 2], # Equity(67 [C]) ], index=pd.MultiIndex.from_product( (self.dates, finder.retrieve_all(self.sids)), ), ) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def init_class_fixtures(cls): super(BasePipelineTestCase, cls).init_class_fixtures() cls.__calendar = date_range("2014", "2015", freq=cls.trading_calendar.day) cls.__assets = assets = Int64Index(arange(1, 20)) cls.__tmp_finder_ctx = tmp_asset_finder( equities=make_simple_equity_info(assets, cls.__calendar[0], cls.__calendar[-1]) ) cls.__finder = cls.__tmp_finder_ctx.__enter__() cls.__mask = cls.__finder.lifetimes(cls.__calendar[-30:], include_start_date=False)
def setUpClass(cls): cls._cleanup_stack = stack = ExitStack() cls.finder = stack.enter_context( tmp_asset_finder(equities=cls.get_equity_info()), ) cls.cols = {} cls.dataset = {sid: frame.drop(CASH_FIELD_NAME, axis=1) for sid, frame in enumerate(buyback_authorizations_cases)} cls.loader_type = ShareBuybackAuthorizationsLoader
def test_id_ffill_out_of_window_macro_dataset(self): """ input (df): asof_date timestamp other value 0 2013-12-22 2013-12-22 NaN 0 1 2013-12-23 2013-12-23 1 NaN 2 2013-12-24 2013-12-24 NaN NaN output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-03 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 """ dates = self.dates - timedelta(days=10) df = pd.DataFrame({ 'value': (0, np.nan, np.nan), 'other': (np.nan, 1, np.nan), 'asof_date': dates, 'timestamp': dates, }) fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = pd.DataFrame( np.array([[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]), columns=['value', 'other'], index=pd.MultiIndex.from_product( (self.dates, finder.retrieve_all(self.sids)), ), ).sort_index(axis=1) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def test_id_take_last_in_group(self): T = pd.Timestamp df = pd.DataFrame( columns=['asof_date', 'timestamp', 'sid', 'other', 'value'], data=[ [T('2014-01-01'), T('2014-01-01 00'), 65, 0, 0], [T('2014-01-01'), T('2014-01-01 01'), 65, 1, np.nan], [T('2014-01-01'), T('2014-01-01 00'), 66, np.nan, np.nan], [T('2014-01-01'), T('2014-01-01 01'), 66, np.nan, 1], [T('2014-01-01'), T('2014-01-01 00'), 67, 2, np.nan], [T('2014-01-01'), T('2014-01-01 01'), 67, np.nan, np.nan], [T('2014-01-02'), T('2014-01-02 00'), 65, np.nan, np.nan], [T('2014-01-02'), T('2014-01-02 01'), 65, np.nan, 1], [T('2014-01-02'), T('2014-01-02 00'), 66, np.nan, np.nan], [T('2014-01-02'), T('2014-01-02 01'), 66, 2, np.nan], [T('2014-01-02'), T('2014-01-02 00'), 67, 3, 3], [T('2014-01-02'), T('2014-01-02 01'), 67, 3, 3], [T('2014-01-03'), T('2014-01-03 00'), 65, 2, np.nan], [T('2014-01-03'), T('2014-01-03 01'), 65, 2, np.nan], [T('2014-01-03'), T('2014-01-03 00'), 66, 3, 3], [T('2014-01-03'), T('2014-01-03 01'), 66, np.nan, np.nan], [T('2014-01-03'), T('2014-01-03 00'), 67, np.nan, np.nan], [T('2014-01-03'), T('2014-01-03 01'), 67, np.nan, 4], ], ) fields = OrderedDict(self.dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = pd.DataFrame( columns=['other', 'value'], data=[ [1, 0], # 2014-01-01 Equity(65 [A]) [np.nan, 1], # Equity(66 [B]) [2, np.nan], # Equity(67 [C]) [1, 1], # 2014-01-02 Equity(65 [A]) [2, 1], # Equity(66 [B]) [3, 3], # Equity(67 [C]) [2, 1], # 2014-01-03 Equity(65 [A]) [3, 3], # Equity(66 [B]) [3, 3], # Equity(67 [C]) ], index=pd.MultiIndex.from_product( (self.dates, finder.retrieve_all(self.sids)), ), ) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def test_id_ffill_out_of_window(self): """ input (df): asof_date timestamp sid other value 0 2013-12-22 2013-12-22 65 0 0 1 2013-12-22 2013-12-22 66 NaN 1 2 2013-12-22 2013-12-22 67 2 NaN 3 2013-12-23 2013-12-23 65 NaN 1 4 2013-12-23 2013-12-23 66 2 NaN 5 2013-12-23 2013-12-23 67 3 3 6 2013-12-24 2013-12-24 65 2 NaN 7 2013-12-24 2013-12-24 66 3 3 8 2013-12-24 2013-12-24 67 NaN 4 output (expected): other value 2014-01-01 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 2014-01-03 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 """ dates = self.dates.repeat(3) - timedelta(days=10) df = pd.DataFrame({ 'sid': self.sids * 3, 'value': (0, 1, np.nan, 1, np.nan, 3, np.nan, 3, 4), 'other': (0, np.nan, 2, np.nan, 2, 3, 2, 3, np.nan), 'asof_date': dates, 'timestamp': dates, }) fields = OrderedDict(self.dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = pd.DataFrame( np.array([[2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4]]), columns=['other', 'value'], index=pd.MultiIndex.from_product( (self.dates, finder.retrieve_all(self.sids)), ), ) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def setUpClass(cls): cls.__calendar = date_range('2014', '2015', freq=trading_day) cls.__assets = assets = Int64Index(arange(1, 20)) cls.__tmp_finder_ctx = tmp_asset_finder( equities=make_simple_equity_info( assets, cls.__calendar[0], cls.__calendar[-1], )) cls.__finder = cls.__tmp_finder_ctx.__enter__() cls.__mask = cls.__finder.lifetimes( cls.__calendar[-30:], include_start_date=False, )
def test_deltas_only_one_delta_in_universe(self, asset_info): expr = bz.data(self.df, name='expr', dshape=self.dshape) deltas = pd.DataFrame({ 'sid': [65, 66], 'asof_date': [self.dates[1], self.dates[0]], 'timestamp': [self.dates[2], self.dates[1]], 'value': [10, 11], }) deltas = bz.data(deltas, name='deltas', dshape=self.dshape) expected_views = keymap(pd.Timestamp, { '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( columns=[ 'value', ], data=np.array([11, 10, 4]).repeat(len(asset_info.index)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_read_from_asset_finder(self): sids = list(range(8)) exchange_names = [ 'NEW YORK STOCK EXCHANGE', 'NEW YORK STOCK EXCHANGE', 'NASDAQ STOCK MARKET', 'NASDAQ STOCK MARKET', 'TOKYO STOCK EXCHANGE', 'TOKYO STOCK EXCHANGE', 'OSAKA STOCK EXCHANGE', 'OSAKA STOCK EXCHANGE', ] equities = pd.DataFrame({ 'sid': sids, 'real_sid': [str(sid) for sid in sids], 'currency': ['USD'] * len(sids), 'exchange': exchange_names, 'symbol': [chr(65 + sid) for sid in sids], }) exchange_infos = [ ExchangeInfo('NEW YORK STOCK EXCHANGE', 'NYSE', 'US'), ExchangeInfo('NASDAQ STOCK MARKET', 'NYSE', 'US'), ExchangeInfo('TOKYO STOCK EXCHANGE', 'JPX', 'JP'), ExchangeInfo('OSAKA STOCK EXCHANGE', 'JPX', 'JP'), ] exchange_info_table = pd.DataFrame( [(info.name, info.canonical_name, info.country_code) for info in exchange_infos], columns=['exchange', 'canonical_name', 'country_code'], ) expected_exchange_info_map = { info.name: info for info in exchange_infos } ctx = tmp_asset_finder( equities=equities, exchanges=exchange_info_table, ) with ctx as af: actual_exchange_info_map = af.exchange_info assets = af.retrieve_all(sids) assert_equal(actual_exchange_info_map, expected_exchange_info_map) for asset in assets: expected_exchange_info = expected_exchange_info_map[exchange_names[ asset.sid]] assert_equal(asset.exchange_info, expected_exchange_info)
def setUpClass(cls): cls.__calendar = date_range('2014', '2015', freq=trading_day) cls.__assets = assets = Int64Index(arange(1, 20)) cls.__tmp_finder_ctx = tmp_asset_finder( equities=make_simple_equity_info( assets, cls.__calendar[0], cls.__calendar[-1], ) ) cls.__finder = cls.__tmp_finder_ctx.__enter__() cls.__mask = cls.__finder.lifetimes( cls.__calendar[-30:], include_start_date=False, )
def test_deltas_only_one_delta_in_universe(self, asset_info): expr = bz.data(self.df, name='expr', dshape=self.dshape) deltas = pd.DataFrame({ 'sid': [65, 66], 'asof_date': [self.dates[1], self.dates[0]], 'timestamp': [self.dates[2], self.dates[1]], 'value': [10, 11], }) deltas = bz.data(deltas, name='deltas', dshape=self.dshape) expected_views = keymap( pd.Timestamp, { '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( columns=[ 'value', ], data=np.array([11, 10, 4]).repeat(len(asset_info.index)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_id_multiple_columns(self): """ input (df): asof_date sid timestamp value other 0 2014-01-01 65 2014-01-01 0 1 1 2014-01-01 66 2014-01-01 1 2 2 2014-01-01 67 2014-01-01 2 3 3 2014-01-02 65 2014-01-02 1 2 4 2014-01-02 66 2014-01-02 2 3 5 2014-01-02 67 2014-01-02 3 4 6 2014-01-03 65 2014-01-03 2 3 7 2014-01-03 66 2014-01-03 3 4 8 2014-01-03 67 2014-01-03 4 5 output (expected): value other 2014-01-01 Equity(65 [A]) 0 1 Equity(66 [B]) 1 2 Equity(67 [C]) 2 3 2014-01-02 Equity(65 [A]) 1 2 Equity(66 [B]) 2 3 Equity(67 [C]) 3 4 2014-01-03 Equity(65 [A]) 2 3 Equity(66 [B]) 3 4 Equity(67 [C]) 4 5 """ df = self.df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = df.drop('asof_date', axis=1).set_index( ['timestamp', 'sid'], ).sort_index(axis=1) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) self._test_id( df, var * Record(fields), expected, finder, ('value', 'int_value', 'other'), )
def init_class_fixtures(cls): super(BasePipelineTestCase, cls).init_class_fixtures() cls.__calendar = date_range('2014', '2015', freq=cls.trading_calendar.day) cls.__assets = assets = Int64Index(arange(1, 20)) cls.__tmp_finder_ctx = tmp_asset_finder( equities=make_simple_equity_info( assets, cls.__calendar[0], cls.__calendar[-1], )) cls.__finder = cls.__tmp_finder_ctx.__enter__() cls.__mask = cls.__finder.lifetimes( cls.__calendar[-30:], include_start_date=False, )
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product(( df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', 'other'), ).sort_index(axis=1) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def test_deltas_macro(self): asset_info = asset_infos[0][0] expr = bz.data(self.macro_df, name='expr', dshape=self.macro_dshape) deltas = bz.data( self.macro_df.iloc[:-1], name='deltas', dshape=self.macro_dshape, ) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap( pd.Timestamp, { '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets), '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets), }) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) dates = self.dates self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_deltas_macro(self): asset_info = asset_infos[0][0] expr = bz.data(self.macro_df, name='expr', dshape=self.macro_dshape) deltas = bz.data( self.macro_df.iloc[:-1], name='deltas', dshape=self.macro_dshape, ) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap(pd.Timestamp, { '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets), '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets), }) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) dates = self.dates self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44) ).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.int_value.latest, 'int_value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ).dt.tz_localize('utc') expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_id_macro_dataset(self): """ input (self.macro_df) asof_date timestamp value 0 2014-01-01 2014-01-01 0 3 2014-01-02 2014-01-02 1 6 2014-01-03 2014-01-03 2 output (expected): value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 0 Equity(67 [C]) 0 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 1 Equity(67 [C]) 1 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 2 Equity(67 [C]) 2 """ asset_info = asset_infos[0][0] nassets = len(asset_info) with tmp_asset_finder() as finder: expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value',), ) self._test_id( self.macro_df, self.macro_dshape, expected, finder, ('value',), )
def test_id_macro_dataset(self): """ input (self.macro_df) asof_date timestamp value 0 2014-01-01 2014-01-01 0 3 2014-01-02 2014-01-02 1 6 2014-01-03 2014-01-03 2 output (expected): value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 0 Equity(67 [C]) 0 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 1 Equity(67 [C]) 1 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 2 Equity(67 [C]) 2 """ asset_info = asset_infos[0][0] nassets = len(asset_info) with tmp_asset_finder() as finder: expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', ), ) self._test_id( self.macro_df, self.macro_dshape, expected, finder, ('value', ), )
def test_id(self): """ input (self.df): asof_date sid timestamp value 0 2014-01-01 65 2014-01-01 0 1 2014-01-01 66 2014-01-01 1 2 2014-01-01 67 2014-01-01 2 3 2014-01-02 65 2014-01-02 1 4 2014-01-02 66 2014-01-02 2 5 2014-01-02 67 2014-01-02 3 6 2014-01-03 65 2014-01-03 2 7 2014-01-03 66 2014-01-03 3 8 2014-01-03 67 2014-01-03 4 output (expected) value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 1 Equity(67 [C]) 2 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 2 Equity(67 [C]) 3 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 3 Equity(67 [C]) 4 """ with tmp_asset_finder() as finder: expected = self.df.drop('asof_date', axis=1).set_index(['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) self._test_id(self.df, self.dshape, expected, finder, ( 'int_value', 'value', ))
def test_id(self): """ input (self.df): asof_date sid timestamp value 0 2014-01-01 65 2014-01-01 0 1 2014-01-01 66 2014-01-01 1 2 2014-01-01 67 2014-01-01 2 3 2014-01-02 65 2014-01-02 1 4 2014-01-02 66 2014-01-02 2 5 2014-01-02 67 2014-01-02 3 6 2014-01-03 65 2014-01-03 2 7 2014-01-03 66 2014-01-03 3 8 2014-01-03 67 2014-01-03 4 output (expected) value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 1 Equity(67 [C]) 2 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 2 Equity(67 [C]) 3 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 3 Equity(67 [C]) 4 """ with tmp_asset_finder() as finder: expected = self.df.drop('asof_date', axis=1).set_index( ['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) self._test_id( self.df, self.dshape, expected, finder, ('int_value', 'value',) )
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.int_value.latest, 'int_value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ).dt.tz_localize('utc') expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_novel_deltas(self, asset_info): base_dates = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04') ]) repeated_dates = base_dates.repeat(3) baseline = pd.DataFrame({ 'sid': self.sids * 2, 'value': (0., 1., 2., 1., 2., 3.), 'int_value': (0, 1, 2, 1, 2, 3), 'asof_date': repeated_dates, 'timestamp': repeated_dates, }) expr = bz.data(baseline, name='expr', dshape=self.dshape) deltas = bz.data( odo( bz.transform( expr, value=expr.value + 10, timestamp=expr.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap(pd.Timestamp, { '2014-01-03': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]), '2014-01-06': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]), }) if len(asset_info) == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views, ) expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan] else: expected_output_buffer = [10, 11, 12, 11, 12, 13] cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( expected_output_buffer, index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def test_novel_deltas_macro(self): asset_info = asset_infos[0][0] base_dates = pd.DatetimeIndex( [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04')]) baseline = pd.DataFrame({ 'value': (0, 1), 'asof_date': base_dates, 'timestamp': base_dates, }) expr = bz.data(baseline, name='expr', dshape=self.macro_dshape) deltas = bz.data(baseline, name='deltas', dshape=self.macro_dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap( pd.Timestamp, { '2014-01-03': repeat_last_axis( np.array([10.0, 10.0, 10.0]), nassets, ), '2014-01-06': repeat_last_axis( np.array([10.0, 10.0, 11.0]), nassets, ), }) cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def test_novel_deltas(self, asset_info): base_dates = pd.DatetimeIndex( [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04')]) repeated_dates = base_dates.repeat(3) baseline = pd.DataFrame({ 'sid': self.ASSET_FINDER_EQUITY_SIDS * 2, 'value': (0., 1., 2., 1., 2., 3.), 'int_value': (0, 1, 2, 1, 2, 3), 'asof_date': repeated_dates, 'timestamp': repeated_dates, }) expr = bz.data(baseline, name='expr', dshape=self.dshape) deltas = bz.data( odo( bz.transform( expr, value=expr.value + 10, timestamp=expr.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap( pd.Timestamp, { '2014-01-03': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]), '2014-01-06': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]), }) if len(asset_info) == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views, ) expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan] else: expected_output_buffer = [10, 11, 12, 11, 12, 13] cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( expected_output_buffer, index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def test_deltas(self, asset_info, add_extra_sid): df = self.df.copy() if add_extra_sid: extra_sid_df = pd.DataFrame({ 'asof_date': self.dates, 'timestamp': self.dates, 'sid': (ord('E'), ) * 3, 'value': ( 3., 4., 5., ), 'int_value': (3, 4, 5), }) df = df.append(extra_sid_df, ignore_index=True) expr = bz.data(df, name='expr', dshape=self.dshape) deltas = bz.data(df, dshape=self.dshape) deltas = bz.data( odo( bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap( pd.Timestamp, { '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0] ]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_deltas(self, asset_info, add_extra_sid): df = self.df.copy() if add_extra_sid: extra_sid_df = pd.DataFrame({ 'asof_date': self.dates, 'timestamp': self.dates, 'sid': (ord('E'),) * 3, 'value': (3., 4., 5.,), 'int_value': (3, 4, 5), }) df = df.append(extra_sid_df, ignore_index=True) expr = bz.data(df, name='expr', dshape=self.dshape) deltas = bz.data(df, dshape=self.dshape) deltas = bz.data( odo( bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap(pd.Timestamp, { '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )