def test_id(self): expr = bz.Data(self.df, name='expr', dshape=self.dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = self.df.drop('asof_date', axis=1).set_index(['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value',), ) assert_frame_equal(result, expected, check_dtype=False)
def test_group_by_type(self): equities = make_simple_equity_info( range(5), start_date=pd.Timestamp('2014-01-01'), end_date=pd.Timestamp('2015-01-01'), ) futures = make_commodity_future_info( first_sid=6, root_symbols=['CL'], years=[2014], ) # Intersecting sid queries, to exercise loading of partially-cached # results. queries = [ ([0, 1, 3], [6, 7]), ([0, 2, 3], [7, 10]), (list(equities.index), list(futures.index)), ] with tmp_asset_finder(equities=equities, futures=futures) as finder: for equity_sids, future_sids in queries: results = finder.group_by_type(equity_sids + future_sids) self.assertEqual( results, {'equity': set(equity_sids), 'future': set(future_sids)}, )
def test_id(self): """ input (self.df): asof_date sid timestamp value 0 2014-01-01 65 2014-01-01 0 1 2014-01-01 66 2014-01-01 1 2 2014-01-01 67 2014-01-01 2 3 2014-01-02 65 2014-01-02 1 4 2014-01-02 66 2014-01-02 2 5 2014-01-02 67 2014-01-02 3 6 2014-01-03 65 2014-01-03 2 7 2014-01-03 66 2014-01-03 3 8 2014-01-03 67 2014-01-03 4 output (expected) value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 1 Equity(67 [C]) 2 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 2 Equity(67 [C]) 3 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 3 Equity(67 [C]) 4 """ with tmp_asset_finder() as finder: expected = self.df.drop('asof_date', axis=1).set_index( ['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) self._test_id(self.df, self.dshape, expected, finder, ('value',))
def test_id(self): """ input (self.df): asof_date sid timestamp value 0 2014-01-01 65 2014-01-01 0 1 2014-01-01 66 2014-01-01 1 2 2014-01-01 67 2014-01-01 2 3 2014-01-02 65 2014-01-02 1 4 2014-01-02 66 2014-01-02 2 5 2014-01-02 67 2014-01-02 3 6 2014-01-03 65 2014-01-03 2 7 2014-01-03 66 2014-01-03 3 8 2014-01-03 67 2014-01-03 4 output (expected) value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 1 Equity(67 [C]) 2 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 2 Equity(67 [C]) 3 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 3 Equity(67 [C]) 4 """ with tmp_asset_finder() as finder: expected = self.df.drop('asof_date', axis=1).set_index(['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) self._test_id(self.df, self.dshape, expected, finder, ('value', ))
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', ), ) assert_frame_equal(result, expected, check_dtype=False)
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.Data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ) expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_id(self): expr = bz.Data(self.df, name='expr', dshape=self.dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = self.df.drop('asof_date', axis=1).set_index( ['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def _test_id(self, df, dshape, expected, finder, add): expr = bz.Data(df, name='expr', dshape=dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() for a in add: p.add(getattr(ds, a).latest, a) dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) assert_frame_equal( result, _utc_localize_index_level_0(expected), check_dtype=False, )
def test_group_by_type(self): equities = make_simple_equity_info( range(5), start_date=pd.Timestamp('2014-01-01'), end_date=pd.Timestamp('2015-01-01'), ) futures = make_commodity_future_info( first_sid=6, root_symbols=['CL'], years=[2014], ) # Intersecting sid queries, to exercise loading of partially-cached # results. queries = [ ([0, 1, 3], [6, 7]), ([0, 2, 3], [7, 10]), (list(equities.index), list(futures.index)), ] with tmp_asset_finder(equities=equities, futures=futures) as finder: for equity_sids, future_sids in queries: results = finder.group_by_type(equity_sids + future_sids) self.assertEqual( results, { 'equity': set(equity_sids), 'future': set(future_sids) }, )
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df["other"] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields["other"] = fields["value"] asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product((df.timestamp, finder.retrieve_all(asset_info.index))), columns=("value", "other"), ).sort_index(axis=1) self._test_id(df, var * Record(fields), expected, finder, ("value", "other"))
def test_id_macro_dataset(self): """ input (self.macro_df) asof_date timestamp value 0 2014-01-01 2014-01-01 0 3 2014-01-02 2014-01-02 1 6 2014-01-03 2014-01-03 2 output (expected): value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 0 Equity(67 [C]) 0 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 1 Equity(67 [C]) 1 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 2 Equity(67 [C]) 2 """ asset_info = asset_infos[0][0] nassets = len(asset_info) with tmp_asset_finder() as finder: expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product((self.macro_df.timestamp, finder.retrieve_all(asset_info.index))), columns=("value",), ) self._test_id(self.macro_df, self.macro_dshape, expected, finder, ("value",))
def test_id_ffill_out_of_window_macro_dataset(self): """ input (df): asof_date timestamp other value 0 2013-12-22 2013-12-22 NaN 0 1 2013-12-23 2013-12-23 1 NaN 2 2013-12-24 2013-12-24 NaN NaN output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-03 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 """ dates = self.dates - timedelta(days=10) df = pd.DataFrame( {"value": (0, np.nan, np.nan), "other": (np.nan, 1, np.nan), "asof_date": dates, "timestamp": dates} ) fields = OrderedDict(self.macro_dshape.measure.fields) fields["other"] = fields["value"] with tmp_asset_finder() as finder: expected = pd.DataFrame( np.array([[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]), columns=["value", "other"], index=pd.MultiIndex.from_product((self.dates, finder.retrieve_all(self.sids))), ).sort_index(axis=1) self._test_id(df, var * Record(fields), expected, finder, ("value", "other"))
def test_custom_query_time_tz(self): df = self.df.copy() df["timestamp"] = ( (pd.DatetimeIndex(df["timestamp"], tz="EST") + timedelta(hours=8, minutes=44)) .tz_convert("utc") .tz_localize(None) ) df.ix[3:5, "timestamp"] = pd.Timestamp("2014-01-01 13:45") expr = bz.Data(df, name="expr", dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz="EST") ds = from_blaze(expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values) p = Pipeline() p.add(ds.value.latest, "value") p.add(ds.int_value.latest, "int_value") dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine(loader, dates, finder).run_pipeline(p, dates[0], dates[-1]) expected = df.drop("asof_date", axis=1) expected["timestamp"] = expected["timestamp"].dt.normalize().astype("datetime64[ns]").dt.tz_localize("utc") expected.ix[3:5, "timestamp"] += timedelta(days=1) expected.set_index(["timestamp", "sid"], inplace=True) expected.index = pd.MultiIndex.from_product( (expected.index.levels[0], finder.retrieve_all(expected.index.levels[1])) ) assert_frame_equal(result, expected, check_dtype=False)
def test_retrieve_specific_type(self, type_, lookup_name, failure_type): equities = make_simple_equity_info( range(5), start_date=pd.Timestamp("2014-01-01"), end_date=pd.Timestamp("2015-01-01") ) max_equity = equities.index.max() futures = make_commodity_future_info(first_sid=max_equity + 1, root_symbols=["CL"], years=[2014]) equity_sids = [0, 1] future_sids = [max_equity + 1, max_equity + 2, max_equity + 3] if type_ == Equity: success_sids = equity_sids fail_sids = future_sids else: fail_sids = equity_sids success_sids = future_sids with tmp_asset_finder(equities=equities, futures=futures) as finder: # Run twice to exercise caching. lookup = getattr(finder, lookup_name) for _ in range(2): results = lookup(success_sids) self.assertIsInstance(results, dict) self.assertEqual(set(results.keys()), set(success_sids)) self.assertEqual(valmap(int, results), dict(zip(success_sids, success_sids))) self.assertEqual({type_}, {type(asset) for asset in itervalues(results)}) with self.assertRaises(failure_type): lookup(fail_sids) with self.assertRaises(failure_type): # Should fail if **any** of the assets are bad. lookup([success_sids[0], fail_sids[0]])
def test_novel_deltas_macro(self): asset_info = asset_infos[0][0] base_dates = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04') ]) baseline = pd.DataFrame({ 'value': (0, 1), 'asof_date': base_dates, 'timestamp': base_dates, }) expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape) deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap(pd.Timestamp, { '2014-01-03': repeat_last_axis( np.array([10.0, 10.0, 10.0]), nassets, ), '2014-01-06': repeat_last_axis( np.array([10.0, 10.0, 11.0]), nassets, ), }) cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] expr = bz.Data(df, name='expr', dshape=var * Record(fields)) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.other.latest, 'other') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product(( df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', 'other'), ).sort_index(axis=1) assert_frame_equal( result, expected.sort_index(axis=1), check_dtype=False, )
def setUpClass(cls): cls._cleanup_stack = stack = ExitStack() cls.finder = stack.enter_context( tmp_asset_finder(equities=cls.equity_info), ) cls.cols = {} cls.dataset = create_buyback_auth_tst_frame(cls.event_dates_cases, SHARE_COUNT_FIELD_NAME) cls.loader_type = CashBuybackAuthorizationsLoader
def setUpClass(cls): cls.__calendar = date_range("2014", "2015", freq=trading_day) cls.__assets = assets = Int64Index(arange(1, 20)) cls.__tmp_finder_ctx = tmp_asset_finder( equities=make_simple_equity_info(assets, cls.__calendar[0], cls.__calendar[-1]) ) cls.__finder = cls.__tmp_finder_ctx.__enter__() cls.__mask = cls.__finder.lifetimes(cls.__calendar[-30:], include_start_date=False)
def test_novel_deltas(self, asset_info): base_dates = pd.DatetimeIndex([pd.Timestamp("2014-01-01"), pd.Timestamp("2014-01-04")]) repeated_dates = base_dates.repeat(3) baseline = pd.DataFrame( { "sid": self.sids * 2, "value": (0, 1, 2, 1, 2, 3), "asof_date": repeated_dates, "timestamp": repeated_dates, } ) expr = bz.Data(baseline, name="expr", dshape=self.dshape) deltas = bz.Data(baseline, name="deltas", dshape=self.dshape) deltas = bz.transform(deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1)) expected_views = keymap( pd.Timestamp, { "2014-01-03": np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]), "2014-01-06": np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]), }, ) if len(asset_info) == 4: expected_views = valmap(lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views) expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan] else: expected_output_buffer = [10, 11, 12, 11, 12, 13] cal = pd.DatetimeIndex( [ pd.Timestamp("2014-01-01"), pd.Timestamp("2014-01-02"), pd.Timestamp("2014-01-03"), # omitting the 4th and 5th to simulate a weekend pd.Timestamp("2014-01-06"), ] ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( expected_output_buffer, index=pd.MultiIndex.from_product( (sorted(expected_views.keys()), finder.retrieve_all(asset_info.index)) ), columns=("value",), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def test_id_take_last_in_group_macro(self): """ output (expected): other value 2014-01-01 Equity(65 [A]) NaN 1 Equity(66 [B]) NaN 1 Equity(67 [C]) NaN 1 2014-01-02 Equity(65 [A]) 1 2 Equity(66 [B]) 1 2 Equity(67 [C]) 1 2 2014-01-03 Equity(65 [A]) 2 2 Equity(66 [B]) 2 2 Equity(67 [C]) 2 2 """ T = pd.Timestamp df = pd.DataFrame( columns=['asof_date', 'timestamp', 'other', 'value'], data=[ [T('2014-01-01'), T('2014-01-01 00'), np.nan, 1], [T('2014-01-01'), T('2014-01-01 01'), np.nan, np.nan], [T('2014-01-02'), T('2014-01-02 00'), 1, np.nan], [T('2014-01-02'), T('2014-01-02 01'), np.nan, 2], [T('2014-01-03'), T('2014-01-03 00'), 2, np.nan], [T('2014-01-03'), T('2014-01-03 01'), 3, 3], ], ) fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = pd.DataFrame( columns=[ 'other', 'value', ], data=[ [np.nan, 1], # 2014-01-01 Equity(65 [A]) [np.nan, 1], # Equity(66 [B]) [np.nan, 1], # Equity(67 [C]) [1, 2], # 2014-01-02 Equity(65 [A]) [1, 2], # Equity(66 [B]) [1, 2], # Equity(67 [C]) [2, 2], # 2014-01-03 Equity(65 [A]) [2, 2], # Equity(66 [B]) [2, 2], # Equity(67 [C]) ], index=pd.MultiIndex.from_product( (self.dates, finder.retrieve_all(self.sids)), ), ) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def test_deltas(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = bz.Data(self.df, dshape=self.dshape) deltas = bz.Data( odo( bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap(pd.Timestamp, { '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_deltas(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = bz.Data(self.df, dshape=self.dshape) deltas = bz.Data( odo( bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap( pd.Timestamp, { '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0] ]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_id_take_last_in_group(self): T = pd.Timestamp df = pd.DataFrame( columns=['asof_date', 'timestamp', 'sid', 'other', 'value'], data=[ [T('2014-01-01'), T('2014-01-01 00'), 65, 0, 0], [T('2014-01-01'), T('2014-01-01 01'), 65, 1, np.nan], [T('2014-01-01'), T('2014-01-01 00'), 66, np.nan, np.nan], [T('2014-01-01'), T('2014-01-01 01'), 66, np.nan, 1], [T('2014-01-01'), T('2014-01-01 00'), 67, 2, np.nan], [T('2014-01-01'), T('2014-01-01 01'), 67, np.nan, np.nan], [T('2014-01-02'), T('2014-01-02 00'), 65, np.nan, np.nan], [T('2014-01-02'), T('2014-01-02 01'), 65, np.nan, 1], [T('2014-01-02'), T('2014-01-02 00'), 66, np.nan, np.nan], [T('2014-01-02'), T('2014-01-02 01'), 66, 2, np.nan], [T('2014-01-02'), T('2014-01-02 00'), 67, 3, 3], [T('2014-01-02'), T('2014-01-02 01'), 67, 3, 3], [T('2014-01-03'), T('2014-01-03 00'), 65, 2, np.nan], [T('2014-01-03'), T('2014-01-03 01'), 65, 2, np.nan], [T('2014-01-03'), T('2014-01-03 00'), 66, 3, 3], [T('2014-01-03'), T('2014-01-03 01'), 66, np.nan, np.nan], [T('2014-01-03'), T('2014-01-03 00'), 67, np.nan, np.nan], [T('2014-01-03'), T('2014-01-03 01'), 67, np.nan, 4], ], ) fields = OrderedDict(self.dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = pd.DataFrame( columns=['other', 'value'], data=[ [1, 0], # 2014-01-01 Equity(65 [A]) [np.nan, 1], # Equity(66 [B]) [2, np.nan], # Equity(67 [C]) [1, 1], # 2014-01-02 Equity(65 [A]) [2, 1], # Equity(66 [B]) [3, 3], # Equity(67 [C]) [2, 1], # 2014-01-03 Equity(65 [A]) [3, 3], # Equity(66 [B]) [3, 3], # Equity(67 [C]) ], index=pd.MultiIndex.from_product( (self.dates, finder.retrieve_all(self.sids)), ), ) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def test_id_ffill_out_of_window(self): """ input (df): asof_date timestamp sid other value 0 2013-12-22 2013-12-22 65 0 0 1 2013-12-22 2013-12-22 66 NaN 1 2 2013-12-22 2013-12-22 67 2 NaN 3 2013-12-23 2013-12-23 65 NaN 1 4 2013-12-23 2013-12-23 66 2 NaN 5 2013-12-23 2013-12-23 67 3 3 6 2013-12-24 2013-12-24 65 2 NaN 7 2013-12-24 2013-12-24 66 3 3 8 2013-12-24 2013-12-24 67 NaN 4 output (expected): other value 2014-01-01 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 2014-01-03 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 """ dates = self.dates.repeat(3) - timedelta(days=10) df = pd.DataFrame({ 'sid': self.sids * 3, 'value': (0, 1, np.nan, 1, np.nan, 3, np.nan, 3, 4), 'other': (0, np.nan, 2, np.nan, 2, 3, 2, 3, np.nan), 'asof_date': dates, 'timestamp': dates, }) fields = OrderedDict(self.dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = pd.DataFrame( np.array([[2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4]]), columns=['other', 'value'], index=pd.MultiIndex.from_product( (self.dates, finder.retrieve_all(self.sids)), ), ) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def setUpClass(cls): cls.__calendar = date_range('2014', '2015', freq=trading_day) cls.__assets = assets = Int64Index(arange(1, 20)) cls.__tmp_finder_ctx = tmp_asset_finder( equities=make_simple_equity_info( assets, cls.__calendar[0], cls.__calendar[-1], )) cls.__finder = cls.__tmp_finder_ctx.__enter__() cls.__mask = cls.__finder.lifetimes( cls.__calendar[-30:], include_start_date=False, )
def test_deltas_only_one_delta_in_universe(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = pd.DataFrame({ 'sid': [65, 66], 'asof_date': [self.dates[1], self.dates[0]], 'timestamp': [self.dates[2], self.dates[1]], 'value': [10, 11], }) deltas = bz.Data(deltas, name='deltas', dshape=self.dshape) expected_views = keymap(pd.Timestamp, { '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( columns=[ 'value', ], data=np.array([11, 10, 4]).repeat(len(asset_info.index)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_retrieve_all(self): equities = make_simple_equity_info( range(5), start_date=pd.Timestamp('2014-01-01'), end_date=pd.Timestamp('2015-01-01'), ) max_equity = equities.index.max() futures = make_commodity_future_info( first_sid=max_equity + 1, root_symbols=['CL'], years=[2014], ) with tmp_asset_finder(equities=equities, futures=futures) as finder: all_sids = finder.sids self.assertEqual(len(all_sids), len(equities) + len(futures)) queries = [ # Empty Query. (), # Only Equities. tuple(equities.index[:2]), # Only Futures. tuple(futures.index[:3]), # Mixed, all cache misses. tuple(equities.index[2:]) + tuple(futures.index[3:]), # Mixed, all cache hits. tuple(equities.index[2:]) + tuple(futures.index[3:]), # Everything. all_sids, all_sids, ] for sids in queries: equity_sids = [i for i in sids if i <= max_equity] future_sids = [i for i in sids if i > max_equity] results = finder.retrieve_all(sids) self.assertEqual(sids, tuple(map(int, results))) self.assertEqual( [Equity for _ in equity_sids] + [Future for _ in future_sids], list(map(type, results)), ) self.assertEqual( ( list(equities.symbol.loc[equity_sids]) + list(futures.symbol.loc[future_sids]) ), list(asset.symbol for asset in results), )
def test_deltas_only_one_delta_in_universe(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = pd.DataFrame({ 'sid': [65, 66], 'asof_date': [self.dates[1], self.dates[0]], 'timestamp': [self.dates[2], self.dates[1]], 'value': [10, 11], }) deltas = bz.Data(deltas, name='deltas', dshape=self.dshape) expected_views = keymap( pd.Timestamp, { '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( columns=[ 'value', ], data=np.array([11, 10, 4]).repeat(len(asset_info.index)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_id_ffill_out_of_window(self): """ input (df): asof_date timestamp sid other value 0 2013-12-22 2013-12-22 65 0 0 1 2013-12-22 2013-12-22 66 NaN 1 2 2013-12-22 2013-12-22 67 2 NaN 3 2013-12-23 2013-12-23 65 NaN 1 4 2013-12-23 2013-12-23 66 2 NaN 5 2013-12-23 2013-12-23 67 3 3 6 2013-12-24 2013-12-24 65 2 NaN 7 2013-12-24 2013-12-24 66 3 3 8 2013-12-24 2013-12-24 67 NaN 4 output (expected): other value 2014-01-01 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 2014-01-03 Equity(65 [A]) 2 1 Equity(66 [B]) 3 3 Equity(67 [C]) 3 4 """ dates = self.dates.repeat(3) - timedelta(days=10) df = pd.DataFrame( { "sid": self.sids * 3, "value": (0, 1, np.nan, 1, np.nan, 3, np.nan, 3, 4), "other": (0, np.nan, 2, np.nan, 2, 3, 2, 3, np.nan), "asof_date": dates, "timestamp": dates, } ) fields = OrderedDict(self.dshape.measure.fields) fields["other"] = fields["value"] with tmp_asset_finder() as finder: expected = pd.DataFrame( np.array([[2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4], [2, 1], [3, 3], [3, 4]]), columns=["other", "value"], index=pd.MultiIndex.from_product((self.dates, finder.retrieve_all(self.sids))), ) self._test_id(df, var * Record(fields), expected, finder, ("value", "other"))
def setUpClass(cls): cls._cleanup_stack = stack = ExitStack() equity_info = make_simple_equity_info( cls.sids, start_date=pd.Timestamp('2013-01-01', tz='UTC'), end_date=pd.Timestamp('2015-01-01', tz='UTC'), ) cls.cols = {} cls.dataset = {sid: df for sid, df in enumerate( case.rename( columns={DATE_FIELD_NAME: ANNOUNCEMENT_FIELD_NAME} ) for case in cls.event_dates_cases)} cls.finder = stack.enter_context( tmp_asset_finder(equities=equity_info), ) cls.loader_type = EarningsCalendarLoader
def test_id_multiple_columns(self): """ input (df): asof_date sid timestamp value other 0 2014-01-01 65 2014-01-01 0 1 1 2014-01-01 66 2014-01-01 1 2 2 2014-01-01 67 2014-01-01 2 3 3 2014-01-02 65 2014-01-02 1 2 4 2014-01-02 66 2014-01-02 2 3 5 2014-01-02 67 2014-01-02 3 4 6 2014-01-03 65 2014-01-03 2 3 7 2014-01-03 66 2014-01-03 3 4 8 2014-01-03 67 2014-01-03 4 5 output (expected): value other 2014-01-01 Equity(65 [A]) 0 1 Equity(66 [B]) 1 2 Equity(67 [C]) 2 3 2014-01-02 Equity(65 [A]) 1 2 Equity(66 [B]) 2 3 Equity(67 [C]) 3 4 2014-01-03 Equity(65 [A]) 2 3 Equity(66 [B]) 3 4 Equity(67 [C]) 4 5 """ df = self.df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = df.drop('asof_date', axis=1).set_index( ['timestamp', 'sid'], ).sort_index(axis=1) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def test_id_ffill_out_of_window_macro_dataset(self): """ input (df): asof_date timestamp other value 0 2013-12-22 2013-12-22 NaN 0 1 2013-12-23 2013-12-23 1 NaN 2 2013-12-24 2013-12-24 NaN NaN output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-03 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 """ dates = self.dates - timedelta(days=10) df = pd.DataFrame({ 'value': (0, np.nan, np.nan), 'other': (np.nan, 1, np.nan), 'asof_date': dates, 'timestamp': dates, }) fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] with tmp_asset_finder() as finder: expected = pd.DataFrame( np.array([[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]), columns=['value', 'other'], index=pd.MultiIndex.from_product( (self.dates, finder.retrieve_all(self.sids)), ), ).sort_index(axis=1) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def test_deltas_macro(self): asset_info = asset_infos[0][0] expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) deltas = bz.Data( self.macro_df.iloc[:-1], name='deltas', dshape=self.macro_dshape, ) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap( pd.Timestamp, { '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets), '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets), }) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) dates = self.dates self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_retrieve_specific_type(self, type_, lookup_name, failure_type): equities = make_simple_equity_info( range(5), start_date=pd.Timestamp('2014-01-01'), end_date=pd.Timestamp('2015-01-01'), ) max_equity = equities.index.max() futures = make_commodity_future_info( first_sid=max_equity + 1, root_symbols=['CL'], years=[2014], ) equity_sids = [0, 1] future_sids = [max_equity + 1, max_equity + 2, max_equity + 3] if type_ == Equity: success_sids = equity_sids fail_sids = future_sids else: fail_sids = equity_sids success_sids = future_sids with tmp_asset_finder(equities=equities, futures=futures) as finder: # Run twice to exercise caching. lookup = getattr(finder, lookup_name) for _ in range(2): results = lookup(success_sids) self.assertIsInstance(results, dict) self.assertEqual(set(results.keys()), set(success_sids)) self.assertEqual( valmap(int, results), dict(zip(success_sids, success_sids)), ) self.assertEqual( {type_}, {type(asset) for asset in itervalues(results)}, ) with self.assertRaises(failure_type): lookup(fail_sids) with self.assertRaises(failure_type): # Should fail if **any** of the assets are bad. lookup([success_sids[0], fail_sids[0]])
def test_deltas_macro(self): asset_info = asset_infos[0][0] expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) deltas = bz.Data( self.macro_df.iloc[:-1], name='deltas', dshape=self.macro_dshape, ) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap(pd.Timestamp, { '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets), '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets), }) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) dates = self.dates self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product(( df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', 'other'), ).sort_index(axis=1) self._test_id( df, var * Record(fields), expected, finder, ('value', 'other'), )
def setUpClass(cls): cls._cleanup_stack = stack = ExitStack() cls.sids = A, B, C, D, E = range(5) equity_info = make_simple_equity_info( cls.sids, start_date=pd.Timestamp('2013-01-01', tz='UTC'), end_date=pd.Timestamp('2015-01-01', tz='UTC'), ) cls.finder = stack.enter_context( tmp_asset_finder(equities=equity_info), ) cls.earnings_dates = { # K1--K2--E1--E2. A: to_series( knowledge_dates=['2014-01-05', '2014-01-10'], earning_dates=['2014-01-15', '2014-01-20'], ), # K1--K2--E2--E1. B: to_series(knowledge_dates=['2014-01-05', '2014-01-10'], earning_dates=['2014-01-20', '2014-01-15']), # K1--E1--K2--E2. C: to_series(knowledge_dates=['2014-01-05', '2014-01-15'], earning_dates=['2014-01-10', '2014-01-20']), # K1 == K2. D: to_series( knowledge_dates=['2014-01-05'] * 2, earning_dates=['2014-01-10', '2014-01-15'], ), E: pd.Series( data=[], index=pd.DatetimeIndex([]), dtype='datetime64[ns]', ), }
def setUpClass(cls): cls._cleanup_stack = stack = ExitStack() cls.sids = A, B, C, D, E = range(5) equity_info = make_simple_equity_info( cls.sids, start_date=pd.Timestamp('2013-01-01', tz='UTC'), end_date=pd.Timestamp('2015-01-01', tz='UTC'), ) cls.finder = stack.enter_context( tmp_asset_finder(equities=equity_info), ) cls.earnings_dates = { # K1--K2--E1--E2. A: to_series( knowledge_dates=['2014-01-05', '2014-01-10'], earning_dates=['2014-01-15', '2014-01-20'], ), # K1--K2--E2--E1. B: to_series( knowledge_dates=['2014-01-05', '2014-01-10'], earning_dates=['2014-01-20', '2014-01-15'] ), # K1--E1--K2--E2. C: to_series( knowledge_dates=['2014-01-05', '2014-01-15'], earning_dates=['2014-01-10', '2014-01-20'] ), # K1 == K2. D: to_series( knowledge_dates=['2014-01-05'] * 2, earning_dates=['2014-01-10', '2014-01-15'], ), E: pd.Series( data=[], index=pd.DatetimeIndex([]), dtype='datetime64[ns]', ), }
def test_id_macro_dataset(self): """ input (self.macro_df) asof_date timestamp value 0 2014-01-01 2014-01-01 0 3 2014-01-02 2014-01-02 1 6 2014-01-03 2014-01-03 2 output (expected): value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 0 Equity(67 [C]) 0 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 1 Equity(67 [C]) 1 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 2 Equity(67 [C]) 2 """ asset_info = asset_infos[0][0] nassets = len(asset_info) with tmp_asset_finder() as finder: expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', ), ) self._test_id( self.macro_df, self.macro_dshape, expected, finder, ('value', ), )
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44) ).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.Data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.int_value.latest, 'int_value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ).dt.tz_localize('utc') expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_novel_deltas(self, asset_info): base_dates = pd.DatetimeIndex( [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04')]) repeated_dates = base_dates.repeat(3) baseline = pd.DataFrame({ 'sid': self.sids * 2, 'value': (0, 1, 2, 1, 2, 3), 'asof_date': repeated_dates, 'timestamp': repeated_dates, }) expr = bz.Data(baseline, name='expr', dshape=self.dshape) deltas = bz.Data(baseline, name='deltas', dshape=self.dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) expected_views = keymap( pd.Timestamp, { '2014-01-03': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]), '2014-01-06': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]), }) if len(asset_info) == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views, ) expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan] else: expected_output_buffer = [10, 11, 12, 11, 12, 13] cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( expected_output_buffer, index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def setUpClass(cls): cls._cleanup_stack = stack = ExitStack() equity_info = make_simple_equity_info( cls.sids, start_date=pd.Timestamp('2013-01-01', tz='UTC'), end_date=pd.Timestamp('2015-01-01', tz='UTC'), ) cls.cols = {} cls.dataset = { sid: df for sid, df in enumerate( case.rename(columns={DATE_FIELD_NAME: ANNOUNCEMENT_FIELD_NAME}) for case in cls.event_dates_cases) } cls.finder = stack.enter_context( tmp_asset_finder(equities=equity_info), ) cls.loader_type = EarningsCalendarLoader @classmethod def tearDownClass(cls): cls._cleanup_stack.close() def setup(self, dates): _expected_next_announce = self.get_expected_next_event_dates(dates) _expected_previous_announce = self.get_expected_previous_event_dates( dates) _expected_next_busday_offsets = self._compute_busday_offsets( _expected_next_announce)
def test_lookup_symbol_from_multiple_valid(self): # This test asserts that we resolve conflicts in accordance with the # following rules when we have multiple assets holding the same symbol # at the same time: # If multiple SIDs exist for symbol S at time T, return the candidate # SID whose start_date is highest. (200 cases) # If multiple SIDs exist for symbol S at time T, the best candidate # SIDs share the highest start_date, return the SID with the highest # end_date. (34 cases) # It is the opinion of the author (ssanderson) that we should consider # this malformed input and fail here. But this is the current indended # behavior of the code, and I accidentally broke it while refactoring. # These will serve as regression tests until the time comes that we # decide to enforce this as an error. # See https://github.com/quantopian/zipline/issues/837 for more # details. df = pd.DataFrame.from_records( [ { "sid": 1, "symbol": "multiple", "start_date": pd.Timestamp("2010-01-01"), "end_date": pd.Timestamp("2012-01-01"), "exchange": "NYSE", }, # Same as asset 1, but with a later end date. { "sid": 2, "symbol": "multiple", "start_date": pd.Timestamp("2010-01-01"), "end_date": pd.Timestamp("2013-01-01"), "exchange": "NYSE", }, # Same as asset 1, but with a later start_date { "sid": 3, "symbol": "multiple", "start_date": pd.Timestamp("2011-01-01"), "end_date": pd.Timestamp("2012-01-01"), "exchange": "NYSE", }, ] ) def check(expected_sid, date): result = finder.lookup_symbol("MULTIPLE", date) self.assertEqual(result.symbol, "MULTIPLE") self.assertEqual(result.sid, expected_sid) with tmp_asset_finder(finder_cls=self.asset_finder_type, equities=df) as finder: self.assertIsInstance(finder, self.asset_finder_type) # Sids 1 and 2 are eligible here. We should get asset 2 because it # has the later end_date. check(2, pd.Timestamp("2010-12-31")) # Sids 1, 2, and 3 are eligible here. We should get sid 3 because # it has a later start_date check(3, pd.Timestamp("2011-01-01"))
def test_lookup_symbol_from_multiple_valid(self): # This test asserts that we resolve conflicts in accordance with the # following rules when we have multiple assets holding the same symbol # at the same time: # If multiple SIDs exist for symbol S at time T, return the candidate # SID whose start_date is highest. (200 cases) # If multiple SIDs exist for symbol S at time T, the best candidate # SIDs share the highest start_date, return the SID with the highest # end_date. (34 cases) # It is the opinion of the author (ssanderson) that we should consider # this malformed input and fail here. But this is the current indended # behavior of the code, and I accidentally broke it while refactoring. # These will serve as regression tests until the time comes that we # decide to enforce this as an error. # See https://github.com/quantopian/zipline/issues/837 for more # details. df = pd.DataFrame.from_records([ { 'sid': 1, 'symbol': 'multiple', 'start_date': pd.Timestamp('2010-01-01'), 'end_date': pd.Timestamp('2012-01-01'), 'exchange': 'NYSE' }, # Same as asset 1, but with a later end date. { 'sid': 2, 'symbol': 'multiple', 'start_date': pd.Timestamp('2010-01-01'), 'end_date': pd.Timestamp('2013-01-01'), 'exchange': 'NYSE' }, # Same as asset 1, but with a later start_date { 'sid': 3, 'symbol': 'multiple', 'start_date': pd.Timestamp('2011-01-01'), 'end_date': pd.Timestamp('2012-01-01'), 'exchange': 'NYSE' }, ]) def check(expected_sid, date): result = finder.lookup_symbol( 'MULTIPLE', date, ) self.assertEqual(result.symbol, 'MULTIPLE') self.assertEqual(result.sid, expected_sid) with tmp_asset_finder(finder_cls=self.asset_finder_type, equities=df) as finder: self.assertIsInstance(finder, self.asset_finder_type) # Sids 1 and 2 are eligible here. We should get asset 2 because it # has the later end_date. check(2, pd.Timestamp('2010-12-31')) # Sids 1, 2, and 3 are eligible here. We should get sid 3 because # it has a later start_date check(3, pd.Timestamp('2011-01-01'))
def test_novel_deltas(self, asset_info): base_dates = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04') ]) repeated_dates = base_dates.repeat(3) baseline = pd.DataFrame({ 'sid': self.sids * 2, 'value': (0., 1., 2., 1., 2., 3.), 'int_value': (0, 1, 2, 1, 2, 3), 'asof_date': repeated_dates, 'timestamp': repeated_dates, }) expr = bz.Data(baseline, name='expr', dshape=self.dshape) deltas = bz.Data( odo( bz.transform( expr, value=expr.value + 10, timestamp=expr.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap(pd.Timestamp, { '2014-01-03': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]), '2014-01-06': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]), }) if len(asset_info) == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views, ) expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan] else: expected_output_buffer = [10, 11, 12, 11, 12, 13] cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( expected_output_buffer, index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def test_novel_deltas_macro(self): asset_info = asset_infos[0][0] base_dates = pd.DatetimeIndex( [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04')]) baseline = pd.DataFrame({ 'value': (0, 1), 'asof_date': base_dates, 'timestamp': base_dates, }) expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape) deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap( pd.Timestamp, { '2014-01-03': repeat_last_axis( np.array([10.0, 10.0, 10.0]), nassets, ), '2014-01-06': repeat_last_axis( np.array([10.0, 10.0, 11.0]), nassets, ), }) cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )