def _run_pipeline(self, expr, deltas, expected_views, expected_output, finder, calendar, start, end, window_length, compute_fn): loader = BlazeLoader() ds = from_blaze( expr, deltas, loader=loader, no_deltas_rule=no_deltas_rules.raise_, ) p = Pipeline() # prevent unbound locals issue in the inner class window_length_ = window_length class TestFactor(CustomFactor): inputs = ds.value, window_length = window_length_ def compute(self, today, assets, out, data): assert_array_almost_equal(data, expected_views[today]) out[:] = compute_fn(data) p.add(TestFactor(), 'value') result = SimplePipelineEngine( loader, calendar, finder, ).run_pipeline(p, start, end) assert_frame_equal( result, expected_output, check_dtype=False, )
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.Data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ) expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', ), ) assert_frame_equal(result, expected, check_dtype=False)
def test_id(self): expr = bz.Data(self.df, name='expr', dshape=self.dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = self.df.drop('asof_date', axis=1).set_index(['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def init_class_fixtures(cls): super(BlazeToPipelineTestCase, cls).init_class_fixtures() cls.dates = dates = pd.date_range('2014-01-01', '2014-01-03') dates = cls.dates.repeat(3) cls.df = df = pd.DataFrame({ 'sid': cls.ASSET_FINDER_EQUITY_SIDS * 3, 'value': (0., 1., 2., 1., 2., 3., 2., 3., 4.), 'int_value': (0, 1, 2, 1, 2, 3, 2, 3, 4), 'asof_date': dates, 'timestamp': dates, }) cls.dshape = dshape(""" var * { sid: ?int64, value: ?float64, int_value: ?int64, asof_date: datetime, timestamp: datetime } """) cls.macro_df = df[df.sid == 65].drop('sid', axis=1) dshape_ = OrderedDict(cls.dshape.measure.fields) del dshape_['sid'] cls.macro_dshape = var * Record(dshape_) cls.garbage_loader = BlazeLoader() cls.missing_values = {'int_value': 0}
def setUpClass(cls): cls.dates = dates = pd.date_range('2014-01-01', '2014-01-03') dates = cls.dates.repeat(3) cls.sids = sids = ord('A'), ord('B'), ord('C') cls.df = df = pd.DataFrame({ 'sid': sids * 3, 'value': (0., 1., 2., 1., 2., 3., 2., 3., 4.), 'int_value': (0, 1, 2, 1, 2, 3, 2, 3, 4), 'asof_date': dates, 'timestamp': dates, }) cls.dshape = dshape(""" var * { sid: ?int64, value: ?float64, int_value: ?int64, asof_date: datetime, timestamp: datetime } """) cls.macro_df = df[df.sid == 65].drop('sid', axis=1) dshape_ = OrderedDict(cls.dshape.measure.fields) del dshape_['sid'] cls.macro_dshape = var * Record(dshape_) cls.garbage_loader = BlazeLoader() cls.missing_values = {'int_value': 0}
def _test_id(self, df, dshape, expected, finder, add): expr = bz.Data(df, name='expr', dshape=dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() for a in add: p.add(getattr(ds, a).latest, a) dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) assert_frame_equal( result, _utc_localize_index_level_0(expected), check_dtype=False, )
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] expr = bz.Data(df, name='expr', dshape=var * Record(fields)) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.other.latest, 'other') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product(( df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', 'other'), ).sort_index(axis=1) assert_frame_equal( result, expected.sort_index(axis=1), check_dtype=False, )
def test_auto_deltas_fail_raise(self): loader = BlazeLoader() expr = bz.Data(self.df, dshape=self.dshape) with self.assertRaises(ValueError) as e: from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.raise_, ) self.assertIn(str(expr), str(e.exception))
def test_auto_deltas_fail_warn(self): with warnings.catch_warnings(record=True) as ws: warnings.simplefilter('always') loader = BlazeLoader() expr = bz.Data(self.df, dshape=self.dshape) from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.warn, ) self.assertEqual(len(ws), 1) w = ws[0].message self.assertIsInstance(w, NoDeltasWarning) self.assertIn(str(expr), str(w))
def test_auto_deltas(self): expr = bz.Data( { 'ds': self.df, 'ds_deltas': pd.DataFrame(columns=self.df.columns) }, dshape=var * Record(( ('ds', self.dshape.measure), ('ds_deltas', self.dshape.measure), )), ) loader = BlazeLoader() ds = from_blaze(expr.ds, loader=loader) self.assertEqual(len(loader), 1) exprdata = loader[ds] self.assertTrue(exprdata.expr.isidentical(expr.ds)) self.assertTrue(exprdata.deltas.isidentical(expr.ds_deltas))
extensions=[], strict=True, environ=os.environ, ) # Set-Up Pricing Data Access trading_calendar = get_calendar('NYSE') bundle = 'sharadar-prices' #'quandl' bundle_data = bundles.load(bundle) loaders = {} # create and empty BlazeLoader blaze_loader = BlazeLoader() def my_dispatcher(column): return loaders[column] pipeline_loader = USEquityPricingLoader( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) def choose_loader(column): if column in USEquityPricing.columns: return pipeline_loader try: return my_dispatcher(column) except: