def test_estimate_windows_at_quarter_boundaries(self, start_idx, num_announcements_out): dataset = QuartersEstimates(num_announcements_out) trading_days = self.trading_days timelines = self.timelines # The window length should be from the starting index back to the first # date on which we got data. The goal is to ensure that as we # progress through the timeline, all data we got, starting from that # first date, is correctly overwritten. window_len = (self.trading_days.get_loc(start_idx) - self.trading_days.get_loc(self.window_test_start_date) + 1) class SomeFactor(CustomFactor): inputs = [dataset.estimate] window_length = window_len def compute(self, today, assets, out, estimate): today_idx = trading_days.get_loc(today) today_timeline = timelines[num_announcements_out].loc[ today].reindex(trading_days[:today_idx + 1]).values timeline_start_idx = (len(today_timeline) - window_len) assert_equal(estimate, today_timeline[timeline_start_idx:]) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) engine.run_pipeline( Pipeline({'est': SomeFactor()}), start_date=start_idx, # last event date we have end_date=pd.Timestamp('2015-01-20', tz='utc'), )
def test_load_with_trading_calendar(self): engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline({c.name: c.latest for c in EventDataSet.columns}), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) for c in EventDataSet.columns: if c in self.next_value_columns: self.check_next_value_results( c, results[c.name].unstack(), self.trading_days, ) elif c in self.previous_value_columns: self.check_previous_value_results( c, results[c.name].unstack(), self.trading_days, ) else: raise AssertionError("Unexpected column %s." % c)
def test_load_properly_forward_fills(self): engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) # Cut the dates in half so we need to forward fill some data which # is not in our window. The results should be computed the same as if # we had computed across the entire window and then sliced after the # computation. dates = self.trading_days[len(self.trading_days) // 2:] results = engine.run_pipeline( Pipeline({c.name: c.latest for c in EventDataSet.columns}), start_date=dates[0], end_date=dates[-1], ) for c in EventDataSet.columns: if c in self.next_value_columns: self.check_next_value_results( c, results[c.name].unstack(), dates, ) elif c in self.previous_value_columns: self.check_previous_value_results( c, results[c.name].unstack(), dates, ) else: raise AssertionError("Unexpected column %s." % c)
def test_multiple_qtrs_requested(self): dataset1 = QuartersEstimates(1) dataset2 = QuartersEstimates(2) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline( merge([{c.name + '1': c.latest for c in dataset1.columns}, {c.name + '2': c.latest for c in dataset2.columns}])), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) q1_columns = [col.name + '1' for col in self.columns] q2_columns = [col.name + '2' for col in self.columns] # We now expect a column for 1 quarter out and a column for 2 # quarters out for each of the dataset columns. assert_equal(sorted(np.array(q1_columns + q2_columns)), sorted(results.columns.values)) assert_equal(self.expected_out.sort(axis=1), results.xs(0, level=1).sort(axis=1))
def test_multiple_qtrs_requested(self): dataset1 = QuartersEstimates(1) dataset2 = QuartersEstimates(2) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline( merge([{c.name + '1': c.latest for c in dataset1.columns}, {c.name + '2': c.latest for c in dataset2.columns}]) ), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) q1_columns = [col.name + '1' for col in self.columns] q2_columns = [col.name + '2' for col in self.columns] # We now expect a column for 1 quarter out and a column for 2 # quarters out for each of the dataset columns. assert_equal(sorted(np.array(q1_columns + q2_columns)), sorted(results.columns.values)) assert_equal(self.expected_out.sort(axis=1), results.xs(0, level=1).sort(axis=1))
def test_load_empty(self): """ For the case where raw data is empty, make sure we have a result for all sids, that the dimensions are correct, and that we have the correct missing value. """ raw_events = pd.DataFrame( columns=[ "sid", "timestamp", "event_date", "float", "int", "datetime", "string", ] ) next_value_columns = { EventDataSet_US.next_datetime: "datetime", EventDataSet_US.next_event_date: "event_date", EventDataSet_US.next_float: "float", EventDataSet_US.next_int: "int", EventDataSet_US.next_string: "string", EventDataSet_US.next_string_custom_missing: "string", } previous_value_columns = { EventDataSet_US.previous_datetime: "datetime", EventDataSet_US.previous_event_date: "event_date", EventDataSet_US.previous_float: "float", EventDataSet_US.previous_int: "int", EventDataSet_US.previous_string: "string", EventDataSet_US.previous_string_custom_missing: "string", } loader = EventsLoader(raw_events, next_value_columns, previous_value_columns) engine = SimplePipelineEngine( lambda x: loader, self.asset_finder, ) results = engine.run_pipeline( Pipeline( {c.name: c.latest for c in EventDataSet_US.columns}, domain=US_EQUITIES ), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS) dates = self.trading_days expected = self.frame_containing_all_missing_values( index=pd.MultiIndex.from_product([dates, assets]), columns=EventDataSet_US.columns, ) assert_equal(results, expected)
def test_load_empty(self): """ For the case where raw data is empty, make sure we have a result for all sids, that the dimensions are correct, and that we have the correct missing value. """ raw_events = pd.DataFrame( columns=["sid", "timestamp", "event_date", "float", "int", "datetime", "string"] ) next_value_columns = { EventDataSet_US.next_datetime: 'datetime', EventDataSet_US.next_event_date: 'event_date', EventDataSet_US.next_float: 'float', EventDataSet_US.next_int: 'int', EventDataSet_US.next_string: 'string', EventDataSet_US.next_string_custom_missing: 'string' } previous_value_columns = { EventDataSet_US.previous_datetime: 'datetime', EventDataSet_US.previous_event_date: 'event_date', EventDataSet_US.previous_float: 'float', EventDataSet_US.previous_int: 'int', EventDataSet_US.previous_string: 'string', EventDataSet_US.previous_string_custom_missing: 'string' } loader = EventsLoader( raw_events, next_value_columns, previous_value_columns ) engine = SimplePipelineEngine( lambda x: loader, self.asset_finder, ) results = engine.run_pipeline( Pipeline({ c.name: c.latest for c in EventDataSet_US.columns }, domain=US_EQUITIES), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS) dates = self.trading_days expected = self.frame_containing_all_missing_values( index=pd.MultiIndex.from_product([dates, assets]), columns=EventDataSet_US.columns, ) assert_equal(results, expected)
def test_no_num_announcements_attr(self): dataset = QuartersEstimatesNoNumQuartersAttr(1) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) p = Pipeline({c.name: c.latest for c in dataset.columns}) with self.assertRaises(AttributeError): engine.run_pipeline( p, start_date=self.trading_days[0], end_date=self.trading_days[-1], )
def init_class_fixtures(cls): super(TestDownsampledRowwiseOperation, cls).init_class_fixtures() cls.pipeline_engine = SimplePipelineEngine( get_loader=lambda c: ExplodingObject(), calendar=cls.dates, asset_finder=cls.asset_finder, )
def init_class_fixtures(cls): super(TestDownsampledRowwiseOperation, cls).init_class_fixtures() cls.pipeline_engine = SimplePipelineEngine( get_loader=lambda c: ExplodingObject(), asset_finder=cls.asset_finder, default_domain=EquitySessionDomain( cls.dates, country_code=cls.ASSET_FINDER_COUNTRY_CODE, ), )
def test_estimate_windows_at_quarter_boundaries(self, start_idx, num_announcements_out): dataset = QuartersEstimates(num_announcements_out) trading_days = self.trading_days timelines = self.timelines # The window length should be from the starting index back to the first # date on which we got data. The goal is to ensure that as we # progress through the timeline, all data we got, starting from that # first date, is correctly overwritten. window_len = ( self.trading_days.get_loc(start_idx) - self.trading_days.get_loc(self.window_test_start_date) + 1 ) class SomeFactor(CustomFactor): inputs = [dataset.estimate] window_length = window_len def compute(self, today, assets, out, estimate): today_idx = trading_days.get_loc(today) today_timeline = timelines[ num_announcements_out ].loc[today].reindex( trading_days[:today_idx + 1] ).values timeline_start_idx = (len(today_timeline) - window_len) assert_equal(estimate, today_timeline[timeline_start_idx:]) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) engine.run_pipeline( Pipeline({'est': SomeFactor()}), start_date=start_idx, # last event date we have end_date=pd.Timestamp('2015-01-20', tz='utc'), )
def test_windows_with_varying_num_estimates(self): dataset = QuartersEstimates(1) assert_compute = self.assert_compute class SomeFactor(CustomFactor): inputs = [dataset.estimate] window_length = 3 def compute(self, today, assets, out, estimate): assert_compute(estimate, today) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) engine.run_pipeline( Pipeline({'est': SomeFactor()}), start_date=pd.Timestamp('2015-01-13', tz='utc'), # last event date we have end_date=pd.Timestamp('2015-01-14', tz='utc'), )
def init_class_fixtures(cls): super(WithSeededRandomPipelineEngine, cls).init_class_fixtures() cls._sids = cls.asset_finder.sids cls.seeded_random_loader = loader = make_seeded_random_loader( cls.SEEDED_RANDOM_PIPELINE_SEED, cls.trading_days, cls._sids, ) cls.seeded_random_engine = SimplePipelineEngine( get_loader=lambda column: loader, calendar=cls.trading_days, asset_finder=cls.asset_finder, )
def test_estimates(self): dataset = QuartersEstimates(1) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline({c.name: c.latest for c in dataset.columns}), start_date=self.trading_days[1], end_date=self.trading_days[-2], ) for sid in self.ASSET_FINDER_EQUITY_SIDS: sid_estimates = results.xs(sid, level=1) # Separate assertion for all-null DataFrame to avoid setting # column dtypes on `all_expected`. if sid == max(self.ASSET_FINDER_EQUITY_SIDS): assert_true(sid_estimates.isnull().all().all()) else: ts_sorted_estimates = self.events[ self.events[SID_FIELD_NAME] == sid ].sort(TS_FIELD_NAME) q1_knowledge = ts_sorted_estimates[ ts_sorted_estimates[FISCAL_QUARTER_FIELD_NAME] == 1 ] q2_knowledge = ts_sorted_estimates[ ts_sorted_estimates[FISCAL_QUARTER_FIELD_NAME] == 2 ] all_expected = pd.concat( [self.get_expected_estimate( q1_knowledge[q1_knowledge[TS_FIELD_NAME] <= date.tz_localize(None)], q2_knowledge[q2_knowledge[TS_FIELD_NAME] <= date.tz_localize(None)], date.tz_localize(None), ).set_index([[date]]) for date in sid_estimates.index], axis=0) assert_equal(all_expected[sid_estimates.columns], sid_estimates)
def test_wrong_num_announcements_passed(self): bad_dataset1 = QuartersEstimates(-1) bad_dataset2 = QuartersEstimates(-2) good_dataset = QuartersEstimates(1) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) columns = { c.name + str(dataset.num_announcements): c.latest for dataset in (bad_dataset1, bad_dataset2, good_dataset) for c in dataset.columns } p = Pipeline(columns) with self.assertRaises(ValueError) as e: engine.run_pipeline( p, start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assert_raises_regex(e, INVALID_NUM_QTRS_MESSAGE % "-1,-2")
def test_wrong_num_announcements_passed(self): bad_dataset1 = QuartersEstimates(-1) bad_dataset2 = QuartersEstimates(-2) good_dataset = QuartersEstimates(1) engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) columns = {c.name + str(dataset.num_announcements): c.latest for dataset in (bad_dataset1, bad_dataset2, good_dataset) for c in dataset.columns} p = Pipeline(columns) with self.assertRaises(ValueError) as e: engine.run_pipeline( p, start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assert_raises_regex(e, INVALID_NUM_QTRS_MESSAGE % "-1,-2")
def make_pipeline_engine(bundle, data_dates): """Creates a pipeline engine for the dates in data_dates. Using this allows usage very similar to run_pipeline in Quantopian's env.""" bundle_data = load(bundle, os.environ, None) pipeline_loader = USEquityPricingLoader(bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader) def choose_loader(column): if column in USEquityPricing.columns: return pipeline_loader raise ValueError("No PipelineLoader registered for column %s." % column) # set up pipeline cal = bundle_data.equity_daily_bar_reader.trading_calendar.all_sessions cal2 = cal[(cal >= data_dates[0]) & (cal <= data_dates[1])] spe = SimplePipelineEngine(get_loader=choose_loader, calendar=cal2, asset_finder=bundle_data.asset_finder) return spe
def init_class_fixtures(cls): # This is a rare case where we actually want to do work **before** we # call init_class_fixtures. We choose our sids for WithAssetFinder # based on the events generated by make_event_data. cls.raw_events = make_events(add_nulls=True) cls.raw_events_no_nulls = cls.raw_events[ cls.raw_events['event_date'].notnull() ] cls.next_value_columns = { EventDataSet_US.next_datetime: 'datetime', EventDataSet_US.next_event_date: 'event_date', EventDataSet_US.next_float: 'float', EventDataSet_US.next_int: 'int', EventDataSet_US.next_string: 'string', EventDataSet_US.next_string_custom_missing: 'string' } cls.previous_value_columns = { EventDataSet_US.previous_datetime: 'datetime', EventDataSet_US.previous_event_date: 'event_date', EventDataSet_US.previous_float: 'float', EventDataSet_US.previous_int: 'int', EventDataSet_US.previous_string: 'string', EventDataSet_US.previous_string_custom_missing: 'string' } cls.loader = EventsLoader( cls.raw_events, cls.next_value_columns, cls.previous_value_columns, ) cls.ASSET_FINDER_EQUITY_SIDS = list(cls.raw_events['sid'].unique()) cls.ASSET_FINDER_EQUITY_SYMBOLS = [ 's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS ] super(EventsLoaderTestCase, cls).init_class_fixtures() cls.engine = SimplePipelineEngine( lambda c: cls.loader, asset_finder=cls.asset_finder, default_domain=US_EQUITIES, )
def pipeline_event_setup_engine(self, dates): """ Make a Pipeline Enigne object based on the given dates. """ loader = self.loader_type(*self.pipeline_event_loader_args(dates)) return SimplePipelineEngine(lambda _: loader, dates, self.asset_finder)
# # # pipe = make_pipeline() # result = run_pipeline(pipe, '2017-01-01', '2017-01-01') # df = result.sort_values(by = ['latest'],axis = 0, ascending = False) bundle_data = bundles.load('quandl') pipeline_loader = USEquityPricingLoader( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) engine = SimplePipelineEngine( get_loader=pipeline_loader, calendar=bundle_data.equity_daily_bar_reader.trading_calendar.all_sessions, asset_finder=bundle_data.asset_finder, ) # the pipe get all symbols close price pipe = Pipeline( columns={ 'price': USEquityPricing.close.latest, } ) result = engine.run_pipeline( pipe, pd.Timestamp('2018-09-28', tz='utc'), pd.Timestamp('2018-09-28', tz='utc') )