def test_bad_dates(self): loader = self.loader engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) p = Pipeline() msg = "start_date must be before or equal to end_date .*" with self.assertRaisesRegexp(ValueError, msg): engine.run_pipeline(p, self.dates[2], self.dates[1])
def test_bad_dates(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) p = Pipeline() msg = "start_date must be before or equal to end_date .*" with self.assertRaisesRegexp(ValueError, msg): engine.run_pipeline(p, self.dates[2], self.dates[1])
def test_rolling_and_nonrolling(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the last day that we think all # the assets existed. dates_to_test = self.dates[-30:] constants = {open_: 1, close: 2, volume: 3} loader = PrecomputedLoader(constants=constants, dates=self.dates, sids=self.asset_ids) engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest} ), dates_to_test[0], dates_to_test[-1], ) self.assertIsNotNone(result) self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns)) result_index = self.asset_ids * len(dates_to_test) result_shape = (len(result_index),) check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3, dtype=float))) for name, const in [("open", 1), ("close", 2), ("volume", 3)]: check_arrays(result[name], Series(index=result_index, data=full(result_shape, const, dtype=float)))
def test_factor_with_single_output(self): """ Test passing an `outputs` parameter of length 1 to a CustomFactor. """ dates = self.dates[5:10] assets = self.assets num_dates = len(dates) open = USEquityPricing.open open_values = [self.constants[open]] * num_dates open_values_as_tuple = [(self.constants[open],)] * num_dates engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder) single_output = OpenPrice(outputs=["open"]) pipeline = Pipeline(columns={"open_instance": single_output, "open_attribute": single_output.open}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) # The instance `single_output` itself will compute a numpy.recarray # when added as a column to our pipeline, so we expect its output # values to be 1-tuples. open_instance_expected = {asset: open_values_as_tuple for asset in assets} open_attribute_expected = {asset: open_values for asset in assets} for colname, expected_values in ( ("open_instance", open_instance_expected), ("open_attribute", open_attribute_expected), ): column_results = results[colname].unstack() expected_results = DataFrame(expected_values, index=dates, columns=assets, dtype=float64) assert_frame_equal(column_results, expected_results)
def test_drawdown(self): # The monotonically-increasing data produced by SyntheticDailyBarWriter # exercises two pathological cases for MaxDrawdown. The actual # computed results are pretty much useless (everything is either NaN) # or zero, but verifying we correctly handle those corner cases is # valuable. engine = SimplePipelineEngine( lambda column: self.pipeline_loader, self.trading_calendar.all_sessions, self.asset_finder ) window_length = 5 asset_ids = self.all_asset_ids dates = date_range( self.first_asset_start + self.trading_calendar.day, self.last_asset_end, freq=self.trading_calendar.day ) dates_to_test = dates[window_length:] drawdown = MaxDrawdown(inputs=(USEquityPricing.close,), window_length=window_length) results = engine.run_pipeline(Pipeline(columns={"drawdown": drawdown}), dates_to_test[0], dates_to_test[-1]) # We expect NaNs when the asset was undefined, otherwise 0 everywhere, # since the input is always increasing. expected = DataFrame( data=zeros((len(dates_to_test), len(asset_ids)), dtype=float), index=dates_to_test, columns=self.asset_finder.retrieve_all(asset_ids), ) self.write_nans(expected) result = results["drawdown"].unstack() assert_frame_equal(expected, result)
def test_numeric_factor(self): constants = self.constants loader = self.loader engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) num_dates = 5 dates = self.dates[10 : 10 + num_dates] high, low = USEquityPricing.high, USEquityPricing.low open, close = USEquityPricing.open, USEquityPricing.close high_minus_low = RollingSumDifference(inputs=[high, low]) open_minus_close = RollingSumDifference(inputs=[open, close]) avg = (high_minus_low + open_minus_close) / 2 results = engine.run_pipeline( Pipeline(columns={"high_low": high_minus_low, "open_close": open_minus_close, "avg": avg}), dates[0], dates[-1], ) high_low_result = results["high_low"].unstack() expected_high_low = 3.0 * (constants[high] - constants[low]) assert_frame_equal(high_low_result, DataFrame(expected_high_low, index=dates, columns=self.assets)) open_close_result = results["open_close"].unstack() expected_open_close = 3.0 * (constants[open] - constants[close]) assert_frame_equal(open_close_result, DataFrame(expected_open_close, index=dates, columns=self.assets)) avg_result = results["avg"].unstack() expected_avg = (expected_high_low + expected_open_close) / 2.0 assert_frame_equal(avg_result, DataFrame(expected_avg, index=dates, columns=self.assets))
def test_SMA(self): engine = SimplePipelineEngine( lambda column: self.pipeline_loader, self.trading_calendar.all_sessions, self.asset_finder ) window_length = 5 asset_ids = self.all_asset_ids dates = date_range( self.first_asset_start + self.trading_calendar.day, self.last_asset_end, freq=self.trading_calendar.day ) dates_to_test = dates[window_length:] SMA = SimpleMovingAverage(inputs=(USEquityPricing.close,), window_length=window_length) results = engine.run_pipeline(Pipeline(columns={"sma": SMA}), dates_to_test[0], dates_to_test[-1]) # Shift back the raw inputs by a trading day because we expect our # computed results to be computed using values anchored on the # **previous** day's data. expected_raw = rolling_mean( expected_bar_values_2d(dates - self.trading_calendar.day, self.equity_info, "close"), window_length, min_periods=1, ) expected = DataFrame( # Truncate off the extra rows needed to compute the SMAs. expected_raw[window_length:], index=dates_to_test, # dates_to_test is dates[window_length:] columns=self.asset_finder.retrieve_all(asset_ids), ) self.write_nans(expected) result = results["sma"].unstack() assert_frame_equal(result, expected)
def test_engine_with_multicolumn_loader(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the second to last day that we think all # the assets existed. If we test the last day of our calendar, no # assets will be in our output, because their end dates are all dates_to_test = self.dates[-32:-2] constants = {open_: 1, close: 2, volume: 3} loader = ConstantLoader(constants=constants, dates=self.dates, assets=self.assets) engine = SimplePipelineEngine(loader, self.dates, self.asset_finder) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest} ), dates_to_test[0], dates_to_test[-1], ) self.assertIsNotNone(result) self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns)) result_index = self.assets * len(dates_to_test) result_shape = (len(result_index),) check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3))) for name, const in [("open", 1), ("close", 2), ("volume", 3)]: check_arrays(result[name], Series(index=result_index, data=full(result_shape, const)))
def test_input_dates_provided_by_default(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) class TestFactor(CustomFactor): inputs = [InputDates(), USEquityPricing.close] window_length = 10 dtype = datetime64ns_dtype def compute(self, today, assets, out, dates, closes): first, last = dates[[0, -1], 0] assert last == today.asm8 assert len(dates) == len(closes) == self.window_length out[:] = first p = Pipeline(columns={'t': TestFactor()}) results = engine.run_pipeline(p, self.dates[9], self.dates[10]) # All results are the same, so just grab one column. column = results.unstack().iloc[:, 0].values check_arrays(column, self.dates[:2].values)
def test_screen(self): loader = self.loader finder = self.asset_finder assets = array(self.assets) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) num_dates = 5 dates = self.dates[10:10 + num_dates] factor = AssetID() for asset in assets: p = Pipeline(columns={'f': factor}, screen=factor <= asset) result = engine.run_pipeline(p, dates[0], dates[-1]) expected_sids = assets[assets <= asset] expected_assets = finder.retrieve_all(expected_sids) expected_result = DataFrame( index=MultiIndex.from_product([dates, expected_assets]), data=tile(expected_sids.astype(float), [len(dates)]), columns=['f'], ) assert_frame_equal(result, expected_result)
def test_single_factor(self): loader = self.loader finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) result_shape = (num_dates, num_assets) = (5, len(assets)) dates = self.dates[10:10 + num_dates] factor = RollingSumDifference() expected_result = -factor.window_length # Since every asset will pass the screen, these should be equivalent. pipelines = [ Pipeline(columns={'f': factor}), Pipeline( columns={'f': factor}, screen=factor.eq(expected_result), ), ] for p in pipelines: result = engine.run_pipeline(p, dates[0], dates[-1]) self.assertEqual(set(result.columns), {'f'}) assert_multi_index_is_product(self, result.index, dates, finder.retrieve_all(assets)) check_arrays( result['f'].unstack().values, full(result_shape, expected_result), )
def test_custom_factor_outputs_parameter(self): dates = self.dates[5:10] assets = self.assets num_dates = len(dates) num_assets = len(assets) constants = self.constants engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) def create_expected_results(expected_value): expected_values = full( (num_dates, num_assets), expected_value, float64, ) return DataFrame(expected_values, index=dates, columns=assets) for window_length in range(1, 3): sum_, diff = OpenCloseSumAndDiff( outputs=['sum_', 'diff'], window_length=window_length, ) pipeline = Pipeline(columns={'sum_': sum_, 'diff': diff}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, op in ('sum_', add), ('diff', sub): output_results = results[colname].unstack() output_expected = create_expected_results( op( constants[USEquityPricing.open] * window_length, constants[USEquityPricing.close] * window_length, ) ) assert_frame_equal(output_results, output_expected)
def test_instance_of_factor_with_multiple_outputs(self): """ Test adding a CustomFactor instance, which has multiple outputs, as a pipeline column directly. Its computed values should be tuples containing the computed values of each of its outputs. """ dates = self.dates[5:10] assets = self.assets num_dates = len(dates) num_assets = len(assets) constants = self.constants engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) open_values = [constants[USEquityPricing.open]] * num_assets close_values = [constants[USEquityPricing.close]] * num_assets expected_values = [list(zip(open_values, close_values))] * num_dates expected_results = DataFrame( expected_values, index=dates, columns=assets, dtype=float64, ) multiple_outputs = MultipleOutputs() pipeline = Pipeline(columns={'instance': multiple_outputs}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) instance_results = results['instance'].unstack() assert_frame_equal(instance_results, expected_results)
def test_single_factor(self): loader = self.loader assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) result_shape = (num_dates, num_assets) = (5, len(assets)) dates = self.dates[10:10 + num_dates] factor = RollingSumDifference() expected_result = -factor.window_length # Since every asset will pass the screen, these should be equivalent. pipelines = [ Pipeline(columns={'f': factor}), Pipeline( columns={'f': factor}, screen=factor.eq(expected_result), ), ] for p in pipelines: result = engine.run_pipeline(p, dates[0], dates[-1]) self.assertEqual(set(result.columns), {'f'}) assert_multi_index_is_product( self, result.index, dates, assets ) check_arrays( result['f'].unstack().values, full(result_shape, expected_result, dtype=float), )
def test_custom_factor_outputs_parameter(self): dates = self.dates[5:10] assets = self.assets num_dates = len(dates) num_assets = len(assets) constants = self.constants engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder) def create_expected_results(expected_value): expected_values = full((num_dates, num_assets), expected_value, float64) return DataFrame(expected_values, index=dates, columns=assets) for window_length in range(1, 3): sum_, diff = OpenCloseSumAndDiff(outputs=["sum_", "diff"], window_length=window_length) pipeline = Pipeline(columns={"sum_": sum_, "diff": diff}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, op in ("sum_", add), ("diff", sub): output_results = results[colname].unstack() output_expected = create_expected_results( op( constants[USEquityPricing.open] * window_length, constants[USEquityPricing.close] * window_length, ) ) assert_frame_equal(output_results, output_expected)
def run_pipeline(self, *args, **kwargs): calendar = US_EQUITIES loader = self.loader finder = self.asset_finder engine = SimplePipelineEngine(lambda col: loader, finder, default_domain=calendar) return engine.run_pipeline(*args, **kwargs)
def test_rolling_and_nonrolling(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the last day that we think all # the assets existed. dates_to_test = self.dates[-30:] constants = {open_: 1, close: 2, volume: 3} loader = PrecomputedLoader( constants=constants, dates=self.dates, sids=self.asset_ids, ) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={ 'sumdiff': sumdiff, 'open': open_.latest, 'close': close.latest, 'volume': volume.latest, }, ), dates_to_test[0], dates_to_test[-1] ) self.assertIsNotNone(result) self.assertEqual( {'sumdiff', 'open', 'close', 'volume'}, set(result.columns) ) result_index = self.asset_ids * len(dates_to_test) result_shape = (len(result_index),) check_arrays( result['sumdiff'], Series( index=result_index, data=full(result_shape, -3, dtype=float), ), ) for name, const in [('open', 1), ('close', 2), ('volume', 3)]: check_arrays( result[name], Series( index=result_index, data=full(result_shape, const, dtype=float), ), )
def test_factor_with_multiple_outputs(self): dates = self.dates[5:10] assets = self.assets asset_ids = self.asset_ids constants = self.constants num_dates = len(dates) num_assets = len(assets) open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = make_cascading_boolean_array( shape=(num_dates, num_assets), ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = make_alternating_boolean_array( shape=(num_dates, num_assets), first_value=False, ) expected_no_mask_result = full( shape=(num_dates, num_assets), fill_value=True, dtype=bool_dtype, ) masks = cascading_mask, alternating_mask, NotSpecified expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): open_price, close_price = MultipleOutputs(mask=mask) pipeline = Pipeline( columns={'open_price': open_price, 'close_price': close_price}, ) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, case_column in (('open_price', open), ('close_price', close)): if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = results[colname].unstack() output_expected = create_expected_results( constants[case_column], expected_mask, ) assert_frame_equal(output_results, output_expected)
def test_same_day_pipeline(self): loader = self.loader engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) factor = AssetID() asset = self.asset_ids[0] p = Pipeline(columns={"f": factor}, screen=factor <= asset) # The crux of this is that when we run the pipeline for a single day # (i.e. start and end dates are the same) we should accurately get # data for the day prior. result = engine.run_pipeline(p, self.dates[1], self.dates[1]) self.assertEqual(result["f"][0], 1.0)
def test_fail_usefully_on_insufficient_data(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) class SomeFactor(CustomFactor): inputs = [USEquityPricing.close] window_length = 10 def compute(self, today, assets, out, closes): pass p = Pipeline(columns={'t': SomeFactor()}) # self.dates[9] is the earliest date we should be able to compute. engine.run_pipeline(p, self.dates[9], self.dates[9]) # We shouldn't be able to compute dates[8], since we only know about 8 # prior dates, and we need a window length of 10. with self.assertRaises(NoFurtherDataError): engine.run_pipeline(p, self.dates[8], self.dates[8])
def test_rolling_and_nonrolling(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the last day that we think all # the assets existed. dates_to_test = self.dates[-30:] constants = {open_: 1, close: 2, volume: 3} loader = ConstantLoader( constants=constants, dates=self.dates, assets=self.assets, ) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={ 'sumdiff': sumdiff, 'open': open_.latest, 'close': close.latest, 'volume': volume.latest, }, ), dates_to_test[0], dates_to_test[-1] ) self.assertIsNotNone(result) self.assertEqual( {'sumdiff', 'open', 'close', 'volume'}, set(result.columns) ) result_index = self.assets * len(dates_to_test) result_shape = (len(result_index),) check_arrays( result['sumdiff'], Series(index=result_index, data=full(result_shape, -3)), ) for name, const in [('open', 1), ('close', 2), ('volume', 3)]: check_arrays( result[name], Series(index=result_index, data=full(result_shape, const)), )
def test_engine_with_multicolumn_loader(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the second to last day that we think all # the assets existed. If we test the last day of our calendar, no # assets will be in our output, because their end dates are all dates_to_test = self.dates[-32:-2] constants = {open_: 1, close: 2, volume: 3} loader = ConstantLoader( constants=constants, dates=self.dates, assets=self.assets, ) engine = SimplePipelineEngine(loader, self.dates, self.asset_finder) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={ 'sumdiff': sumdiff, 'open': open_.latest, 'close': close.latest, 'volume': volume.latest, }, ), dates_to_test[0], dates_to_test[-1] ) self.assertIsNotNone(result) self.assertEqual( {'sumdiff', 'open', 'close', 'volume'}, set(result.columns) ) result_index = self.assets * len(dates_to_test) result_shape = (len(result_index),) check_arrays( result['sumdiff'], Series(index=result_index, data=full(result_shape, -3)), ) for name, const in [('open', 1), ('close', 2), ('volume', 3)]: check_arrays( result[name], Series(index=result_index, data=full(result_shape, const)), )
def test_same_day_pipeline(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) factor = AssetID() asset = self.asset_ids[0] p = Pipeline(columns={'f': factor}, screen=factor <= asset) # The crux of this is that when we run the pipeline for a single day # (i.e. start and end dates are the same) we should accurately get # data for the day prior. result = engine.run_pipeline(p, self.dates[1], self.dates[1]) self.assertEqual(result['f'][0], 1.0)
def test_SMA(self): engine = SimplePipelineEngine( lambda column: self.pipeline_loader, self.env.trading_days, self.finder, ) window_length = 5 assets = self.all_assets dates = date_range( self.first_asset_start + self.trading_day, self.last_asset_end, freq=self.trading_day, ) dates_to_test = dates[window_length:] SMA = SimpleMovingAverage( inputs=(USEquityPricing.close, ), window_length=window_length, ) results = engine.run_pipeline( Pipeline(columns={'sma': SMA}), dates_to_test[0], dates_to_test[-1], ) # Shift back the raw inputs by a trading day because we expect our # computed results to be computed using values anchored on the # **previous** day's data. expected_raw = rolling_mean( self.writer.expected_values_2d( dates - self.trading_day, assets, 'close', ), window_length, min_periods=1, ) expected = DataFrame( # Truncate off the extra rows needed to compute the SMAs. expected_raw[window_length:], index=dates_to_test, # dates_to_test is dates[window_length:] columns=self.finder.retrieve_all(assets), ) self.write_nans(expected) result = results['sma'].unstack() assert_frame_equal(result, expected)
def test_numeric_factor(self): constants = self.constants loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) num_dates = 5 dates = self.dates[10:10 + num_dates] high, low = USEquityPricing.high, USEquityPricing.low open, close = USEquityPricing.open, USEquityPricing.close high_minus_low = RollingSumDifference(inputs=[high, low]) open_minus_close = RollingSumDifference(inputs=[open, close]) avg = (high_minus_low + open_minus_close) / 2 results = engine.run_pipeline( Pipeline(columns={ 'high_low': high_minus_low, 'open_close': open_minus_close, 'avg': avg, }, ), dates[0], dates[-1], ) high_low_result = results['high_low'].unstack() expected_high_low = 3.0 * (constants[high] - constants[low]) assert_frame_equal( high_low_result, DataFrame(expected_high_low, index=dates, columns=self.assets), ) open_close_result = results['open_close'].unstack() expected_open_close = 3.0 * (constants[open] - constants[close]) assert_frame_equal( open_close_result, DataFrame(expected_open_close, index=dates, columns=self.assets), ) avg_result = results['avg'].unstack() expected_avg = (expected_high_low + expected_open_close) / 2.0 assert_frame_equal( avg_result, DataFrame(expected_avg, index=dates, columns=self.assets), )
def test_multiple_rolling_factors(self): loader = self.loader finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) shape = num_dates, num_assets = (5, len(assets)) dates = self.dates[10:10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference( window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high], ) pipeline = Pipeline( columns={ 'short': short_factor, 'long': long_factor, 'high': high_factor, } ) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) self.assertEqual(set(results.columns), {'short', 'high', 'long'}) assert_multi_index_is_product( self, results.index, dates, finder.retrieve_all(assets) ) # row-wise sum over an array whose values are all (1 - 2) check_arrays( results['short'].unstack().values, full(shape, -short_factor.window_length), ) check_arrays( results['long'].unstack().values, full(shape, -long_factor.window_length), ) # row-wise sum over an array whose values are all (1 - 3) check_arrays( results['high'].unstack().values, full(shape, -2 * high_factor.window_length), )
def test_multiple_rolling_factors(self): loader = self.loader finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) shape = num_dates, num_assets = (5, len(assets)) dates = self.dates[10:10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference( window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high], ) pipeline = Pipeline(columns={ 'short': short_factor, 'long': long_factor, 'high': high_factor, }) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) self.assertEqual(set(results.columns), {'short', 'high', 'long'}) assert_multi_index_is_product(self, results.index, dates, finder.retrieve_all(assets)) # row-wise sum over an array whose values are all (1 - 2) check_arrays( results['short'].unstack().values, full(shape, -short_factor.window_length), ) check_arrays( results['long'].unstack().values, full(shape, -long_factor.window_length), ) # row-wise sum over an array whose values are all (1 - 3) check_arrays( results['high'].unstack().values, full(shape, -2 * high_factor.window_length), )
def test_factor_with_single_output(self): """ Test passing an `outputs` parameter of length 1 to a CustomFactor. """ dates = self.dates[5:10] assets = self.assets num_dates = len(dates) open = USEquityPricing.open open_values = [self.constants[open]] * num_dates open_values_as_tuple = [(self.constants[open], )] * num_dates engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) single_output = OpenPrice(outputs=['open']) pipeline = Pipeline(columns={ 'open_instance': single_output, 'open_attribute': single_output.open, }, ) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) # The instance `single_output` itself will compute a numpy.recarray # when added as a column to our pipeline, so we expect its output # values to be 1-tuples. open_instance_expected = { asset: open_values_as_tuple for asset in assets } open_attribute_expected = {asset: open_values for asset in assets} for colname, expected_values in (('open_instance', open_instance_expected), ('open_attribute', open_attribute_expected)): column_results = results[colname].unstack() expected_results = DataFrame( expected_values, index=dates, columns=assets, dtype=float64, ) assert_frame_equal(column_results, expected_results)
def test_drawdown(self): # The monotonically-increasing data produced by SyntheticDailyBarWriter # exercises two pathological cases for MaxDrawdown. The actual # computed results are pretty much useless (everything is either NaN) # or zero, but verifying we correctly handle those corner cases is # valuable. engine = SimplePipelineEngine( lambda column: self.pipeline_loader, self.env.trading_days, self.finder, ) window_length = 5 assets = self.all_assets dates = date_range( self.first_asset_start + self.trading_day, self.last_asset_end, freq=self.trading_day, ) dates_to_test = dates[window_length:] drawdown = MaxDrawdown( inputs=(USEquityPricing.close, ), window_length=window_length, ) results = engine.run_pipeline( Pipeline(columns={'drawdown': drawdown}), dates_to_test[0], dates_to_test[-1], ) # We expect NaNs when the asset was undefined, otherwise 0 everywhere, # since the input is always increasing. expected = DataFrame( data=zeros((len(dates_to_test), len(assets)), dtype=float), index=dates_to_test, columns=self.finder.retrieve_all(assets), ) self.write_nans(expected) result = results['drawdown'].unstack() assert_frame_equal(expected, result)
def test_screen(self): loader = self.loader finder = self.asset_finder asset_ids = array(self.asset_ids) engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) num_dates = 5 dates = self.dates[10 : 10 + num_dates] factor = AssetID() for asset_id in asset_ids: p = Pipeline(columns={"f": factor}, screen=factor <= asset_id) result = engine.run_pipeline(p, dates[0], dates[-1]) expected_sids = asset_ids[asset_ids <= asset_id] expected_assets = finder.retrieve_all(expected_sids) expected_result = DataFrame( index=MultiIndex.from_product([dates, expected_assets]), data=tile(expected_sids.astype(float), [len(dates)]), columns=["f"], ) assert_frame_equal(result, expected_result)
def test_input_dates_provided_by_default(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) class TestFactor(CustomFactor): inputs = [InputDates(), USEquityPricing.close] window_length = 10 dtype = datetime64ns_dtype def compute(self, today, assets, out, dates, closes): first, last = dates[[0, -1], 0] assert last == today.asm8 assert len(dates) == len(closes) == self.window_length out[:] = first p = Pipeline(columns={'t': TestFactor()}) results = engine.run_pipeline(p, self.dates[9], self.dates[10]) # All results are the same, so just grab one column. column = results.unstack().iloc[:, 0].values check_arrays(column, self.dates[:2].values)
def test_multiple_rolling_factors(self): loader = self.loader assets = self.assets engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) shape = num_dates, num_assets = (5, len(assets)) dates = self.dates[10 : 10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference(window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high]) pipeline = Pipeline(columns={"short": short_factor, "long": long_factor, "high": high_factor}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) self.assertEqual(set(results.columns), {"short", "high", "long"}) assert_multi_index_is_product(self, results.index, dates, assets) # row-wise sum over an array whose values are all (1 - 2) check_arrays(results["short"].unstack().values, full(shape, -short_factor.window_length, dtype=float)) check_arrays(results["long"].unstack().values, full(shape, -long_factor.window_length, dtype=float)) # row-wise sum over an array whose values are all (1 - 3) check_arrays(results["high"].unstack().values, full(shape, -2 * high_factor.window_length, dtype=float))
def test_compute_with_adjustments(self): dates, assets = self.dates, self.assets low, high = USEquityPricing.low, USEquityPricing.high apply_idxs = [3, 10, 16] def apply_date(idx, offset=0): return dates[apply_idxs[idx] + offset] adjustments = DataFrame.from_records([ dict( kind=MULTIPLY, sid=assets[1], value=2.0, start_date=None, end_date=apply_date(0, offset=-1), apply_date=apply_date(0), ), dict( kind=MULTIPLY, sid=assets[1], value=3.0, start_date=None, end_date=apply_date(1, offset=-1), apply_date=apply_date(1), ), dict( kind=MULTIPLY, sid=assets[1], value=5.0, start_date=None, end_date=apply_date(2, offset=-1), apply_date=apply_date(2), ), ]) low_base = DataFrame(self.make_frame(30.0)) low_loader = DataFrameLoader(low, low_base.copy(), adjustments=None) # Pre-apply inverse of adjustments to the baseline. high_base = DataFrame(self.make_frame(30.0)) high_base.iloc[:apply_idxs[0], 1] /= 2.0 high_base.iloc[:apply_idxs[1], 1] /= 3.0 high_base.iloc[:apply_idxs[2], 1] /= 5.0 high_loader = DataFrameLoader(high, high_base, adjustments) engine = SimplePipelineEngine( { low: low_loader, high: high_loader }.__getitem__, self.dates, self.asset_finder, ) for window_length in range(1, 4): low_mavg = SimpleMovingAverage( inputs=[USEquityPricing.low], window_length=window_length, ) high_mavg = SimpleMovingAverage( inputs=[USEquityPricing.high], window_length=window_length, ) bounds = product_upper_triangle(range(window_length, len(dates))) for start, stop in bounds: results = engine.run_pipeline( Pipeline(columns={ 'low': low_mavg, 'high': high_mavg }), dates[start], dates[stop], ) self.assertEqual(set(results.columns), {'low', 'high'}) iloc_bounds = slice(start, stop + 1) # +1 to include end date low_results = results.unstack()['low'] assert_frame_equal(low_results, low_base.iloc[iloc_bounds]) high_results = results.unstack()['high'] assert_frame_equal(high_results, high_base.iloc[iloc_bounds])
def test_loader_given_multiple_columns(self): class Loader1DataSet1(DataSet): col1 = Column(float32) col2 = Column(float32) class Loader1DataSet2(DataSet): col1 = Column(float32) col2 = Column(float32) class Loader2DataSet(DataSet): col1 = Column(float32) col2 = Column(float32) constants1 = { Loader1DataSet1.col1: 1, Loader1DataSet1.col2: 2, Loader1DataSet2.col1: 3, Loader1DataSet2.col2: 4 } loader1 = RecordingConstantLoader(constants=constants1, dates=self.dates, assets=self.assets) constants2 = {Loader2DataSet.col1: 5, Loader2DataSet.col2: 6} loader2 = RecordingConstantLoader(constants=constants2, dates=self.dates, assets=self.assets) engine = SimplePipelineEngine( lambda column: loader2 if column.dataset == Loader2DataSet else loader1, self.dates, self.asset_finder, ) pipe_col1 = RollingSumSum(inputs=[ Loader1DataSet1.col1, Loader1DataSet2.col1, Loader2DataSet.col1 ], window_length=2) pipe_col2 = RollingSumSum(inputs=[ Loader1DataSet1.col2, Loader1DataSet2.col2, Loader2DataSet.col2 ], window_length=3) pipe_col3 = RollingSumSum(inputs=[Loader2DataSet.col1], window_length=3) columns = OrderedDict([ ('pipe_col1', pipe_col1), ('pipe_col2', pipe_col2), ('pipe_col3', pipe_col3), ]) result = engine.run_pipeline( Pipeline(columns=columns), self.dates[2], # index is >= the largest window length - 1 self.dates[-1]) min_window = min(pip_col.window_length for pip_col in itervalues(columns)) col_to_val = ChainMap(constants1, constants2) vals = { name: (sum(col_to_val[col] for col in pipe_col.inputs) * pipe_col.window_length) for name, pipe_col in iteritems(columns) } index = MultiIndex.from_product([self.dates[2:], self.assets]) expected = DataFrame(data={ col: concatenate((full( (columns[col].window_length - min_window) * index.levshape[1], nan), full((index.levshape[0] - (columns[col].window_length - min_window)) * index.levshape[1], val))) for col, val in iteritems(vals) }, index=index, columns=columns) assert_frame_equal(result, expected) self.assertEqual( set(loader1.load_calls), { ColumnArgs.sorted_by_ds(Loader1DataSet1.col1, Loader1DataSet2.col1), ColumnArgs.sorted_by_ds(Loader1DataSet1.col2, Loader1DataSet2.col2) }) self.assertEqual(set(loader2.load_calls), { ColumnArgs.sorted_by_ds(Loader2DataSet.col1, Loader2DataSet.col2) })
# Set the trading calendar trading_calendar = get_calendar('NYSE') # Create a Pipeline engine engine = SimplePipelineEngine(get_loader=choose_loader, calendar=trading_calendar.all_sessions, asset_finder=bundle_data.asset_finder) # Set the start and end dates start_date = pd.Timestamp('2016-01-05', tz='utc') end_date = pd.Timestamp('2016-01-05', tz='utc') # Run our pipeline for the given start and end dates pipeline_output = engine.run_pipeline(pipeline, start_date, end_date) # Get the values in index level 1 and save them to a list universe_tickers = pipeline_output.index.get_level_values(1).values.tolist() # Create a data portal data_portal = DataPortal( bundle_data.asset_finder, trading_calendar=trading_calendar, first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader) def get_pricing(data_portal, trading_calendar,
def test_factor_with_multiple_outputs(self): dates = self.dates[5:10] assets = self.assets asset_ids = self.asset_ids constants = self.constants open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = array( [ [True, True, True, False], [True, True, False, False], [True, False, False, False], [False, False, False, False], [False, False, False, False], ], dtype=bool, ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = array( [ [False, True, False, True], [True, False, True, False], [False, True, False, True], [True, False, True, False], [False, True, False, True], ], dtype=bool, ) expected_no_mask_result = array( [ [True, True, True, True], [True, True, True, True], [True, True, True, True], [True, True, True, True], [True, True, True, True], ], dtype=bool, ) masks = cascading_mask, alternating_mask, NotSpecified expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): open_price, close_price = MultipleOutputs(mask=mask) pipeline = Pipeline(columns={"open_price": open_price, "close_price": close_price}) if mask is not NotSpecified: pipeline.add(mask, "mask") results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, case_column in (("open_price", open), ("close_price", close)): if mask is not NotSpecified: mask_results = results["mask"].unstack() check_arrays(mask_results.values, expected_mask) output_results = results[colname].unstack() output_expected = create_expected_results(constants[case_column], expected_mask) assert_frame_equal(output_results, output_expected)
bundle_data.adjustment_reader, ) def choose_loader(column): if column in USEquityPricing.columns: return pipeline_loader raise ValueError("No PipelineLoader registered for column %s." % column) #data_frequency = 'daily', #capital_base = DEFAULT_CAPITAL_BASE start = '2015-9-1' # 必须在国内交易日 end = '2017-9-8' # 必须在国内交易日 #print Date(tz='utc', as_timestamp=True).parser(start) #perf_tracker = None # Pull in the environment's new AssetFinder for quick reference #print trading_calendar.all_sessions your_engine = SimplePipelineEngine( get_loader=choose_loader, calendar=trading_calendar.all_sessions, asset_finder=trading_environment.asset_finder) result = your_engine.run_pipeline( my_pipe, Date(tz='utc', as_timestamp=True).parser(start), Date(tz='utc', as_timestamp=True).parser(end)) print result
def test_compute_with_adjustments(self): dates, asset_ids = self.dates, self.asset_ids low, high = USEquityPricing.low, USEquityPricing.high apply_idxs = [3, 10, 16] def apply_date(idx, offset=0): return dates[apply_idxs[idx] + offset] adjustments = DataFrame.from_records( [ dict( kind=MULTIPLY, sid=asset_ids[1], value=2.0, start_date=None, end_date=apply_date(0, offset=-1), apply_date=apply_date(0), ), dict( kind=MULTIPLY, sid=asset_ids[1], value=3.0, start_date=None, end_date=apply_date(1, offset=-1), apply_date=apply_date(1), ), dict( kind=MULTIPLY, sid=asset_ids[1], value=5.0, start_date=None, end_date=apply_date(2, offset=-1), apply_date=apply_date(2), ), ] ) low_base = DataFrame(self.make_frame(30.0)) low_loader = DataFrameLoader(low, low_base.copy(), adjustments=None) # Pre-apply inverse of adjustments to the baseline. high_base = DataFrame(self.make_frame(30.0)) high_base.iloc[:apply_idxs[0], 1] /= 2.0 high_base.iloc[:apply_idxs[1], 1] /= 3.0 high_base.iloc[:apply_idxs[2], 1] /= 5.0 high_loader = DataFrameLoader(high, high_base, adjustments) engine = SimplePipelineEngine( {low: low_loader, high: high_loader}.__getitem__, self.dates, self.asset_finder, ) for window_length in range(1, 4): low_mavg = SimpleMovingAverage( inputs=[USEquityPricing.low], window_length=window_length, ) high_mavg = SimpleMovingAverage( inputs=[USEquityPricing.high], window_length=window_length, ) bounds = product_upper_triangle(range(window_length, len(dates))) for start, stop in bounds: results = engine.run_pipeline( Pipeline( columns={'low': low_mavg, 'high': high_mavg} ), dates[start], dates[stop], ) self.assertEqual(set(results.columns), {'low', 'high'}) iloc_bounds = slice(start, stop + 1) # +1 to include end date low_results = results.unstack()['low'] assert_frame_equal(low_results, low_base.iloc[iloc_bounds]) high_results = results.unstack()['high'] assert_frame_equal(high_results, high_base.iloc[iloc_bounds])
def test_loader_given_multiple_columns(self): class Loader1DataSet1(DataSet): col1 = Column(float) col2 = Column(float32) class Loader1DataSet2(DataSet): col1 = Column(float32) col2 = Column(float32) class Loader2DataSet(DataSet): col1 = Column(float32) col2 = Column(float32) constants1 = {Loader1DataSet1.col1: 1, Loader1DataSet1.col2: 2, Loader1DataSet2.col1: 3, Loader1DataSet2.col2: 4} loader1 = RecordingPrecomputedLoader(constants=constants1, dates=self.dates, sids=self.assets) constants2 = {Loader2DataSet.col1: 5, Loader2DataSet.col2: 6} loader2 = RecordingPrecomputedLoader(constants=constants2, dates=self.dates, sids=self.assets) engine = SimplePipelineEngine( lambda column: loader2 if column.dataset == Loader2DataSet else loader1, self.dates, self.asset_finder, ) pipe_col1 = RollingSumSum(inputs=[Loader1DataSet1.col1, Loader1DataSet2.col1, Loader2DataSet.col1], window_length=2) pipe_col2 = RollingSumSum(inputs=[Loader1DataSet1.col2, Loader1DataSet2.col2, Loader2DataSet.col2], window_length=3) pipe_col3 = RollingSumSum(inputs=[Loader2DataSet.col1], window_length=3) columns = OrderedDict([ ('pipe_col1', pipe_col1), ('pipe_col2', pipe_col2), ('pipe_col3', pipe_col3), ]) result = engine.run_pipeline( Pipeline(columns=columns), self.dates[2], # index is >= the largest window length - 1 self.dates[-1] ) min_window = min(pip_col.window_length for pip_col in itervalues(columns)) col_to_val = ChainMap(constants1, constants2) vals = {name: (sum(col_to_val[col] for col in pipe_col.inputs) * pipe_col.window_length) for name, pipe_col in iteritems(columns)} index = MultiIndex.from_product([self.dates[2:], self.assets]) def expected_for_col(col): val = vals[col] offset = columns[col].window_length - min_window return concatenate( [ full(offset * index.levshape[1], nan), full( (index.levshape[0] - offset) * index.levshape[1], val, float, ) ], ) expected = DataFrame( data={col: expected_for_col(col) for col in vals}, index=index, columns=columns, ) assert_frame_equal(result, expected) self.assertEqual(set(loader1.load_calls), {ColumnArgs.sorted_by_ds(Loader1DataSet1.col1, Loader1DataSet2.col1), ColumnArgs.sorted_by_ds(Loader1DataSet1.col2, Loader1DataSet2.col2)}) self.assertEqual(set(loader2.load_calls), {ColumnArgs.sorted_by_ds(Loader2DataSet.col1, Loader2DataSet.col2)})
def test_masked_factor(self): """ Test that a Custom Factor computes the correct values when passed a mask. The mask/filter should be applied prior to computing any values, as opposed to computing the factor across the entire universe of assets. Any assets that are filtered out should be filled with missing values. """ loader = self.loader dates = self.dates[5:8] assets = self.assets asset_ids = self.asset_ids constants = self.constants open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) factor1_value = constants[open] factor2_value = 3.0 * (constants[open] - constants[close]) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = array( [[True, True, True, False], [True, True, False, False], [True, False, False, False]], dtype=bool, ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = array( [[False, True, False, True], [True, False, True, False], [False, True, False, True]], dtype=bool, ) masks = cascading_mask, alternating_mask expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): # Test running a pipeline with a single masked factor. columns = {'factor1': OpenPrice(mask=mask), 'mask': mask} pipeline = Pipeline(columns=columns) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) factor1_results = results['factor1'].unstack() factor1_expected = create_expected_results(factor1_value, mask_results) assert_frame_equal(factor1_results, factor1_expected) # Test running a pipeline with a second factor. This ensures that # adding another factor to the pipeline with a different window # length does not cause any unexpected behavior, especially when # both factors share the same mask. columns['factor2'] = RollingSumDifference(mask=mask) pipeline = Pipeline(columns=columns) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) factor1_results = results['factor1'].unstack() factor2_results = results['factor2'].unstack() factor1_expected = create_expected_results(factor1_value, mask_results) factor2_expected = create_expected_results(factor2_value, mask_results) assert_frame_equal(factor1_results, factor1_expected) assert_frame_equal(factor2_results, factor2_expected)
def run_pipeline(pipeline, start_date, end_date=None, bundle=None): """ Compute values for pipeline from start_date to end_date, using the specified bundle or the default bundle. Parameters ---------- pipeline : Pipeline, required The pipeline to run. start_date : str (YYYY-MM-DD), required First date on which the pipeline should run. If start_date is not a trading day, the pipeline will start on the first trading day after start_date. end_date : str (YYYY-MM-DD), optional Last date on which the pipeline should run. If end_date is not a trading day, the pipeline will end on the first trading day after end_date. Defaults to today. bundle : str, optional the bundle code. If omitted, the default bundle will be used (and must be set). Returns ------- result : pd.DataFrame A frame of computed results. The result columns correspond to the entries of pipeline.columns, which should be a dictionary mapping strings to instances of zipline.pipeline.term.Term. For each date between start_date and end_date, result will contain a row for each asset that passed pipeline.screen. A screen of None indicates that a row should be returned for each asset that existed each day. Examples -------- Get a pipeline of 1-year returns: >>> from zipline.pipeline.factors import Returns >>> pipeline = Pipeline( # doctest: +SKIP columns={ '1Y': Returns(window_length=252), }) >>> factor = run_pipeline(pipeline, '2018-01-01', '2019-02-01', bundle="usstock-1min") # doctest: +SKIP """ if not bundle: bundle = get_default_bundle() if not bundle: raise ValidationError("you must specify a bundle or set a default bundle") bundle = bundle["default_bundle"] load_extensions(code=bundle) bundle_data = bundles.load( bundle, os.environ, pd.Timestamp.utcnow(), ) calendar_name = bundles.bundles[bundle].calendar_name trading_calendar = get_calendar(calendar_name) start_date = pd.Timestamp(start_date) if start_date.tz: start_date = start_date.tz_convert("UTC") else: start_date = start_date.tz_localize("UTC") if end_date: end_date = pd.Timestamp(end_date) else: end_date = pd.Timestamp.now().normalize() if end_date.tz: end_date = end_date.tz_convert("UTC") else: end_date = end_date.tz_localize("UTC") first_session = max(bundles.bundles[bundle].start_session, trading_calendar.first_session) if start_date < first_session: raise ValidationError( f"start_date cannot be earlier than {first_session.date().isoformat()} for this bundle") # Roll-forward start_date to valid session for i in range(100): if trading_calendar.is_session(start_date): break start_date += pd.Timedelta(days=1) else: raise ValidationError(f"start_date is not in {calendar_name} calendar") # Roll-forward end_date to valid session for i in range(100): if trading_calendar.is_session(end_date): break end_date += pd.Timedelta(days=1) else: raise ValidationError("end_date is not in calendar") if ( end_date < start_date): raise ValidationError("end_date cannot be earlier than start_date") default_pipeline_loader = EquityPricingLoader.without_fx( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder) asset_finder_cache[bundle] = asset_finder pipeline_loader = QuantRocketPipelineLoaderRouter( asset_db_conn=asset_finder.engine, calendar=trading_calendar, default_loader=default_pipeline_loader, default_loader_columns=EquityPricing.columns ) calendar_domain = domain.get_domain_from_calendar(trading_calendar) engine = SimplePipelineEngine( pipeline_loader, asset_finder, calendar_domain) return engine.run_pipeline(pipeline, start_date, end_date)
def initialize(context): dates = pd.date_range('2018-01-01', '2018-09-28') # assets = bundle_data.asset_finder.lookup_symbols(['A', 'AAL'], as_of_date=None) # assets = bundle_data.asset_finder sids = bundle_data.asset_finder.sids assets = [sid(item) for item in sids] # The values for Column A will just be a 2D array of numbers ranging from 1 -> N. column_A_frame = pd.DataFrame( data=np.arange(len(dates) * len(assets), dtype=float).reshape(len(dates), len(assets)), index=dates, columns=sids, ) # Column B will always provide True for 0 and False for 1. column_B_frame = pd.DataFrame(data={sids[0]: True, sids[1]: False}, index=dates) loaders = { MyDataSet.column_A: DataFrameLoader(MyDataSet.column_A, column_A_frame), MyDataSet.column_B: DataFrameLoader(MyDataSet.column_B, column_B_frame), } def my_dispatcher(column): return loaders[column] # Set up pipeline engine # Loader for pricing pipeline_loader = USEquityPricingLoader( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) def choose_loader(column): if column in USEquityPricing.columns: return pipeline_loader return my_dispatcher(column) engine = SimplePipelineEngine( get_loader=choose_loader, calendar=trading_calendar.all_sessions, asset_finder=bundle_data.asset_finder, ) p = Pipeline( columns={ 'price': USEquityPricing.close.latest, 'col_A': MyDataSet.column_A.latest, 'col_B': MyDataSet.column_B.latest }, screen=StaticAssets(assets) ) df = engine.run_pipeline( p, pd.Timestamp('2016-01-07', tz='utc'), pd.Timestamp('2016-01-07', tz='utc') ) df = df.sort_values(by=['price'], axis=0, ascending=False) print(df)