def test_numeric_factor(self): constants = self.constants loader = self.loader engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) num_dates = 5 dates = self.dates[10 : 10 + num_dates] high, low = USEquityPricing.high, USEquityPricing.low open, close = USEquityPricing.open, USEquityPricing.close high_minus_low = RollingSumDifference(inputs=[high, low]) open_minus_close = RollingSumDifference(inputs=[open, close]) avg = (high_minus_low + open_minus_close) / 2 results = engine.run_pipeline( Pipeline(columns={"high_low": high_minus_low, "open_close": open_minus_close, "avg": avg}), dates[0], dates[-1], ) high_low_result = results["high_low"].unstack() expected_high_low = 3.0 * (constants[high] - constants[low]) assert_frame_equal(high_low_result, DataFrame(expected_high_low, index=dates, columns=self.assets)) open_close_result = results["open_close"].unstack() expected_open_close = 3.0 * (constants[open] - constants[close]) assert_frame_equal(open_close_result, DataFrame(expected_open_close, index=dates, columns=self.assets)) avg_result = results["avg"].unstack() expected_avg = (expected_high_low + expected_open_close) / 2.0 assert_frame_equal(avg_result, DataFrame(expected_avg, index=dates, columns=self.assets))
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.TermGraph Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ engine = SimplePipelineEngine(lambda column: ExplodingObject(), self.__calendar, self.__finder) if mask is None: mask = self.__mask dates, assets, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) return engine.compute_chunk(graph, dates, assets, initial_workspace)
def test_input_dates_provided_by_default(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) class TestFactor(CustomFactor): inputs = [InputDates(), USEquityPricing.close] window_length = 10 dtype = datetime64ns_dtype def compute(self, today, assets, out, dates, closes): first, last = dates[[0, -1], 0] assert last == today.asm8 assert len(dates) == len(closes) == self.window_length out[:] = first p = Pipeline(columns={'t': TestFactor()}) results = engine.run_pipeline(p, self.dates[9], self.dates[10]) # All results are the same, so just grab one column. column = results.unstack().iloc[:, 0].values check_arrays(column, self.dates[:2].values)
def test_SMA(self): engine = SimplePipelineEngine( lambda column: self.pipeline_loader, self.trading_calendar.all_sessions, self.asset_finder ) window_length = 5 asset_ids = self.all_asset_ids dates = date_range( self.first_asset_start + self.trading_calendar.day, self.last_asset_end, freq=self.trading_calendar.day ) dates_to_test = dates[window_length:] SMA = SimpleMovingAverage(inputs=(USEquityPricing.close,), window_length=window_length) results = engine.run_pipeline(Pipeline(columns={"sma": SMA}), dates_to_test[0], dates_to_test[-1]) # Shift back the raw inputs by a trading day because we expect our # computed results to be computed using values anchored on the # **previous** day's data. expected_raw = rolling_mean( expected_bar_values_2d(dates - self.trading_calendar.day, self.equity_info, "close"), window_length, min_periods=1, ) expected = DataFrame( # Truncate off the extra rows needed to compute the SMAs. expected_raw[window_length:], index=dates_to_test, # dates_to_test is dates[window_length:] columns=self.asset_finder.retrieve_all(asset_ids), ) self.write_nans(expected) result = results["sma"].unstack() assert_frame_equal(result, expected)
def test_rolling_and_nonrolling(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the last day that we think all # the assets existed. dates_to_test = self.dates[-30:] constants = {open_: 1, close: 2, volume: 3} loader = PrecomputedLoader(constants=constants, dates=self.dates, sids=self.asset_ids) engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest} ), dates_to_test[0], dates_to_test[-1], ) self.assertIsNotNone(result) self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns)) result_index = self.asset_ids * len(dates_to_test) result_shape = (len(result_index),) check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3, dtype=float))) for name, const in [("open", 1), ("close", 2), ("volume", 3)]: check_arrays(result[name], Series(index=result_index, data=full(result_shape, const, dtype=float)))
def test_factor_with_single_output(self): """ Test passing an `outputs` parameter of length 1 to a CustomFactor. """ dates = self.dates[5:10] assets = self.assets num_dates = len(dates) open = USEquityPricing.open open_values = [self.constants[open]] * num_dates open_values_as_tuple = [(self.constants[open],)] * num_dates engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder) single_output = OpenPrice(outputs=["open"]) pipeline = Pipeline(columns={"open_instance": single_output, "open_attribute": single_output.open}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) # The instance `single_output` itself will compute a numpy.recarray # when added as a column to our pipeline, so we expect its output # values to be 1-tuples. open_instance_expected = {asset: open_values_as_tuple for asset in assets} open_attribute_expected = {asset: open_values for asset in assets} for colname, expected_values in ( ("open_instance", open_instance_expected), ("open_attribute", open_attribute_expected), ): column_results = results[colname].unstack() expected_results = DataFrame(expected_values, index=dates, columns=assets, dtype=float64) assert_frame_equal(column_results, expected_results)
def test_single_factor(self): loader = self.loader assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) result_shape = (num_dates, num_assets) = (5, len(assets)) dates = self.dates[10:10 + num_dates] factor = RollingSumDifference() expected_result = -factor.window_length # Since every asset will pass the screen, these should be equivalent. pipelines = [ Pipeline(columns={'f': factor}), Pipeline( columns={'f': factor}, screen=factor.eq(expected_result), ), ] for p in pipelines: result = engine.run_pipeline(p, dates[0], dates[-1]) self.assertEqual(set(result.columns), {'f'}) assert_multi_index_is_product( self, result.index, dates, assets ) check_arrays( result['f'].unstack().values, full(result_shape, expected_result, dtype=float), )
def test_single_factor(self): loader = self.loader finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) result_shape = (num_dates, num_assets) = (5, len(assets)) dates = self.dates[10:10 + num_dates] factor = RollingSumDifference() expected_result = -factor.window_length # Since every asset will pass the screen, these should be equivalent. pipelines = [ Pipeline(columns={'f': factor}), Pipeline( columns={'f': factor}, screen=factor.eq(expected_result), ), ] for p in pipelines: result = engine.run_pipeline(p, dates[0], dates[-1]) self.assertEqual(set(result.columns), {'f'}) assert_multi_index_is_product(self, result.index, dates, finder.retrieve_all(assets)) check_arrays( result['f'].unstack().values, full(result_shape, expected_result), )
def test_screen(self): loader = self.loader finder = self.asset_finder assets = array(self.assets) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) num_dates = 5 dates = self.dates[10:10 + num_dates] factor = AssetID() for asset in assets: p = Pipeline(columns={'f': factor}, screen=factor <= asset) result = engine.run_pipeline(p, dates[0], dates[-1]) expected_sids = assets[assets <= asset] expected_assets = finder.retrieve_all(expected_sids) expected_result = DataFrame( index=MultiIndex.from_product([dates, expected_assets]), data=tile(expected_sids.astype(float), [len(dates)]), columns=['f'], ) assert_frame_equal(result, expected_result)
def test_custom_factor_outputs_parameter(self): dates = self.dates[5:10] assets = self.assets num_dates = len(dates) num_assets = len(assets) constants = self.constants engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) def create_expected_results(expected_value): expected_values = full( (num_dates, num_assets), expected_value, float64, ) return DataFrame(expected_values, index=dates, columns=assets) for window_length in range(1, 3): sum_, diff = OpenCloseSumAndDiff( outputs=['sum_', 'diff'], window_length=window_length, ) pipeline = Pipeline(columns={'sum_': sum_, 'diff': diff}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, op in ('sum_', add), ('diff', sub): output_results = results[colname].unstack() output_expected = create_expected_results( op( constants[USEquityPricing.open] * window_length, constants[USEquityPricing.close] * window_length, ) ) assert_frame_equal(output_results, output_expected)
def test_instance_of_factor_with_multiple_outputs(self): """ Test adding a CustomFactor instance, which has multiple outputs, as a pipeline column directly. Its computed values should be tuples containing the computed values of each of its outputs. """ dates = self.dates[5:10] assets = self.assets num_dates = len(dates) num_assets = len(assets) constants = self.constants engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) open_values = [constants[USEquityPricing.open]] * num_assets close_values = [constants[USEquityPricing.close]] * num_assets expected_values = [list(zip(open_values, close_values))] * num_dates expected_results = DataFrame( expected_values, index=dates, columns=assets, dtype=float64, ) multiple_outputs = MultipleOutputs() pipeline = Pipeline(columns={'instance': multiple_outputs}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) instance_results = results['instance'].unstack() assert_frame_equal(instance_results, expected_results)
def test_custom_factor_outputs_parameter(self): dates = self.dates[5:10] assets = self.assets num_dates = len(dates) num_assets = len(assets) constants = self.constants engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder) def create_expected_results(expected_value): expected_values = full((num_dates, num_assets), expected_value, float64) return DataFrame(expected_values, index=dates, columns=assets) for window_length in range(1, 3): sum_, diff = OpenCloseSumAndDiff(outputs=["sum_", "diff"], window_length=window_length) pipeline = Pipeline(columns={"sum_": sum_, "diff": diff}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, op in ("sum_", add), ("diff", sub): output_results = results[colname].unstack() output_expected = create_expected_results( op( constants[USEquityPricing.open] * window_length, constants[USEquityPricing.close] * window_length, ) ) assert_frame_equal(output_results, output_expected)
def test_drawdown(self): # The monotonically-increasing data produced by SyntheticDailyBarWriter # exercises two pathological cases for MaxDrawdown. The actual # computed results are pretty much useless (everything is either NaN) # or zero, but verifying we correctly handle those corner cases is # valuable. engine = SimplePipelineEngine( lambda column: self.pipeline_loader, self.trading_calendar.all_sessions, self.asset_finder ) window_length = 5 asset_ids = self.all_asset_ids dates = date_range( self.first_asset_start + self.trading_calendar.day, self.last_asset_end, freq=self.trading_calendar.day ) dates_to_test = dates[window_length:] drawdown = MaxDrawdown(inputs=(USEquityPricing.close,), window_length=window_length) results = engine.run_pipeline(Pipeline(columns={"drawdown": drawdown}), dates_to_test[0], dates_to_test[-1]) # We expect NaNs when the asset was undefined, otherwise 0 everywhere, # since the input is always increasing. expected = DataFrame( data=zeros((len(dates_to_test), len(asset_ids)), dtype=float), index=dates_to_test, columns=self.asset_finder.retrieve_all(asset_ids), ) self.write_nans(expected) result = results["drawdown"].unstack() assert_frame_equal(expected, result)
def test_engine_with_multicolumn_loader(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the second to last day that we think all # the assets existed. If we test the last day of our calendar, no # assets will be in our output, because their end dates are all dates_to_test = self.dates[-32:-2] constants = {open_: 1, close: 2, volume: 3} loader = ConstantLoader(constants=constants, dates=self.dates, assets=self.assets) engine = SimplePipelineEngine(loader, self.dates, self.asset_finder) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest} ), dates_to_test[0], dates_to_test[-1], ) self.assertIsNotNone(result) self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns)) result_index = self.assets * len(dates_to_test) result_shape = (len(result_index),) check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3))) for name, const in [("open", 1), ("close", 2), ("volume", 3)]: check_arrays(result[name], Series(index=result_index, data=full(result_shape, const)))
def run_pipeline(self, *args, **kwargs): calendar = US_EQUITIES loader = self.loader finder = self.asset_finder engine = SimplePipelineEngine(lambda col: loader, finder, default_domain=calendar) return engine.run_pipeline(*args, **kwargs)
def test_rolling_and_nonrolling(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the last day that we think all # the assets existed. dates_to_test = self.dates[-30:] constants = {open_: 1, close: 2, volume: 3} loader = PrecomputedLoader( constants=constants, dates=self.dates, sids=self.asset_ids, ) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={ 'sumdiff': sumdiff, 'open': open_.latest, 'close': close.latest, 'volume': volume.latest, }, ), dates_to_test[0], dates_to_test[-1] ) self.assertIsNotNone(result) self.assertEqual( {'sumdiff', 'open', 'close', 'volume'}, set(result.columns) ) result_index = self.asset_ids * len(dates_to_test) result_shape = (len(result_index),) check_arrays( result['sumdiff'], Series( index=result_index, data=full(result_shape, -3, dtype=float), ), ) for name, const in [('open', 1), ('close', 2), ('volume', 3)]: check_arrays( result[name], Series( index=result_index, data=full(result_shape, const, dtype=float), ), )
def test_bad_dates(self): loader = self.loader engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) p = Pipeline() msg = "start_date must be before or equal to end_date .*" with self.assertRaisesRegexp(ValueError, msg): engine.run_pipeline(p, self.dates[2], self.dates[1])
def test_factor_with_multiple_outputs(self): dates = self.dates[5:10] assets = self.assets asset_ids = self.asset_ids constants = self.constants num_dates = len(dates) num_assets = len(assets) open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = make_cascading_boolean_array( shape=(num_dates, num_assets), ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = make_alternating_boolean_array( shape=(num_dates, num_assets), first_value=False, ) expected_no_mask_result = full( shape=(num_dates, num_assets), fill_value=True, dtype=bool_dtype, ) masks = cascading_mask, alternating_mask, NotSpecified expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): open_price, close_price = MultipleOutputs(mask=mask) pipeline = Pipeline( columns={'open_price': open_price, 'close_price': close_price}, ) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, case_column in (('open_price', open), ('close_price', close)): if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = results[colname].unstack() output_expected = create_expected_results( constants[case_column], expected_mask, ) assert_frame_equal(output_results, output_expected)
def test_bad_dates(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) p = Pipeline() msg = "start_date must be before or equal to end_date .*" with self.assertRaisesRegexp(ValueError, msg): engine.run_pipeline(p, self.dates[2], self.dates[1])
def test_same_day_pipeline(self): loader = self.loader engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) factor = AssetID() asset = self.asset_ids[0] p = Pipeline(columns={"f": factor}, screen=factor <= asset) # The crux of this is that when we run the pipeline for a single day # (i.e. start and end dates are the same) we should accurately get # data for the day prior. result = engine.run_pipeline(p, self.dates[1], self.dates[1]) self.assertEqual(result["f"][0], 1.0)
def test_rolling_and_nonrolling(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the last day that we think all # the assets existed. dates_to_test = self.dates[-30:] constants = {open_: 1, close: 2, volume: 3} loader = ConstantLoader( constants=constants, dates=self.dates, assets=self.assets, ) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={ 'sumdiff': sumdiff, 'open': open_.latest, 'close': close.latest, 'volume': volume.latest, }, ), dates_to_test[0], dates_to_test[-1] ) self.assertIsNotNone(result) self.assertEqual( {'sumdiff', 'open', 'close', 'volume'}, set(result.columns) ) result_index = self.assets * len(dates_to_test) result_shape = (len(result_index),) check_arrays( result['sumdiff'], Series(index=result_index, data=full(result_shape, -3)), ) for name, const in [('open', 1), ('close', 2), ('volume', 3)]: check_arrays( result[name], Series(index=result_index, data=full(result_shape, const)), )
def test_same_day_pipeline(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) factor = AssetID() asset = self.asset_ids[0] p = Pipeline(columns={'f': factor}, screen=factor <= asset) # The crux of this is that when we run the pipeline for a single day # (i.e. start and end dates are the same) we should accurately get # data for the day prior. result = engine.run_pipeline(p, self.dates[1], self.dates[1]) self.assertEqual(result['f'][0], 1.0)
def test_engine_with_multicolumn_loader(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the second to last day that we think all # the assets existed. If we test the last day of our calendar, no # assets will be in our output, because their end dates are all dates_to_test = self.dates[-32:-2] constants = {open_: 1, close: 2, volume: 3} loader = ConstantLoader( constants=constants, dates=self.dates, assets=self.assets, ) engine = SimplePipelineEngine(loader, self.dates, self.asset_finder) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline( columns={ 'sumdiff': sumdiff, 'open': open_.latest, 'close': close.latest, 'volume': volume.latest, }, ), dates_to_test[0], dates_to_test[-1] ) self.assertIsNotNone(result) self.assertEqual( {'sumdiff', 'open', 'close', 'volume'}, set(result.columns) ) result_index = self.assets * len(dates_to_test) result_shape = (len(result_index),) check_arrays( result['sumdiff'], Series(index=result_index, data=full(result_shape, -3)), ) for name, const in [('open', 1), ('close', 2), ('volume', 3)]: check_arrays( result[name], Series(index=result_index, data=full(result_shape, const)), )
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.ExecutionPlan Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ def get_loader(c): raise AssertionError("run_graph() should not require any loaders!") engine = SimplePipelineEngine( get_loader, self.asset_finder, default_domain=US_EQUITIES, ) if mask is None: mask = self.default_asset_exists_mask dates, sids, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) initial_workspace.setdefault(InputDates(), dates) refcounts = graph.initial_refcounts(initial_workspace) execution_order = graph.execution_order(initial_workspace, refcounts) return engine.compute_chunk( graph=graph, dates=dates, sids=sids, workspace=initial_workspace, execution_order=execution_order, refcounts=refcounts, hooks=NoHooks(), )
def test_SMA(self): engine = SimplePipelineEngine( lambda column: self.pipeline_loader, self.env.trading_days, self.finder, ) window_length = 5 assets = self.all_assets dates = date_range( self.first_asset_start + self.trading_day, self.last_asset_end, freq=self.trading_day, ) dates_to_test = dates[window_length:] SMA = SimpleMovingAverage( inputs=(USEquityPricing.close, ), window_length=window_length, ) results = engine.run_pipeline( Pipeline(columns={'sma': SMA}), dates_to_test[0], dates_to_test[-1], ) # Shift back the raw inputs by a trading day because we expect our # computed results to be computed using values anchored on the # **previous** day's data. expected_raw = rolling_mean( self.writer.expected_values_2d( dates - self.trading_day, assets, 'close', ), window_length, min_periods=1, ) expected = DataFrame( # Truncate off the extra rows needed to compute the SMAs. expected_raw[window_length:], index=dates_to_test, # dates_to_test is dates[window_length:] columns=self.finder.retrieve_all(assets), ) self.write_nans(expected) result = results['sma'].unstack() assert_frame_equal(result, expected)
def init_class_fixtures(cls): (super().init_class_fixtures()) adjustments = NullAdjustmentReader() cls.loaders = { GB_EQUITIES: EquityPricingLoader( cls.daily_bar_readers['XLON'], adjustments, cls.in_memory_fx_rate_reader, ), US_EQUITIES: EquityPricingLoader( cls.daily_bar_readers['XNYS'], adjustments, cls.in_memory_fx_rate_reader, ), CA_EQUITIES: EquityPricingLoader( cls.daily_bar_readers['XTSE'], adjustments, cls.in_memory_fx_rate_reader, ) } cls.engine = SimplePipelineEngine( get_loader=cls.get_loader, asset_finder=cls.asset_finder, )
def make_pipeline_engine(symbols=['SPY', 'TLT'], bundle='etfs_bundle', calendar='NYSE'): register(bundle, symbols) bundle_data = load(bundle) # Set up pipeline engine # Loader for pricing pipeline_loader = USEquityPricingLoader( bundle_data.equity_daily_bar_reader, bundle_data.adjustment_reader, ) def my_dispatcher(column): return loaders[column] def choose_loader(column): if column in USEquityPricing.columns: return pipeline_loader return my_dispatcher(column) trading_calendar = get_calendar(calendar) engine = SimplePipelineEngine( get_loader=choose_loader, calendar=trading_calendar.all_sessions, asset_finder=bundle_data.asset_finder, ) assets = bundle_data.asset_finder.lookup_symbols(symbols, as_of_date=None) return assets, engine
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', ), ) assert_frame_equal(result, expected, check_dtype=False)
def test_id(self): expr = bz.Data(self.df, name='expr', dshape=self.dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = self.df.drop('asof_date', axis=1).set_index(['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def init_class_fixtures(cls): super(ParameterizedFactorTestCase, cls).init_class_fixtures() day = cls.env.trading_day cls.dates = dates = date_range( '2015-02-01', '2015-02-28', freq=day, tz='UTC', ) sids = cls.sids cls.raw_data = DataFrame( data=arange(len(dates) * len(sids), dtype=float).reshape( len(dates), len(sids), ), index=dates, columns=cls.asset_finder.retrieve_all(sids), ) close_loader = DataFrameLoader(USEquityPricing.close, cls.raw_data) volume_loader = DataFrameLoader( USEquityPricing.volume, cls.raw_data * 2, ) cls.engine = SimplePipelineEngine( { USEquityPricing.close: close_loader, USEquityPricing.volume: volume_loader, }.__getitem__, cls.dates, cls.asset_finder, )
def _test_id(self, df, dshape, expected, finder, add): expr = bz.Data(df, name='expr', dshape=dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() for a in add: p.add(getattr(ds, a).latest, a) dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) assert_frame_equal( result, _utc_localize_index_level_0(expected), check_dtype=False, )
def init_class_fixtures(cls): (super(WithInternationalPricingPipelineEngine, cls).init_class_fixtures()) adjustments = NullAdjustmentReader() cls.loaders = { GB_EQUITIES: EquityPricingLoader( cls.daily_bar_readers["XLON"], adjustments, cls.in_memory_fx_rate_reader, ), US_EQUITIES: EquityPricingLoader( cls.daily_bar_readers["XNYS"], adjustments, cls.in_memory_fx_rate_reader, ), CA_EQUITIES: EquityPricingLoader( cls.daily_bar_readers["XTSE"], adjustments, cls.in_memory_fx_rate_reader, ), } cls.engine = SimplePipelineEngine( get_loader=cls.get_loader, asset_finder=cls.asset_finder, )
def _run_pipeline(self, expr, deltas, expected_views, expected_output, finder, calendar, start, end, window_length, compute_fn): loader = BlazeLoader() ds = from_blaze( expr, deltas, loader=loader, no_deltas_rule=no_deltas_rules.raise_, ) p = Pipeline() # prevent unbound locals issue in the inner class window_length_ = window_length class TestFactor(CustomFactor): inputs = ds.value, window_length = window_length_ def compute(self, today, assets, out, data): assert_array_almost_equal(data, expected_views[today]) out[:] = compute_fn(data) p.add(TestFactor(), 'value') result = SimplePipelineEngine( loader, calendar, finder, ).run_pipeline(p, start, end) assert_frame_equal( result, expected_output, check_dtype=False, )
def set_bundle(name, calendar='XSHG'): global trading_calendar global bundle global bundle_data global engine global choose_loader global data bundle = name trading_calendar = get_calendar(calendar) bundle_data = bundles.load(bundle) engine = SimplePipelineEngine( get_loader=choose_loader, calendar=trading_calendar.all_sessions, asset_finder=bundle_data.asset_finder, ) data = CNDataPortal( bundle_data.asset_finder, trading_calendar=trading_calendar, first_trading_day=bundle_data.equity_daily_bar_reader. first_trading_day, equity_minute_reader=None, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, )
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.Data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ) expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def temp_pipeline_engine(calendar, sids, random_seed, symbols=None): """ A contextManager that yields a SimplePipelineEngine holding a reference to an AssetFinder generated via tmp_asset_finder. Parameters ---------- calendar : pd.DatetimeIndex Calendar to pass to the constructed PipelineEngine. sids : iterable[int] Sids to use for the temp asset finder. random_seed : int Integer used to seed instances of SeededRandomLoader. symbols : iterable[str], optional Symbols for constructed assets. Forwarded to make_simple_equity_info. """ equity_info = make_simple_equity_info( sids=sids, start_date=calendar[0], end_date=calendar[-1], symbols=symbols, ) loader = make_seeded_random_loader(random_seed, calendar, sids) def get_loader(column): return loader with tmp_asset_finder(equities=equity_info) as finder: yield SimplePipelineEngine(get_loader, calendar, finder)
def init_class_fixtures(cls): (super(WithInternationalPricingPipelineEngine, cls).init_class_fixtures()) adjustments = NullAdjustmentReader() cls.loaders = { GB_EQUITIES: EquityPricingLoader( cls.daily_bar_readers['LSE'], adjustments, ), US_EQUITIES: EquityPricingLoader( cls.daily_bar_readers['NYSE'], adjustments, ), CA_EQUITIES: EquityPricingLoader( cls.daily_bar_readers['TSX'], adjustments, ) } cls.engine = SimplePipelineEngine( get_loader=cls.get_loader, asset_finder=cls.asset_finder, )
def test_numeric_factor(self): constants = self.constants loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) num_dates = 5 dates = self.dates[10:10 + num_dates] high, low = USEquityPricing.high, USEquityPricing.low open, close = USEquityPricing.open, USEquityPricing.close high_minus_low = RollingSumDifference(inputs=[high, low]) open_minus_close = RollingSumDifference(inputs=[open, close]) avg = (high_minus_low + open_minus_close) / 2 results = engine.run_pipeline( Pipeline(columns={ 'high_low': high_minus_low, 'open_close': open_minus_close, 'avg': avg, }, ), dates[0], dates[-1], ) high_low_result = results['high_low'].unstack() expected_high_low = 3.0 * (constants[high] - constants[low]) assert_frame_equal( high_low_result, DataFrame(expected_high_low, index=dates, columns=self.assets), ) open_close_result = results['open_close'].unstack() expected_open_close = 3.0 * (constants[open] - constants[close]) assert_frame_equal( open_close_result, DataFrame(expected_open_close, index=dates, columns=self.assets), ) avg_result = results['avg'].unstack() expected_avg = (expected_high_low + expected_open_close) / 2.0 assert_frame_equal( avg_result, DataFrame(expected_avg, index=dates, columns=self.assets), )
def create_pipeline_engine(bundle_name='alpaca_api'): global BUNDLE_DATA if not BUNDLE_DATA: set_bundle_data(bundle_name) # Create a Pipeline engine engine = SimplePipelineEngine(get_loader=choose_loader, asset_finder=BUNDLE_DATA.asset_finder) return engine
def test_multiple_rolling_factors(self): loader = self.loader finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) shape = num_dates, num_assets = (5, len(assets)) dates = self.dates[10:10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference( window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high], ) pipeline = Pipeline( columns={ 'short': short_factor, 'long': long_factor, 'high': high_factor, } ) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) self.assertEqual(set(results.columns), {'short', 'high', 'long'}) assert_multi_index_is_product( self, results.index, dates, finder.retrieve_all(assets) ) # row-wise sum over an array whose values are all (1 - 2) check_arrays( results['short'].unstack().values, full(shape, -short_factor.window_length), ) check_arrays( results['long'].unstack().values, full(shape, -long_factor.window_length), ) # row-wise sum over an array whose values are all (1 - 3) check_arrays( results['high'].unstack().values, full(shape, -2 * high_factor.window_length), )
def build_pipeline_engine(bundle_data, trading_calendar): pricing_loader = PricingLoader(bundle_data) engine = SimplePipelineEngine(get_loader=pricing_loader.get_loader, calendar=trading_calendar.all_sessions, asset_finder=bundle_data.asset_finder) return engine
def test_multiple_rolling_factors(self): loader = self.loader finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) shape = num_dates, num_assets = (5, len(assets)) dates = self.dates[10:10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference( window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high], ) pipeline = Pipeline(columns={ 'short': short_factor, 'long': long_factor, 'high': high_factor, }) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) self.assertEqual(set(results.columns), {'short', 'high', 'long'}) assert_multi_index_is_product(self, results.index, dates, finder.retrieve_all(assets)) # row-wise sum over an array whose values are all (1 - 2) check_arrays( results['short'].unstack().values, full(shape, -short_factor.window_length), ) check_arrays( results['long'].unstack().values, full(shape, -long_factor.window_length), ) # row-wise sum over an array whose values are all (1 - 3) check_arrays( results['high'].unstack().values, full(shape, -2 * high_factor.window_length), )
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] expr = bz.Data(df, name='expr', dshape=var * Record(fields)) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.other.latest, 'other') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product(( df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', 'other'), ).sort_index(axis=1) assert_frame_equal( result, expected.sort_index(axis=1), check_dtype=False, )
def run_graph(self, graph, initial_workspace, mask=None): """ Compute the given TermGraph, seeding the workspace of our engine with `initial_workspace`. Parameters ---------- graph : zipline.pipeline.graph.ExecutionPlan Graph to run. initial_workspace : dict Initial workspace to forward to SimplePipelineEngine.compute_chunk. mask : DataFrame, optional This is a value to pass to `initial_workspace` as the mask from `AssetExists()`. Defaults to a frame of shape `self.default_shape` containing all True values. Returns ------- results : dict Mapping from termname -> computed result. """ def get_loader(c): raise AssertionError("run_graph() should not require any loaders!") engine = SimplePipelineEngine( get_loader, self.asset_finder, default_domain=US_EQUITIES, ) if mask is None: mask = self.default_asset_exists_mask dates, sids, mask_values = explode(mask) initial_workspace.setdefault(AssetExists(), mask_values) initial_workspace.setdefault(InputDates(), dates) return engine.compute_chunk( graph=graph, dates=dates, sids=sids, initial_workspace=initial_workspace, )
def test_fail_usefully_on_insufficient_data(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) class SomeFactor(CustomFactor): inputs = [USEquityPricing.close] window_length = 10 def compute(self, today, assets, out, closes): pass p = Pipeline(columns={'t': SomeFactor()}) # self.dates[9] is the earliest date we should be able to compute. engine.run_pipeline(p, self.dates[9], self.dates[9]) # We shouldn't be able to compute dates[8], since we only know about 8 # prior dates, and we need a window length of 10. with self.assertRaises(NoFurtherDataError): engine.run_pipeline(p, self.dates[8], self.dates[8])
def test_screen(self): loader = self.loader finder = self.asset_finder asset_ids = array(self.asset_ids) engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) num_dates = 5 dates = self.dates[10 : 10 + num_dates] factor = AssetID() for asset_id in asset_ids: p = Pipeline(columns={"f": factor}, screen=factor <= asset_id) result = engine.run_pipeline(p, dates[0], dates[-1]) expected_sids = asset_ids[asset_ids <= asset_id] expected_assets = finder.retrieve_all(expected_sids) expected_result = DataFrame( index=MultiIndex.from_product([dates, expected_assets]), data=tile(expected_sids.astype(float), [len(dates)]), columns=["f"], ) assert_frame_equal(result, expected_result)
def test_multiple_rolling_factors(self): loader = self.loader assets = self.assets engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder) shape = num_dates, num_assets = (5, len(assets)) dates = self.dates[10 : 10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference(window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high]) pipeline = Pipeline(columns={"short": short_factor, "long": long_factor, "high": high_factor}) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) self.assertEqual(set(results.columns), {"short", "high", "long"}) assert_multi_index_is_product(self, results.index, dates, assets) # row-wise sum over an array whose values are all (1 - 2) check_arrays(results["short"].unstack().values, full(shape, -short_factor.window_length, dtype=float)) check_arrays(results["long"].unstack().values, full(shape, -long_factor.window_length, dtype=float)) # row-wise sum over an array whose values are all (1 - 3) check_arrays(results["high"].unstack().values, full(shape, -2 * high_factor.window_length, dtype=float))
def _test_id(self, df, dshape, expected, finder, add): expr = bz.data(df, name='expr', dshape=dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() for a in add: p.add(getattr(ds, a).latest, a) dates = self.dates result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) assert_frame_equal( result.sort_index(axis=1), _utc_localize_index_level_0(expected.sort_index(axis=1)), check_dtype=False, )
def test_masked_factor(self): """ Test that a Custom Factor computes the correct values when passed a mask. The mask/filter should be applied prior to computing any values, as opposed to computing the factor across the entire universe of assets. Any assets that are filtered out should be filled with missing values. """ loader = self.loader dates = self.dates[5:8] assets = self.assets asset_ids = self.asset_ids constants = self.constants open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) factor1_value = constants[open] factor2_value = 3.0 * (constants[open] - constants[close]) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = array( [[True, True, True, False], [True, True, False, False], [True, False, False, False]], dtype=bool, ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = array( [[False, True, False, True], [True, False, True, False], [False, True, False, True]], dtype=bool, ) masks = cascading_mask, alternating_mask expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): # Test running a pipeline with a single masked factor. columns = {'factor1': OpenPrice(mask=mask), 'mask': mask} pipeline = Pipeline(columns=columns) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) factor1_results = results['factor1'].unstack() factor1_expected = create_expected_results(factor1_value, mask_results) assert_frame_equal(factor1_results, factor1_expected) # Test running a pipeline with a second factor. This ensures that # adding another factor to the pipeline with a different window # length does not cause any unexpected behavior, especially when # both factors share the same mask. columns['factor2'] = RollingSumDifference(mask=mask) pipeline = Pipeline(columns=columns) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) factor1_results = results['factor1'].unstack() factor2_results = results['factor2'].unstack() factor1_expected = create_expected_results(factor1_value, mask_results) factor2_expected = create_expected_results(factor2_value, mask_results) assert_frame_equal(factor1_results, factor1_expected) assert_frame_equal(factor2_results, factor2_expected)
def test_loader_given_multiple_columns(self): class Loader1DataSet1(DataSet): col1 = Column(float) col2 = Column(float32) class Loader1DataSet2(DataSet): col1 = Column(float32) col2 = Column(float32) class Loader2DataSet(DataSet): col1 = Column(float32) col2 = Column(float32) constants1 = {Loader1DataSet1.col1: 1, Loader1DataSet1.col2: 2, Loader1DataSet2.col1: 3, Loader1DataSet2.col2: 4} loader1 = RecordingPrecomputedLoader(constants=constants1, dates=self.dates, sids=self.assets) constants2 = {Loader2DataSet.col1: 5, Loader2DataSet.col2: 6} loader2 = RecordingPrecomputedLoader(constants=constants2, dates=self.dates, sids=self.assets) engine = SimplePipelineEngine( lambda column: loader2 if column.dataset == Loader2DataSet else loader1, self.dates, self.asset_finder, ) pipe_col1 = RollingSumSum(inputs=[Loader1DataSet1.col1, Loader1DataSet2.col1, Loader2DataSet.col1], window_length=2) pipe_col2 = RollingSumSum(inputs=[Loader1DataSet1.col2, Loader1DataSet2.col2, Loader2DataSet.col2], window_length=3) pipe_col3 = RollingSumSum(inputs=[Loader2DataSet.col1], window_length=3) columns = OrderedDict([ ('pipe_col1', pipe_col1), ('pipe_col2', pipe_col2), ('pipe_col3', pipe_col3), ]) result = engine.run_pipeline( Pipeline(columns=columns), self.dates[2], # index is >= the largest window length - 1 self.dates[-1] ) min_window = min(pip_col.window_length for pip_col in itervalues(columns)) col_to_val = ChainMap(constants1, constants2) vals = {name: (sum(col_to_val[col] for col in pipe_col.inputs) * pipe_col.window_length) for name, pipe_col in iteritems(columns)} index = MultiIndex.from_product([self.dates[2:], self.assets]) def expected_for_col(col): val = vals[col] offset = columns[col].window_length - min_window return concatenate( [ full(offset * index.levshape[1], nan), full( (index.levshape[0] - offset) * index.levshape[1], val, float, ) ], ) expected = DataFrame( data={col: expected_for_col(col) for col in vals}, index=index, columns=columns, ) assert_frame_equal(result, expected) self.assertEqual(set(loader1.load_calls), {ColumnArgs.sorted_by_ds(Loader1DataSet1.col1, Loader1DataSet2.col1), ColumnArgs.sorted_by_ds(Loader1DataSet1.col2, Loader1DataSet2.col2)}) self.assertEqual(set(loader2.load_calls), {ColumnArgs.sorted_by_ds(Loader2DataSet.col1, Loader2DataSet.col2)})
def test_factor_with_multiple_outputs(self): dates = self.dates[5:10] assets = self.assets asset_ids = self.asset_ids constants = self.constants open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = array( [ [True, True, True, False], [True, True, False, False], [True, False, False, False], [False, False, False, False], [False, False, False, False], ], dtype=bool, ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = array( [ [False, True, False, True], [True, False, True, False], [False, True, False, True], [True, False, True, False], [False, True, False, True], ], dtype=bool, ) expected_no_mask_result = array( [ [True, True, True, True], [True, True, True, True], [True, True, True, True], [True, True, True, True], [True, True, True, True], ], dtype=bool, ) masks = cascading_mask, alternating_mask, NotSpecified expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): open_price, close_price = MultipleOutputs(mask=mask) pipeline = Pipeline(columns={"open_price": open_price, "close_price": close_price}) if mask is not NotSpecified: pipeline.add(mask, "mask") results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, case_column in (("open_price", open), ("close_price", close)): if mask is not NotSpecified: mask_results = results["mask"].unstack() check_arrays(mask_results.values, expected_mask) output_results = results[colname].unstack() output_expected = create_expected_results(constants[case_column], expected_mask) assert_frame_equal(output_results, output_expected)
def test_compute_with_adjustments(self): dates, asset_ids = self.dates, self.asset_ids low, high = USEquityPricing.low, USEquityPricing.high apply_idxs = [3, 10, 16] def apply_date(idx, offset=0): return dates[apply_idxs[idx] + offset] adjustments = DataFrame.from_records( [ dict( kind=MULTIPLY, sid=asset_ids[1], value=2.0, start_date=None, end_date=apply_date(0, offset=-1), apply_date=apply_date(0), ), dict( kind=MULTIPLY, sid=asset_ids[1], value=3.0, start_date=None, end_date=apply_date(1, offset=-1), apply_date=apply_date(1), ), dict( kind=MULTIPLY, sid=asset_ids[1], value=5.0, start_date=None, end_date=apply_date(2, offset=-1), apply_date=apply_date(2), ), ] ) low_base = DataFrame(self.make_frame(30.0)) low_loader = DataFrameLoader(low, low_base.copy(), adjustments=None) # Pre-apply inverse of adjustments to the baseline. high_base = DataFrame(self.make_frame(30.0)) high_base.iloc[:apply_idxs[0], 1] /= 2.0 high_base.iloc[:apply_idxs[1], 1] /= 3.0 high_base.iloc[:apply_idxs[2], 1] /= 5.0 high_loader = DataFrameLoader(high, high_base, adjustments) engine = SimplePipelineEngine( {low: low_loader, high: high_loader}.__getitem__, self.dates, self.asset_finder, ) for window_length in range(1, 4): low_mavg = SimpleMovingAverage( inputs=[USEquityPricing.low], window_length=window_length, ) high_mavg = SimpleMovingAverage( inputs=[USEquityPricing.high], window_length=window_length, ) bounds = product_upper_triangle(range(window_length, len(dates))) for start, stop in bounds: results = engine.run_pipeline( Pipeline( columns={'low': low_mavg, 'high': high_mavg} ), dates[start], dates[stop], ) self.assertEqual(set(results.columns), {'low', 'high'}) iloc_bounds = slice(start, stop + 1) # +1 to include end date low_results = results.unstack()['low'] assert_frame_equal(low_results, low_base.iloc[iloc_bounds]) high_results = results.unstack()['high'] assert_frame_equal(high_results, high_base.iloc[iloc_bounds])