Example #1
0
    def test_numeric_factor(self):
        constants = self.constants
        loader = self.loader
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)
        num_dates = 5
        dates = self.dates[10 : 10 + num_dates]
        high, low = USEquityPricing.high, USEquityPricing.low
        open, close = USEquityPricing.open, USEquityPricing.close

        high_minus_low = RollingSumDifference(inputs=[high, low])
        open_minus_close = RollingSumDifference(inputs=[open, close])
        avg = (high_minus_low + open_minus_close) / 2

        results = engine.run_pipeline(
            Pipeline(columns={"high_low": high_minus_low, "open_close": open_minus_close, "avg": avg}),
            dates[0],
            dates[-1],
        )

        high_low_result = results["high_low"].unstack()
        expected_high_low = 3.0 * (constants[high] - constants[low])
        assert_frame_equal(high_low_result, DataFrame(expected_high_low, index=dates, columns=self.assets))

        open_close_result = results["open_close"].unstack()
        expected_open_close = 3.0 * (constants[open] - constants[close])
        assert_frame_equal(open_close_result, DataFrame(expected_open_close, index=dates, columns=self.assets))

        avg_result = results["avg"].unstack()
        expected_avg = (expected_high_low + expected_open_close) / 2.0
        assert_frame_equal(avg_result, DataFrame(expected_avg, index=dates, columns=self.assets))
Example #2
0
    def run_graph(self, graph, initial_workspace, mask=None):
        """
        Compute the given TermGraph, seeding the workspace of our engine with
        `initial_workspace`.

        Parameters
        ----------
        graph : zipline.pipeline.graph.TermGraph
            Graph to run.
        initial_workspace : dict
            Initial workspace to forward to SimplePipelineEngine.compute_chunk.
        mask : DataFrame, optional
            This is a value to pass to `initial_workspace` as the mask from
            `AssetExists()`.  Defaults to a frame of shape `self.default_shape`
            containing all True values.

        Returns
        -------
        results : dict
            Mapping from termname -> computed result.
        """
        engine = SimplePipelineEngine(lambda column: ExplodingObject(), self.__calendar, self.__finder)
        if mask is None:
            mask = self.__mask

        dates, assets, mask_values = explode(mask)
        initial_workspace.setdefault(AssetExists(), mask_values)
        return engine.compute_chunk(graph, dates, assets, initial_workspace)
Example #3
0
    def test_input_dates_provided_by_default(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )

        class TestFactor(CustomFactor):
            inputs = [InputDates(), USEquityPricing.close]
            window_length = 10
            dtype = datetime64ns_dtype

            def compute(self, today, assets, out, dates, closes):
                first, last = dates[[0, -1], 0]
                assert last == today.asm8
                assert len(dates) == len(closes) == self.window_length
                out[:] = first

        p = Pipeline(columns={'t': TestFactor()})
        results = engine.run_pipeline(p, self.dates[9], self.dates[10])

        # All results are the same, so just grab one column.
        column = results.unstack().iloc[:, 0].values
        check_arrays(column, self.dates[:2].values)
Example #4
0
    def test_SMA(self):
        engine = SimplePipelineEngine(
            lambda column: self.pipeline_loader, self.trading_calendar.all_sessions, self.asset_finder
        )
        window_length = 5
        asset_ids = self.all_asset_ids
        dates = date_range(
            self.first_asset_start + self.trading_calendar.day, self.last_asset_end, freq=self.trading_calendar.day
        )
        dates_to_test = dates[window_length:]

        SMA = SimpleMovingAverage(inputs=(USEquityPricing.close,), window_length=window_length)

        results = engine.run_pipeline(Pipeline(columns={"sma": SMA}), dates_to_test[0], dates_to_test[-1])

        # Shift back the raw inputs by a trading day because we expect our
        # computed results to be computed using values anchored on the
        # **previous** day's data.
        expected_raw = rolling_mean(
            expected_bar_values_2d(dates - self.trading_calendar.day, self.equity_info, "close"),
            window_length,
            min_periods=1,
        )

        expected = DataFrame(
            # Truncate off the extra rows needed to compute the SMAs.
            expected_raw[window_length:],
            index=dates_to_test,  # dates_to_test is dates[window_length:]
            columns=self.asset_finder.retrieve_all(asset_ids),
        )
        self.write_nans(expected)
        result = results["sma"].unstack()
        assert_frame_equal(result, expected)
Example #5
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = PrecomputedLoader(constants=constants, dates=self.dates, sids=self.asset_ids)
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest}
            ),
            dates_to_test[0],
            dates_to_test[-1],
        )
        self.assertIsNotNone(result)
        self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns))

        result_index = self.asset_ids * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3, dtype=float)))

        for name, const in [("open", 1), ("close", 2), ("volume", 3)]:
            check_arrays(result[name], Series(index=result_index, data=full(result_shape, const, dtype=float)))
Example #6
0
    def test_factor_with_single_output(self):
        """
        Test passing an `outputs` parameter of length 1 to a CustomFactor.
        """
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        open = USEquityPricing.open
        open_values = [self.constants[open]] * num_dates
        open_values_as_tuple = [(self.constants[open],)] * num_dates
        engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder)

        single_output = OpenPrice(outputs=["open"])
        pipeline = Pipeline(columns={"open_instance": single_output, "open_attribute": single_output.open})
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        # The instance `single_output` itself will compute a numpy.recarray
        # when added as a column to our pipeline, so we expect its output
        # values to be 1-tuples.
        open_instance_expected = {asset: open_values_as_tuple for asset in assets}
        open_attribute_expected = {asset: open_values for asset in assets}

        for colname, expected_values in (
            ("open_instance", open_instance_expected),
            ("open_attribute", open_attribute_expected),
        ):
            column_results = results[colname].unstack()
            expected_results = DataFrame(expected_values, index=dates, columns=assets, dtype=float64)
            assert_frame_equal(column_results, expected_results)
Example #7
0
    def test_single_factor(self):
        loader = self.loader
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )
        result_shape = (num_dates, num_assets) = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        factor = RollingSumDifference()
        expected_result = -factor.window_length

        # Since every asset will pass the screen, these should be equivalent.
        pipelines = [
            Pipeline(columns={'f': factor}),
            Pipeline(
                columns={'f': factor},
                screen=factor.eq(expected_result),
            ),
        ]

        for p in pipelines:
            result = engine.run_pipeline(p, dates[0], dates[-1])
            self.assertEqual(set(result.columns), {'f'})
            assert_multi_index_is_product(
                self, result.index, dates, assets
            )

            check_arrays(
                result['f'].unstack().values,
                full(result_shape, expected_result, dtype=float),
            )
Example #8
0
    def test_single_factor(self):
        loader = self.loader
        finder = self.asset_finder
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        result_shape = (num_dates, num_assets) = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        factor = RollingSumDifference()
        expected_result = -factor.window_length

        # Since every asset will pass the screen, these should be equivalent.
        pipelines = [
            Pipeline(columns={'f': factor}),
            Pipeline(
                columns={'f': factor},
                screen=factor.eq(expected_result),
            ),
        ]

        for p in pipelines:
            result = engine.run_pipeline(p, dates[0], dates[-1])
            self.assertEqual(set(result.columns), {'f'})
            assert_multi_index_is_product(self, result.index, dates,
                                          finder.retrieve_all(assets))

            check_arrays(
                result['f'].unstack().values,
                full(result_shape, expected_result),
            )
Example #9
0
    def test_screen(self):
        loader = self.loader
        finder = self.asset_finder
        assets = array(self.assets)
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        num_dates = 5
        dates = self.dates[10:10 + num_dates]

        factor = AssetID()
        for asset in assets:
            p = Pipeline(columns={'f': factor}, screen=factor <= asset)
            result = engine.run_pipeline(p, dates[0], dates[-1])

            expected_sids = assets[assets <= asset]
            expected_assets = finder.retrieve_all(expected_sids)
            expected_result = DataFrame(
                index=MultiIndex.from_product([dates, expected_assets]),
                data=tile(expected_sids.astype(float), [len(dates)]),
                columns=['f'],
            )

            assert_frame_equal(result, expected_result)
Example #10
0
    def test_custom_factor_outputs_parameter(self):
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        num_assets = len(assets)
        constants = self.constants
        engine = SimplePipelineEngine(
            lambda column: self.loader, self.dates, self.asset_finder,
        )

        def create_expected_results(expected_value):
            expected_values = full(
                (num_dates, num_assets), expected_value, float64,
            )
            return DataFrame(expected_values, index=dates, columns=assets)

        for window_length in range(1, 3):
            sum_, diff = OpenCloseSumAndDiff(
                outputs=['sum_', 'diff'], window_length=window_length,
            )
            pipeline = Pipeline(columns={'sum_': sum_, 'diff': diff})
            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, op in ('sum_', add), ('diff', sub):
                output_results = results[colname].unstack()
                output_expected = create_expected_results(
                    op(
                        constants[USEquityPricing.open] * window_length,
                        constants[USEquityPricing.close] * window_length,
                    )
                )
                assert_frame_equal(output_results, output_expected)
Example #11
0
    def test_instance_of_factor_with_multiple_outputs(self):
        """
        Test adding a CustomFactor instance, which has multiple outputs, as a
        pipeline column directly. Its computed values should be tuples
        containing the computed values of each of its outputs.
        """
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        num_assets = len(assets)
        constants = self.constants
        engine = SimplePipelineEngine(
            lambda column: self.loader, self.dates, self.asset_finder,
        )

        open_values = [constants[USEquityPricing.open]] * num_assets
        close_values = [constants[USEquityPricing.close]] * num_assets
        expected_values = [list(zip(open_values, close_values))] * num_dates
        expected_results = DataFrame(
            expected_values, index=dates, columns=assets, dtype=float64,
        )

        multiple_outputs = MultipleOutputs()
        pipeline = Pipeline(columns={'instance': multiple_outputs})
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])
        instance_results = results['instance'].unstack()
        assert_frame_equal(instance_results, expected_results)
Example #12
0
    def test_custom_factor_outputs_parameter(self):
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        num_assets = len(assets)
        constants = self.constants
        engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder)

        def create_expected_results(expected_value):
            expected_values = full((num_dates, num_assets), expected_value, float64)
            return DataFrame(expected_values, index=dates, columns=assets)

        for window_length in range(1, 3):
            sum_, diff = OpenCloseSumAndDiff(outputs=["sum_", "diff"], window_length=window_length)
            pipeline = Pipeline(columns={"sum_": sum_, "diff": diff})
            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, op in ("sum_", add), ("diff", sub):
                output_results = results[colname].unstack()
                output_expected = create_expected_results(
                    op(
                        constants[USEquityPricing.open] * window_length,
                        constants[USEquityPricing.close] * window_length,
                    )
                )
                assert_frame_equal(output_results, output_expected)
Example #13
0
    def test_drawdown(self):
        # The monotonically-increasing data produced by SyntheticDailyBarWriter
        # exercises two pathological cases for MaxDrawdown.  The actual
        # computed results are pretty much useless (everything is either NaN)
        # or zero, but verifying we correctly handle those corner cases is
        # valuable.
        engine = SimplePipelineEngine(
            lambda column: self.pipeline_loader, self.trading_calendar.all_sessions, self.asset_finder
        )
        window_length = 5
        asset_ids = self.all_asset_ids
        dates = date_range(
            self.first_asset_start + self.trading_calendar.day, self.last_asset_end, freq=self.trading_calendar.day
        )
        dates_to_test = dates[window_length:]

        drawdown = MaxDrawdown(inputs=(USEquityPricing.close,), window_length=window_length)

        results = engine.run_pipeline(Pipeline(columns={"drawdown": drawdown}), dates_to_test[0], dates_to_test[-1])

        # We expect NaNs when the asset was undefined, otherwise 0 everywhere,
        # since the input is always increasing.
        expected = DataFrame(
            data=zeros((len(dates_to_test), len(asset_ids)), dtype=float),
            index=dates_to_test,
            columns=self.asset_finder.retrieve_all(asset_ids),
        )
        self.write_nans(expected)
        result = results["drawdown"].unstack()

        assert_frame_equal(expected, result)
Example #14
0
    def test_engine_with_multicolumn_loader(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the second to last day that we think all
        # the assets existed.  If we test the last day of our calendar, no
        # assets will be in our output, because their end dates are all
        dates_to_test = self.dates[-32:-2]

        constants = {open_: 1, close: 2, volume: 3}
        loader = ConstantLoader(constants=constants, dates=self.dates, assets=self.assets)
        engine = SimplePipelineEngine(loader, self.dates, self.asset_finder)

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest}
            ),
            dates_to_test[0],
            dates_to_test[-1],
        )
        self.assertIsNotNone(result)
        self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns))

        result_index = self.assets * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3)))

        for name, const in [("open", 1), ("close", 2), ("volume", 3)]:
            check_arrays(result[name], Series(index=result_index, data=full(result_shape, const)))
Example #15
0
 def run_pipeline(self, *args, **kwargs):
     calendar = US_EQUITIES
     loader = self.loader
     finder = self.asset_finder
     engine = SimplePipelineEngine(lambda col: loader,
                                   finder,
                                   default_domain=calendar)
     return engine.run_pipeline(*args, **kwargs)
Example #16
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = PrecomputedLoader(
            constants=constants,
            dates=self.dates,
            sids=self.asset_ids,
        )
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={
                    'sumdiff': sumdiff,
                    'open': open_.latest,
                    'close': close.latest,
                    'volume': volume.latest,
                },
            ),
            dates_to_test[0],
            dates_to_test[-1]
        )
        self.assertIsNotNone(result)
        self.assertEqual(
            {'sumdiff', 'open', 'close', 'volume'},
            set(result.columns)
        )

        result_index = self.asset_ids * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(
            result['sumdiff'],
            Series(
                index=result_index,
                data=full(result_shape, -3, dtype=float),
            ),
        )

        for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
            check_arrays(
                result[name],
                Series(
                    index=result_index,
                    data=full(result_shape, const, dtype=float),
                ),
            )
Example #17
0
    def test_bad_dates(self):
        loader = self.loader
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)

        p = Pipeline()

        msg = "start_date must be before or equal to end_date .*"
        with self.assertRaisesRegexp(ValueError, msg):
            engine.run_pipeline(p, self.dates[2], self.dates[1])
Example #18
0
    def test_factor_with_multiple_outputs(self):
        dates = self.dates[5:10]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        num_dates = len(dates)
        num_assets = len(assets)
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(
            lambda column: self.loader, self.dates, self.asset_finder,
        )

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = make_cascading_boolean_array(
            shape=(num_dates, num_assets),
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = make_alternating_boolean_array(
            shape=(num_dates, num_assets), first_value=False,
        )

        expected_no_mask_result = full(
            shape=(num_dates, num_assets), fill_value=True, dtype=bool_dtype,
        )

        masks = cascading_mask, alternating_mask, NotSpecified
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
            expected_no_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            open_price, close_price = MultipleOutputs(mask=mask)
            pipeline = Pipeline(
                columns={'open_price': open_price, 'close_price': close_price},
            )
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, case_column in (('open_price', open),
                                         ('close_price', close)):
                if mask is not NotSpecified:
                    mask_results = results['mask'].unstack()
                    check_arrays(mask_results.values, expected_mask)
                output_results = results[colname].unstack()
                output_expected = create_expected_results(
                    constants[case_column], expected_mask,
                )
                assert_frame_equal(output_results, output_expected)
Example #19
0
    def test_bad_dates(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        p = Pipeline()

        msg = "start_date must be before or equal to end_date .*"
        with self.assertRaisesRegexp(ValueError, msg):
            engine.run_pipeline(p, self.dates[2], self.dates[1])
Example #20
0
    def test_same_day_pipeline(self):
        loader = self.loader
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)
        factor = AssetID()
        asset = self.asset_ids[0]
        p = Pipeline(columns={"f": factor}, screen=factor <= asset)

        # The crux of this is that when we run the pipeline for a single day
        #  (i.e. start and end dates are the same) we should accurately get
        # data for the day prior.
        result = engine.run_pipeline(p, self.dates[1], self.dates[1])
        self.assertEqual(result["f"][0], 1.0)
Example #21
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = ConstantLoader(
            constants=constants,
            dates=self.dates,
            assets=self.assets,
        )
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={
                    'sumdiff': sumdiff,
                    'open': open_.latest,
                    'close': close.latest,
                    'volume': volume.latest,
                },
            ),
            dates_to_test[0],
            dates_to_test[-1]
        )
        self.assertIsNotNone(result)
        self.assertEqual(
            {'sumdiff', 'open', 'close', 'volume'},
            set(result.columns)
        )

        result_index = self.assets * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(
            result['sumdiff'],
            Series(index=result_index, data=full(result_shape, -3)),
        )

        for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
            check_arrays(
                result[name],
                Series(index=result_index, data=full(result_shape, const)),
            )
Example #22
0
    def test_same_day_pipeline(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )
        factor = AssetID()
        asset = self.asset_ids[0]
        p = Pipeline(columns={'f': factor}, screen=factor <= asset)

        # The crux of this is that when we run the pipeline for a single day
        #  (i.e. start and end dates are the same) we should accurately get
        # data for the day prior.
        result = engine.run_pipeline(p, self.dates[1], self.dates[1])
        self.assertEqual(result['f'][0], 1.0)
Example #23
0
    def test_engine_with_multicolumn_loader(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the second to last day that we think all
        # the assets existed.  If we test the last day of our calendar, no
        # assets will be in our output, because their end dates are all
        dates_to_test = self.dates[-32:-2]

        constants = {open_: 1, close: 2, volume: 3}
        loader = ConstantLoader(
            constants=constants,
            dates=self.dates,
            assets=self.assets,
        )
        engine = SimplePipelineEngine(loader, self.dates, self.asset_finder)

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={
                    'sumdiff': sumdiff,
                    'open': open_.latest,
                    'close': close.latest,
                    'volume': volume.latest,
                },
            ),
            dates_to_test[0],
            dates_to_test[-1]
        )
        self.assertIsNotNone(result)
        self.assertEqual(
            {'sumdiff', 'open', 'close', 'volume'},
            set(result.columns)
        )

        result_index = self.assets * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(
            result['sumdiff'],
            Series(index=result_index, data=full(result_shape, -3)),
        )

        for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
            check_arrays(
                result[name],
                Series(index=result_index, data=full(result_shape, const)),
            )
Example #24
0
    def run_graph(self, graph, initial_workspace, mask=None):
        """
        Compute the given TermGraph, seeding the workspace of our engine with
        `initial_workspace`.

        Parameters
        ----------
        graph : zipline.pipeline.graph.ExecutionPlan
            Graph to run.
        initial_workspace : dict
            Initial workspace to forward to SimplePipelineEngine.compute_chunk.
        mask : DataFrame, optional
            This is a value to pass to `initial_workspace` as the mask from
            `AssetExists()`.  Defaults to a frame of shape `self.default_shape`
            containing all True values.

        Returns
        -------
        results : dict
            Mapping from termname -> computed result.
        """
        def get_loader(c):
            raise AssertionError("run_graph() should not require any loaders!")

        engine = SimplePipelineEngine(
            get_loader,
            self.asset_finder,
            default_domain=US_EQUITIES,
        )
        if mask is None:
            mask = self.default_asset_exists_mask

        dates, sids, mask_values = explode(mask)

        initial_workspace.setdefault(AssetExists(), mask_values)
        initial_workspace.setdefault(InputDates(), dates)

        refcounts = graph.initial_refcounts(initial_workspace)
        execution_order = graph.execution_order(initial_workspace, refcounts)

        return engine.compute_chunk(
            graph=graph,
            dates=dates,
            sids=sids,
            workspace=initial_workspace,
            execution_order=execution_order,
            refcounts=refcounts,
            hooks=NoHooks(),
        )
Example #25
0
    def test_SMA(self):
        engine = SimplePipelineEngine(
            lambda column: self.pipeline_loader,
            self.env.trading_days,
            self.finder,
        )
        window_length = 5
        assets = self.all_assets
        dates = date_range(
            self.first_asset_start + self.trading_day,
            self.last_asset_end,
            freq=self.trading_day,
        )
        dates_to_test = dates[window_length:]

        SMA = SimpleMovingAverage(
            inputs=(USEquityPricing.close, ),
            window_length=window_length,
        )

        results = engine.run_pipeline(
            Pipeline(columns={'sma': SMA}),
            dates_to_test[0],
            dates_to_test[-1],
        )

        # Shift back the raw inputs by a trading day because we expect our
        # computed results to be computed using values anchored on the
        # **previous** day's data.
        expected_raw = rolling_mean(
            self.writer.expected_values_2d(
                dates - self.trading_day,
                assets,
                'close',
            ),
            window_length,
            min_periods=1,
        )

        expected = DataFrame(
            # Truncate off the extra rows needed to compute the SMAs.
            expected_raw[window_length:],
            index=dates_to_test,  # dates_to_test is dates[window_length:]
            columns=self.finder.retrieve_all(assets),
        )
        self.write_nans(expected)
        result = results['sma'].unstack()
        assert_frame_equal(result, expected)
    def init_class_fixtures(cls):
        (super().init_class_fixtures())

        adjustments = NullAdjustmentReader()
        cls.loaders = {
            GB_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers['XLON'],
                adjustments,
                cls.in_memory_fx_rate_reader,
            ),
            US_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers['XNYS'],
                adjustments,
                cls.in_memory_fx_rate_reader,
            ),
            CA_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers['XTSE'],
                adjustments,
                cls.in_memory_fx_rate_reader,
            )
        }
        cls.engine = SimplePipelineEngine(
            get_loader=cls.get_loader,
            asset_finder=cls.asset_finder,
        )
Example #27
0
def make_pipeline_engine(symbols=['SPY', 'TLT'], bundle='etfs_bundle', calendar='NYSE'):
    register(bundle, symbols)
    bundle_data = load(bundle)

    # Set up pipeline engine
    # Loader for pricing
    pipeline_loader = USEquityPricingLoader(
        bundle_data.equity_daily_bar_reader,
        bundle_data.adjustment_reader,
    )

    def my_dispatcher(column):
        return loaders[column]

    def choose_loader(column):
        if column in USEquityPricing.columns:
            return pipeline_loader
        return my_dispatcher(column)

    trading_calendar = get_calendar(calendar)
    engine = SimplePipelineEngine(
        get_loader=choose_loader,
        calendar=trading_calendar.all_sessions,
        asset_finder=bundle_data.asset_finder,
    )

    assets = bundle_data.asset_finder.lookup_symbols(symbols, as_of_date=None)
    return assets, engine
Example #28
0
    def test_id_macro_dataset(self):
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule='ignore',
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        nassets = len(asset_info)
        expected = pd.DataFrame(
            list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
            index=pd.MultiIndex.from_product((
                self.macro_df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', ),
        )
        assert_frame_equal(result, expected, check_dtype=False)
Example #29
0
    def test_id(self):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule='ignore',
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = self.df.drop('asof_date',
                                axis=1).set_index(['timestamp', 'sid'], )
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Example #30
0
    def init_class_fixtures(cls):
        super(ParameterizedFactorTestCase, cls).init_class_fixtures()
        day = cls.env.trading_day

        cls.dates = dates = date_range(
            '2015-02-01',
            '2015-02-28',
            freq=day,
            tz='UTC',
        )
        sids = cls.sids

        cls.raw_data = DataFrame(
            data=arange(len(dates) * len(sids), dtype=float).reshape(
                len(dates),
                len(sids),
            ),
            index=dates,
            columns=cls.asset_finder.retrieve_all(sids),
        )

        close_loader = DataFrameLoader(USEquityPricing.close, cls.raw_data)
        volume_loader = DataFrameLoader(
            USEquityPricing.volume,
            cls.raw_data * 2,
        )

        cls.engine = SimplePipelineEngine(
            {
                USEquityPricing.close: close_loader,
                USEquityPricing.volume: volume_loader,
            }.__getitem__,
            cls.dates,
            cls.asset_finder,
        )
Example #31
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.Data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        assert_frame_equal(
            result,
            _utc_localize_index_level_0(expected),
            check_dtype=False,
        )
Example #32
0
    def init_class_fixtures(cls):
        (super(WithInternationalPricingPipelineEngine,
               cls).init_class_fixtures())

        adjustments = NullAdjustmentReader()
        cls.loaders = {
            GB_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers["XLON"],
                adjustments,
                cls.in_memory_fx_rate_reader,
            ),
            US_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers["XNYS"],
                adjustments,
                cls.in_memory_fx_rate_reader,
            ),
            CA_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers["XTSE"],
                adjustments,
                cls.in_memory_fx_rate_reader,
            ),
        }
        cls.engine = SimplePipelineEngine(
            get_loader=cls.get_loader,
            asset_finder=cls.asset_finder,
        )
Example #33
0
    def _run_pipeline(self, expr, deltas, expected_views, expected_output,
                      finder, calendar, start, end, window_length, compute_fn):
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            deltas,
            loader=loader,
            no_deltas_rule=no_deltas_rules.raise_,
        )
        p = Pipeline()

        # prevent unbound locals issue in the inner class
        window_length_ = window_length

        class TestFactor(CustomFactor):
            inputs = ds.value,
            window_length = window_length_

            def compute(self, today, assets, out, data):
                assert_array_almost_equal(data, expected_views[today])
                out[:] = compute_fn(data)

        p.add(TestFactor(), 'value')

        result = SimplePipelineEngine(
            loader,
            calendar,
            finder,
        ).run_pipeline(p, start, end)

        assert_frame_equal(
            result,
            expected_output,
            check_dtype=False,
        )
Example #34
0
def set_bundle(name, calendar='XSHG'):
    global trading_calendar
    global bundle
    global bundle_data
    global engine
    global choose_loader
    global data

    bundle = name
    trading_calendar = get_calendar(calendar)
    bundle_data = bundles.load(bundle)
    engine = SimplePipelineEngine(
        get_loader=choose_loader,
        calendar=trading_calendar.all_sessions,
        asset_finder=bundle_data.asset_finder,
    )

    data = CNDataPortal(
        bundle_data.asset_finder,
        trading_calendar=trading_calendar,
        first_trading_day=bundle_data.equity_daily_bar_reader.
        first_trading_day,
        equity_minute_reader=None,
        equity_daily_reader=bundle_data.equity_daily_bar_reader,
        adjustment_reader=bundle_data.adjustment_reader,
    )
Example #35
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.Data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]', )
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Example #36
0
def temp_pipeline_engine(calendar, sids, random_seed, symbols=None):
    """
    A contextManager that yields a SimplePipelineEngine holding a reference to
    an AssetFinder generated via tmp_asset_finder.

    Parameters
    ----------
    calendar : pd.DatetimeIndex
        Calendar to pass to the constructed PipelineEngine.
    sids : iterable[int]
        Sids to use for the temp asset finder.
    random_seed : int
        Integer used to seed instances of SeededRandomLoader.
    symbols : iterable[str], optional
        Symbols for constructed assets. Forwarded to make_simple_equity_info.
    """
    equity_info = make_simple_equity_info(
        sids=sids,
        start_date=calendar[0],
        end_date=calendar[-1],
        symbols=symbols,
    )

    loader = make_seeded_random_loader(random_seed, calendar, sids)

    def get_loader(column):
        return loader

    with tmp_asset_finder(equities=equity_info) as finder:
        yield SimplePipelineEngine(get_loader, calendar, finder)
    def init_class_fixtures(cls):
        (super(WithInternationalPricingPipelineEngine,
               cls).init_class_fixtures())

        adjustments = NullAdjustmentReader()
        cls.loaders = {
            GB_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers['LSE'],
                adjustments,
            ),
            US_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers['NYSE'],
                adjustments,
            ),
            CA_EQUITIES:
            EquityPricingLoader(
                cls.daily_bar_readers['TSX'],
                adjustments,
            )
        }
        cls.engine = SimplePipelineEngine(
            get_loader=cls.get_loader,
            asset_finder=cls.asset_finder,
        )
Example #38
0
    def test_numeric_factor(self):
        constants = self.constants
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        num_dates = 5
        dates = self.dates[10:10 + num_dates]
        high, low = USEquityPricing.high, USEquityPricing.low
        open, close = USEquityPricing.open, USEquityPricing.close

        high_minus_low = RollingSumDifference(inputs=[high, low])
        open_minus_close = RollingSumDifference(inputs=[open, close])
        avg = (high_minus_low + open_minus_close) / 2

        results = engine.run_pipeline(
            Pipeline(columns={
                'high_low': high_minus_low,
                'open_close': open_minus_close,
                'avg': avg,
            }, ),
            dates[0],
            dates[-1],
        )

        high_low_result = results['high_low'].unstack()
        expected_high_low = 3.0 * (constants[high] - constants[low])
        assert_frame_equal(
            high_low_result,
            DataFrame(expected_high_low, index=dates, columns=self.assets),
        )

        open_close_result = results['open_close'].unstack()
        expected_open_close = 3.0 * (constants[open] - constants[close])
        assert_frame_equal(
            open_close_result,
            DataFrame(expected_open_close, index=dates, columns=self.assets),
        )

        avg_result = results['avg'].unstack()
        expected_avg = (expected_high_low + expected_open_close) / 2.0
        assert_frame_equal(
            avg_result,
            DataFrame(expected_avg, index=dates, columns=self.assets),
        )
Example #39
0
def create_pipeline_engine(bundle_name='alpaca_api'):
    global BUNDLE_DATA
    if not BUNDLE_DATA:
        set_bundle_data(bundle_name)
    # Create a Pipeline engine
    engine = SimplePipelineEngine(get_loader=choose_loader,
                                  asset_finder=BUNDLE_DATA.asset_finder)
    return engine
Example #40
0
    def test_multiple_rolling_factors(self):

        loader = self.loader
        finder = self.asset_finder
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )
        shape = num_dates, num_assets = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        short_factor = RollingSumDifference(window_length=3)
        long_factor = RollingSumDifference(window_length=5)
        high_factor = RollingSumDifference(
            window_length=3,
            inputs=[USEquityPricing.open, USEquityPricing.high],
        )

        pipeline = Pipeline(
            columns={
                'short': short_factor,
                'long': long_factor,
                'high': high_factor,
            }
        )
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        self.assertEqual(set(results.columns), {'short', 'high', 'long'})
        assert_multi_index_is_product(
            self, results.index, dates, finder.retrieve_all(assets)
        )

        # row-wise sum over an array whose values are all (1 - 2)
        check_arrays(
            results['short'].unstack().values,
            full(shape, -short_factor.window_length),
        )
        check_arrays(
            results['long'].unstack().values,
            full(shape, -long_factor.window_length),
        )
        # row-wise sum over an array whose values are all (1 - 3)
        check_arrays(
            results['high'].unstack().values,
            full(shape, -2 * high_factor.window_length),
        )
Example #41
0
def build_pipeline_engine(bundle_data, trading_calendar):
    pricing_loader = PricingLoader(bundle_data)

    engine = SimplePipelineEngine(get_loader=pricing_loader.get_loader,
                                  calendar=trading_calendar.all_sessions,
                                  asset_finder=bundle_data.asset_finder)

    return engine
Example #42
0
    def test_multiple_rolling_factors(self):

        loader = self.loader
        finder = self.asset_finder
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        shape = num_dates, num_assets = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        short_factor = RollingSumDifference(window_length=3)
        long_factor = RollingSumDifference(window_length=5)
        high_factor = RollingSumDifference(
            window_length=3,
            inputs=[USEquityPricing.open, USEquityPricing.high],
        )

        pipeline = Pipeline(columns={
            'short': short_factor,
            'long': long_factor,
            'high': high_factor,
        })
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        self.assertEqual(set(results.columns), {'short', 'high', 'long'})
        assert_multi_index_is_product(self, results.index, dates,
                                      finder.retrieve_all(assets))

        # row-wise sum over an array whose values are all (1 - 2)
        check_arrays(
            results['short'].unstack().values,
            full(shape, -short_factor.window_length),
        )
        check_arrays(
            results['long'].unstack().values,
            full(shape, -long_factor.window_length),
        )
        # row-wise sum over an array whose values are all (1 - 3)
        check_arrays(
            results['high'].unstack().values,
            full(shape, -2 * high_factor.window_length),
        )
Example #43
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']
        expr = bz.Data(df, name='expr', dshape=var * Record(fields))
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.other.latest, 'other')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = pd.DataFrame(
            np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0),
            index=pd.MultiIndex.from_product((
                df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', 'other'),
        ).sort_index(axis=1)
        assert_frame_equal(
            result,
            expected.sort_index(axis=1),
            check_dtype=False,
        )
Example #44
0
    def run_graph(self, graph, initial_workspace, mask=None):
        """
        Compute the given TermGraph, seeding the workspace of our engine with
        `initial_workspace`.

        Parameters
        ----------
        graph : zipline.pipeline.graph.ExecutionPlan
            Graph to run.
        initial_workspace : dict
            Initial workspace to forward to SimplePipelineEngine.compute_chunk.
        mask : DataFrame, optional
            This is a value to pass to `initial_workspace` as the mask from
            `AssetExists()`.  Defaults to a frame of shape `self.default_shape`
            containing all True values.

        Returns
        -------
        results : dict
            Mapping from termname -> computed result.
        """
        def get_loader(c):
            raise AssertionError("run_graph() should not require any loaders!")

        engine = SimplePipelineEngine(
            get_loader,
            self.asset_finder,
            default_domain=US_EQUITIES,
        )
        if mask is None:
            mask = self.default_asset_exists_mask

        dates, sids, mask_values = explode(mask)

        initial_workspace.setdefault(AssetExists(), mask_values)
        initial_workspace.setdefault(InputDates(), dates)

        return engine.compute_chunk(
            graph=graph,
            dates=dates,
            sids=sids,
            initial_workspace=initial_workspace,
        )
Example #45
0
    def test_fail_usefully_on_insufficient_data(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        class SomeFactor(CustomFactor):
            inputs = [USEquityPricing.close]
            window_length = 10

            def compute(self, today, assets, out, closes):
                pass

        p = Pipeline(columns={'t': SomeFactor()})

        # self.dates[9] is the earliest date we should be able to compute.
        engine.run_pipeline(p, self.dates[9], self.dates[9])

        # We shouldn't be able to compute dates[8], since we only know about 8
        # prior dates, and we need a window length of 10.
        with self.assertRaises(NoFurtherDataError):
            engine.run_pipeline(p, self.dates[8], self.dates[8])
Example #46
0
    def test_screen(self):
        loader = self.loader
        finder = self.asset_finder
        asset_ids = array(self.asset_ids)
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)
        num_dates = 5
        dates = self.dates[10 : 10 + num_dates]

        factor = AssetID()
        for asset_id in asset_ids:
            p = Pipeline(columns={"f": factor}, screen=factor <= asset_id)
            result = engine.run_pipeline(p, dates[0], dates[-1])

            expected_sids = asset_ids[asset_ids <= asset_id]
            expected_assets = finder.retrieve_all(expected_sids)
            expected_result = DataFrame(
                index=MultiIndex.from_product([dates, expected_assets]),
                data=tile(expected_sids.astype(float), [len(dates)]),
                columns=["f"],
            )

            assert_frame_equal(result, expected_result)
Example #47
0
    def test_multiple_rolling_factors(self):

        loader = self.loader
        assets = self.assets
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)
        shape = num_dates, num_assets = (5, len(assets))
        dates = self.dates[10 : 10 + num_dates]

        short_factor = RollingSumDifference(window_length=3)
        long_factor = RollingSumDifference(window_length=5)
        high_factor = RollingSumDifference(window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high])

        pipeline = Pipeline(columns={"short": short_factor, "long": long_factor, "high": high_factor})
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        self.assertEqual(set(results.columns), {"short", "high", "long"})
        assert_multi_index_is_product(self, results.index, dates, assets)

        # row-wise sum over an array whose values are all (1 - 2)
        check_arrays(results["short"].unstack().values, full(shape, -short_factor.window_length, dtype=float))
        check_arrays(results["long"].unstack().values, full(shape, -long_factor.window_length, dtype=float))
        # row-wise sum over an array whose values are all (1 - 3)
        check_arrays(results["high"].unstack().values, full(shape, -2 * high_factor.window_length, dtype=float))
Example #48
0
    def test_input_dates_provided_by_default(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        class TestFactor(CustomFactor):
            inputs = [InputDates(), USEquityPricing.close]
            window_length = 10
            dtype = datetime64ns_dtype

            def compute(self, today, assets, out, dates, closes):
                first, last = dates[[0, -1], 0]
                assert last == today.asm8
                assert len(dates) == len(closes) == self.window_length
                out[:] = first

        p = Pipeline(columns={'t': TestFactor()})
        results = engine.run_pipeline(p, self.dates[9], self.dates[10])

        # All results are the same, so just grab one column.
        column = results.unstack().iloc[:, 0].values
        check_arrays(column, self.dates[:2].values)
Example #49
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        result = SimplePipelineEngine(
            loader,
            dates,
            finder,
        ).run_pipeline(p, dates[0], dates[-1])
        assert_frame_equal(
            result.sort_index(axis=1),
            _utc_localize_index_level_0(expected.sort_index(axis=1)),
            check_dtype=False,
        )
Example #50
0
    def test_masked_factor(self):
        """
        Test that a Custom Factor computes the correct values when passed a
        mask. The mask/filter should be applied prior to computing any values,
        as opposed to computing the factor across the entire universe of
        assets. Any assets that are filtered out should be filled with missing
        values.
        """
        loader = self.loader
        dates = self.dates[5:8]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        factor1_value = constants[open]
        factor2_value = 3.0 * (constants[open] - constants[close])

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = array(
            [[True,  True,  True, False],
             [True,  True, False, False],
             [True, False, False, False]],
            dtype=bool,
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = array(
            [[False,  True, False,   True],
             [True,  False,  True,  False],
             [False,  True, False,   True]],
            dtype=bool,
        )

        masks = cascading_mask, alternating_mask
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            # Test running a pipeline with a single masked factor.
            columns = {'factor1': OpenPrice(mask=mask), 'mask': mask}
            pipeline = Pipeline(columns=columns)
            results = engine.run_pipeline(pipeline, dates[0], dates[-1])

            mask_results = results['mask'].unstack()
            check_arrays(mask_results.values, expected_mask)

            factor1_results = results['factor1'].unstack()
            factor1_expected = create_expected_results(factor1_value,
                                                       mask_results)
            assert_frame_equal(factor1_results, factor1_expected)

            # Test running a pipeline with a second factor. This ensures that
            # adding another factor to the pipeline with a different window
            # length does not cause any unexpected behavior, especially when
            # both factors share the same mask.
            columns['factor2'] = RollingSumDifference(mask=mask)
            pipeline = Pipeline(columns=columns)
            results = engine.run_pipeline(pipeline, dates[0], dates[-1])

            mask_results = results['mask'].unstack()
            check_arrays(mask_results.values, expected_mask)

            factor1_results = results['factor1'].unstack()
            factor2_results = results['factor2'].unstack()
            factor1_expected = create_expected_results(factor1_value,
                                                       mask_results)
            factor2_expected = create_expected_results(factor2_value,
                                                       mask_results)
            assert_frame_equal(factor1_results, factor1_expected)
            assert_frame_equal(factor2_results, factor2_expected)
Example #51
0
    def test_loader_given_multiple_columns(self):

        class Loader1DataSet1(DataSet):
            col1 = Column(float)
            col2 = Column(float32)

        class Loader1DataSet2(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        class Loader2DataSet(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        constants1 = {Loader1DataSet1.col1: 1,
                      Loader1DataSet1.col2: 2,
                      Loader1DataSet2.col1: 3,
                      Loader1DataSet2.col2: 4}

        loader1 = RecordingPrecomputedLoader(constants=constants1,
                                             dates=self.dates,
                                             sids=self.assets)
        constants2 = {Loader2DataSet.col1: 5,
                      Loader2DataSet.col2: 6}
        loader2 = RecordingPrecomputedLoader(constants=constants2,
                                             dates=self.dates,
                                             sids=self.assets)

        engine = SimplePipelineEngine(
            lambda column:
            loader2 if column.dataset == Loader2DataSet else loader1,
            self.dates, self.asset_finder,
        )

        pipe_col1 = RollingSumSum(inputs=[Loader1DataSet1.col1,
                                          Loader1DataSet2.col1,
                                          Loader2DataSet.col1],
                                  window_length=2)

        pipe_col2 = RollingSumSum(inputs=[Loader1DataSet1.col2,
                                          Loader1DataSet2.col2,
                                          Loader2DataSet.col2],
                                  window_length=3)

        pipe_col3 = RollingSumSum(inputs=[Loader2DataSet.col1],
                                  window_length=3)

        columns = OrderedDict([
            ('pipe_col1', pipe_col1),
            ('pipe_col2', pipe_col2),
            ('pipe_col3', pipe_col3),
        ])
        result = engine.run_pipeline(
            Pipeline(columns=columns),
            self.dates[2],  # index is >= the largest window length - 1
            self.dates[-1]
        )
        min_window = min(pip_col.window_length
                         for pip_col in itervalues(columns))
        col_to_val = ChainMap(constants1, constants2)
        vals = {name: (sum(col_to_val[col] for col in pipe_col.inputs)
                       * pipe_col.window_length)
                for name, pipe_col in iteritems(columns)}

        index = MultiIndex.from_product([self.dates[2:], self.assets])

        def expected_for_col(col):
            val = vals[col]
            offset = columns[col].window_length - min_window
            return concatenate(
                [
                    full(offset * index.levshape[1], nan),
                    full(
                        (index.levshape[0] - offset) * index.levshape[1],
                        val,
                        float,
                    )
                ],
            )

        expected = DataFrame(
            data={col: expected_for_col(col) for col in vals},
            index=index,
            columns=columns,
        )

        assert_frame_equal(result, expected)

        self.assertEqual(set(loader1.load_calls),
                         {ColumnArgs.sorted_by_ds(Loader1DataSet1.col1,
                                                  Loader1DataSet2.col1),
                          ColumnArgs.sorted_by_ds(Loader1DataSet1.col2,
                                                  Loader1DataSet2.col2)})
        self.assertEqual(set(loader2.load_calls),
                         {ColumnArgs.sorted_by_ds(Loader2DataSet.col1,
                                                  Loader2DataSet.col2)})
Example #52
0
    def test_factor_with_multiple_outputs(self):
        dates = self.dates[5:10]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder)

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = array(
            [
                [True, True, True, False],
                [True, True, False, False],
                [True, False, False, False],
                [False, False, False, False],
                [False, False, False, False],
            ],
            dtype=bool,
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = array(
            [
                [False, True, False, True],
                [True, False, True, False],
                [False, True, False, True],
                [True, False, True, False],
                [False, True, False, True],
            ],
            dtype=bool,
        )

        expected_no_mask_result = array(
            [
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
            ],
            dtype=bool,
        )

        masks = cascading_mask, alternating_mask, NotSpecified
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
            expected_no_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            open_price, close_price = MultipleOutputs(mask=mask)
            pipeline = Pipeline(columns={"open_price": open_price, "close_price": close_price})
            if mask is not NotSpecified:
                pipeline.add(mask, "mask")

            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, case_column in (("open_price", open), ("close_price", close)):
                if mask is not NotSpecified:
                    mask_results = results["mask"].unstack()
                    check_arrays(mask_results.values, expected_mask)
                output_results = results[colname].unstack()
                output_expected = create_expected_results(constants[case_column], expected_mask)
                assert_frame_equal(output_results, output_expected)
Example #53
0
    def test_compute_with_adjustments(self):
        dates, asset_ids = self.dates, self.asset_ids
        low, high = USEquityPricing.low, USEquityPricing.high
        apply_idxs = [3, 10, 16]

        def apply_date(idx, offset=0):
            return dates[apply_idxs[idx] + offset]

        adjustments = DataFrame.from_records(
            [
                dict(
                    kind=MULTIPLY,
                    sid=asset_ids[1],
                    value=2.0,
                    start_date=None,
                    end_date=apply_date(0, offset=-1),
                    apply_date=apply_date(0),
                ),
                dict(
                    kind=MULTIPLY,
                    sid=asset_ids[1],
                    value=3.0,
                    start_date=None,
                    end_date=apply_date(1, offset=-1),
                    apply_date=apply_date(1),
                ),
                dict(
                    kind=MULTIPLY,
                    sid=asset_ids[1],
                    value=5.0,
                    start_date=None,
                    end_date=apply_date(2, offset=-1),
                    apply_date=apply_date(2),
                ),
            ]
        )
        low_base = DataFrame(self.make_frame(30.0))
        low_loader = DataFrameLoader(low, low_base.copy(), adjustments=None)

        # Pre-apply inverse of adjustments to the baseline.
        high_base = DataFrame(self.make_frame(30.0))
        high_base.iloc[:apply_idxs[0], 1] /= 2.0
        high_base.iloc[:apply_idxs[1], 1] /= 3.0
        high_base.iloc[:apply_idxs[2], 1] /= 5.0

        high_loader = DataFrameLoader(high, high_base, adjustments)

        engine = SimplePipelineEngine(
            {low: low_loader, high: high_loader}.__getitem__,
            self.dates,
            self.asset_finder,
        )

        for window_length in range(1, 4):
            low_mavg = SimpleMovingAverage(
                inputs=[USEquityPricing.low],
                window_length=window_length,
            )
            high_mavg = SimpleMovingAverage(
                inputs=[USEquityPricing.high],
                window_length=window_length,
            )
            bounds = product_upper_triangle(range(window_length, len(dates)))
            for start, stop in bounds:
                results = engine.run_pipeline(
                    Pipeline(
                        columns={'low': low_mavg, 'high': high_mavg}
                    ),
                    dates[start],
                    dates[stop],
                )
                self.assertEqual(set(results.columns), {'low', 'high'})
                iloc_bounds = slice(start, stop + 1)  # +1 to include end date

                low_results = results.unstack()['low']
                assert_frame_equal(low_results, low_base.iloc[iloc_bounds])

                high_results = results.unstack()['high']
                assert_frame_equal(high_results, high_base.iloc[iloc_bounds])