Esempio n. 1
0
    def test_correlation_and_regression_with_bad_asset(self):
        """
        Test that `RollingPearsonOfReturns`, `RollingSpearmanOfReturns` and
        `RollingLinearRegressionOfReturns` raise the proper exception when
        given a nonexistent target asset.
        """
        my_asset = Equity(0, exchange="TEST")
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        # This filter is arbitrary; the important thing is that we test each
        # factor both with and without a specified mask.
        my_asset_filter = AssetID().eq(1)

        for mask in (NotSpecified, my_asset_filter):
            pearson_factor = RollingPearsonOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=3,
                mask=mask,
            )
            spearman_factor = RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=3,
                mask=mask,
            )
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=3,
                regression_length=3,
                mask=mask,
            )

            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'pearson_factor': pearson_factor}),
                    start_date,
                    end_date,
                )
            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'spearman_factor': spearman_factor}),
                    start_date,
                    end_date,
                )
            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'regression_factor': regression_factor}),
                    start_date,
                    end_date,
                )
Esempio n. 2
0
    def test_adding_slice_column(self):
        """
        Test that slices cannot be added as a pipeline column.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])
        open_slice = OpenPrice()[my_asset]

        with self.assertRaises(UnsupportedPipelineOutput):
            Pipeline(columns={'open_slice': open_slice})

        pipe = Pipeline(columns={})
        with self.assertRaises(UnsupportedPipelineOutput):
            pipe.add(open_slice, 'open_slice')
Esempio n. 3
0
    def test_adding_slice_column(self):
        """
        Test that slices cannot be added as a pipeline column.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])
        open_slice = OpenPrice()[my_asset]

        with self.assertRaises(UnsupportedPipelineOutput):
            Pipeline(columns={'open_slice': open_slice})

        pipe = Pipeline(columns={})
        with self.assertRaises(UnsupportedPipelineOutput):
            pipe.add(open_slice, 'open_slice')
Esempio n. 4
0
    def check_downsampled_term(self, term):

        #       June 2014
        # Mo Tu We Th Fr Sa Su
        #                    1
        #  2  3  4  5  6  7  8
        #  9 10 11 12 13 14 15
        # 16 17 18 19 20 21 22
        # 23 24 25 26 27 28 29
        # 30
        all_sessions = self.nyse_sessions
        compute_dates = all_sessions[all_sessions.slice_indexer(
            '2014-06-05', '2015-01-06')]
        start_date, end_date = compute_dates[[0, -1]]

        pipe = Pipeline({
            'year': term.downsample(frequency='year_start'),
            'quarter': term.downsample(frequency='quarter_start'),
            'month': term.downsample(frequency='month_start'),
            'week': term.downsample(frequency='week_start'),
        })

        # Raw values for term, computed each day from 2014 to the end of the
        # target period.
        raw_term_results = self.run_pipeline(
            Pipeline({'term': term}),
            start_date=pd.Timestamp('2014-01-02', tz='UTC'),
            end_date=pd.Timestamp('2015-01-06', tz='UTC'),
        )['term'].unstack()

        expected_results = {
            'year': (raw_term_results.groupby(
                pd.TimeGrouper('AS')).first().reindex(compute_dates,
                                                      method='ffill')),
            'quarter': (raw_term_results.groupby(
                pd.TimeGrouper('QS')).first().reindex(compute_dates,
                                                      method='ffill')),
            'month': (raw_term_results.groupby(
                pd.TimeGrouper('MS')).first().reindex(compute_dates,
                                                      method='ffill')),
            'week': (raw_term_results.groupby(pd.TimeGrouper(
                'W', label='left')).first().reindex(compute_dates,
                                                    method='ffill')),
        }

        results = self.run_pipeline(pipe, start_date, end_date)

        for frequency in expected_results:
            result = results[frequency].unstack()
            expected = expected_results[frequency]
            assert_frame_equal(result, expected)
Esempio n. 5
0
    def test_slice_with_masking(self, unmasked_column, slice_column):
        """
        Test that masking a factor that uses slices as inputs does not mask the
        slice data.
        """
        sids = self.sids
        asset_finder = self.asset_finder
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        # Create a filter that masks out all but a single asset.
        unmasked_asset = asset_finder.retrieve_asset(sids[unmasked_column])
        unmasked_asset_only = (AssetID().eq(unmasked_asset.sid))

        # Asset used to create our slice. In the cases where this is different
        # than `unmasked_asset`, our slice should still have non-missing data
        # when used as an input to our custom factor. That is, it should not be
        # masked out.
        slice_asset = asset_finder.retrieve_asset(sids[slice_column])

        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[slice_asset]

        returns_results = self.run_pipeline(
            Pipeline(columns={'returns': returns}),
            start_date,
            end_date,
        )
        returns_results = returns_results['returns'].unstack()

        class UsesSlicedInput(CustomFactor):
            window_length = 1
            inputs = [returns, returns_slice]

            def compute(self, today, assets, out, returns, returns_slice):
                # Ensure that our mask correctly affects the `returns` input
                # and does not affect the `returns_slice` input.
                assert returns.shape == (1, 1)
                assert returns_slice.shape == (1, 1)
                assert returns[0, 0] == \
                    returns_results.loc[today, unmasked_asset]
                assert returns_slice[0, 0] == \
                    returns_results.loc[today, slice_asset]

        columns = {'masked': UsesSlicedInput(mask=unmasked_asset_only)}

        # Assertions about the expected data are made in the `compute` function
        # of our custom factor above.
        self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
Esempio n. 6
0
    def test_add(self):
        p = Pipeline()
        f = SomeFactor()

        p.add(f, 'f')
        self.assertEqual(p.columns, {'f': f})

        p.add(f > 5, 'g')
        self.assertEqual(p.columns, {'f': f, 'g': f > 5})

        with self.assertRaises(TypeError):
            p.add(f, 1)

        with self.assertRaises(TypeError):
            p.add(USEquityPricing.open, 'open')
Esempio n. 7
0
    def test_load_with_trading_calendar(self):
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        for c in EventDataSet.columns:
            if c in self.next_value_columns:
                self.check_next_value_results(
                    c,
                    results[c.name].unstack(),
                    self.trading_days,
                )
            elif c in self.previous_value_columns:
                self.check_previous_value_results(
                    c,
                    results[c.name].unstack(),
                    self.trading_days,
                )
            else:
                raise AssertionError("Unexpected column %s." % c)
Esempio n. 8
0
    def test_slice(self, my_asset_column, window_length_):
        """
        Test that slices can be created by indexing into a term, and that they
        have the correct shape when used as inputs.
        """
        sids = self.sids
        my_asset = self.asset_finder.retrieve_asset(self.sids[my_asset_column])

        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[my_asset]

        class UsesSlicedInput(CustomFactor):
            window_length = window_length_
            inputs = [returns, returns_slice]

            def compute(self, today, assets, out, returns, returns_slice):
                # Make sure that our slice is the correct shape (i.e. has only
                # one column) and that it has the same values as the original
                # returns factor from which it is derived.
                assert returns_slice.shape == (self.window_length, 1)
                assert returns.shape == (self.window_length, len(sids))
                check_arrays(returns_slice[:, 0], returns[:, my_asset_column])

        # Assertions about the expected slice data are made in the `compute`
        # function of our custom factor above.
        self.run_pipeline(
            Pipeline(columns={'uses_sliced_input': UsesSlicedInput()}),
            self.pipeline_start_date,
            self.pipeline_end_date,
        )
Esempio n. 9
0
    def test_load_properly_forward_fills(self):
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        # Cut the dates in half so we need to forward fill some data which
        # is not in our window. The results should be computed the same as if
        # we had computed across the entire window and then sliced after the
        # computation.
        dates = self.trading_days[len(self.trading_days) / 2:]
        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=dates[0],
            end_date=dates[-1],
        )

        for c in EventDataSet.columns:
            if c in self.next_value_columns:
                self.check_next_value_results(
                    c,
                    results[c.name].unstack(),
                    dates,
                )
            elif c in self.previous_value_columns:
                self.check_previous_value_results(
                    c,
                    results[c.name].unstack(),
                    dates,
                )
            else:
                raise AssertionError("Unexpected column %s." % c)
Esempio n. 10
0
    def test_construction(self):
        p0 = Pipeline()
        self.assertEqual(p0.columns, {})
        self.assertIs(p0.screen, None)

        columns = {'f': SomeFactor()}
        p1 = Pipeline(columns=columns)
        self.assertEqual(p1.columns, columns)

        screen = SomeFilter()
        p2 = Pipeline(screen=screen)
        self.assertEqual(p2.columns, {})
        self.assertEqual(p2.screen, screen)

        p3 = Pipeline(columns=columns, screen=screen)
        self.assertEqual(p3.columns, columns)
        self.assertEqual(p3.screen, screen)
Esempio n. 11
0
def make_pipeline():
    return Pipeline(
        columns={
            'price': CryptoPricing.open.latest,
            'short_mavg': VWAP(window_length=SHORT_WINDOW),
            'long_mavg': VWAP(window_length=LONG_WINDOW),
        }
    )
Esempio n. 12
0
        def initialize(context):
            pipeline = attach_pipeline(Pipeline(), 'test')

            vwap = VWAP(window_length=10)
            pipeline.add(vwap, 'vwap')

            # Nothing should have prices less than 0.
            pipeline.set_screen(vwap < 0)
Esempio n. 13
0
    def test_add(self):
        p = Pipeline()
        f = SomeFactor()

        p.add(f, 'f')
        self.assertEqual(p.columns, {'f': f})

        p.add(f > 5, 'g')
        self.assertEqual(p.columns, {'f': f, 'g': f > 5})

        with self.assertRaises(TypeError):
            p.add(f, 1)

        with self.assertRaises(TypeError):
            p.add(USEquityPricing.open, 'open')
Esempio n. 14
0
    def test_factor_correlation_methods(self,
                                        returns_length,
                                        correlation_length):
        """
        Ensure that `Factor.pearsonr` and `Factor.spearmanr` are consistent
        with the built-in factors `RollingPearsonOfReturns` and
        `RollingSpearmanOfReturns`.
        """
        my_asset = self.my_asset
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        pearson = returns.pearsonr(
            target=returns_slice, correlation_length=correlation_length,
        )
        spearman = returns.spearmanr(
            target=returns_slice, correlation_length=correlation_length,
        )
        expected_pearson = RollingPearsonOfReturns(
            target=my_asset,
            returns_length=returns_length,
            correlation_length=correlation_length,
        )
        expected_spearman = RollingSpearmanOfReturns(
            target=my_asset,
            returns_length=returns_length,
            correlation_length=correlation_length,
        )

        # These built-ins construct their own Returns factor to use as inputs,
        # so the only way to set our own inputs is to do so after the fact.
        # This should not be done in practice. It is necessary here because we
        # want Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_pearson.inputs = [returns, returns_slice]
        expected_spearman.inputs = [returns, returns_slice]

        columns = {
            'pearson': pearson,
            'spearman': spearman,
            'expected_pearson': expected_pearson,
            'expected_spearman': expected_spearman,
        }

        results = run_pipeline(Pipeline(columns=columns), start_date, end_date)
        pearson_results = results['pearson'].unstack()
        spearman_results = results['spearman'].unstack()
        expected_pearson_results = results['expected_pearson'].unstack()
        expected_spearman_results = results['expected_spearman'].unstack()

        assert_frame_equal(pearson_results, expected_pearson_results)
        assert_frame_equal(spearman_results, expected_spearman_results)
Esempio n. 15
0
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=["sid",
                     "timestamp",
                     "event_date",
                     "float",
                     "int",
                     "datetime",
                     "string"]
        )
        next_value_columns = {
            EventDataSet.next_datetime: 'datetime',
            EventDataSet.next_event_date: 'event_date',
            EventDataSet.next_float: 'float',
            EventDataSet.next_int: 'int',
            EventDataSet.next_string: 'string',
            EventDataSet.next_string_custom_missing: 'string'
        }
        previous_value_columns = {
            EventDataSet.previous_datetime: 'datetime',
            EventDataSet.previous_event_date: 'event_date',
            EventDataSet.previous_float: 'float',
            EventDataSet.previous_int: 'int',
            EventDataSet.previous_string: 'string',
            EventDataSet.previous_string_custom_missing: 'string'
        }
        loader = EventsLoader(
            raw_events, next_value_columns, previous_value_columns
        )
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet.columns,
        )

        assert_equal(results, expected)
Esempio n. 16
0
    def _test_masked_single_column_output(self):
        """
        Tests for masking custom factors that compute a 1D out.
        """
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day)
        alternating_mask.window_safe = True
        cascading_mask.window_safe = True

        for mask in (alternating_mask, cascading_mask):
            class SingleColumnOutput(CustomFactor):
                window_length = 1
                inputs = [self.col, mask]
                window_safe = True
                ndim = 1

                def compute(self, today, assets, out, col, mask):
                    # Because we specified ndim as 1, `out` should always be a
                    # singleton array but `close` should be a sized based on
                    # the mask we passed.
                    assert out.shape == (1,)
                    assert col.shape == (1, mask.sum())
                    out[:] = col.sum()

            # Since we cannot add single column output factors as pipeline
            # columns, we have to test its output through another factor.
            class UsesSingleColumnInput(CustomFactor):
                window_length = 1
                inputs = [self.col, mask, SingleColumnOutput(mask=mask)]

                def compute(self,
                            today,
                            assets,
                            out,
                            col,
                            mask,
                            single_column_output):
                    # Make sure that `single_column` has the correct value
                    # based on the masked it used.
                    assert single_column_output.shape == (1, 1)
                    single_column_output_value = single_column_output[0][0]
                    expected_value = where(mask, col, 0).sum()
                    assert single_column_output_value == expected_value

            columns = {'uses_single_column_input': UsesSingleColumnInput()}

            # Assertions about the expected shapes of our data are made in the
            # `compute` function of our custom factors above.
            self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
Esempio n. 17
0
        def initialize(context):
            pipeline = Pipeline()
            context.vwaps = []
            for length in vwaps:
                name = vwap_key(length)
                factor = VWAP(window_length=length)
                context.vwaps.append(factor)
                pipeline.add(factor, name=name)

            filter_ = (USEquityPricing.close.latest > 300)
            pipeline.add(filter_, 'filter')
            if set_screen:
                pipeline.set_screen(filter_)

            attach_pipeline(pipeline, 'test')
Esempio n. 18
0
    def test_overwrite(self):
        p = Pipeline()
        f = SomeFactor()
        other_f = SomeOtherFactor()

        p.add(f, 'f')
        self.assertEqual(p.columns, {'f': f})

        with self.assertRaises(KeyError) as e:
            p.add(other_f, 'f')
        [message] = e.exception.args
        self.assertEqual(message, "Column 'f' already exists.")

        p.add(other_f, 'f', overwrite=True)
        self.assertEqual(p.columns, {'f': other_f})
Esempio n. 19
0
    def test_set_screen(self):
        f, g = SomeFilter(), SomeOtherFilter()

        p = Pipeline()
        self.assertEqual(p.screen, None)

        p.set_screen(f)
        self.assertEqual(p.screen, f)

        with self.assertRaises(ValueError):
            p.set_screen(f)

        p.set_screen(g, overwrite=True)
        self.assertEqual(p.screen, g)

        with self.assertRaises(TypeError) as e:
            p.set_screen(f, g)

        message = e.exception.args[0]
        self.assertIn(
            "expected a value of type bool or int for argument 'overwrite'",
            message,
        )
Esempio n. 20
0
    def test_remove(self):
        f = SomeFactor()
        p = Pipeline(columns={'f': f})

        with self.assertRaises(KeyError) as e:
            p.remove('not_a_real_name')

        self.assertEqual(f, p.remove('f'))

        with self.assertRaises(KeyError) as e:
            p.remove('f')

        self.assertEqual(e.exception.args, ('f', ))
def make_pipeline(context):
    return Pipeline(
        columns={
            'price': CryptoPricing.open.latest,
            'sma': SimpleMovingAverage(
                    inputs=[CryptoPricing.close],
                    window_length=context.WINDOW,
            ),
            'std': AnnualizedVolatility(
                    inputs=[CryptoPricing.close],
                    window_length=context.WINDOW,
                    annualization_factor=1,
            ),
        }
    )
Esempio n. 22
0
    def test_show_graph(self):
        f = SomeFactor()
        p = Pipeline(columns={'f': SomeFactor()})

        # The real display_graph call shells out to GraphViz, which isn't a
        # requirement, so patch it out for testing.

        def mock_display_graph(g, format='svg', include_asset_exists=False):
            return (g, format, include_asset_exists)

        self.assertEqual(
            inspect.getargspec(display_graph),
            inspect.getargspec(mock_display_graph),
            msg="Mock signature doesn't match signature for display_graph.")

        patch_display_graph = patch(
            'catalyst.pipeline.graph.display_graph',
            mock_display_graph,
        )

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph()
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'svg')
            self.assertEqual(include_asset_exists, False)

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format='png')
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'png')
            self.assertEqual(include_asset_exists, False)

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format='jpeg')
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'jpeg')
            self.assertEqual(include_asset_exists, False)

        expected = (r".*\.show_graph\(\) expected a value in "
                    r"\('svg', 'png', 'jpeg'\) for argument 'format', "
                    r"but got 'fizzbuzz' instead.")

        with self.assertRaisesRegexp(ValueError, expected):
            p.show_graph(format='fizzbuzz')
Esempio n. 23
0
    def test_latest(self):
        columns = TDS.columns
        pipe = Pipeline(columns={c.name: c.latest for c in columns}, )

        cal_slice = slice(20, 40)
        dates_to_test = self.calendar[cal_slice]
        result = self.engine.run_pipeline(
            pipe,
            dates_to_test[0],
            dates_to_test[-1],
        )
        for column in columns:
            with ignore_pandas_nan_categorical_warning():
                col_result = result[column.name].unstack()

            expected_col_result = self.expected_latest(column, cal_slice)
            assert_frame_equal(col_result, expected_col_result)
Esempio n. 24
0
    def test_overwrite(self):
        p = Pipeline()
        f = SomeFactor()
        other_f = SomeOtherFactor()

        p.add(f, 'f')
        self.assertEqual(p.columns, {'f': f})

        with self.assertRaises(KeyError) as e:
            p.add(other_f, 'f')
        [message] = e.exception.args
        self.assertEqual(message, "Column 'f' already exists.")

        p.add(other_f, 'f', overwrite=True)
        self.assertEqual(p.columns, {'f': other_f})
Esempio n. 25
0
        def initialize(context):
            pipeline = Pipeline()
            context.vwaps = []
            for length in vwaps:
                name = vwap_key(length)
                factor = VWAP(window_length=length)
                context.vwaps.append(factor)
                pipeline.add(factor, name=name)

            filter_ = (USEquityPricing.close.latest > 300)
            pipeline.add(filter_, 'filter')
            if set_screen:
                pipeline.set_screen(filter_)

            attach_pipeline(pipeline, 'test')
Esempio n. 26
0
    def test_single_column_output(self):
        """
        Tests for custom factors that compute a 1D out.
        """
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day)

        class SingleColumnOutput(CustomFactor):
            window_length = 1
            inputs = [self.col]
            window_safe = True
            ndim = 1

            def compute(self, today, assets, out, col):
                # Because we specified ndim as 1, `out` should be a singleton
                # array but `close` should be a regular sized input.
                assert out.shape == (1, )
                assert col.shape == (1, 3)
                out[:] = col.sum()

        # Since we cannot add single column output factors as pipeline
        # columns, we have to test its output through another factor.
        class UsesSingleColumnOutput(CustomFactor):
            window_length = 1
            inputs = [SingleColumnOutput()]

            def compute(self, today, assets, out, single_column_output):
                # Make sure that `single_column` has the correct shape. That
                # is, it should always have one column regardless of any mask
                # passed to `UsesSingleColumnInput`.
                assert single_column_output.shape == (1, 1)

        for mask in (alternating_mask, cascading_mask):
            columns = {
                'uses_single_column_output':
                UsesSingleColumnOutput(),
                'uses_single_column_output_masked':
                UsesSingleColumnOutput(mask=mask, ),
            }

            # Assertions about the expected shapes of our data are made in the
            # `compute` function of our custom factors above.
            self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
Esempio n. 27
0
    def test_show_graph(self):
        f = SomeFactor()
        p = Pipeline(columns={'f': SomeFactor()})

        # The real display_graph call shells out to GraphViz, which isn't a
        # requirement, so patch it out for testing.

        def mock_display_graph(g, format='svg', include_asset_exists=False):
            return (g, format, include_asset_exists)

        self.assertEqual(
            inspect.getargspec(display_graph),
            inspect.getargspec(mock_display_graph),
            msg="Mock signature doesn't match signature for display_graph."
        )

        patch_display_graph = patch(
            'catalyst.pipeline.graph.display_graph',
            mock_display_graph,
        )

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph()
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'svg')
            self.assertEqual(include_asset_exists, False)

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format='png')
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'png')
            self.assertEqual(include_asset_exists, False)

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format='jpeg')
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'jpeg')
            self.assertEqual(include_asset_exists, False)

        expected = (
            r".*\.show_graph\(\) expected a value in "
            r"\('svg', 'png', 'jpeg'\) for argument 'format', "
            r"but got 'fizzbuzz' instead."
        )

        with self.assertRaisesRegexp(ValueError, expected):
            p.show_graph(format='fizzbuzz')
Esempio n. 28
0
    def test_set_screen(self):
        f, g = SomeFilter(), SomeOtherFilter()

        p = Pipeline()
        self.assertEqual(p.screen, None)

        p.set_screen(f)
        self.assertEqual(p.screen, f)

        with self.assertRaises(ValueError):
            p.set_screen(f)

        p.set_screen(g, overwrite=True)
        self.assertEqual(p.screen, g)

        with self.assertRaises(TypeError) as e:
            p.set_screen(f, g)

        message = e.exception.args[0]
        self.assertIn(
            "expected a value of type bool or int for argument 'overwrite'",
            message,
        )
Esempio n. 29
0
    def _check_filters(self, evens, odds, first_five, last_three):
        pipe = Pipeline(columns={
            'sid': SidFactor(),
            'evens': evens,
            'odds': odds,
            'first_five': first_five,
            'last_three': last_three,
        }, )

        start, end = self.trading_days[[-10, -1]]
        results = self.run_pipeline(pipe, start, end).unstack()

        sids = results.sid.astype(int64_dtype)

        assert_equal(results.evens, ~(sids % 2).astype(bool))
        assert_equal(results.odds, (sids % 2).astype(bool))
        assert_equal(results.first_five, sids < 5)
        assert_equal(results.last_three, sids >= 7)
Esempio n. 30
0
    def test_remove(self):
        f = SomeFactor()
        p = Pipeline(columns={'f': f})

        with self.assertRaises(KeyError) as e:
            p.remove('not_a_real_name')

        self.assertEqual(f, p.remove('f'))

        with self.assertRaises(KeyError) as e:
            p.remove('f')

        self.assertEqual(e.exception.args, ('f',))
    def _test_factor_regression_method(self,
                                       returns_length,
                                       regression_length):
        """
        Ensure that `Factor.linear_regression` is consistent with the built-in
        factor `RollingLinearRegressionOfReturns`.
        """
        my_asset = self.my_asset
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        regression = returns.linear_regression(
            target=returns_slice, regression_length=regression_length,
        )
        expected_regression = RollingLinearRegressionOfReturns(
            target=my_asset,
            returns_length=returns_length,
            regression_length=regression_length,
        )

        # This built-in constructs its own Returns factor to use as an input,
        # so the only way to set our own input is to do so after the fact. This
        # should not be done in practice. It is necessary here because we want
        # Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_regression.inputs = [returns, returns_slice]

        columns = {
            'regression': regression,
            'expected_regression': expected_regression,
        }

        results = run_pipeline(Pipeline(columns=columns), start_date, end_date)
        regression_results = results['regression'].unstack()
        expected_regression_results = results['expected_regression'].unstack()

        assert_frame_equal(regression_results, expected_regression_results)
Esempio n. 32
0
    def test_non_existent_asset(self):
        """
        Test that indexing into a term with a non-existent asset raises the
        proper exception.
        """
        my_asset = Asset(0, exchange="TEST")
        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[my_asset]

        class UsesSlicedInput(CustomFactor):
            window_length = 1
            inputs = [returns_slice]

            def compute(self, today, assets, out, returns_slice):
                pass

        with self.assertRaises(NonExistentAssetInTimeFrame):
            self.run_pipeline(
                Pipeline(columns={'uses_sliced_input': UsesSlicedInput()}),
                self.pipeline_start_date,
                self.pipeline_end_date,
            )
Esempio n. 33
0
    def test_construction_bad_input_types(self):

        with self.assertRaises(TypeError):
            Pipeline(1)

        Pipeline({})

        with self.assertRaises(TypeError):
            Pipeline({}, 1)

        with self.assertRaises(TypeError):
            Pipeline({}, SomeFactor())

        with self.assertRaises(TypeError):
            Pipeline({'open': USEquityPricing.open})

        Pipeline({}, SomeFactor() > 5)
 def late_attach(context, data):
     attach_pipeline(Pipeline(), 'test')
     raise AssertionError("Shouldn't make it past attach_pipeline!")
Esempio n. 35
0
    def test_correlation_factors(self, returns_length, correlation_length):
        """
        Tests for the built-in factors `RollingPearsonOfReturns` and
        `RollingSpearmanOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length)
        masks = (self.cascading_mask, self.alternating_mask, NotSpecified)
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            pearson_factor = RollingPearsonOfReturns(
                target=my_asset,
                returns_length=returns_length,
                correlation_length=correlation_length,
                mask=mask,
            )
            spearman_factor = RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=returns_length,
                correlation_length=correlation_length,
                mask=mask,
            )

            columns = {
                'pearson_factor': pearson_factor,
                'spearman_factor': spearman_factor,
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            pearson_results = results['pearson_factor'].unstack()
            spearman_results = results['spearman_factor'].unstack()
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            # Run a separate pipeline that calculates returns starting
            # (correlation_length - 1) days prior to our start date. This is
            # because we need (correlation_length - 1) extra days of returns to
            # compute our expected correlations.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (correlation_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected correlation coefficients
            # between the asset we are interested in and each other asset. Each
            # correlation is calculated over `correlation_length` days.
            expected_pearson_results = full_like(pearson_results, nan)
            expected_spearman_results = full_like(spearman_results, nan)
            for day in range(num_days):
                todays_returns = returns_results.iloc[
                    day:day + correlation_length
                ]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_pearson_results[day, asset_column] = pearsonr(
                        my_asset_returns, other_asset_returns,
                    )[0]
                    expected_spearman_results[day, asset_column] = spearmanr(
                        my_asset_returns, other_asset_returns,
                    )[0]

            expected_pearson_results = DataFrame(
                data=where(expected_mask, expected_pearson_results, nan),
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(pearson_results, expected_pearson_results)

            expected_spearman_results = DataFrame(
                data=where(expected_mask, expected_spearman_results, nan),
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(spearman_results, expected_spearman_results)
Esempio n. 36
0
    def test_regression_of_returns_factor(self,
                                          returns_length,
                                          regression_length):
        """
        Tests for the built-in factor `RollingLinearRegressionOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # The order of these is meant to align with the output of `linregress`.
        outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr']

        returns = Returns(window_length=returns_length)
        masks = self.cascading_mask, self.alternating_mask, NotSpecified
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=returns_length,
                regression_length=regression_length,
                mask=mask,
            )

            columns = {
                output: getattr(regression_factor, output)
                for output in outputs
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            output_results = {}
            expected_output_results = {}
            for output in outputs:
                output_results[output] = results[output].unstack()
                expected_output_results[output] = full_like(
                    output_results[output], nan,
                )

            # Run a separate pipeline that calculates returns starting
            # (regression_length - 1) days prior to our start date. This is
            # because we need (regression_length - 1) extra days of returns to
            # compute our expected regressions.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (regression_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected regression results for Y ~ X
            # where Y is the asset we are interested in and X is each other
            # asset. Each regression is calculated over `regression_length`
            # days of data.
            for day in range(num_days):
                todays_returns = returns_results.iloc[
                    day:day + regression_length
                ]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_regression_results = linregress(
                        y=other_asset_returns, x=my_asset_returns,
                    )
                    for i, output in enumerate(outputs):
                        expected_output_results[output][day, asset_column] = \
                            expected_regression_results[i]

            for output in outputs:
                output_result = output_results[output]
                expected_output_result = DataFrame(
                    where(expected_mask, expected_output_results[output], nan),
                    index=dates[start_date_index:end_date_index + 1],
                    columns=assets,
                )
                assert_frame_equal(output_result, expected_output_result)
 def initialize(context):
     p = attach_pipeline(Pipeline(), 'test', chunks=chunks)
     p.add(USEquityPricing.close.latest, 'close')
 def initialize(context):
     attach_pipeline(Pipeline(), 'test')
 def initialize(context):
     attach_pipeline(Pipeline(), 'test')
     pipeline_output('test')
     raise AssertionError("Shouldn't make it past pipeline_output()")