Exemple #1
0
    def _test_id(self, df, dshape, expected, finder, add):
        expr = bz.data(df, name='expr', dshape=dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        for a in add:
            p.add(getattr(ds, a).latest, a)
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        assert_frame_equal(
            result,
            _utc_localize_index_level_0(expected),
            check_dtype=False,
        )
Exemple #2
0
    def test_id(self):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = self.df.drop('asof_date', axis=1).set_index(
            ['timestamp', 'sid'],
        )
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Exemple #3
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df["timestamp"] = (
            (pd.DatetimeIndex(df["timestamp"], tz="EST") + timedelta(hours=8, minutes=44))
            .tz_convert("utc")
            .tz_localize(None)
        )
        df.ix[3:5, "timestamp"] = pd.Timestamp("2014-01-01 13:45")
        expr = bz.Data(df, name="expr", dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz="EST")
        ds = from_blaze(expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values)
        p = Pipeline()
        p.add(ds.value.latest, "value")
        p.add(ds.int_value.latest, "int_value")
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(loader, dates, finder).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop("asof_date", axis=1)
        expected["timestamp"] = expected["timestamp"].dt.normalize().astype("datetime64[ns]").dt.tz_localize("utc")
        expected.ix[3:5, "timestamp"] += timedelta(days=1)
        expected.set_index(["timestamp", "sid"], inplace=True)
        expected.index = pd.MultiIndex.from_product(
            (expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]))
        )
        assert_frame_equal(result, expected, check_dtype=False)
Exemple #4
0
    def test_id_macro_dataset(self):
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        nassets = len(asset_info)
        expected = pd.DataFrame(
            list(concatv([0] * nassets, [1] * nassets, [2] * nassets)),
            index=pd.MultiIndex.from_product((
                self.macro_df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value',),
        )
        assert_frame_equal(result, expected, check_dtype=False)
Exemple #5
0
    def _run_pipeline(
        self, expr, deltas, expected_views, expected_output, finder, calendar, start, end, window_length, compute_fn
    ):
        loader = BlazeLoader()
        ds = from_blaze(
            expr, deltas, loader=loader, no_deltas_rule=no_deltas_rules.raise_, missing_values=self.missing_values
        )
        p = Pipeline()

        # prevent unbound locals issue in the inner class
        window_length_ = window_length

        class TestFactor(CustomFactor):
            inputs = (ds.value,)
            window_length = window_length_

            def compute(self, today, assets, out, data):
                assert_array_almost_equal(data, expected_views[today])
                out[:] = compute_fn(data)

        p.add(TestFactor(), "value")

        result = SimplePipelineEngine(loader, calendar, finder).run_pipeline(p, start, end)

        assert_frame_equal(result, _utc_localize_index_level_0(expected_output), check_dtype=False)
Exemple #6
0
    def test_id_macro_dataset_multiple_columns(self):
        """
        input (df):
           asof_date  timestamp  other  value
        0 2014-01-01 2014-01-01      1      0
        3 2014-01-02 2014-01-02      2      1
        6 2014-01-03 2014-01-03      3      2

        output (expected):
                                   other  value
        2014-01-01 Equity(65 [A])      1      0
                   Equity(66 [B])      1      0
                   Equity(67 [C])      1      0
        2014-01-02 Equity(65 [A])      2      1
                   Equity(66 [B])      2      1
                   Equity(67 [C])      2      1
        2014-01-03 Equity(65 [A])      3      2
                   Equity(66 [B])      3      2
                   Equity(67 [C])      3      2
        """
        df = self.macro_df.copy()
        df['other'] = df.value + 1
        fields = OrderedDict(self.macro_dshape.measure.fields)
        fields['other'] = fields['value']
        expr = bz.Data(df, name='expr', dshape=var * Record(fields))
        loader = BlazeLoader()
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.other.latest, 'other')
        dates = self.dates

        asset_info = asset_infos[0][0]
        with tmp_asset_finder(equities=asset_info) as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = pd.DataFrame(
            np.array([[0, 1],
                      [1, 2],
                      [2, 3]]).repeat(3, axis=0),
            index=pd.MultiIndex.from_product((
                df.timestamp,
                finder.retrieve_all(asset_info.index),
            )),
            columns=('value', 'other'),
        ).sort_index(axis=1)
        assert_frame_equal(
            result,
            expected.sort_index(axis=1),
            check_dtype=False,
        )
Exemple #7
0
    def test_factor_with_multiple_outputs(self):
        dates = self.dates[5:10]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        num_dates = len(dates)
        num_assets = len(assets)
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(
            lambda column: self.loader, self.dates, self.asset_finder,
        )

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = make_cascading_boolean_array(
            shape=(num_dates, num_assets),
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = make_alternating_boolean_array(
            shape=(num_dates, num_assets), first_value=False,
        )

        expected_no_mask_result = full(
            shape=(num_dates, num_assets), fill_value=True, dtype=bool_dtype,
        )

        masks = cascading_mask, alternating_mask, NotSpecified
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
            expected_no_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            open_price, close_price = MultipleOutputs(mask=mask)
            pipeline = Pipeline(
                columns={'open_price': open_price, 'close_price': close_price},
            )
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, case_column in (('open_price', open),
                                         ('close_price', close)):
                if mask is not NotSpecified:
                    mask_results = results['mask'].unstack()
                    check_arrays(mask_results.values, expected_mask)
                output_results = results[colname].unstack()
                output_expected = create_expected_results(
                    constants[case_column], expected_mask,
                )
                assert_frame_equal(output_results, output_expected)
Exemple #8
0
    def test_conflict_between_outputs(self):
        class D(DataSet):
            c = Column(float)

        D_US = D.specialize(US_EQUITIES)
        D_CA = D.specialize(CA_EQUITIES)

        pipe = Pipeline({"f": D_US.c.latest, "g": D_CA.c.latest})
        with self.assertRaises(AmbiguousDomain) as e:
            pipe.domain(default=GENERIC)

        self.assertEqual(e.exception.domains, [CA_EQUITIES, US_EQUITIES])
Exemple #9
0
def initialize(context):

    # Create, register and name a pipeline in initialize.
    pipe = Pipeline()
    attach_pipeline(pipe, 'example')

    # Construct a simple moving average factor and add it to the pipeline.
    sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=10)
    pipe.add(sma_short, 'sma_short')

    # Set a screen on the pipelines to filter out securities.
    pipe.set_screen(sma_short > 1.0)
Exemple #10
0
    def test_adding_slice_column(self):
        """
        Test that slices cannot be added as a pipeline column.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])
        open_slice = OpenPrice()[my_asset]

        with self.assertRaises(UnsupportedPipelineOutput):
            Pipeline(columns={'open_slice': open_slice})

        pipe = Pipeline(columns={})
        with self.assertRaises(UnsupportedPipelineOutput):
            pipe.add(open_slice, 'open_slice')
def test_mean_reversion_5day_sector_neutral_smoothed(fn):
    column_name = 'Mean_Reversion_5Day_Sector_Neutral_Smoothed'
    start_date_str = '2015-01-05'
    end_date_str = '2015-01-07'

    # Build engine
    trading_calendar = get_calendar('NYSE')
    bundle_data = bundles.load(project_helper.EOD_BUNDLE_NAME)
    engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar)

    # Build pipeline
    universe_window_length = 2
    universe_asset_count = 4
    universe = AverageDollarVolume(window_length=universe_window_length).top(universe_asset_count)
    pipeline = Pipeline(screen=universe)

    run_pipeline_args = {
        'pipeline': pipeline,
        'start_date': pd.Timestamp(start_date_str, tz='utc'),
        'end_date': pd.Timestamp(end_date_str, tz='utc')}
    fn_inputs = {
        'window_length': 3,
        'universe': universe,
        'sector': project_helper.Sector()}
    fn_correct_outputs = OrderedDict([
        (
            'pipline_out', pd.DataFrame(
                [0.44721360, 1.34164079, -1.34164079, -0.44721360,
                 1.34164079, 0.44721360, -1.34164079, -0.44721360,
                 0.44721360, 1.34164079, -1.34164079, -0.44721360],
                engine.run_pipeline(**run_pipeline_args).index,
                [column_name]))])

    print('Running Integration Test on pipeline:')
    print('> start_dat = pd.Timestamp(\'{}\', tz=\'utc\')'.format(start_date_str))
    print('> end_date = pd.Timestamp(\'{}\', tz=\'utc\')'.format(end_date_str))
    print('> universe = AverageDollarVolume(window_length={}).top({})'.format(
        universe_window_length, universe_asset_count))
    print('> factor = {}('.format(fn.__name__))
    print('    window_length={},'.format(fn_inputs['window_length']))
    print('    universe=universe,')
    print('    sector=project_helper.Sector())')
    print('> pipeline.add(factor, \'{}\')'.format(column_name))
    print('> engine.run_pipeline(pipeline, start_dat, end_date)')
    print('')

    pipeline.add(fn(**fn_inputs), column_name)
    assert_output(engine.run_pipeline, run_pipeline_args, fn_correct_outputs, check_parameter_changes=False)
Exemple #12
0
    def initialize(context):
        # Create, register and name a pipeline in initialize.
        pipe = Pipeline()
        context.attach_pipeline(pipe, 'AAPL')
    
        # Construct a simple moving average factor and add it to the pipeline.
        USEquityPricing需要本地自定义
        if True:
            sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=10)
        else:#mid added
            data = Column(float64)
            dataset = DataSet()
            close = data.bind(dataset, 'aapl')
            sma_short = SimpleMovingAverage(inputs=[close], window_length=10)

        
        pipe.add(sma_short, 'sma_short')
def initialize(context):

    pipe = Pipeline()
    attach_pipeline(pipe, 'example')

    # Note that we don't call add_factor on these Factors.
    # We don't need to store intermediate values if we're not going to use them
    sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=30)
    sma_long = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=100)

    sma_val = sma_short/sma_long

    # Construct the custom factor
    mkt_cap = MarketCap()

    # Create and apply a filter representing the top 500 equities by MarketCap
    # every day.
    mkt_cap_top_500 = mkt_cap.top(500)

    remove_penny_stocks = sma_short > 1.0

    pipe.add(sma_val, 'sma_val')
    pipe.add(mkt_cap, 'mkt_cap')
    # Use mkt_cap_top_500 as a mask on rank
    pipe.add(sma_val.rank(mask=mkt_cap_top_500), 'sma_rank')

    # Use multiple screens to narrow the universe
    pipe.set_screen(mkt_cap.top(500) & remove_penny_stocks)
Exemple #14
0
    def test_add(self):
        p = Pipeline()
        f = SomeFactor()

        p.add(f, 'f')
        self.assertEqual(p.columns, {'f': f})

        p.add(f > 5, 'g')
        self.assertEqual(p.columns, {'f': f, 'g': f > 5})

        with self.assertRaises(TypeError):
            p.add(f, 1)

        with self.assertRaises(TypeError):
            p.add(USEquityPricing.open, 'open')
Exemple #15
0
    def test_add(self):
        p = Pipeline()
        f = SomeFactor()

        p.add(f, "f")
        self.assertEqual(p.columns, {"f": f})

        p.add(f > 5, "g")
        self.assertEqual(p.columns, {"f": f, "g": f > 5})

        with self.assertRaises(TypeError):
            p.add(f, 1)
Exemple #16
0
    def test_custom_query_time_tz(self):
        df = self.df.copy()
        df['timestamp'] = (
            pd.DatetimeIndex(df['timestamp'], tz='EST') +
            timedelta(hours=8, minutes=44)
        ).tz_convert('utc').tz_localize(None)
        df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45')
        expr = bz.data(df, name='expr', dshape=self.dshape)
        loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST')
        ds = from_blaze(
            expr,
            loader=loader,
            no_deltas_rule=no_deltas_rules.ignore,
            missing_values=self.missing_values,
        )
        p = Pipeline()
        p.add(ds.value.latest, 'value')
        p.add(ds.int_value.latest, 'int_value')
        dates = self.dates

        with tmp_asset_finder() as finder:
            result = SimplePipelineEngine(
                loader,
                dates,
                finder,
            ).run_pipeline(p, dates[0], dates[-1])

        expected = df.drop('asof_date', axis=1)
        expected['timestamp'] = expected['timestamp'].dt.normalize().astype(
            'datetime64[ns]',
        ).dt.tz_localize('utc')
        expected.ix[3:5, 'timestamp'] += timedelta(days=1)
        expected.set_index(['timestamp', 'sid'], inplace=True)
        expected.index = pd.MultiIndex.from_product((
            expected.index.levels[0],
            finder.retrieve_all(expected.index.levels[1]),
        ))
        assert_frame_equal(result, expected, check_dtype=False)
Exemple #17
0
    def test_add(self):
        p = Pipeline('test')
        f = SomeFactor()

        p.add(f, 'f')
        self.assertEqual(p.columns, {'f': f})

        p.add(f > 5, 'g')
        self.assertEqual(p.columns, {'f': f, 'g': f > 5})

        with self.assertRaises(TypeError):
            p.add(f, 1)
Exemple #18
0
    def test_set_screen(self):
        f, g = SomeFilter(), SomeOtherFilter()

        p = Pipeline()
        self.assertEqual(p.screen, None)

        p.set_screen(f)
        self.assertEqual(p.screen, f)

        with self.assertRaises(ValueError):
            p.set_screen(f)

        p.set_screen(g, overwrite=True)
        self.assertEqual(p.screen, g)

        with self.assertRaises(TypeError) as e:
            p.set_screen(f, g)

        message = e.exception.args[0]
        self.assertIn("expected a value of type bool or int for argument 'overwrite'", message)
Exemple #19
0
def make_pipeinit(context):
    universe = context.etf_universe
    factors = make_factor()

    pipeline_columns = {}
    for f in factors.keys():
        for days_ago in reversed(range(WINDOW_LENGTH)):
            pipeline_columns[f + '-' + str(days_ago)] = Factor_N_Days_Ago(
                [factors[f](mask=universe)],
                window_length=days_ago + 1,
                mask=universe)

    pipe = Pipeline(columns=pipeline_columns, screen=universe)

    return pipe
 def test_simple_beta_matches_regression(self):
     run_pipeline = self.run_pipeline
     simple_beta = SimpleBeta(target=self.my_asset, regression_length=10)
     complex_beta = RollingLinearRegressionOfReturns(
         target=self.my_asset,
         returns_length=2,
         regression_length=10,
     ).beta
     pipe = Pipeline({'simple': simple_beta, 'complex': complex_beta})
     results = run_pipeline(
         pipe,
         self.pipeline_start_date,
         self.pipeline_end_date,
     )
     assert_equal(results['simple'], results['complex'], check_names=False)
Exemple #21
0
    def test_masked_single_column_output(self):
        """
        Tests for masking custom factors that compute a 1D out.
        """
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day)
        alternating_mask.window_safe = True
        cascading_mask.window_safe = True

        for mask in (alternating_mask, cascading_mask):

            class SingleColumnOutput(CustomFactor):
                window_length = 1
                inputs = [self.col, mask]
                window_safe = True
                ndim = 1

                def compute(self, today, assets, out, col, mask):
                    # Because we specified ndim as 1, `out` should always be a
                    # singleton array but `close` should be a sized based on
                    # the mask we passed.
                    assert out.shape == (1, )
                    assert col.shape == (1, mask.sum())
                    out[:] = col.sum()

            # Since we cannot add single column output factors as pipeline
            # columns, we have to test its output through another factor.
            class UsesSingleColumnInput(CustomFactor):
                window_length = 1
                inputs = [self.col, mask, SingleColumnOutput(mask=mask)]

                def compute(self, today, assets, out, col, mask,
                            single_column_output):
                    # Make sure that `single_column` has the correct value
                    # based on the masked it used.
                    assert single_column_output.shape == (1, 1)
                    single_column_output_value = single_column_output[0][0]
                    expected_value = where(mask, col, 0).sum()
                    assert single_column_output_value == expected_value

            columns = {'uses_single_column_input': UsesSingleColumnInput()}

            # Assertions about the expected shapes of our data are made in the
            # `compute` function of our custom factors above.
            self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
Exemple #22
0
    def test_SMA(self):
        engine = SimplePipelineEngine(
            lambda column: self.pipeline_loader,
            self.env.trading_days,
            self.finder,
        )
        window_length = 5
        assets = self.all_assets
        dates = date_range(
            self.first_asset_start + self.trading_day,
            self.last_asset_end,
            freq=self.trading_day,
        )
        dates_to_test = dates[window_length:]

        SMA = SimpleMovingAverage(
            inputs=(USEquityPricing.close, ),
            window_length=window_length,
        )

        results = engine.run_pipeline(
            Pipeline(columns={'sma': SMA}),
            dates_to_test[0],
            dates_to_test[-1],
        )

        # Shift back the raw inputs by a trading day because we expect our
        # computed results to be computed using values anchored on the
        # **previous** day's data.
        expected_raw = rolling_mean(
            self.writer.expected_values_2d(
                dates - self.trading_day,
                assets,
                'close',
            ),
            window_length,
            min_periods=1,
        )

        expected = DataFrame(
            # Truncate off the extra rows needed to compute the SMAs.
            expected_raw[window_length:],
            index=dates_to_test,  # dates_to_test is dates[window_length:]
            columns=self.finder.retrieve_all(assets),
        )
        self.write_nans(expected)
        result = results['sma'].unstack()
        assert_frame_equal(result, expected)
Exemple #23
0
def make_pipeline():
    """Sets up the pipeline"""
    dollar_volume = AverageDollarVolume(window_length=20)
    adv1000 = dollar_volume.top(1000)
    fd = Fundamentals(mask=adv1000)
    market_cap = fd.cshoq * fd.prccq  # this is how to calculate market cap with Computstat fields
    book_equity = fd.seqq - fd.PS  # this is a quick way to calculate book_equity
    book_to_price = book_equity / market_cap
    biggest = market_cap.top(500, mask=adv1000)
    smallest = market_cap.bottom(500, mask=adv1000)

    highpb = book_to_price.top(500, mask=adv1000)
    lowpb = book_to_price.bottom(500, mask=adv1000)

    momentum = Momentum(mask=adv1000)  # momentum
    high_momentum = momentum.top(500, mask=adv1000)
    low_momentum = momentum.bottom(500, mask=adv1000)

    volatility = Volatility(mask=adv1000)
    highvol = volatility.top(500, mask=adv1000)
    lowvol = volatility.bottom(500, mask=adv1000)

    streversal = RSI(window_length=14, mask=adv1000)
    high_streversal = streversal.top(500, mask=adv1000)
    low_streversal = streversal.bottom(500, mask=adv1000)

    universe = biggest | smallest | highpb | lowpb | low_momentum | high_momentum

    return Pipeline(
        columns={
            'returns': Returns(window_length=2),
            # 'market_cap': market_cap,  # not needed
            # 'book_to_price': book_to_price,  # not needed
            'biggest': biggest,
            'smallest': smallest,
            'highpb': highpb,
            'lowpb': lowpb,
            # 'momentum': momentum,  # not needed
            'low_momentum': low_momentum,
            'high_momentum': high_momentum,
            # 'volatility': volatility, # not needed
            'highvol': highvol,
            'lowvol': lowvol,
            # 'streversal': streversal,  # not needed
            'high_streversal': high_streversal,
            'low_streversal': low_streversal
        },
        screen=universe)
Exemple #24
0
    def test_show_graph(self):
        f = SomeFactor()
        p = Pipeline(columns={"f": SomeFactor()})

        # The real display_graph call shells out to GraphViz, which isn't a
        # requirement, so patch it out for testing.

        def mock_display_graph(g, format="svg", include_asset_exists=False):
            return (g, format, include_asset_exists)

        assert getargspec(display_graph) == getargspec(
            mock_display_graph
        ), "Mock signature doesn't match signature for display_graph."

        patch_display_graph = patch(
            "zipline.pipeline.graph.display_graph",
            mock_display_graph,
        )

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph()
            assert graph.outputs["f"] is f
            # '' is a sentinel used for screen if it's not supplied.
            assert sorted(graph.outputs.keys()) == ["f", graph.screen_name]
            assert format == "svg"
            assert include_asset_exists is False

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format="png")
            assert graph.outputs["f"] is f
            # '' is a sentinel used for screen if it's not supplied.
            assert sorted(graph.outputs.keys()) == ["f", graph.screen_name]
            assert format == "png"
            assert include_asset_exists is False

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format="jpeg")
            assert graph.outputs["f"] is f
            assert sorted(graph.outputs.keys()) == ["f", graph.screen_name]
            assert format == "jpeg"
            assert include_asset_exists is False

        expected = (
            r".*\.show_graph\(\) expected a value in "
            r"\('svg', 'png', 'jpeg'\) for argument 'format', "
            r"but got 'fizzbuzz' instead."
        )

        with pytest.raises(ValueError, match=expected):
            p.show_graph(format="fizzbuzz")
Exemple #25
0
    def test_dollar_volume(self):
        results = self.engine.run_pipeline(
            Pipeline(
                columns={
                    'dv1': AverageDollarVolume(window_length=1),
                    'dv5': AverageDollarVolume(window_length=5),
                }),
            self.dates[5],
            self.dates[-1],
        )

        expected_1 = (self.raw_data[5:]**2) * 2
        assert_frame_equal(results['dv1'].unstack(), expected_1)

        expected_5 = rolling_mean((self.raw_data**2) * 2, window=5)[5:]
        assert_frame_equal(results['dv5'].unstack(), expected_5)
Exemple #26
0
    def test_same_day_pipeline(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        factor = AssetID()
        asset = self.assets[0]
        p = Pipeline(columns={'f': factor}, screen=factor <= asset)

        # The crux of this is that when we run the pipeline for a single day
        #  (i.e. start and end dates are the same) we should accurately get
        # data for the day prior.
        result = engine.run_pipeline(p, self.dates[1], self.dates[1])
        self.assertEqual(result['f'][0], 1.0)
Exemple #27
0
    def test_set_screen(self):
        f, g = SomeFilter(), SomeOtherFilter()

        p = Pipeline()
        self.assertEqual(p.screen, None)

        p.set_screen(f)
        self.assertEqual(p.screen, f)

        with self.assertRaises(ValueError):
            p.set_screen(f)

        p.set_screen(g, overwrite=True)
        self.assertEqual(p.screen, g)
Exemple #28
0
    def test_show_graph(self):
        f = SomeFactor()
        p = Pipeline(columns={'f': SomeFactor()})

        # The real display_graph call shells out to GraphViz, which isn't a
        # requirement, so patch it out for testing.

        def mock_display_graph(g, format='svg', include_asset_exists=False):
            return (g, format, include_asset_exists)

        self.assertEqual(
            inspect.getargspec(display_graph),
            inspect.getargspec(mock_display_graph),
            msg="Mock signature doesn't match signature for display_graph.")

        patch_display_graph = patch(
            'zipline.pipeline.graph.display_graph',
            mock_display_graph,
        )

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph()
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'svg')
            self.assertEqual(include_asset_exists, False)

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format='png')
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'png')
            self.assertEqual(include_asset_exists, False)

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format='jpeg')
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'jpeg')
            self.assertEqual(include_asset_exists, False)

        expected = (r".*\.show_graph\(\) expected a value in "
                    r"\('svg', 'png', 'jpeg'\) for argument 'format', "
                    r"but got 'fizzbuzz' instead.")

        with self.assertRaisesRegexp(ValueError, expected):
            p.show_graph(format='fizzbuzz')
Exemple #29
0
def make_strategy_pipeline(context):
    pipe = Pipeline()

    # Set the volume filter, 126 days is roughly 6 month daily data
    volume_filter = average_volume_filter(126, 1E7)
    
    # compute past returns
    rsi_factor = technical_factor(126, rsi, 14)
    ema20_factor = technical_factor(126, ema, 20)
    ema50_factor = technical_factor(126, ema, 50)
    
    # add to pipelines
    pipe.add(rsi_factor,'rsi')
    pipe.add(ema20_factor,'ema20')
    pipe.add(ema50_factor,'ema50')
    pipe.set_screen(volume_filter)

    return pipe
def make_strategy_pipeline(context):
    pipe = Pipeline()

    # get the strategy parameters
    lookback = context.params['lookback'] * 21
    v = context.params['min_volume']

    # Set the volume filter
    volume_filter = average_volume_filter(lookback, v)

    # compute past returns
    vol_factor = technical_factor(lookback, volatility, 1)
    skew_factor = technical_factor(lookback, skewness, None)
    pipe.add(vol_factor, 'vol')
    pipe.add(skew_factor, 'skew')
    pipe.set_screen(volume_filter)

    return pipe
Exemple #31
0
    def test_numeric_factor(self):
        constants = self.constants
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        num_dates = 5
        dates = self.dates[10:10 + num_dates]
        high, low = USEquityPricing.high, USEquityPricing.low
        open, close = USEquityPricing.open, USEquityPricing.close

        high_minus_low = RollingSumDifference(inputs=[high, low])
        open_minus_close = RollingSumDifference(inputs=[open, close])
        avg = (high_minus_low + open_minus_close) / 2

        results = engine.run_pipeline(
            Pipeline(columns={
                'high_low': high_minus_low,
                'open_close': open_minus_close,
                'avg': avg,
            }, ),
            dates[0],
            dates[-1],
        )

        high_low_result = results['high_low'].unstack()
        expected_high_low = 3.0 * (constants[high] - constants[low])
        assert_frame_equal(
            high_low_result,
            DataFrame(expected_high_low, index=dates, columns=self.assets),
        )

        open_close_result = results['open_close'].unstack()
        expected_open_close = 3.0 * (constants[open] - constants[close])
        assert_frame_equal(
            open_close_result,
            DataFrame(expected_open_close, index=dates, columns=self.assets),
        )

        avg_result = results['avg'].unstack()
        expected_avg = (expected_high_low + expected_open_close) / 2.0
        assert_frame_equal(
            avg_result,
            DataFrame(expected_avg, index=dates, columns=self.assets),
        )
def initialize(context):
    # The initialize method is called at the very start of your script's
    # execution. You can set up anything you'll be needing later here. The
    # context argument will be received by all pylivetrader methods in
    # your script, and you can store information on it that you'd like to
    # share between methods, or in later trades

    # let's create our pipeline and attach it to pylivetrader execution
    top5 = AverageDollarVolume(window_length=20).top(5)
    pipe = Pipeline({
        'close': USEquityPricing.close.latest,
    }, screen=top5)

    # this line connects the pipeline to pylivetrader. this is done once,
    # and we get a new and it's stored in the context. we will get a fresh list
    # of assets every morning in before_trading_start()
    context.attach_pipeline(pipe, "pipe")
Exemple #33
0
    def test_latest(self):
        columns = TDS.columns
        pipe = Pipeline(
            columns={c.name: c.latest for c in columns},
        )

        cal_slice = slice(20, 40)
        dates_to_test = self.calendar[cal_slice]
        result = self.engine.run_pipeline(
            pipe,
            dates_to_test[0],
            dates_to_test[-1],
        )
        for column in columns:
            float_result = result[column.name].unstack()
            expected_float_result = self.expected_latest(column, cal_slice)
            assert_frame_equal(float_result, expected_float_result)
Exemple #34
0
    def test_compute(self, dates):
        engine = self.setup_engine(dates)
        self.setup(dates)

        pipe = Pipeline(columns=self.pipeline_columns)

        result = engine.run_pipeline(
            pipe,
            start_date=dates[0],
            end_date=dates[-1],
        )

        for sid in self.sids:
            for col_name in self.cols.keys():
                assert_series_equal(result[col_name].xs(sid, level=1),
                                    self.cols[col_name][sid],
                                    check_names=False)
Exemple #35
0
def risk_loading_pipeline(sector_type='cn'):
    """
    为风险模型创建一个包含所有风险加载pipeline

    返回
    ----
    pipeline:Pipeline
        包含风险模型中每个因子的风险加载的pipeline
    """
    columns = style_columns()
    if sector_type == 'sw':
        columns.update(sw_sector_columns())
    elif sector_type == 'cn':
        columns.update(cn_sector_columns())
    else:
        raise ValueError(f"不支持{sector_type}")
    return Pipeline(columns=columns, domain=CN_EQUITIES)
        def initialize(context):
            pipeline = Pipeline()
            context.vwaps = []
            for length in vwaps:
                name = vwap_key(length)
                factor = VWAP(window_length=length)
                context.vwaps.append(factor)
                pipeline.add(factor, name=name)

            filter_ = USEquityPricing.close.latest > 300
            pipeline.add(filter_, "filter")
            if set_screen:
                pipeline.set_screen(filter_)

            attach_pipeline(pipeline, "test")
Exemple #37
0
    def test_single_column_output(self):
        """
        Tests for custom factors that compute a 1D out.
        """
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day)

        class SingleColumnOutput(CustomFactor):
            window_length = 1
            inputs = [self.col]
            window_safe = True
            ndim = 1

            def compute(self, today, assets, out, col):
                # Because we specified ndim as 1, `out` should be a singleton
                # array but `close` should be a regular sized input.
                assert out.shape == (1, )
                assert col.shape == (1, 3)
                out[:] = col.sum()

        # Since we cannot add single column output factors as pipeline
        # columns, we have to test its output through another factor.
        class UsesSingleColumnOutput(CustomFactor):
            window_length = 1
            inputs = [SingleColumnOutput()]

            def compute(self, today, assets, out, single_column_output):
                # Make sure that `single_column` has the correct shape. That
                # is, it should always have one column regardless of any mask
                # passed to `UsesSingleColumnInput`.
                assert single_column_output.shape == (1, 1)

        for mask in (alternating_mask, cascading_mask):
            columns = {
                'uses_single_column_output':
                UsesSingleColumnOutput(),
                'uses_single_column_output_masked':
                UsesSingleColumnOutput(mask=mask, ),
            }

            # Assertions about the expected shapes of our data are made in the
            # `compute` function of our custom factors above.
            self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
Exemple #38
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = ConstantLoader(
            constants=constants,
            dates=self.dates,
            assets=self.assets,
        )
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(columns={
                'sumdiff': sumdiff,
                'open': open_.latest,
                'close': close.latest,
                'volume': volume.latest,
            }, ), dates_to_test[0], dates_to_test[-1])
        self.assertIsNotNone(result)
        self.assertEqual({'sumdiff', 'open', 'close', 'volume'},
                         set(result.columns))

        result_index = self.assets * len(dates_to_test)
        result_shape = (len(result_index), )
        check_arrays(
            result['sumdiff'],
            Series(index=result_index, data=full(result_shape, -3)),
        )

        for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
            check_arrays(
                result[name],
                Series(index=result_index, data=full(result_shape, const)),
            )
def make_pipeline():
    dollar_volume = AverageDollarVolume(window_length=1)
    high_dollar_volume = dollar_volume.percentile_between(N, 100)
    recent_returns = Returns(window_length=N, mask=high_dollar_volume)
    low_returns = recent_returns.percentile_between(0, 10)
    high_returns = recent_returns.percentile_between(N, 100)
    dv2 = DV2()
    pipe_columns = {
        'low_returns': low_returns,
        'high_returns': high_returns,
        'recent_returns': recent_returns,
        'dollar_volume': dollar_volume,
        'dv2': dv2
    }
    pipe_screen = (low_returns | high_returns)
    pipe = Pipeline(columns=pipe_columns, screen=pipe_screen)
    return pipe
Exemple #40
0
    def test_compute(self, dates):
        engine = self.pipeline_event_setup_engine(dates)
        cols = self.setup(dates)

        pipe = Pipeline(columns=self.pipeline_columns)

        result = engine.run_pipeline(
            pipe,
            start_date=dates[0],
            end_date=dates[-1],
        )

        for sid in self.get_sids():
            for col_name in cols.keys():
                assert_series_equal(result[col_name].unstack(1)[sid],
                                    cols[col_name][sid],
                                    check_names=False)
Exemple #41
0
    def test_latest(self):
        columns = TDS.columns
        pipe = Pipeline(columns={c.name: c.latest for c in columns}, )

        cal_slice = slice(20, 40)
        dates_to_test = self.trading_days[cal_slice]
        result = self.engine.run_pipeline(
            pipe,
            dates_to_test[0],
            dates_to_test[-1],
        )
        for column in columns:
            with ignore_pandas_nan_categorical_warning():
                col_result = result[column.name].unstack()

            expected_col_result = self.expected_latest(column, cal_slice)
            assert_frame_equal(col_result, expected_col_result)
Exemple #42
0
    def test_overwrite(self):
        p = Pipeline()
        f = SomeFactor()
        other_f = SomeOtherFactor()

        p.add(f, 'f')
        self.assertEqual(p.columns, {'f': f})

        with self.assertRaises(KeyError) as e:
            p.add(other_f, 'f')
        [message] = e.exception.args
        self.assertEqual(message, "Column 'f' already exists.")

        p.add(other_f, 'f', overwrite=True)
        self.assertEqual(p.columns, {'f': other_f})
Exemple #43
0
def make_pipeline():
    """
    A function to create our dynamic stock selector (pipeline). Documentation
    on pipeline can be found here:
    https://www.quantopian.com/help#pipeline-title
    """

    # Base universe set to the QTradableStocksUS
    base_universe = TradableStocksUS()

    # Factor of yesterday's close price.
    yesterday_close = USEquityPricing.close.latest

    pipe = Pipeline(columns={
        'close': yesterday_close,
    }, screen=base_universe)
    return pipe
Exemple #44
0
def make_pipeline():

    yearly_returns = Returns(window_length=252)

    monthly_returns = Returns(window_length=21)

    lagged_returns = yearly_returns - monthly_returns

    return Pipeline(
        columns={
            'lagged_returns': lagged_returns,
            'marketcap': MyDataSet.marketcap.latest,
        },
        screen=lagged_returns.notnull() &
               MyDataSet.marketcap.latest.notnull() &
               MyDataSet.marketcap.latest.top(500)
    )
Exemple #45
0
    def create_high_dollar_volume_pipeline():
        pipe = Pipeline()

        dollar_volume = AverageDollarVolume(
            window_length=63)  # 63 days = 1 quarter
        pipe.add(dollar_volume, 'dollar_volume')

        high_dollar_volume = dollar_volume.percentile_between(
            95, 100)  # top 5% by dollar volume
        pipe.set_screen(high_dollar_volume)

        return pipe
Exemple #46
0
    def test_show_graph(self):
        f = SomeFactor()
        p = Pipeline(columns={'f': SomeFactor()})

        # The real display_graph call shells out to GraphViz, which isn't a
        # requirement, so patch it out for testing.

        def mock_display_graph(g, format='svg', include_asset_exists=False):
            return (g, format, include_asset_exists)

        self.assertEqual(
            inspect.getargspec(display_graph),
            inspect.getargspec(mock_display_graph),
            msg="Mock signature doesn't match signature for display_graph."
        )

        patch_display_graph = patch(
            'zipline.pipeline.graph.display_graph',
            mock_display_graph,
        )

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph()
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'svg')
            self.assertEqual(include_asset_exists, False)

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format='png')
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'png')
            self.assertEqual(include_asset_exists, False)

        with patch_display_graph:
            graph, format, include_asset_exists = p.show_graph(format='jpeg')
            self.assertIs(graph.outputs['f'], f)
            # '' is a sentinel used for screen if it's not supplied.
            self.assertEqual(sorted(graph.outputs.keys()), ['', 'f'])
            self.assertEqual(format, 'jpeg')
            self.assertEqual(include_asset_exists, False)

        expected = (
            r".*\.show_graph\(\) expected a value in "
            r"\('svg', 'png', 'jpeg'\) for argument 'format', "
            r"but got 'fizzbuzz' instead."
        )

        with self.assertRaisesRegexp(ValueError, expected):
            p.show_graph(format='fizzbuzz')
def make_pipeline():
    filter1 = CNEquityPricing.volume.latest > 4000
    # filter2 = CNEquityPricing.high.latest < CNEquityPricing.up_limit.latest/1000
    # filter3 = CNEquityPricing.high.latest > CNEquityPricing.down_limit.latest/1000
    close = CNEquityPricing.close.latest
    market_cap = CNEquityPricing.close.latest * CNFinancialData.total_share_0QE.latest
    universe = filter1 & market_cap.notnull()

    maket_cap_1 = market_cap.deciles(mask=universe).eq(0)

    market_cap_top5 = market_cap.bottom(5, mask=maket_cap_1)

    # market_cap_1_top = market_cap.top(5, mask=maket_cap_1)
    pipe = Pipeline()
    pipe.add(market_cap, 'market_cap')
    pipe.add(close, 'close')
    pipe.set_screen(market_cap_top5)

    return pipe
Exemple #48
0
    def test_multiple_rolling_factors(self):

        loader = self.loader
        finder = self.asset_finder
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        shape = num_dates, num_assets = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        short_factor = RollingSumDifference(window_length=3)
        long_factor = RollingSumDifference(window_length=5)
        high_factor = RollingSumDifference(
            window_length=3,
            inputs=[USEquityPricing.open, USEquityPricing.high],
        )

        pipeline = Pipeline(columns={
            'short': short_factor,
            'long': long_factor,
            'high': high_factor,
        })
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        self.assertEqual(set(results.columns), {'short', 'high', 'long'})
        assert_multi_index_is_product(self, results.index, dates,
                                      finder.retrieve_all(assets))

        # row-wise sum over an array whose values are all (1 - 2)
        check_arrays(
            results['short'].unstack().values,
            full(shape, -short_factor.window_length),
        )
        check_arrays(
            results['long'].unstack().values,
            full(shape, -long_factor.window_length),
        )
        # row-wise sum over an array whose values are all (1 - 3)
        check_arrays(
            results['high'].unstack().values,
            full(shape, -2 * high_factor.window_length),
        )
Exemple #49
0
def make_pipeline(context):
    """
    A function to create our pipeline (dynamic stock selector). The pipeline is used
    to rank stocks based on different factors, including builtin factors, or custom
    factors that you can define. Documentation on pipeline can be found here:
    https://www.quantopian.com/help#pipeline-title
    """
    # Create a pipeline object.

    # Create a dollar_volume factor using default inputs and window_length.
    # This is a builtin factor.
    dollar_volume = AverageDollarVolume(window_length=1)

    # Define high dollar-volume filter to be the top 2% of stocks by dollar
    # volume.
    high_dollar_volume = dollar_volume.percentile_between(95, 100)

    # Create a recent_returns factor with a 5-day returns lookback for all securities
    # in our high_dollar_volume Filter. This is a custom factor defined below (see
    # RecentReturns class).
    recent_returns = Returns(
        window_length=16, mask=high_dollar_volume)

    # Define high and low returns filters to be the bottom 1% and top 1% of
    # securities in the high dollar-volume group.
    low_returns = recent_returns.percentile_between(0, 5)
    high_returns = recent_returns.percentile_between(95, 100)

    # Define a column dictionary that holds all the Factors
    pipe_columns = {
        'low_returns': low_returns,
        'high_returns': high_returns,
        'recent_returns': recent_returns,
        'dollar_volume': dollar_volume
    }

    # Add a filter to the pipeline such that only high-return and low-return
    # securities are kept.
    # pipe_screen = (low_returns & liquidity_filter | high_returns & vol_filter)
    pipe_screen = (low_returns | high_returns)

    # Create a pipeline object with the defined columns and screen.
    pipe = Pipeline(columns=pipe_columns, screen=pipe_screen)

    return pipe
Exemple #50
0
def make_pipeline():

    log.info('Making Pipeline')

    mktcap = IEXKeyStats.marketcap.latest
    primary_share = IsPrimaryShareEmulation()
    universe = mktcap.top(1500, mask=primary_share)

    pipe = Pipeline(
        {
            'close': USEquityPricing.close.latest,
            'str': ComputeSTR(),
            'ind': IEXCompany.industry.latest,
            'type': IEXCompany.issueType.latest,
            'symbol': IEXCompany.symbol.latest,
        },
        screen=universe)
    return pipe
def make_pipeline():
    """
    A function to create our dynamic stock selector (pipeline). Documentation
    on pipeline can be found here:
    https://www.quantopian.com/help#pipeline-title
    """

    base_universe = StaticAssets(
        symbols('XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLK', 'XLU'))

    # Factor of yesterday's close price.
    yesterday_close = USEquityPricing.close.latest

    pipeline = Pipeline(columns={
        'close': yesterday_close,
    },
                        screen=base_universe)
    return pipeline
Exemple #52
0
    def _check_filters(self, evens, odds, first_five, last_three):
        pipe = Pipeline(columns={
            'sid': SidFactor(),
            'evens': evens,
            'odds': odds,
            'first_five': first_five,
            'last_three': last_three,
        }, )

        start, end = self.trading_days[[-10, -1]]
        results = self.run_pipeline(pipe, start, end).unstack()

        sids = results.sid.astype(int64_dtype)

        assert_equal(results.evens, ~(sids % 2).astype(bool))
        assert_equal(results.odds, (sids % 2).astype(bool))
        assert_equal(results.first_five, sids < 5)
        assert_equal(results.last_three, sids >= 7)
Exemple #53
0
    def test_remove(self):
        f = SomeFactor()
        p = Pipeline(columns={"f": f})

        with self.assertRaises(KeyError) as e:
            p.remove("not_a_real_name")

        self.assertEqual(f, p.remove("f"))

        with self.assertRaises(KeyError) as e:
            p.remove("f")

        self.assertEqual(e.exception.args, ("f",))
Exemple #54
0
    def test_remove(self):
        f = SomeFactor()
        p = Pipeline(columns={'f': f})

        with self.assertRaises(KeyError) as e:
            p.remove('not_a_real_name')

        self.assertEqual(f, p.remove('f'))

        with self.assertRaises(KeyError) as e:
            p.remove('f')

        self.assertEqual(e.exception.args, ('f',))
    def test_regression_of_returns_factor(self,
                                          returns_length,
                                          regression_length):
        """
        Tests for the built-in factor `RollingLinearRegressionOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # The order of these is meant to align with the output of `linregress`.
        outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr']

        returns = Returns(window_length=returns_length)
        masks = self.cascading_mask, self.alternating_mask, NotSpecified
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=returns_length,
                regression_length=regression_length,
                mask=mask,
            )

            columns = {
                output: getattr(regression_factor, output)
                for output in outputs
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            output_results = {}
            expected_output_results = {}
            for output in outputs:
                output_results[output] = results[output].unstack()
                expected_output_results[output] = full_like(
                    output_results[output], nan,
                )

            # Run a separate pipeline that calculates returns starting
            # (regression_length - 1) days prior to our start date. This is
            # because we need (regression_length - 1) extra days of returns to
            # compute our expected regressions.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (regression_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected regression results for Y ~ X
            # where Y is the asset we are interested in and X is each other
            # asset. Each regression is calculated over `regression_length`
            # days of data.
            for day in range(num_days):
                todays_returns = returns_results.iloc[
                    day:day + regression_length
                ]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_regression_results = linregress(
                        y=other_asset_returns, x=my_asset_returns,
                    )
                    for i, output in enumerate(outputs):
                        expected_output_results[output][day, asset_column] = \
                            expected_regression_results[i]

            for output in outputs:
                output_result = output_results[output]
                expected_output_result = DataFrame(
                    where(expected_mask, expected_output_results[output], nan),
                    index=dates[start_date_index:end_date_index + 1],
                    columns=assets,
                )
                assert_frame_equal(output_result, expected_output_result)
    def test_correlation_factors(self, returns_length, correlation_length):
        """
        Tests for the built-in factors `RollingPearsonOfReturns` and
        `RollingSpearmanOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length)
        masks = (self.cascading_mask, self.alternating_mask, NotSpecified)
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            pearson_factor = RollingPearsonOfReturns(
                target=my_asset,
                returns_length=returns_length,
                correlation_length=correlation_length,
                mask=mask,
            )
            spearman_factor = RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=returns_length,
                correlation_length=correlation_length,
                mask=mask,
            )

            columns = {
                'pearson_factor': pearson_factor,
                'spearman_factor': spearman_factor,
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            pearson_results = results['pearson_factor'].unstack()
            spearman_results = results['spearman_factor'].unstack()
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            # Run a separate pipeline that calculates returns starting
            # (correlation_length - 1) days prior to our start date. This is
            # because we need (correlation_length - 1) extra days of returns to
            # compute our expected correlations.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (correlation_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected correlation coefficients
            # between the asset we are interested in and each other asset. Each
            # correlation is calculated over `correlation_length` days.
            expected_pearson_results = full_like(pearson_results, nan)
            expected_spearman_results = full_like(spearman_results, nan)
            for day in range(num_days):
                todays_returns = returns_results.iloc[
                    day:day + correlation_length
                ]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_pearson_results[day, asset_column] = pearsonr(
                        my_asset_returns, other_asset_returns,
                    )[0]
                    expected_spearman_results[day, asset_column] = spearmanr(
                        my_asset_returns, other_asset_returns,
                    )[0]

            expected_pearson_results = DataFrame(
                data=where(expected_mask, expected_pearson_results, nan),
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(pearson_results, expected_pearson_results)

            expected_spearman_results = DataFrame(
                data=where(expected_mask, expected_spearman_results, nan),
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(spearman_results, expected_spearman_results)
        def initialize(context):
            p = Pipeline('test')
            p.add(USEquityPricing.close.latest, 'close')

            attach_pipeline(p)
def initialize(context):

    pipe = Pipeline()
    attach_pipeline(pipe, "example")

    sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=30)
    sma_long = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=100)

    # Combined factors to create new factors
    sma_val = sma_short / sma_long

    # Create and apply a screen to remove penny stocks
    remove_penny_stocks = sma_short > 1.0
    pipe.set_screen(remove_penny_stocks)

    pipe.add(sma_short, "sma_short")
    pipe.add(sma_long, "sma_long")
    pipe.add(sma_val, "sma_val")
    # Rank a factor using a mask to ignore the values we're
    # filtering out by passing mask=remove_penny_stocks to rank.
    pipe.add(sma_val.rank(mask=remove_penny_stocks), "sma_rank")
Exemple #59
0
    def test_factor_with_multiple_outputs(self):
        dates = self.dates[5:10]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder)

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = array(
            [
                [True, True, True, False],
                [True, True, False, False],
                [True, False, False, False],
                [False, False, False, False],
                [False, False, False, False],
            ],
            dtype=bool,
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = array(
            [
                [False, True, False, True],
                [True, False, True, False],
                [False, True, False, True],
                [True, False, True, False],
                [False, True, False, True],
            ],
            dtype=bool,
        )

        expected_no_mask_result = array(
            [
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
            ],
            dtype=bool,
        )

        masks = cascading_mask, alternating_mask, NotSpecified
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
            expected_no_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            open_price, close_price = MultipleOutputs(mask=mask)
            pipeline = Pipeline(columns={"open_price": open_price, "close_price": close_price})
            if mask is not NotSpecified:
                pipeline.add(mask, "mask")

            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, case_column in (("open_price", open), ("close_price", close)):
                if mask is not NotSpecified:
                    mask_results = results["mask"].unstack()
                    check_arrays(mask_results.values, expected_mask)
                output_results = results[colname].unstack()
                output_expected = create_expected_results(constants[case_column], expected_mask)
                assert_frame_equal(output_results, output_expected)