def test_id(self): expr = bz.Data(self.df, name='expr', dshape=self.dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = self.df.drop('asof_date', axis=1).set_index(['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def _test_id(self, df, dshape, expected, finder, add): expr = bz.data(df, name='expr', dshape=dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() for a in add: p.add(getattr(ds, a).latest, a) dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) assert_frame_equal( result, _utc_localize_index_level_0(expected), check_dtype=False, )
def _run_pipeline( self, expr, deltas, expected_views, expected_output, finder, calendar, start, end, window_length, compute_fn ): loader = BlazeLoader() ds = from_blaze( expr, deltas, loader=loader, no_deltas_rule=no_deltas_rules.raise_, missing_values=self.missing_values ) p = Pipeline() # prevent unbound locals issue in the inner class window_length_ = window_length class TestFactor(CustomFactor): inputs = (ds.value,) window_length = window_length_ def compute(self, today, assets, out, data): assert_array_almost_equal(data, expected_views[today]) out[:] = compute_fn(data) p.add(TestFactor(), "value") result = SimplePipelineEngine(loader, calendar, finder).run_pipeline(p, start, end) assert_frame_equal(result, _utc_localize_index_level_0(expected_output), check_dtype=False)
def initialize(context): pipe = Pipeline() attach_pipeline(pipe, 'example') # Note that we don't call add_factor on these Factors. # We don't need to store intermediate values if we're not going to use them sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=30) sma_long = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=100) sma_val = sma_short/sma_long # Construct the custom factor mkt_cap = MarketCap() # Create and apply a filter representing the top 500 equities by MarketCap # every day. mkt_cap_top_500 = mkt_cap.top(500) remove_penny_stocks = sma_short > 1.0 pipe.add(sma_val, 'sma_val') pipe.add(mkt_cap, 'mkt_cap') # Use mkt_cap_top_500 as a mask on rank pipe.add(sma_val.rank(mask=mkt_cap_top_500), 'sma_rank') # Use multiple screens to narrow the universe pipe.set_screen(mkt_cap.top(500) & remove_penny_stocks)
def _run_pipeline(self, expr, deltas, expected_views, expected_output, finder, calendar, start, end, window_length, compute_fn): loader = BlazeLoader() ds = from_blaze( expr, deltas, loader=loader, no_deltas_rule=no_deltas_rules.raise_, ) p = Pipeline() # prevent unbound locals issue in the inner class window_length_ = window_length class TestFactor(CustomFactor): inputs = ds.value, window_length = window_length_ def compute(self, today, assets, out, data): assert_array_almost_equal(data, expected_views[today]) out[:] = compute_fn(data) p.add(TestFactor(), 'value') result = SimplePipelineEngine( loader, calendar, finder, ).run_pipeline(p, start, end) assert_frame_equal( result, expected_output, check_dtype=False, )
def test_id(self): expr = bz.Data(self.df, name='expr', dshape=self.dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = self.df.drop('asof_date', axis=1).set_index( ['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', ), ) assert_frame_equal(result, expected, check_dtype=False)
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value',), ) assert_frame_equal(result, expected, check_dtype=False)
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44)).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.Data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ) expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def _test_id(self, df, dshape, expected, finder, add): expr = bz.Data(df, name='expr', dshape=dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() for a in add: p.add(getattr(ds, a).latest, a) dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) assert_frame_equal( result, _utc_localize_index_level_0(expected), check_dtype=False, )
def test_custom_query_time_tz(self): df = self.df.copy() df["timestamp"] = ( (pd.DatetimeIndex(df["timestamp"], tz="EST") + timedelta(hours=8, minutes=44)) .tz_convert("utc") .tz_localize(None) ) df.ix[3:5, "timestamp"] = pd.Timestamp("2014-01-01 13:45") expr = bz.Data(df, name="expr", dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz="EST") ds = from_blaze(expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values) p = Pipeline() p.add(ds.value.latest, "value") p.add(ds.int_value.latest, "int_value") dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine(loader, dates, finder).run_pipeline(p, dates[0], dates[-1]) expected = df.drop("asof_date", axis=1) expected["timestamp"] = expected["timestamp"].dt.normalize().astype("datetime64[ns]").dt.tz_localize("utc") expected.ix[3:5, "timestamp"] += timedelta(days=1) expected.set_index(["timestamp", "sid"], inplace=True) expected.index = pd.MultiIndex.from_product( (expected.index.levels[0], finder.retrieve_all(expected.index.levels[1])) ) assert_frame_equal(result, expected, check_dtype=False)
def initialize(context): """ use our factors to add our pipes and screens. """ pipe = Pipeline() attach_pipeline(pipe, 'ff_example') mkt_cap = MarketEquity() pipe.add(mkt_cap, 'market_cap') book_equity = BookEquity() # book equity over market equity bm = book_equity/mkt_cap pipe.add(bm, 'bm') # 营运能力 op = OP() pipe.add(op, 'op') # 投资因子 inv = INV() pipe.add(inv, 'inv') returns = Returns(window_length=2) pipe.add(returns, 'returns') dt = get_datetime().normalize() start_ = dt if dt > START_DATE else START_DATE context.result = result.loc[start_: , :]
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] expr = bz.Data(df, name='expr', dshape=var * Record(fields)) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.other.latest, 'other') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product(( df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', 'other'), ).sort_index(axis=1) assert_frame_equal( result, expected.sort_index(axis=1), check_dtype=False, )
def test_mean_reversion_5day_sector_neutral_smoothed(fn): column_name = 'Mean_Reversion_5Day_Sector_Neutral_Smoothed' start_date_str = '2015-01-05' end_date_str = '2015-01-07' # Build engine trading_calendar = get_calendar('NYSE') bundle_data = bundles.load(project_helper.EOD_BUNDLE_NAME) engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar) # Build pipeline universe_window_length = 2 universe_asset_count = 4 universe = AverageDollarVolume( window_length=universe_window_length).top(universe_asset_count) pipeline = Pipeline(screen=universe) run_pipeline_args = { 'pipeline': pipeline, 'start_date': pd.Timestamp(start_date_str, tz='utc'), 'end_date': pd.Timestamp(end_date_str, tz='utc') } fn_inputs = { 'window_length': 3, 'universe': universe, 'sector': project_helper.Sector() } fn_correct_outputs = OrderedDict([ ('pipline_out', pd.DataFrame([ 0.44721360, 1.34164079, -1.34164079, -0.44721360, 1.34164079, 0.44721360, -1.34164079, -0.44721360, 0.44721360, 1.34164079, -1.34164079, -0.44721360 ], engine.run_pipeline(**run_pipeline_args).index, [column_name])) ]) print('Running Integration Test on pipeline:') print('> start_dat = pd.Timestamp(\'{}\', tz=\'utc\')'.format( start_date_str)) print('> end_date = pd.Timestamp(\'{}\', tz=\'utc\')'.format(end_date_str)) print('> universe = AverageDollarVolume(window_length={}).top({})'.format( universe_window_length, universe_asset_count)) print('> factor = {}('.format(fn.__name__)) print(' window_length={},'.format(fn_inputs['window_length'])) print(' universe=universe,') print(' sector=project_helper.Sector())') print('> pipeline.add(factor, \'{}\')'.format(column_name)) print('> engine.run_pipeline(pipeline, start_dat, end_date)') print('') pipeline.add(fn(**fn_inputs), column_name) assert_output(engine.run_pipeline, run_pipeline_args, fn_correct_outputs, check_parameter_changes=False)
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] expr = bz.Data(df, name='expr', dshape=var * Record(fields)) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.other.latest, 'other') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product(( df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', 'other'), ).sort_index(axis=1) assert_frame_equal( result, expected.sort_index(axis=1), check_dtype=False, )
def test_factor_with_multiple_outputs(self): dates = self.dates[5:10] assets = self.assets asset_ids = self.asset_ids constants = self.constants num_dates = len(dates) num_assets = len(assets) open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = make_cascading_boolean_array( shape=(num_dates, num_assets), ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = make_alternating_boolean_array( shape=(num_dates, num_assets), first_value=False, ) expected_no_mask_result = full( shape=(num_dates, num_assets), fill_value=True, dtype=bool_dtype, ) masks = cascading_mask, alternating_mask, NotSpecified expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): open_price, close_price = MultipleOutputs(mask=mask) pipeline = Pipeline( columns={'open_price': open_price, 'close_price': close_price}, ) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, case_column in (('open_price', open), ('close_price', close)): if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = results[colname].unstack() output_expected = create_expected_results( constants[case_column], expected_mask, ) assert_frame_equal(output_results, output_expected)
def create_high_dollar_volume_pipeline(): pipe = Pipeline() dollar_volume = AverageDollarVolume( window_length=63) # 63 days = 1 quarter pipe.add(dollar_volume, 'dollar_volume') high_dollar_volume = dollar_volume.percentile_between( 95, 100) # top 5% by dollar volume pipe.set_screen(high_dollar_volume) return pipe
def initialize(context): # Create, register and name a pipeline in initialize. pipe = Pipeline() attach_pipeline(pipe, 'example') # Construct a simple moving average factor and add it to the pipeline. sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=10) pipe.add(sma_short, 'sma_short') # Set a screen on the pipelines to filter out securities. pipe.set_screen(sma_short > 1.0)
def test_adding_slice_column(self): """ Test that slices cannot be added as a pipeline column. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) open_slice = OpenPrice()[my_asset] with self.assertRaises(UnsupportedPipelineOutput): Pipeline(columns={'open_slice': open_slice}) pipe = Pipeline(columns={}) with self.assertRaises(UnsupportedPipelineOutput): pipe.add(open_slice, 'open_slice')
def test_adding_slice_column(self): """ Test that slices cannot be added as a pipeline column. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) open_slice = OpenPrice()[my_asset] with self.assertRaises(UnsupportedPipelineOutput): Pipeline(columns={'open_slice': open_slice}) pipe = Pipeline(columns={}) with self.assertRaises(UnsupportedPipelineOutput): pipe.add(open_slice, 'open_slice')
def make_pipeline(asset_finder): private_universe = private_universe_mask( hs300.tolist(),asset_finder=asset_finder) #print private_universe_mask(['000001','000002','000005'],asset_finder=asset_finder) ###################################################################################################### returns = Returns(inputs=[USEquityPricing.close], window_length=5) # 预测一周数据 ###################################################################################################### ep = 1/Fundamental(mask = private_universe,asset_finder=asset_finder).pe bp = 1/Fundamental(mask = private_universe,asset_finder=asset_finder).pb bvps = Fundamental(mask = private_universe,asset_finder=asset_finder).bvps market = Fundamental(mask = private_universe,asset_finder=asset_finder).outstanding rev20 = Returns(inputs=[USEquityPricing.close], window_length=20,mask = private_universe) vol20 = AverageDollarVolume(window_length=20,mask = private_universe) illiq = ILLIQ(window_length=22,mask = private_universe) rsi = RSI(window_length=22,mask = private_universe) mom = Momentum(window_length=252,mask = private_universe) sector = get_sector(asset_finder=asset_finder,mask=private_universe) ONEHOTCLASS,sector_indict_keys = get_sector_by_onehot(asset_finder=asset_finder,mask=private_universe) pipe_columns = { 'ep':ep.zscore(groupby=sector).downsample('month_start'), 'bp':bp.zscore(groupby=sector).downsample('month_start'), 'bvps':bvps.zscore(groupby=sector).downsample('month_start'), 'market_cap': market.zscore(groupby=sector).downsample('month_start'), 'vol20':vol20.zscore(groupby=sector), 'rev20':rev20.zscore(groupby=sector), 'ILLIQ':illiq.zscore(groupby=sector,mask=illiq.percentile_between(1, 99)), 'mom' :mom.zscore(groupby=sector,mask=mom.percentile_between(1, 99)), 'rsi' :rsi.zscore(groupby=sector,mask=rsi.percentile_between(1, 99)), #'sector':sector, #'returns':returns.quantiles(100), 'returns': returns.zscore(), } # pipe_screen = (low_returns | high_returns) pipe = Pipeline(columns=pipe_columns, screen=private_universe, ) i = 0 for c in ONEHOTCLASS: pipe.add(c,sector_indict_keys[i]) i +=1 return pipe
def test_factor(expression, control, start_date='2017-01-04', end_date='2017-01-04', show_df=False): p = Pipeline(screen=universe) p.add(expression.make_pipeline_factor().pipeline_factor(mask=universe), 'expression_alpha') p.add(control, 'pipeline_factor') df = run_pipeline(p, start_date, end_date) print( np.allclose(df['expression_alpha'].values, df['pipeline_factor'].values)) if show_df: print df
def initialize(context): pipe = Pipeline() attach_pipeline(pipe, 'my_pipeline') pb_ratios = PriceBookRatio() pipe.add(pb_ratios, 'pb_ratio') # If Zipline has trouble pulling the default benchmark, try setting the # benchmark to something already in your bundle # set_benchmark(symbol("change this to a symbol in your data")) # Rebalance monthly schedule_function(rebalance, date_rules.month_start()) context.set_commission(commission.PerShare(cost=.0075, min_trade_cost=1.0))
def initialize(context): pipeline = Pipeline() context.vwaps = [] for length in vwaps: name = vwap_key(length) factor = VWAP(window_length=length) context.vwaps.append(factor) pipeline.add(factor, name=name) filter_ = (USEquityPricing.close.latest > 300) pipeline.add(filter_, 'filter') if set_screen: pipeline.set_screen(filter_) attach_pipeline(pipeline, 'test')
def initialize(context): pipeline = Pipeline() context.vwaps = [] for length in vwaps: name = vwap_key(length) factor = VWAP(window_length=length) context.vwaps.append(factor) pipeline.add(factor, name=name) filter_ = USEquityPricing.close.latest > 300 pipeline.add(filter_, "filter") if set_screen: pipeline.set_screen(filter_) attach_pipeline(pipeline, "test")
def make_strategy_pipeline(context): pipe = Pipeline() # get the strategy parameters lookback = context.params['lookback']*21 v = context.params['min_volume'] # Set the volume filter volume_filter = average_volume_filter(lookback, v) # compute past returns momentum = period_returns(lookback,volume_filter) pipe.add(momentum,'momentum') return pipe
def test_mean_reversion_5day_sector_neutral_smoothed(fn): column_name = 'Mean_Reversion_5Day_Sector_Neutral_Smoothed' start_date_str = '2015-01-05' end_date_str = '2015-01-07' # Build engine trading_calendar = get_calendar('NYSE') bundle_data = bundles.load(project_helper.EOD_BUNDLE_NAME) engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar) # Build pipeline universe_window_length = 2 universe_asset_count = 4 universe = AverageDollarVolume(window_length=universe_window_length).top(universe_asset_count) pipeline = Pipeline(screen=universe) run_pipeline_args = { 'pipeline': pipeline, 'start_date': pd.Timestamp(start_date_str, tz='utc'), 'end_date': pd.Timestamp(end_date_str, tz='utc')} fn_inputs = { 'window_length': 3, 'universe': universe, 'sector': project_helper.Sector()} fn_correct_outputs = OrderedDict([ ( 'pipline_out', pd.DataFrame( [0.44721360, 1.34164079, -1.34164079, -0.44721360, 1.34164079, 0.44721360, -1.34164079, -0.44721360, 0.44721360, 1.34164079, -1.34164079, -0.44721360], engine.run_pipeline(**run_pipeline_args).index, [column_name]))]) print('Running Integration Test on pipeline:') print('> start_dat = pd.Timestamp(\'{}\', tz=\'utc\')'.format(start_date_str)) print('> end_date = pd.Timestamp(\'{}\', tz=\'utc\')'.format(end_date_str)) print('> universe = AverageDollarVolume(window_length={}).top({})'.format( universe_window_length, universe_asset_count)) print('> factor = {}('.format(fn.__name__)) print(' window_length={},'.format(fn_inputs['window_length'])) print(' universe=universe,') print(' sector=project_helper.Sector())') print('> pipeline.add(factor, \'{}\')'.format(column_name)) print('> engine.run_pipeline(pipeline, start_dat, end_date)') print('') pipeline.add(fn(**fn_inputs), column_name) assert_output(engine.run_pipeline, run_pipeline_args, fn_correct_outputs, check_parameter_changes=False)
def make_strategy_pipeline(context): pipe = Pipeline() # get the strategy parameters lookback = context.params['lookback']*21 v = context.params['min_volume'] # Set the volume filter volume_filter = average_volume_filter(lookback, v) # compute past returns rsi_factor = technical_factor(lookback, rsi, 14) pipe.add(rsi_factor,'rsi') pipe.set_screen(volume_filter) return pipe
def initialize(context): # Create, register and name a pipeline in initialize. pipe = Pipeline() context.attach_pipeline(pipe, 'AAPL') # Construct a simple moving average factor and add it to the pipeline. USEquityPricing需要本地自定义 if True: sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=10) else: #mid added data = Column(float64) dataset = DataSet() close = data.bind(dataset, 'aapl') sma_short = SimpleMovingAverage(inputs=[close], window_length=10) pipe.add(sma_short, 'sma_short')
def initialize(context): # Create, register and name a pipeline in initialize. pipe = Pipeline() context.attach_pipeline(pipe, 'AAPL') # Construct a simple moving average factor and add it to the pipeline. USEquityPricing需要本地自定义 if True: sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=10) else:#mid added data = Column(float64) dataset = DataSet() close = data.bind(dataset, 'aapl') sma_short = SimpleMovingAverage(inputs=[close], window_length=10) pipe.add(sma_short, 'sma_short')
def make_pipeline(): filter1 = CNEquityPricing.volume.latest > 4000 # filter2 = CNEquityPricing.high.latest < CNEquityPricing.up_limit.latest/1000 # filter3 = CNEquityPricing.high.latest > CNEquityPricing.down_limit.latest/1000 market_cap = CNEquityPricing.close.latest * CNFinancialData.total_share_0QE.latest universe = filter1 & market_cap.notnull() maket_cap_1 = market_cap.deciles(mask=universe).eq(0) market_cap_top5 = market_cap.bottom(5, mask=maket_cap_1) # market_cap_1_top = market_cap.top(5, mask=maket_cap_1) pipe = Pipeline() pipe.add(market_cap, 'market_cap') pipe.set_screen(market_cap_top5) return pipe
def make_pipeline(context): advFilter = curVsAvgVolFilter(context.params.get('lookback')) # midToLargeFilter = isMidToLargeCap(context.params.get('lookback')) smaSlow = SimpleMovingAverage( inputs=[USEquityPricing.close], window_length=context.params.get('smaSlowLookback')) smaFast = SimpleMovingAverage( inputs=[USEquityPricing.close], window_length=context.params.get('smaFastLookback')) top50 = AverageDollarVolume(window_length=20).top(50) pipe = Pipeline(screen=top50) pipe.add(advFilter, 'advFilter') pipe.add(smaSlow, 'smaSlow') pipe.add(smaFast, 'smaFast') # pipe.add(midToLargeFilter, 'midToLargeFilter') return pipe
def make_strategy_pipeline(context): pipe = Pipeline() # get the strategy parameters lookback = context.params['lookback'] * 21 v = context.params['min_volume'] # Set the volume filter volume_filter = average_volume_filter(lookback, v) # compute past returns vol_factor = technical_factor(lookback, volatility, 1) skew_factor = technical_factor(lookback, skewness, None) pipe.add(vol_factor, 'vol') pipe.add(skew_factor, 'skew') pipe.set_screen(volume_filter) return pipe
def make_strategy_pipeline(context): pipe = Pipeline() # Set the volume filter, 126 days is roughly 6 month daily data volume_filter = average_volume_filter(126, 1E7) # compute past returns rsi_factor = technical_factor(126, rsi, 14) ema20_factor = technical_factor(126, ema, 20) ema50_factor = technical_factor(126, ema, 50) # add to pipelines pipe.add(rsi_factor,'rsi') pipe.add(ema20_factor,'ema20') pipe.add(ema50_factor,'ema50') pipe.set_screen(volume_filter) return pipe
def test_add(self): p = Pipeline() f = SomeFactor() p.add(f, "f") self.assertEqual(p.columns, {"f": f}) p.add(f > 5, "g") self.assertEqual(p.columns, {"f": f, "g": f > 5}) with self.assertRaises(TypeError): p.add(f, 1) with self.assertRaises(TypeError): p.add(USEquityPricing.open, "open")
def test_add(self): p = Pipeline() f = SomeFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) p.add(f > 5, 'g') self.assertEqual(p.columns, {'f': f, 'g': f > 5}) with self.assertRaises(TypeError): p.add(f, 1) with self.assertRaises(TypeError): p.add(USEquityPricing.open, 'open')
def test_add(self): p = Pipeline() f = SomeFactor() p.add(f, "f") assert p.columns == {"f": f} p.add(f > 5, "g") assert p.columns == {"f": f, "g": f > 5} with pytest.raises(TypeError): p.add(f, 1) with pytest.raises(TypeError): p.add(USEquityPricing.open, "open")
def test_add(self): p = Pipeline() f = SomeFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) p.add(f > 5, 'g') self.assertEqual(p.columns, {'f': f, 'g': f > 5}) with self.assertRaises(TypeError): p.add(f, 1) with self.assertRaises(TypeError): p.add(USEquityPricing.open, 'open')
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44) ).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.int_value.latest, 'int_value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ).dt.tz_localize('utc') expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def factor_pipe(context): ''' function to set up a pipeline to retrieve all active syms. We can add filters here as well. ''' pipe = Pipeline() sma_20 = SimpleMovingAverage(inputs=[EquityPricing.close], window_length=20) # Pick the top 50% of stocks ranked by dollar volume dollar_volume = AvgDailyDollarVolumeTraded(window_length=252) high_dollar_volume = dollar_volume.percentile_between(50, 100) # Remove penny stocks no_penny_stocks = sma_20 > 1 filtered_assets = high_dollar_volume & no_penny_stocks pipe.set_screen(filtered_assets) pipe.add(sma_20, 'sma_20') for m in context.agent.models: pipe.add(m.factor(inputs=[EquityPricing.close], window_length=252), m.name) return pipe
def initialize(context): ws.send(msg_placeholder % "Simulation Start") pipe = Pipeline() attach_pipeline(pipe, "volume_pipeline") # 100 day average dollar volume factor dollar_volume = AverageDollarVolume(window_length=100) pipe.add(dollar_volume, "100_day_dollar_volume") # filter out only the top stocks by dollar volume high_dollar_volume = dollar_volume.percentile_between(99, 100) pipe.set_screen(high_dollar_volume) # set the global variables context.dev_multiplier = 2 context.max_notional = 1000000 context.min_notional = -1000000 context.days_traded = 0 ws.send(msg_placeholder % "Pipeline filter attached") schedule_function(func=choose_and_order, date_rule=date_rules.every_day())
def test_add(self): p = Pipeline('test') f = SomeFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) p.add(f > 5, 'g') self.assertEqual(p.columns, {'f': f, 'g': f > 5}) with self.assertRaises(TypeError): p.add(f, 1)
def test_add(self): p = Pipeline() f = SomeFactor() p.add(f, "f") self.assertEqual(p.columns, {"f": f}) p.add(f > 5, "g") self.assertEqual(p.columns, {"f": f, "g": f > 5}) with self.assertRaises(TypeError): p.add(f, 1)
def test_overwrite(self): p = Pipeline() f = SomeFactor() other_f = SomeOtherFactor() p.add(f, "f") assert p.columns == {"f": f} with pytest.raises(KeyError, match="Column 'f' already exists."): p.add(other_f, "f") p.add(other_f, "f", overwrite=True) assert p.columns == {"f": other_f}
def initialize(context): pipe = Pipeline() attach_pipeline(pipe, "example") sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=30) sma_long = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=100) # Combined factors to create new factors sma_val = sma_short / sma_long # Create and apply a screen to remove penny stocks remove_penny_stocks = sma_short > 1.0 pipe.set_screen(remove_penny_stocks) pipe.add(sma_short, "sma_short") pipe.add(sma_long, "sma_long") pipe.add(sma_val, "sma_val") # Rank a factor using a mask to ignore the values we're # filtering out by passing mask=remove_penny_stocks to rank. pipe.add(sma_val.rank(mask=remove_penny_stocks), "sma_rank")
def test_overwrite(self): p = Pipeline() f = SomeFactor() other_f = SomeOtherFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) with self.assertRaises(KeyError) as e: p.add(other_f, 'f') [message] = e.exception.args self.assertEqual(message, "Column 'f' already exists.") p.add(other_f, 'f', overwrite=True) self.assertEqual(p.columns, {'f': other_f})
def test_overwrite(self): p = Pipeline() f = SomeFactor() other_f = SomeOtherFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) with self.assertRaises(KeyError) as e: p.add(other_f, 'f') [message] = e.exception.args self.assertEqual(message, "Column 'f' already exists.") p.add(other_f, 'f', overwrite=True) self.assertEqual(p.columns, {'f': other_f})
def initialize(context): """ use our factors to add our pipes and screens. """ pipe = Pipeline() mkt_cap = MarketEquity() pipe.add(mkt_cap, 'market_cap') book_equity = BookEquity() # book equity over market equity be_me = book_equity / mkt_cap pipe.add(be_me, 'be_me') returns = Returns() pipe.add(returns, 'returns') attach_pipeline(pipe, 'ff_example') schedule_function( func=myfunc, date_rule=date_rules.month_end())
def test_factor_with_multiple_outputs(self): dates = self.dates[5:10] assets = self.assets asset_ids = self.asset_ids constants = self.constants open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = array( [ [True, True, True, False], [True, True, False, False], [True, False, False, False], [False, False, False, False], [False, False, False, False], ], dtype=bool, ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = array( [ [False, True, False, True], [True, False, True, False], [False, True, False, True], [True, False, True, False], [False, True, False, True], ], dtype=bool, ) expected_no_mask_result = array( [ [True, True, True, True], [True, True, True, True], [True, True, True, True], [True, True, True, True], [True, True, True, True], ], dtype=bool, ) masks = cascading_mask, alternating_mask, NotSpecified expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): open_price, close_price = MultipleOutputs(mask=mask) pipeline = Pipeline(columns={"open_price": open_price, "close_price": close_price}) if mask is not NotSpecified: pipeline.add(mask, "mask") results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, case_column in (("open_price", open), ("close_price", close)): if mask is not NotSpecified: mask_results = results["mask"].unstack() check_arrays(mask_results.values, expected_mask) output_results = results[colname].unstack() output_expected = create_expected_results(constants[case_column], expected_mask) assert_frame_equal(output_results, output_expected)
def test_correlation_factors(self, returns_length, correlation_length): """ Tests for the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length) masks = (self.cascading_mask, self.alternating_mask, NotSpecified) expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): pearson_factor = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) spearman_factor = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) columns = { 'pearson_factor': pearson_factor, 'spearman_factor': spearman_factor, } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) pearson_results = results['pearson_factor'].unstack() spearman_results = results['spearman_factor'].unstack() if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected correlation coefficients # between the asset we are interested in and each other asset. Each # correlation is calculated over `correlation_length` days. expected_pearson_results = full_like(pearson_results, nan) expected_spearman_results = full_like(spearman_results, nan) for day in range(num_days): todays_returns = returns_results.iloc[ day:day + correlation_length ] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_pearson_results[day, asset_column] = pearsonr( my_asset_returns, other_asset_returns, )[0] expected_spearman_results[day, asset_column] = spearmanr( my_asset_returns, other_asset_returns, )[0] expected_pearson_results = DataFrame( data=where(expected_mask, expected_pearson_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = DataFrame( data=where(expected_mask, expected_spearman_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results)
def test_regression_of_returns_factor(self, returns_length, regression_length): """ Tests for the built-in factor `RollingLinearRegressionOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] returns = Returns(window_length=returns_length) masks = self.cascading_mask, self.alternating_mask, NotSpecified expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): regression_factor = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, mask=mask, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is # because we need (regression_length - 1) extra days of returns to # compute our expected regressions. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected regression results for Y ~ X # where Y is the asset we are interested in and X is each other # asset. Each regression is calculated over `regression_length` # days of data. for day in range(num_days): todays_returns = returns_results.iloc[ day:day + regression_length ] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_regression_results = linregress( y=other_asset_returns, x=my_asset_returns, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( where(expected_mask, expected_output_results[output], nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result)
def initialize(context): p = Pipeline('test') p.add(USEquityPricing.close.latest, 'close') attach_pipeline(p)