def test_infer_domain_no_terms(self): self.assertEqual(Pipeline().domain(default=GENERIC), GENERIC) self.assertEqual(Pipeline().domain(default=US_EQUITIES), US_EQUITIES)
def initialize(context): attach_pipeline(Pipeline(), 'test')
def run(ts): pipe = Pipeline(ts, domain=US_EQUITIES) start = self.trading_days[-5] end = self.trading_days[-1] return self.run_pipeline(pipe, start, end)
from zipline.utils.calendars import get_calendar universe = AverageDollarVolume(window_length=120).top(500) trading_calendar = get_calendar('NYSE') bundle_data = bundles.load(project_helper.EOD_BUNDLE_NAME) engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar) # ### View Data # With the pipeline engine built, let's get the stocks at the end of the period in the universe we're using. We'll use these tickers to generate the returns data for the our risk model. # In[8]: universe_end_date = pd.Timestamp('2016-01-05', tz='UTC') universe_tickers = engine .run_pipeline( Pipeline(screen=universe), universe_end_date, universe_end_date)\ .index.get_level_values(1)\ .values.tolist() universe_tickers # ## Get Returns # Not that we have our pipeline built, let's access the returns data. We'll start by building a data portal. # In[9]: from zipline.data.data_portal import DataPortal data_portal = DataPortal(
def late_attach(context, data): attach_pipeline(Pipeline(), 'test') raise AssertionError("Shouldn't make it past attach_pipeline!")
# and split on a 2-for-1 basis on February 28, 2005, June 21, 2000, and June 16, 1987. start = pd.to_datetime('2020-08-26', utc=True) end = pd.to_datetime('2020-09-02', utc=True) # AAPL sid 199059 print(prices(symbols(['AAPL']), start, end)) engine = make_pipeline_engine() universe = StaticAssets(symbols(['AAPL'])) pipe = Pipeline( columns={ 'close': Latest([EquityPricing.close], mask=universe), 'mkt_cap': MarketCap(mask=universe), 'prev': Previous([USEquityPricing.close], window_length=2, mask=universe), 'ret': Returns(window_length=2, mask=universe), }, screen=universe, ) stocks = engine.run_pipeline(pipe, start, end, hooks=[]) print(stocks) expected = [[499.30, 2163847100000.00, 503.43, -0.01], [506.09, 2137988000000.00, 499.30, 0.01], [500.04, 2134533300000.00, 506.09, -0.01], [124.81, 2206911200000.00, 125.01, -0.00], [129.04, 2294818300000.00, 124.81, 0.03], [134.18, 2247273200000.00, 129.04, 0.04]]
def test_factor_correlation_methods_two_factors(self, correlation_length): """ Tests for `Factor.pearsonr` and `Factor.spearmanr` when passed another 2D factor instead of a Slice. """ assets = self.assets dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # Ensure that the correlation methods cannot be called with two 2D # factors which have different masks. returns_masked_1 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(1), ) returns_masked_2 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(2), ) with self.assertRaises(IncompatibleTerms): returns_masked_1.pearsonr( target=returns_masked_2, correlation_length=correlation_length, ) with self.assertRaises(IncompatibleTerms): returns_masked_1.spearmanr( target=returns_masked_2, correlation_length=correlation_length, ) returns_5 = Returns(window_length=5, inputs=[self.col]) returns_10 = Returns(window_length=10, inputs=[self.col]) pearson_factor = returns_5.pearsonr( target=returns_10, correlation_length=correlation_length, ) spearman_factor = returns_5.spearmanr( target=returns_10, correlation_length=correlation_length, ) columns = { 'pearson_factor': pearson_factor, 'spearman_factor': spearman_factor, } pipeline = Pipeline(columns=columns) results = run_pipeline(pipeline, start_date, end_date) pearson_results = results['pearson_factor'].unstack() spearman_results = results['spearman_factor'].unstack() # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. columns = {'returns_5': returns_5, 'returns_10': returns_10} results = run_pipeline( Pipeline(columns=columns), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_5_results = results['returns_5'].unstack() returns_10_results = results['returns_10'].unstack() # On each day, calculate the expected correlation coefficients # between each asset's 5 and 10 day rolling returns. Each correlation # is calculated over `correlation_length` days. expected_pearson_results = full_like(pearson_results, nan) expected_spearman_results = full_like(spearman_results, nan) for day in range(num_days): todays_returns_5 = returns_5_results.iloc[day:day + correlation_length] todays_returns_10 = returns_10_results.iloc[day:day + correlation_length] for asset, asset_returns_5 in todays_returns_5.iteritems(): asset_column = int(asset) - 1 asset_returns_10 = todays_returns_10[asset] expected_pearson_results[day, asset_column] = pearsonr( asset_returns_5, asset_returns_10, )[0] expected_spearman_results[day, asset_column] = spearmanr( asset_returns_5, asset_returns_10, )[0] expected_pearson_results = DataFrame( data=expected_pearson_results, index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = DataFrame( data=expected_spearman_results, index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results)
def initialize(context): pipeline_close = attach_pipeline(Pipeline(), "test_close") pipeline_volume = attach_pipeline(Pipeline(), "test_volume") pipeline_close.add(USEquityPricing.close.latest, "close") pipeline_volume.add(USEquityPricing.volume.latest, "volume")
def initialize(context): attach_pipeline(Pipeline(), "test") attach_pipeline(Pipeline(), "test")
import pandas as pd from zipline.pipeline import Pipeline from zipline.pipeline.data import USEquityPricing from sharadar.pipeline.engine import symbol, symbols, make_pipeline_engine from zipline.pipeline.filters import StaticAssets tickers = symbols(['TR1M', 'TR1Y', 'RATEINF']) print(tickers) pipe = Pipeline(columns={ 'Close': USEquityPricing.close.latest, }, screen=StaticAssets(tickers)) engine = make_pipeline_engine() pipe_start = pd.to_datetime('2020-02-03', utc=True) pipe_end = pd.to_datetime('2020-02-07', utc=True) stocks = engine.run_pipeline(pipe, pipe_start, pipe_end) print("stocks.shape [close]", stocks) print(symbol('TR1M').to_dict())
def initialize(context): p = attach_pipeline(Pipeline(), "test", chunks=chunks) p.add(USEquityPricing.close.latest, "close")
import pandas as pd from zipline.pipeline import Pipeline from zipline.pipeline.data import USEquityPricing from sharadar.pipeline.engine import load_sharadar_bundle, symbols, make_pipeline_engine from zipline.pipeline.filters import StaticAssets import time import datetime from sharadar.pipeline.factors import DaysSinceFiling bundle = load_sharadar_bundle() bundle.asset_finder.retrieve_equities([199059, 199623]) spe = make_pipeline_engine() pipe_start = pd.to_datetime('2020-02-03', utc=True) pipe_end = pd.to_datetime('2020-02-07', utc=True) universe = StaticAssets(symbols(['IBM', 'F', 'AAPL'])) pipe_mkt_cap = Pipeline(columns={ 'days_since_filing': DaysSinceFiling(mask=universe), }, screen=universe) start_time = time.time() stocks = spe.run_pipeline(pipe_mkt_cap, pipe_start, pipe_end) print("stocks.shape [mkt cap]", stocks)
def test_loader_given_multiple_columns(self): class Loader1DataSet1(DataSet): col1 = Column(float32) col2 = Column(float32) class Loader1DataSet2(DataSet): col1 = Column(float32) col2 = Column(float32) class Loader2DataSet(DataSet): col1 = Column(float32) col2 = Column(float32) constants1 = { Loader1DataSet1.col1: 1, Loader1DataSet1.col2: 2, Loader1DataSet2.col1: 3, Loader1DataSet2.col2: 4 } loader1 = RecordingConstantLoader(constants=constants1, dates=self.dates, assets=self.assets) constants2 = {Loader2DataSet.col1: 5, Loader2DataSet.col2: 6} loader2 = RecordingConstantLoader(constants=constants2, dates=self.dates, assets=self.assets) engine = SimplePipelineEngine( lambda column: loader2 if column.dataset == Loader2DataSet else loader1, self.dates, self.asset_finder, ) pipe_col1 = RollingSumSum(inputs=[ Loader1DataSet1.col1, Loader1DataSet2.col1, Loader2DataSet.col1 ], window_length=2) pipe_col2 = RollingSumSum(inputs=[ Loader1DataSet1.col2, Loader1DataSet2.col2, Loader2DataSet.col2 ], window_length=3) pipe_col3 = RollingSumSum(inputs=[Loader2DataSet.col1], window_length=3) columns = OrderedDict([ ('pipe_col1', pipe_col1), ('pipe_col2', pipe_col2), ('pipe_col3', pipe_col3), ]) result = engine.run_pipeline( Pipeline(columns=columns), self.dates[2], # index is >= the largest window length - 1 self.dates[-1]) min_window = min(pip_col.window_length for pip_col in itervalues(columns)) col_to_val = ChainMap(constants1, constants2) vals = { name: (sum(col_to_val[col] for col in pipe_col.inputs) * pipe_col.window_length) for name, pipe_col in iteritems(columns) } index = MultiIndex.from_product([self.dates[2:], self.assets]) expected = DataFrame(data={ col: concatenate((full( (columns[col].window_length - min_window) * index.levshape[1], nan), full((index.levshape[0] - (columns[col].window_length - min_window)) * index.levshape[1], val))) for col, val in iteritems(vals) }, index=index, columns=columns) assert_frame_equal(result, expected) self.assertEqual( set(loader1.load_calls), { ColumnArgs.sorted_by_ds(Loader1DataSet1.col1, Loader1DataSet2.col1), ColumnArgs.sorted_by_ds(Loader1DataSet1.col2, Loader1DataSet2.col2) }) self.assertEqual(set(loader2.load_calls), { ColumnArgs.sorted_by_ds(Loader2DataSet.col1, Loader2DataSet.col2) })
def test_masked_factor(self): """ Test that a Custom Factor computes the correct values when passed a mask. The mask/filter should be applied prior to computing any values, as opposed to computing the factor across the entire universe of assets. Any assets that are filtered out should be filled with missing values. """ loader = self.loader dates = self.dates[5:8] assets = self.assets asset_ids = self.asset_ids constants = self.constants open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) factor1_value = constants[open] factor2_value = 3.0 * (constants[open] - constants[close]) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = array( [[True, True, True, False], [True, True, False, False], [True, False, False, False]], dtype=bool, ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = array( [[False, True, False, True], [True, False, True, False], [False, True, False, True]], dtype=bool, ) masks = cascading_mask, alternating_mask expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): # Test running a pipeline with a single masked factor. columns = {'factor1': OpenPrice(mask=mask), 'mask': mask} pipeline = Pipeline(columns=columns) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) factor1_results = results['factor1'].unstack() factor1_expected = create_expected_results(factor1_value, mask_results) assert_frame_equal(factor1_results, factor1_expected) # Test running a pipeline with a second factor. This ensures that # adding another factor to the pipeline with a different window # length does not cause any unexpected behavior, especially when # both factors share the same mask. columns['factor2'] = RollingSumDifference(mask=mask) pipeline = Pipeline(columns=columns) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) factor1_results = results['factor1'].unstack() factor2_results = results['factor2'].unstack() factor1_expected = create_expected_results(factor1_value, mask_results) factor2_expected = create_expected_results(factor2_value, mask_results) assert_frame_equal(factor1_results, factor1_expected) assert_frame_equal(factor2_results, factor2_expected)
def initialize(context): pipeline = attach_pipeline(Pipeline(), 'my_pipeline') test_factor = TestFactor() pipeline.add(test_factor, 'test_factor')
def initialize(context): pipeline = attach_pipeline(Pipeline(), "my_pipeline") test_factor = TestFactor() pipeline.add(test_factor, "test_factor")
def test_compute_with_adjustments(self): dates, assets = self.dates, self.assets low, high = USEquityPricing.low, USEquityPricing.high apply_idxs = [3, 10, 16] def apply_date(idx, offset=0): return dates[apply_idxs[idx] + offset] adjustments = DataFrame.from_records( [ dict( kind=MULTIPLY, sid=assets[1], value=2.0, start_date=None, end_date=apply_date(0, offset=-1), apply_date=apply_date(0), ), dict( kind=MULTIPLY, sid=assets[1], value=3.0, start_date=None, end_date=apply_date(1, offset=-1), apply_date=apply_date(1), ), dict( kind=MULTIPLY, sid=assets[1], value=5.0, start_date=None, end_date=apply_date(2, offset=-1), apply_date=apply_date(2), ), ] ) low_base = DataFrame(self.make_frame(30.0)) low_loader = DataFrameLoader(low, low_base.copy(), adjustments=None) # Pre-apply inverse of adjustments to the baseline. high_base = DataFrame(self.make_frame(30.0)) high_base.iloc[:apply_idxs[0], 1] /= 2.0 high_base.iloc[:apply_idxs[1], 1] /= 3.0 high_base.iloc[:apply_idxs[2], 1] /= 5.0 high_loader = DataFrameLoader(high, high_base, adjustments) loader = MultiColumnLoader({low: low_loader, high: high_loader}) engine = SimplePipelineEngine(loader, self.dates, self.asset_finder) for window_length in range(1, 4): low_mavg = SimpleMovingAverage( inputs=[USEquityPricing.low], window_length=window_length, ) high_mavg = SimpleMovingAverage( inputs=[USEquityPricing.high], window_length=window_length, ) bounds = product_upper_triangle(range(window_length, len(dates))) for start, stop in bounds: results = engine.run_pipeline( Pipeline( columns={'low': low_mavg, 'high': high_mavg} ), dates[start], dates[stop], ) self.assertEqual(set(results.columns), {'low', 'high'}) iloc_bounds = slice(start, stop + 1) # +1 to include end date low_results = results.unstack()['low'] assert_frame_equal(low_results, low_base.iloc[iloc_bounds]) high_results = results.unstack()['high'] assert_frame_equal(high_results, high_base.iloc[iloc_bounds])
default_domain=calendar) return engine.run_pipeline(*args, **kwargs) if __name__ == '__main__': data_path = "/home/yuxuzi/Data/mydataset2" start_date, end_date = pd.Timestamp('2018-03-13'), pd.Timestamp( '2018-03-27') data_source = HDFSimpleDataSource(data_path) dataset = HDFSimpleDataSource.infer_dataset(data_path) if __name__ == '__main__': universe = dataset.sector.latest.element_of(['A', 'B']) class RollingSumDifference(CustomFactor): window_length = 3 inputs = [dataset.open, dataset.close] def compute(self, today, assets, out, open, close): out[:] = (open - close).sum(axis=0) pipe = Pipeline(columns={ 'close': dataset.close.latest, 'sumdiff': RollingSumDifference(), 'sector': dataset.sector.latest }, screen=universe) df = data_source.run_pipeline(pipe, start_date, end_date) print(df)
def test_regression_of_returns_factor(self, returns_length, regression_length): """ Tests for the built-in factor `RollingLinearRegressionOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] returns = Returns(window_length=returns_length) masks = self.cascading_mask, self.alternating_mask, NotSpecified expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): regression_factor = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, mask=mask, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is # because we need (regression_length - 1) extra days of returns to # compute our expected regressions. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected regression results for Y ~ X # where Y is the asset we are interested in and X is each other # asset. Each regression is calculated over `regression_length` # days of data. for day in range(num_days): todays_returns = returns_results.iloc[day:day + regression_length] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_regression_results = linregress( y=other_asset_returns, x=my_asset_returns, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( where(expected_mask, expected_output_results[output], nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result)
def test_generic_pipeline_with_explicit_domain(self, domain): calendar = domain.calendar pipe = Pipeline( { "open": EquityPricing.open.latest, "high": EquityPricing.high.latest, "low": EquityPricing.low.latest, "close": EquityPricing.close.latest, "volume": EquityPricing.volume.latest, }, domain=domain, ) sessions = self.daily_bar_sessions[calendar.name] # Run the pipeline for a 7 day chunk in the middle of our data. # # Using this region ensures that there are assets that never appear in # the pipeline both because they end too soon, and because they start # too late. start, end = sessions[[-17, -10]] result = self.run_pipeline(pipe, start, end) all_assets = self.assets_by_calendar[calendar] # We expect the index of the result to contain all assets that were # alive during the interval between our start and end (not including # the asset's IPO date). expected_assets = [ a for a in all_assets if alive_in_range(a, start, end, include_asset_start_date=False) ] # off by 1 from above to be inclusive of the end date expected_dates = sessions[-17:-9] for col in pipe.columns: # result_date should look like this: # # E F G H I J K L M N O P # noqa # 24.17 25.17 26.17 27.17 28.17 NaN NaN NaN NaN NaN NaN NaN # noqa # NaN 25.18 26.18 27.18 28.18 29.18 NaN NaN NaN NaN NaN NaN # noqa # NaN NaN 26.23 27.23 28.23 29.23 30.23 NaN NaN NaN NaN NaN # noqa # NaN NaN NaN 27.28 28.28 29.28 30.28 31.28 NaN NaN NaN NaN # noqa # NaN NaN NaN NaN 28.30 29.30 30.30 31.30 32.30 NaN NaN NaN # noqa # NaN NaN NaN NaN NaN 29.29 30.29 31.29 32.29 33.29 NaN NaN # noqa # NaN NaN NaN NaN NaN NaN 30.27 31.27 32.27 33.27 34.27 NaN # noqa # NaN NaN NaN NaN NaN NaN NaN 31.29 32.29 33.29 34.29 35.29 # noqa result_data = result[col].unstack() # Check indices. assert_equal(pd.Index(expected_assets), result_data.columns) assert_equal(expected_dates, result_data.index) # Check values. for asset in expected_assets: for date in expected_dates: value = result_data.at[date, asset] self.check_expected_latest_value( calendar, col, date, asset, value, )
def test_factor_regression_method_two_factors(self, regression_length): """ Tests for `Factor.linear_regression` when passed another 2D factor instead of a Slice. """ assets = self.assets dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] # Ensure that the `linear_regression` method cannot be called with two # 2D factors which have different masks. returns_masked_1 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(1), ) returns_masked_2 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(2), ) with self.assertRaises(IncompatibleTerms): returns_masked_1.linear_regression( target=returns_masked_2, regression_length=regression_length, ) returns_5 = Returns(window_length=5, inputs=[self.col]) returns_10 = Returns(window_length=10, inputs=[self.col]) regression_factor = returns_5.linear_regression( target=returns_10, regression_length=regression_length, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) results = run_pipeline(pipeline, start_date, end_date) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is because # we need (regression_length - 1) extra days of returns to compute our # expected regressions. columns = {'returns_5': returns_5, 'returns_10': returns_10} results = run_pipeline( Pipeline(columns=columns), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_5_results = results['returns_5'].unstack() returns_10_results = results['returns_10'].unstack() # On each day, for each asset, calculate the expected regression # results of Y ~ X where Y is the asset's rolling 5 day returns and X # is the asset's rolling 10 day returns. Each regression is calculated # over `regression_length` days of data. for day in range(num_days): todays_returns_5 = returns_5_results.iloc[day:day + regression_length] todays_returns_10 = returns_10_results.iloc[day:day + regression_length] for asset, asset_returns_5 in todays_returns_5.iteritems(): asset_column = int(asset) - 1 asset_returns_10 = todays_returns_10[asset] expected_regression_results = linregress( y=asset_returns_5, x=asset_returns_10, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( expected_output_results[output], index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result)
def run_data_pipeline(engine, universe, start_date, end_date): pipeline = Pipeline(screen=universe) sector = Sector() # Alpha Factors : pipeline.add(DownsideRisk(), 'Downside Risk (Sortino Ratio)') pipeline.add(Vol3M(), '3 Month Volatility') pipeline.add(momentum_1yr(252, universe, sector), 'Momentum_1YR') pipeline.add( mean_reversion_5day_sector_neutral_smoothed(20, universe, sector), 'Mean_Reversion_Sector_Neutral_Smoothed') pipeline.add(overnight_sentiment_smoothed(2, 10, universe), 'Overnight_Sentiment_Smoothed') pipeline.add(rsi_sector_neutral(15, universe, sector), 'RSI_Sector_Neutral_15d') pipeline.add(rsi_sector_neutral(30, universe, sector), 'RSI_Sector_Neutral_30d') beta_factor = (RegressionAgainstTime(mask=universe).beta.rank().zscore()) gamma_factor = (RegressionAgainstTime(mask=universe).gamma.rank().zscore()) conditional_factor = (beta_factor * gamma_factor).rank().zscore() pipeline.add(beta_factor, 'time_beta') pipeline.add(gamma_factor, 'time_gamma') pipeline.add(conditional_factor, 'conditional_factor') # Universal Quant Features : pipeline.add( AnnualizedVolatility(window_length=20, mask=universe).rank().zscore(), 'volatility_20d') pipeline.add( AnnualizedVolatility(window_length=120, mask=universe).rank().zscore(), 'volatility_120d') pipeline.add( AverageDollarVolume(window_length=20, mask=universe).rank().zscore(), 'adv_20d') pipeline.add( AverageDollarVolume(window_length=120, mask=universe).rank().zscore(), 'adv_120d') pipeline.add(sector, 'sector_code') # Regime Features : pipeline.add( SimpleMovingAverage(inputs=[MarketDispersion(mask=universe)], window_length=20), 'dispersion_20d') pipeline.add( SimpleMovingAverage(inputs=[MarketDispersion(mask=universe)], window_length=120), 'dispersion_120d') pipeline.add(MarketVolatility(window_length=20), 'market_vol_20d') pipeline.add(MarketVolatility(window_length=120), 'market_vol_120d') # Target # Let's try to predict the go forward 1-week return. When doing this, it's important to quantize the target. The factor we create is the trailing 5-day return pipeline.add( Returns(window_length=5, mask=universe).quantiles(2), 'return_5d') pipeline.add( Returns(window_length=5, mask=universe).quantiles(25), 'return_5d_p') # Running the Pipeline all_factors = engine.run_pipeline(pipeline, start_date, end_date) # Computing Date Features all_factors = compute_date_features(all_factors, start_date, end_date) # One Hot Encoding Sectors all_factors = one_hot_encode_sectors(all_factors) # Shifted Target For Training The Model all_factors['target'] = all_factors.groupby(level=1)['return_5d'].shift(-5) return all_factors
def make_pipeline(context): """ Create our pipeline. """ # Filter for primary share equities. IsPrimaryShare is a built-in filter. primary_share = IsPrimaryShare() # Not when-issued equities. #not_wi = ~IEXCompany.symbol.latest.endswith('.WI') not_wi = ~PolygonCompany.symbol.latest.endswith(".WI") # Equities without LP in their name, .matches does a match using a regular # expression #not_lp_name = ~IEXCompany.companyName.latest.matches('.* L[. ]?P.?$') not_lp_name = ~PolygonCompany.name.latest.matches(".* L[. ]?P.?$") # Equities whose most recent Morningstar market cap is not null have # fundamental data and therefore are not ETFs. #have_market_cap = IEXKeyStats.marketcap.latest >= 1 have_market_cap = PolygonCompany.marketcap.latest >= 1 # At least a certain price price = USEquityPricing.close.latest AtLeastPrice = (price >= context.MyLeastPrice) AtMostPrice = (price <= context.MyMostPrice) # Filter for stocks that pass all of our previous filters. tradeable_stocks = (primary_share & not_wi & not_lp_name & have_market_cap & AtLeastPrice & AtMostPrice) LowVar = 6 HighVar = 40 log.info(''' Algorithm initialized variables: context.MaxCandidates %s LowVar %s HighVar %s''' % (context.MaxCandidates, LowVar, HighVar)) # High dollar volume filter. base_universe = AverageDollarVolume( window_length=20, mask=tradeable_stocks).percentile_between(LowVar, HighVar) # Short close price average. ShortAvg = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=3, mask=base_universe) # Long close price average. LongAvg = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=45, mask=base_universe) percent_difference = (ShortAvg - LongAvg) / LongAvg # Filter to select securities to long. stocks_worst = percent_difference.bottom(context.MaxCandidates) securities_to_trade = (stocks_worst) return Pipeline( columns={'stocks_worst': stocks_worst}, screen=(securities_to_trade), )
def make_pipeline(): rsi = RSI() return Pipeline(columns={ 'longs': rsi.top(3), 'shorts': rsi.bottom(3), }, )
def initialize(context): attach_pipeline(Pipeline(), 'test') pipeline_output('test') raise AssertionError("Shouldn't make it past pipeline_output()")
class DummyFactor1(CustomFactor): inputs = [] window_length = 1 window_safe = False def compute(self, today, assets, out): log.info('1', today) out[:] = 0 class DummyFactor2(CustomFactor): inputs = [] window_length = 1 window_safe = False def compute(self, today, assets, out): log.info('2', today) out[:] = 0 pipe = Pipeline(columns={ 'close': USEquityPricing.close.latest, 'dummy1': DummyFactor1(), 'dummy2': DummyFactor2() }, screen=NamedUniverse(universe_name) ) start_time = time.time() stocks = spe.run_pipeline(pipe, pipe_start, end) print(stocks.shape) print("--- %s ---" % datetime.timedelta(seconds=(time.time() - start_time)))
def initialize(context): p = attach_pipeline(Pipeline(), 'test', chunksize=chunksize) p.add(USEquityPricing.close.latest, 'close')
def initialize(context): pipeline_close = attach_pipeline(Pipeline(), 'test_close') pipeline_volume = attach_pipeline(Pipeline(), 'test_volume') pipeline_close.add(USEquityPricing.close.latest, 'close') pipeline_volume.add(USEquityPricing.volume.latest, 'volume')
def test_correlation_factors(self, returns_length, correlation_length): """ Tests for the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length) masks = (self.cascading_mask, self.alternating_mask, NotSpecified) expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): pearson_factor = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) spearman_factor = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) columns = { "pearson_factor": pearson_factor, "spearman_factor": spearman_factor, } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, "mask") results = run_pipeline(pipeline, start_date, end_date) pearson_results = results["pearson_factor"].unstack() spearman_results = results["spearman_factor"].unstack() if mask is not NotSpecified: mask_results = results["mask"].unstack() check_arrays(mask_results.values, expected_mask) # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. results = run_pipeline( Pipeline(columns={"returns": returns}), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_results = results["returns"].unstack() # On each day, calculate the expected correlation coefficients # between the asset we are interested in and each other asset. Each # correlation is calculated over `correlation_length` days. expected_pearson_results = np.full_like(pearson_results, nan) expected_spearman_results = np.full_like(spearman_results, nan) for day in range(num_days): todays_returns = returns_results.iloc[day:day + correlation_length] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_pearson_results[day, asset_column] = pearsonr( my_asset_returns, other_asset_returns, )[0] expected_spearman_results[day, asset_column] = spearmanr( my_asset_returns, other_asset_returns, )[0] expected_pearson_results = pd.DataFrame( data=np.where(expected_mask, expected_pearson_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = pd.DataFrame( data=np.where(expected_mask, expected_spearman_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results)
def test_compute_earnings(self, dates): ( engine, expected_next, expected_next_busday_offset, expected_previous, expected_previous_busday_offset, ) = self.setup(dates) pipe = Pipeline( columns={ 'next': EarningsCalendar.next_announcement.latest, 'previous': EarningsCalendar.previous_announcement.latest, 'days_to_next': BusinessDaysUntilNextEarnings(), 'days_since_prev': BusinessDaysSincePreviousEarnings(), }) result = engine.run_pipeline( pipe, start_date=dates[0], end_date=dates[-1], ) computed_next = result['next'] computed_previous = result['previous'] computed_next_busday_offset = result['days_to_next'] computed_previous_busday_offset = result['days_since_prev'] # NaTs in next/prev should correspond to NaNs in offsets. assert_series_equal( computed_next.isnull(), computed_next_busday_offset.isnull(), ) assert_series_equal( computed_previous.isnull(), computed_previous_busday_offset.isnull(), ) for sid in self.sids: assert_series_equal( computed_next.xs(sid, level=1), expected_next(sid), sid, ) assert_series_equal( computed_previous.xs(sid, level=1), expected_previous(sid), sid, ) assert_series_equal( computed_next_busday_offset.xs(sid, level=1), expected_next_busday_offset(sid), sid, ) assert_series_equal( computed_previous_busday_offset.xs(sid, level=1), expected_previous_busday_offset(sid), sid, )