def test_correlation_and_regression_with_bad_asset(self): """ Test that `RollingPearsonOfReturns`, `RollingSpearmanOfReturns` and `RollingLinearRegressionOfReturns` raise the proper exception when given a nonexistent target asset. """ my_asset = Equity(0, exchange="TEST") start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline # This filter is arbitrary; the important thing is that we test each # factor both with and without a specified mask. my_asset_filter = AssetID().eq(1) for mask in (NotSpecified, my_asset_filter): pearson_factor = RollingPearsonOfReturns( target=my_asset, returns_length=3, correlation_length=3, mask=mask, ) spearman_factor = RollingSpearmanOfReturns( target=my_asset, returns_length=3, correlation_length=3, mask=mask, ) regression_factor = RollingLinearRegressionOfReturns( target=my_asset, returns_length=3, regression_length=3, mask=mask, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'pearson_factor': pearson_factor}), start_date, end_date, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'spearman_factor': spearman_factor}), start_date, end_date, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'regression_factor': regression_factor}), start_date, end_date, )
def test_adding_slice_column(self): """ Test that slices cannot be added as a pipeline column. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) open_slice = OpenPrice()[my_asset] with self.assertRaises(UnsupportedPipelineOutput): Pipeline(columns={'open_slice': open_slice}) pipe = Pipeline(columns={}) with self.assertRaises(UnsupportedPipelineOutput): pipe.add(open_slice, 'open_slice')
def check_downsampled_term(self, term): # June 2014 # Mo Tu We Th Fr Sa Su # 1 # 2 3 4 5 6 7 8 # 9 10 11 12 13 14 15 # 16 17 18 19 20 21 22 # 23 24 25 26 27 28 29 # 30 all_sessions = self.nyse_sessions compute_dates = all_sessions[all_sessions.slice_indexer( '2014-06-05', '2015-01-06')] start_date, end_date = compute_dates[[0, -1]] pipe = Pipeline({ 'year': term.downsample(frequency='year_start'), 'quarter': term.downsample(frequency='quarter_start'), 'month': term.downsample(frequency='month_start'), 'week': term.downsample(frequency='week_start'), }) # Raw values for term, computed each day from 2014 to the end of the # target period. raw_term_results = self.run_pipeline( Pipeline({'term': term}), start_date=pd.Timestamp('2014-01-02', tz='UTC'), end_date=pd.Timestamp('2015-01-06', tz='UTC'), )['term'].unstack() expected_results = { 'year': (raw_term_results.groupby( pd.TimeGrouper('AS')).first().reindex(compute_dates, method='ffill')), 'quarter': (raw_term_results.groupby( pd.TimeGrouper('QS')).first().reindex(compute_dates, method='ffill')), 'month': (raw_term_results.groupby( pd.TimeGrouper('MS')).first().reindex(compute_dates, method='ffill')), 'week': (raw_term_results.groupby(pd.TimeGrouper( 'W', label='left')).first().reindex(compute_dates, method='ffill')), } results = self.run_pipeline(pipe, start_date, end_date) for frequency in expected_results: result = results[frequency].unstack() expected = expected_results[frequency] assert_frame_equal(result, expected)
def test_slice_with_masking(self, unmasked_column, slice_column): """ Test that masking a factor that uses slices as inputs does not mask the slice data. """ sids = self.sids asset_finder = self.asset_finder start_date = self.pipeline_start_date end_date = self.pipeline_end_date # Create a filter that masks out all but a single asset. unmasked_asset = asset_finder.retrieve_asset(sids[unmasked_column]) unmasked_asset_only = (AssetID().eq(unmasked_asset.sid)) # Asset used to create our slice. In the cases where this is different # than `unmasked_asset`, our slice should still have non-missing data # when used as an input to our custom factor. That is, it should not be # masked out. slice_asset = asset_finder.retrieve_asset(sids[slice_column]) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[slice_asset] returns_results = self.run_pipeline( Pipeline(columns={'returns': returns}), start_date, end_date, ) returns_results = returns_results['returns'].unstack() class UsesSlicedInput(CustomFactor): window_length = 1 inputs = [returns, returns_slice] def compute(self, today, assets, out, returns, returns_slice): # Ensure that our mask correctly affects the `returns` input # and does not affect the `returns_slice` input. assert returns.shape == (1, 1) assert returns_slice.shape == (1, 1) assert returns[0, 0] == \ returns_results.loc[today, unmasked_asset] assert returns_slice[0, 0] == \ returns_results.loc[today, slice_asset] columns = {'masked': UsesSlicedInput(mask=unmasked_asset_only)} # Assertions about the expected data are made in the `compute` function # of our custom factor above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
def test_add(self): p = Pipeline() f = SomeFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) p.add(f > 5, 'g') self.assertEqual(p.columns, {'f': f, 'g': f > 5}) with self.assertRaises(TypeError): p.add(f, 1) with self.assertRaises(TypeError): p.add(USEquityPricing.open, 'open')
def test_load_with_trading_calendar(self): engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline({c.name: c.latest for c in EventDataSet.columns}), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) for c in EventDataSet.columns: if c in self.next_value_columns: self.check_next_value_results( c, results[c.name].unstack(), self.trading_days, ) elif c in self.previous_value_columns: self.check_previous_value_results( c, results[c.name].unstack(), self.trading_days, ) else: raise AssertionError("Unexpected column %s." % c)
def test_slice(self, my_asset_column, window_length_): """ Test that slices can be created by indexing into a term, and that they have the correct shape when used as inputs. """ sids = self.sids my_asset = self.asset_finder.retrieve_asset(self.sids[my_asset_column]) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[my_asset] class UsesSlicedInput(CustomFactor): window_length = window_length_ inputs = [returns, returns_slice] def compute(self, today, assets, out, returns, returns_slice): # Make sure that our slice is the correct shape (i.e. has only # one column) and that it has the same values as the original # returns factor from which it is derived. assert returns_slice.shape == (self.window_length, 1) assert returns.shape == (self.window_length, len(sids)) check_arrays(returns_slice[:, 0], returns[:, my_asset_column]) # Assertions about the expected slice data are made in the `compute` # function of our custom factor above. self.run_pipeline( Pipeline(columns={'uses_sliced_input': UsesSlicedInput()}), self.pipeline_start_date, self.pipeline_end_date, )
def test_load_properly_forward_fills(self): engine = SimplePipelineEngine( lambda x: self.loader, self.trading_days, self.asset_finder, ) # Cut the dates in half so we need to forward fill some data which # is not in our window. The results should be computed the same as if # we had computed across the entire window and then sliced after the # computation. dates = self.trading_days[len(self.trading_days) / 2:] results = engine.run_pipeline( Pipeline({c.name: c.latest for c in EventDataSet.columns}), start_date=dates[0], end_date=dates[-1], ) for c in EventDataSet.columns: if c in self.next_value_columns: self.check_next_value_results( c, results[c.name].unstack(), dates, ) elif c in self.previous_value_columns: self.check_previous_value_results( c, results[c.name].unstack(), dates, ) else: raise AssertionError("Unexpected column %s." % c)
def test_construction(self): p0 = Pipeline() self.assertEqual(p0.columns, {}) self.assertIs(p0.screen, None) columns = {'f': SomeFactor()} p1 = Pipeline(columns=columns) self.assertEqual(p1.columns, columns) screen = SomeFilter() p2 = Pipeline(screen=screen) self.assertEqual(p2.columns, {}) self.assertEqual(p2.screen, screen) p3 = Pipeline(columns=columns, screen=screen) self.assertEqual(p3.columns, columns) self.assertEqual(p3.screen, screen)
def make_pipeline(): return Pipeline( columns={ 'price': CryptoPricing.open.latest, 'short_mavg': VWAP(window_length=SHORT_WINDOW), 'long_mavg': VWAP(window_length=LONG_WINDOW), } )
def initialize(context): pipeline = attach_pipeline(Pipeline(), 'test') vwap = VWAP(window_length=10) pipeline.add(vwap, 'vwap') # Nothing should have prices less than 0. pipeline.set_screen(vwap < 0)
def test_factor_correlation_methods(self, returns_length, correlation_length): """ Ensure that `Factor.pearsonr` and `Factor.spearmanr` are consistent with the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ my_asset = self.my_asset start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] pearson = returns.pearsonr( target=returns_slice, correlation_length=correlation_length, ) spearman = returns.spearmanr( target=returns_slice, correlation_length=correlation_length, ) expected_pearson = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, ) expected_spearman = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, ) # These built-ins construct their own Returns factor to use as inputs, # so the only way to set our own inputs is to do so after the fact. # This should not be done in practice. It is necessary here because we # want Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_pearson.inputs = [returns, returns_slice] expected_spearman.inputs = [returns, returns_slice] columns = { 'pearson': pearson, 'spearman': spearman, 'expected_pearson': expected_pearson, 'expected_spearman': expected_spearman, } results = run_pipeline(Pipeline(columns=columns), start_date, end_date) pearson_results = results['pearson'].unstack() spearman_results = results['spearman'].unstack() expected_pearson_results = results['expected_pearson'].unstack() expected_spearman_results = results['expected_spearman'].unstack() assert_frame_equal(pearson_results, expected_pearson_results) assert_frame_equal(spearman_results, expected_spearman_results)
def test_load_empty(self): """ For the case where raw data is empty, make sure we have a result for all sids, that the dimensions are correct, and that we have the correct missing value. """ raw_events = pd.DataFrame( columns=["sid", "timestamp", "event_date", "float", "int", "datetime", "string"] ) next_value_columns = { EventDataSet.next_datetime: 'datetime', EventDataSet.next_event_date: 'event_date', EventDataSet.next_float: 'float', EventDataSet.next_int: 'int', EventDataSet.next_string: 'string', EventDataSet.next_string_custom_missing: 'string' } previous_value_columns = { EventDataSet.previous_datetime: 'datetime', EventDataSet.previous_event_date: 'event_date', EventDataSet.previous_float: 'float', EventDataSet.previous_int: 'int', EventDataSet.previous_string: 'string', EventDataSet.previous_string_custom_missing: 'string' } loader = EventsLoader( raw_events, next_value_columns, previous_value_columns ) engine = SimplePipelineEngine( lambda x: loader, self.trading_days, self.asset_finder, ) results = engine.run_pipeline( Pipeline({c.name: c.latest for c in EventDataSet.columns}), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS) dates = self.trading_days expected = self.frame_containing_all_missing_values( index=pd.MultiIndex.from_product([dates, assets]), columns=EventDataSet.columns, ) assert_equal(results, expected)
def _test_masked_single_column_output(self): """ Tests for masking custom factors that compute a 1D out. """ start_date = self.pipeline_start_date end_date = self.pipeline_end_date alternating_mask = (AssetIDPlusDay() % 2).eq(0) cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day) alternating_mask.window_safe = True cascading_mask.window_safe = True for mask in (alternating_mask, cascading_mask): class SingleColumnOutput(CustomFactor): window_length = 1 inputs = [self.col, mask] window_safe = True ndim = 1 def compute(self, today, assets, out, col, mask): # Because we specified ndim as 1, `out` should always be a # singleton array but `close` should be a sized based on # the mask we passed. assert out.shape == (1,) assert col.shape == (1, mask.sum()) out[:] = col.sum() # Since we cannot add single column output factors as pipeline # columns, we have to test its output through another factor. class UsesSingleColumnInput(CustomFactor): window_length = 1 inputs = [self.col, mask, SingleColumnOutput(mask=mask)] def compute(self, today, assets, out, col, mask, single_column_output): # Make sure that `single_column` has the correct value # based on the masked it used. assert single_column_output.shape == (1, 1) single_column_output_value = single_column_output[0][0] expected_value = where(mask, col, 0).sum() assert single_column_output_value == expected_value columns = {'uses_single_column_input': UsesSingleColumnInput()} # Assertions about the expected shapes of our data are made in the # `compute` function of our custom factors above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
def initialize(context): pipeline = Pipeline() context.vwaps = [] for length in vwaps: name = vwap_key(length) factor = VWAP(window_length=length) context.vwaps.append(factor) pipeline.add(factor, name=name) filter_ = (USEquityPricing.close.latest > 300) pipeline.add(filter_, 'filter') if set_screen: pipeline.set_screen(filter_) attach_pipeline(pipeline, 'test')
def test_overwrite(self): p = Pipeline() f = SomeFactor() other_f = SomeOtherFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) with self.assertRaises(KeyError) as e: p.add(other_f, 'f') [message] = e.exception.args self.assertEqual(message, "Column 'f' already exists.") p.add(other_f, 'f', overwrite=True) self.assertEqual(p.columns, {'f': other_f})
def test_set_screen(self): f, g = SomeFilter(), SomeOtherFilter() p = Pipeline() self.assertEqual(p.screen, None) p.set_screen(f) self.assertEqual(p.screen, f) with self.assertRaises(ValueError): p.set_screen(f) p.set_screen(g, overwrite=True) self.assertEqual(p.screen, g) with self.assertRaises(TypeError) as e: p.set_screen(f, g) message = e.exception.args[0] self.assertIn( "expected a value of type bool or int for argument 'overwrite'", message, )
def test_remove(self): f = SomeFactor() p = Pipeline(columns={'f': f}) with self.assertRaises(KeyError) as e: p.remove('not_a_real_name') self.assertEqual(f, p.remove('f')) with self.assertRaises(KeyError) as e: p.remove('f') self.assertEqual(e.exception.args, ('f', ))
def make_pipeline(context): return Pipeline( columns={ 'price': CryptoPricing.open.latest, 'sma': SimpleMovingAverage( inputs=[CryptoPricing.close], window_length=context.WINDOW, ), 'std': AnnualizedVolatility( inputs=[CryptoPricing.close], window_length=context.WINDOW, annualization_factor=1, ), } )
def test_show_graph(self): f = SomeFactor() p = Pipeline(columns={'f': SomeFactor()}) # The real display_graph call shells out to GraphViz, which isn't a # requirement, so patch it out for testing. def mock_display_graph(g, format='svg', include_asset_exists=False): return (g, format, include_asset_exists) self.assertEqual( inspect.getargspec(display_graph), inspect.getargspec(mock_display_graph), msg="Mock signature doesn't match signature for display_graph.") patch_display_graph = patch( 'catalyst.pipeline.graph.display_graph', mock_display_graph, ) with patch_display_graph: graph, format, include_asset_exists = p.show_graph() self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'svg') self.assertEqual(include_asset_exists, False) with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format='png') self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'png') self.assertEqual(include_asset_exists, False) with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format='jpeg') self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'jpeg') self.assertEqual(include_asset_exists, False) expected = (r".*\.show_graph\(\) expected a value in " r"\('svg', 'png', 'jpeg'\) for argument 'format', " r"but got 'fizzbuzz' instead.") with self.assertRaisesRegexp(ValueError, expected): p.show_graph(format='fizzbuzz')
def test_latest(self): columns = TDS.columns pipe = Pipeline(columns={c.name: c.latest for c in columns}, ) cal_slice = slice(20, 40) dates_to_test = self.calendar[cal_slice] result = self.engine.run_pipeline( pipe, dates_to_test[0], dates_to_test[-1], ) for column in columns: with ignore_pandas_nan_categorical_warning(): col_result = result[column.name].unstack() expected_col_result = self.expected_latest(column, cal_slice) assert_frame_equal(col_result, expected_col_result)
def test_single_column_output(self): """ Tests for custom factors that compute a 1D out. """ start_date = self.pipeline_start_date end_date = self.pipeline_end_date alternating_mask = (AssetIDPlusDay() % 2).eq(0) cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day) class SingleColumnOutput(CustomFactor): window_length = 1 inputs = [self.col] window_safe = True ndim = 1 def compute(self, today, assets, out, col): # Because we specified ndim as 1, `out` should be a singleton # array but `close` should be a regular sized input. assert out.shape == (1, ) assert col.shape == (1, 3) out[:] = col.sum() # Since we cannot add single column output factors as pipeline # columns, we have to test its output through another factor. class UsesSingleColumnOutput(CustomFactor): window_length = 1 inputs = [SingleColumnOutput()] def compute(self, today, assets, out, single_column_output): # Make sure that `single_column` has the correct shape. That # is, it should always have one column regardless of any mask # passed to `UsesSingleColumnInput`. assert single_column_output.shape == (1, 1) for mask in (alternating_mask, cascading_mask): columns = { 'uses_single_column_output': UsesSingleColumnOutput(), 'uses_single_column_output_masked': UsesSingleColumnOutput(mask=mask, ), } # Assertions about the expected shapes of our data are made in the # `compute` function of our custom factors above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
def test_show_graph(self): f = SomeFactor() p = Pipeline(columns={'f': SomeFactor()}) # The real display_graph call shells out to GraphViz, which isn't a # requirement, so patch it out for testing. def mock_display_graph(g, format='svg', include_asset_exists=False): return (g, format, include_asset_exists) self.assertEqual( inspect.getargspec(display_graph), inspect.getargspec(mock_display_graph), msg="Mock signature doesn't match signature for display_graph." ) patch_display_graph = patch( 'catalyst.pipeline.graph.display_graph', mock_display_graph, ) with patch_display_graph: graph, format, include_asset_exists = p.show_graph() self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'svg') self.assertEqual(include_asset_exists, False) with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format='png') self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'png') self.assertEqual(include_asset_exists, False) with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format='jpeg') self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'jpeg') self.assertEqual(include_asset_exists, False) expected = ( r".*\.show_graph\(\) expected a value in " r"\('svg', 'png', 'jpeg'\) for argument 'format', " r"but got 'fizzbuzz' instead." ) with self.assertRaisesRegexp(ValueError, expected): p.show_graph(format='fizzbuzz')
def _check_filters(self, evens, odds, first_five, last_three): pipe = Pipeline(columns={ 'sid': SidFactor(), 'evens': evens, 'odds': odds, 'first_five': first_five, 'last_three': last_three, }, ) start, end = self.trading_days[[-10, -1]] results = self.run_pipeline(pipe, start, end).unstack() sids = results.sid.astype(int64_dtype) assert_equal(results.evens, ~(sids % 2).astype(bool)) assert_equal(results.odds, (sids % 2).astype(bool)) assert_equal(results.first_five, sids < 5) assert_equal(results.last_three, sids >= 7)
def test_remove(self): f = SomeFactor() p = Pipeline(columns={'f': f}) with self.assertRaises(KeyError) as e: p.remove('not_a_real_name') self.assertEqual(f, p.remove('f')) with self.assertRaises(KeyError) as e: p.remove('f') self.assertEqual(e.exception.args, ('f',))
def _test_factor_regression_method(self, returns_length, regression_length): """ Ensure that `Factor.linear_regression` is consistent with the built-in factor `RollingLinearRegressionOfReturns`. """ my_asset = self.my_asset start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] regression = returns.linear_regression( target=returns_slice, regression_length=regression_length, ) expected_regression = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, ) # This built-in constructs its own Returns factor to use as an input, # so the only way to set our own input is to do so after the fact. This # should not be done in practice. It is necessary here because we want # Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_regression.inputs = [returns, returns_slice] columns = { 'regression': regression, 'expected_regression': expected_regression, } results = run_pipeline(Pipeline(columns=columns), start_date, end_date) regression_results = results['regression'].unstack() expected_regression_results = results['expected_regression'].unstack() assert_frame_equal(regression_results, expected_regression_results)
def test_non_existent_asset(self): """ Test that indexing into a term with a non-existent asset raises the proper exception. """ my_asset = Asset(0, exchange="TEST") returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[my_asset] class UsesSlicedInput(CustomFactor): window_length = 1 inputs = [returns_slice] def compute(self, today, assets, out, returns_slice): pass with self.assertRaises(NonExistentAssetInTimeFrame): self.run_pipeline( Pipeline(columns={'uses_sliced_input': UsesSlicedInput()}), self.pipeline_start_date, self.pipeline_end_date, )
def test_construction_bad_input_types(self): with self.assertRaises(TypeError): Pipeline(1) Pipeline({}) with self.assertRaises(TypeError): Pipeline({}, 1) with self.assertRaises(TypeError): Pipeline({}, SomeFactor()) with self.assertRaises(TypeError): Pipeline({'open': USEquityPricing.open}) Pipeline({}, SomeFactor() > 5)
def late_attach(context, data): attach_pipeline(Pipeline(), 'test') raise AssertionError("Shouldn't make it past attach_pipeline!")
def test_correlation_factors(self, returns_length, correlation_length): """ Tests for the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length) masks = (self.cascading_mask, self.alternating_mask, NotSpecified) expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): pearson_factor = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) spearman_factor = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) columns = { 'pearson_factor': pearson_factor, 'spearman_factor': spearman_factor, } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) pearson_results = results['pearson_factor'].unstack() spearman_results = results['spearman_factor'].unstack() if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected correlation coefficients # between the asset we are interested in and each other asset. Each # correlation is calculated over `correlation_length` days. expected_pearson_results = full_like(pearson_results, nan) expected_spearman_results = full_like(spearman_results, nan) for day in range(num_days): todays_returns = returns_results.iloc[ day:day + correlation_length ] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_pearson_results[day, asset_column] = pearsonr( my_asset_returns, other_asset_returns, )[0] expected_spearman_results[day, asset_column] = spearmanr( my_asset_returns, other_asset_returns, )[0] expected_pearson_results = DataFrame( data=where(expected_mask, expected_pearson_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = DataFrame( data=where(expected_mask, expected_spearman_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results)
def test_regression_of_returns_factor(self, returns_length, regression_length): """ Tests for the built-in factor `RollingLinearRegressionOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] returns = Returns(window_length=returns_length) masks = self.cascading_mask, self.alternating_mask, NotSpecified expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): regression_factor = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, mask=mask, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is # because we need (regression_length - 1) extra days of returns to # compute our expected regressions. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected regression results for Y ~ X # where Y is the asset we are interested in and X is each other # asset. Each regression is calculated over `regression_length` # days of data. for day in range(num_days): todays_returns = returns_results.iloc[ day:day + regression_length ] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_regression_results = linregress( y=other_asset_returns, x=my_asset_returns, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( where(expected_mask, expected_output_results[output], nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result)
def initialize(context): p = attach_pipeline(Pipeline(), 'test', chunks=chunks) p.add(USEquityPricing.close.latest, 'close')
def initialize(context): attach_pipeline(Pipeline(), 'test')
def initialize(context): attach_pipeline(Pipeline(), 'test') pipeline_output('test') raise AssertionError("Shouldn't make it past pipeline_output()")