def test_baseline(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) loader = DataFrameFFCLoader(USEquityPricing.close, baseline) dates_slice = slice(None, 10, None) sids_slice = slice(1, 3, None) [adj_array] = loader.load_adjusted_array([USEquityPricing.close], self.mask.iloc[dates_slice, sids_slice]) for idx, window in enumerate(adj_array.traverse(window_length=3)): expected = baseline.values[dates_slice, sids_slice][idx : idx + 3] assert_array_equal(window, expected)
def test_bad_input(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) loader = DataFrameFFCLoader(USEquityPricing.close, baseline) with self.assertRaises(ValueError): # Wrong column. loader.load_adjusted_array([USEquityPricing.open], self.mask) with self.assertRaises(ValueError): # Too many columns. loader.load_adjusted_array([USEquityPricing.open, USEquityPricing.close], self.mask)
def test_baseline(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) loader = DataFrameFFCLoader( USEquityPricing.close, baseline, ) dates_slice = slice(None, 10, None) sids_slice = slice(1, 3, None) adj_array = loader.load_adjusted_array([USEquityPricing.close], self.mask.iloc[dates_slice, sids_slice]) for idx, window in enumerate(adj_array.traverse(window_length=3)): expected = baseline.values[dates_slice, sids_slice][idx:idx + 3] assert_array_equal(window, expected)
def __init__(self, constants, dates, assets): loaders = {} for column, const in iteritems(constants): frame = DataFrame( const, index=dates, columns=assets, dtype=column.dtype, ) loaders[column] = DataFrameFFCLoader( column=column, baseline=frame, adjustments=None, ) super(ConstantLoader, self).__init__(loaders)
def test_bad_input(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) loader = DataFrameFFCLoader( USEquityPricing.close, baseline, ) with self.assertRaises(ValueError): # Wrong column. loader.load_adjusted_array([USEquityPricing.open], self.mask) with self.assertRaises(ValueError): # Too many columns. loader.load_adjusted_array( [USEquityPricing.open, USEquityPricing.close], self.mask)
def test_compute_with_adjustments(self): dates, assets = self.dates, self.assets low, high = USEquityPricing.low, USEquityPricing.high apply_idxs = [3, 10, 16] def apply_date(idx, offset=0): return dates[apply_idxs[idx] + offset] adjustments = DataFrame.from_records([ dict( kind=MULTIPLY, sid=assets[1], value=2.0, start_date=None, end_date=apply_date(0, offset=-1), apply_date=apply_date(0), ), dict( kind=MULTIPLY, sid=assets[1], value=3.0, start_date=None, end_date=apply_date(1, offset=-1), apply_date=apply_date(1), ), dict( kind=MULTIPLY, sid=assets[1], value=5.0, start_date=None, end_date=apply_date(2, offset=-1), apply_date=apply_date(2), ), ]) low_base = DataFrame(self.make_frame(30.0)) low_loader = DataFrameFFCLoader(low, low_base.copy(), adjustments=None) # Pre-apply inverse of adjustments to the baseline. high_base = DataFrame(self.make_frame(30.0)) high_base.iloc[:apply_idxs[0], 1] /= 2.0 high_base.iloc[:apply_idxs[1], 1] /= 3.0 high_base.iloc[:apply_idxs[2], 1] /= 5.0 high_loader = DataFrameFFCLoader(high, high_base, adjustments) loader = MultiColumnLoader({low: low_loader, high: high_loader}) engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) for window_length in range(1, 4): low_mavg = SimpleMovingAverage( inputs=[USEquityPricing.low], window_length=window_length, ) high_mavg = SimpleMovingAverage( inputs=[USEquityPricing.high], window_length=window_length, ) bounds = product_upper_triangle(range(window_length, len(dates))) for start, stop in bounds: results = engine.factor_matrix( { 'low': low_mavg, 'high': high_mavg }, dates[start], dates[stop], ) self.assertEqual(set(results.columns), {'low', 'high'}) iloc_bounds = slice(start, stop + 1) # +1 to include end date low_results = results.unstack()['low'] assert_frame_equal(low_results, low_base.iloc[iloc_bounds]) high_results = results.unstack()['high'] assert_frame_equal(high_results, high_base.iloc[iloc_bounds])
def test_adjustments(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) # Use the dates from index 10 on and sids 1-3. dates_slice = slice(10, None, None) sids_slice = slice(1, 4, None) # Adjustments that should actually affect the output. relevant_adjustments = [ { 'sid': 1, 'start_date': None, 'end_date': self.dates[15], 'apply_date': self.dates[16], 'value': 0.5, 'kind': MULTIPLY, }, { 'sid': 2, 'start_date': self.dates[5], 'end_date': self.dates[15], 'apply_date': self.dates[16], 'value': 1.0, 'kind': ADD, }, { 'sid': 2, 'start_date': self.dates[15], 'end_date': self.dates[16], 'apply_date': self.dates[17], 'value': 1.0, 'kind': ADD, }, { 'sid': 3, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': 99.0, 'kind': OVERWRITE, }, ] # These adjustments shouldn't affect the output. irrelevant_adjustments = [ { # Sid Not Requested 'sid': 0, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': -9999.0, 'kind': OVERWRITE, }, { # Sid Unknown 'sid': 9999, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': -9999.0, 'kind': OVERWRITE, }, { # Date Not Requested 'sid': 2, 'start_date': self.dates[1], 'end_date': self.dates[2], 'apply_date': self.dates[3], 'value': -9999.0, 'kind': OVERWRITE, }, { # Date Before Known Data 'sid': 2, 'start_date': self.dates[0] - (2 * trading_day), 'end_date': self.dates[0] - trading_day, 'apply_date': self.dates[0] - trading_day, 'value': -9999.0, 'kind': OVERWRITE, }, { # Date After Known Data 'sid': 2, 'start_date': self.dates[-1] + trading_day, 'end_date': self.dates[-1] + (2 * trading_day), 'apply_date': self.dates[-1] + (3 * trading_day), 'value': -9999.0, 'kind': OVERWRITE, }, ] adjustments = DataFrame(relevant_adjustments + irrelevant_adjustments) loader = DataFrameFFCLoader( USEquityPricing.close, baseline, adjustments=adjustments, ) expected_baseline = baseline.iloc[dates_slice, sids_slice] formatted_adjustments = loader.format_adjustments( self.dates[dates_slice], self.sids[sids_slice], ) expected_formatted_adjustments = { 6: [ Float64Multiply(first_row=0, last_row=5, col=0, value=0.5), Float64Add(first_row=0, last_row=5, col=1, value=1.0), ], 7: [ Float64Add(first_row=5, last_row=6, col=1, value=1.0), ], 8: [ Float64Overwrite(first_row=6, last_row=7, col=2, value=99.0) ], } self.assertEqual(formatted_adjustments, expected_formatted_adjustments) mask = self.mask[dates_slice, sids_slice] with patch('zipline.data.ffc.frame.adjusted_array') as m: loader.load_adjusted_array( columns=[USEquityPricing.close], dates=self.dates[dates_slice], assets=self.sids[sids_slice], mask=mask, ) self.assertEqual(m.call_count, 1) args, kwargs = m.call_args assert_array_equal(kwargs['data'], expected_baseline.values) assert_array_equal(kwargs['mask'], mask) self.assertEqual(kwargs['adjustments'], expected_formatted_adjustments)
def test_adjustments(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) # Use the dates from index 10 on and sids 1-3. dates_slice = slice(10, None, None) sids_slice = slice(1, 4, None) # Adjustments that should actually affect the output. relevant_adjustments = [ { 'sid': 1, 'start_date': None, 'end_date': self.dates[15], 'apply_date': self.dates[16], 'value': 0.5, 'kind': MULTIPLY, }, { 'sid': 2, 'start_date': self.dates[5], 'end_date': self.dates[15], 'apply_date': self.dates[16], 'value': 1.0, 'kind': ADD, }, { 'sid': 2, 'start_date': self.dates[15], 'end_date': self.dates[16], 'apply_date': self.dates[17], 'value': 1.0, 'kind': ADD, }, { 'sid': 3, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': 99.0, 'kind': OVERWRITE, }, ] # These adjustments shouldn't affect the output. irrelevant_adjustments = [ { # Sid Not Requested 'sid': 0, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': -9999.0, 'kind': OVERWRITE, }, { # Sid Unknown 'sid': 9999, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': -9999.0, 'kind': OVERWRITE, }, { # Date Not Requested 'sid': 2, 'start_date': self.dates[1], 'end_date': self.dates[2], 'apply_date': self.dates[3], 'value': -9999.0, 'kind': OVERWRITE, }, { # Date Before Known Data 'sid': 2, 'start_date': self.dates[0] - (2 * trading_day), 'end_date': self.dates[0] - trading_day, 'apply_date': self.dates[0] - trading_day, 'value': -9999.0, 'kind': OVERWRITE, }, { # Date After Known Data 'sid': 2, 'start_date': self.dates[-1] + trading_day, 'end_date': self.dates[-1] + (2 * trading_day), 'apply_date': self.dates[-1] + (3 * trading_day), 'value': -9999.0, 'kind': OVERWRITE, }, ] adjustments = DataFrame(relevant_adjustments + irrelevant_adjustments) loader = DataFrameFFCLoader( USEquityPricing.close, baseline, adjustments=adjustments, ) expected_baseline = baseline.iloc[dates_slice, sids_slice] formatted_adjustments = loader.format_adjustments( self.dates[dates_slice], self.sids[sids_slice], ) expected_formatted_adjustments = { 6: [ Float64Multiply(first_row=0, last_row=5, col=0, value=0.5), Float64Add(first_row=0, last_row=5, col=1, value=1.0), ], 7: [ Float64Add(first_row=5, last_row=6, col=1, value=1.0), ], 8: [Float64Overwrite(first_row=6, last_row=7, col=2, value=99.0)], } self.assertEqual(formatted_adjustments, expected_formatted_adjustments) mask = self.mask.iloc[dates_slice, sids_slice] with patch('zipline.data.ffc.frame.adjusted_array') as m: loader.load_adjusted_array( columns=[USEquityPricing.close], mask=mask, ) self.assertEqual(m.call_count, 1) args, kwargs = m.call_args assert_array_equal(kwargs['data'], expected_baseline.values) assert_array_equal(kwargs['mask'], mask.values) self.assertEqual(kwargs['adjustments'], expected_formatted_adjustments)