def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value): """Construct a `Float64Overwrite` with the correct start and end date based on the asof date of the delta, the dense_dates, and the dense_dates. Parameters ---------- asof : datetime The asof date of the delta. dense_dates : pd.DatetimeIndex The dates requested by the loader. sparse_dates : pd.DatetimeIndex The dates that appeared in the dataset. asset_idx : tuple of int The index of the asset in the block. If this is a tuple, then this is treated as the first and last index to use. value : np.float64 The value to overwrite with. Returns ------- overwrite : Float64Overwrite The overwrite that will apply the new value to the data. """ first_row = dense_dates.searchsorted(asof) last_row = dense_dates.searchsorted( sparse_dates[sparse_dates.searchsorted(asof, 'right')], ) - 1 if first_row > last_row: return first, last = asset_idx yield Float64Overwrite(first_row, last_row, first, last, value)
def overwrite_from_dates(asof, dense_dates, sparse_dates, asset_idx, value): """Construct a `Float64Overwrite` with the correct start and end date based on the asof date of the delta, the dense_dates, and the dense_dates. Parameters ---------- asof : datetime The asof date of the delta. dense_dates : pd.DatetimeIndex The dates requested by the loader. sparse_dates : pd.DatetimeIndex The dates that appeared in the dataset. asset_idx : tuple of int The index of the asset in the block. If this is a tuple, then this is treated as the first and last index to use. value : np.float64 The value to overwrite with. Returns ------- overwrite : Float64Overwrite The overwrite that will apply the new value to the data. Notes ----- This is forward-filling all dense dates that are between the asof_date date and the next sparse date after the asof_date. For example: let ``asof = pd.Timestamp('2014-01-02')``, ``dense_dates = pd.date_range('2014-01-01', '2014-01-05')`` ``sparse_dates = pd.to_datetime(['2014-01', '2014-02', '2014-04'])`` Then the overwrite will apply to indexes: 1, 2, 3, 4 """ if asof is pd.NaT: # Not an actual delta. # This happens due to the groupby we do on the deltas. return first_row = dense_dates.searchsorted(asof) next_idx = sparse_dates.searchsorted(asof.asm8, 'right') if next_idx == len(sparse_dates): # There is no next date in the sparse, this overwrite should apply # through the end of the dense dates. last_row = len(dense_dates) - 1 else: # There is a next date in sparse dates. This means that the overwrite # should only apply until the index of this date in the dense dates. last_row = dense_dates.searchsorted(sparse_dates[next_idx]) - 1 if first_row > last_row: return first, last = asset_idx yield Float64Overwrite(first_row, last_row, first, last, value)
class AdjustedArrayTestCase(TestCase): def test_traverse_invalidating(self): data = arange(5 * 3, dtype='f8').reshape(5, 3) original_data = data.copy() adjustments = {2: [Float64Multiply(0, 4, 0, 2, 2.0)]} adjusted_array = AdjustedArray(data, adjustments, float('nan')) for _ in adjusted_array.traverse(1, copy=False): pass assert_equal(data, original_data * 2) with self.assertRaises(ValueError) as e: adjusted_array.traverse(1) assert_equal( str(e.exception), 'cannot traverse invalidated AdjustedArray', ) def test_copy(self): data = arange(5 * 3, dtype='f8').reshape(5, 3) original_data = data.copy() adjustments = {2: [Float64Multiply(0, 4, 0, 2, 2.0)]} adjusted_array = AdjustedArray(data, adjustments, float('nan')) traverse_copy = adjusted_array.copy() clean_copy = adjusted_array.copy() a_it = adjusted_array.traverse(2, copy=False) b_it = traverse_copy.traverse(2, copy=False) for a, b in zip(a_it, b_it): assert_equal(a, b) with self.assertRaises(ValueError) as e: adjusted_array.copy() assert_equal( str(e.exception), 'cannot copy invalidated AdjustedArray', ) # the clean copy should have the original data even though the # original adjusted array has it's data mutated in place assert_equal(clean_copy.data, original_data) assert_equal(adjusted_array.data, original_data * 2) @parameterized.expand( chain( _gen_unadjusted_cases( 'float', make_input=as_dtype(float64_dtype), make_expected_output=as_dtype(float64_dtype), missing_value=default_missing_value_for_dtype(float64_dtype), ), _gen_unadjusted_cases( 'datetime', make_input=as_dtype(datetime64ns_dtype), make_expected_output=as_dtype(datetime64ns_dtype), missing_value=default_missing_value_for_dtype( datetime64ns_dtype), ), # Test passing an array of strings to AdjustedArray. _gen_unadjusted_cases( 'bytes_ndarray', make_input=as_dtype(bytes_dtype), make_expected_output=as_labelarray(bytes_dtype, b''), missing_value=b'', ), _gen_unadjusted_cases( 'unicode_ndarray', make_input=as_dtype(unicode_dtype), make_expected_output=as_labelarray(unicode_dtype, u''), missing_value=u'', ), _gen_unadjusted_cases( 'object_ndarray', make_input=lambda a: a.astype(unicode).astype(object), make_expected_output=as_labelarray(unicode_dtype, u''), missing_value='', ), # Test passing a LabelArray directly to AdjustedArray. _gen_unadjusted_cases( 'bytes_labelarray', make_input=as_labelarray(bytes_dtype, b''), make_expected_output=as_labelarray(bytes_dtype, b''), missing_value=b'', ), _gen_unadjusted_cases( 'unicode_labelarray', make_input=as_labelarray(unicode_dtype, None), make_expected_output=as_labelarray(unicode_dtype, None), missing_value=u'', ), _gen_unadjusted_cases( 'object_labelarray', make_input=(lambda a: LabelArray( a.astype(unicode).astype(object), u'')), make_expected_output=as_labelarray(unicode_dtype, ''), missing_value='', ), )) def test_no_adjustments(self, name, data, lookback, adjustments, missing_value, perspective_offset, expected_output): array = AdjustedArray(data, adjustments, missing_value) for _ in range(2): # Iterate 2x ensure adjusted_arrays are re-usable. in_out = zip(array.traverse(lookback), expected_output) for yielded, expected_yield in in_out: check_arrays(yielded, expected_yield) @parameterized.expand(_gen_multiplicative_adjustment_cases(float64_dtype)) def test_multiplicative_adjustments(self, name, data, lookback, adjustments, missing_value, perspective_offset, expected): array = AdjustedArray(data, adjustments, missing_value) for _ in range(2): # Iterate 2x ensure adjusted_arrays are re-usable. window_iter = array.traverse( lookback, perspective_offset=perspective_offset, ) for yielded, expected_yield in zip_longest(window_iter, expected): check_arrays(yielded, expected_yield) @parameterized.expand( chain( _gen_overwrite_adjustment_cases(bool_dtype), _gen_overwrite_adjustment_cases(int64_dtype), _gen_overwrite_adjustment_cases(float64_dtype), _gen_overwrite_adjustment_cases(datetime64ns_dtype), _gen_overwrite_1d_array_adjustment_case(float64_dtype), _gen_overwrite_1d_array_adjustment_case(datetime64ns_dtype), _gen_overwrite_1d_array_adjustment_case(bool_dtype), # There are six cases here: # Using np.bytes/np.unicode/object arrays as inputs. # Passing np.bytes/np.unicode/object arrays to LabelArray, # and using those as input. # # The outputs should always be LabelArrays. _gen_unadjusted_cases( 'bytes_ndarray', make_input=as_dtype(bytes_dtype), make_expected_output=as_labelarray(bytes_dtype, b''), missing_value=b'', ), _gen_unadjusted_cases( 'unicode_ndarray', make_input=as_dtype(unicode_dtype), make_expected_output=as_labelarray(unicode_dtype, u''), missing_value=u'', ), _gen_unadjusted_cases( 'object_ndarray', make_input=lambda a: a.astype(unicode).astype(object), make_expected_output=as_labelarray(unicode_dtype, u''), missing_value=u'', ), _gen_unadjusted_cases( 'bytes_labelarray', make_input=as_labelarray(bytes_dtype, b''), make_expected_output=as_labelarray(bytes_dtype, b''), missing_value=b'', ), _gen_unadjusted_cases( 'unicode_labelarray', make_input=as_labelarray(unicode_dtype, u''), make_expected_output=as_labelarray(unicode_dtype, u''), missing_value=u'', ), _gen_unadjusted_cases( 'object_labelarray', make_input=(lambda a: LabelArray( a.astype(unicode).astype(object), None, )), make_expected_output=as_labelarray(unicode_dtype, u''), missing_value=None, ), )) def test_overwrite_adjustment_cases(self, name, baseline, lookback, adjustments, missing_value, perspective_offset, expected): array = AdjustedArray(baseline, adjustments, missing_value) for _ in range(2): # Iterate 2x ensure adjusted_arrays are re-usable. window_iter = array.traverse( lookback, perspective_offset=perspective_offset, ) for yielded, expected_yield in zip_longest(window_iter, expected): check_arrays(yielded, expected_yield) def test_object1darrayoverwrite(self): pairs = [u + l for u, l in product(ascii_uppercase, ascii_lowercase)] categories = pairs + ['~' + c for c in pairs] baseline = LabelArray( array([[''.join((r, c)) for c in 'abc'] for r in ascii_uppercase]), None, categories, ) full_expected = baseline.copy() def flip(cs): if cs is None: return None if cs[0] != '~': return '~' + cs return cs def make_overwrite(fr, lr, fc, lc): fr, lr, fc, lc = map(ord, (fr, lr, fc, lc)) fr -= ord('A') lr -= ord('A') fc -= ord('a') lc -= ord('a') return Object1DArrayOverwrite( fr, lr, fc, lc, baseline[fr:lr + 1, fc].map(flip), ) overwrites = { 3: [make_overwrite('A', 'B', 'a', 'a')], 4: [make_overwrite('A', 'C', 'b', 'c')], 5: [make_overwrite('D', 'D', 'a', 'b')], } it = AdjustedArray(baseline, overwrites, None).traverse(3) window = next(it) expected = full_expected[:3] check_arrays(window, expected) window = next(it) full_expected[0:2, 0] = LabelArray(['~Aa', '~Ba'], None) expected = full_expected[1:4] check_arrays(window, expected) window = next(it) full_expected[0:3, 1:3] = LabelArray( [['~Ab', '~Ac'], ['~Bb', '~Bc'], ['~Cb', '~Cb']], None) expected = full_expected[2:5] check_arrays(window, expected) window = next(it) full_expected[3, :2] = '~Da' expected = full_expected[3:6] check_arrays(window, expected) def test_invalid_lookback(self): data = arange(30, dtype=float).reshape(6, 5) adj_array = AdjustedArray(data, {}, float('nan')) with self.assertRaises(WindowLengthTooLong): adj_array.traverse(7) with self.assertRaises(WindowLengthNotPositive): adj_array.traverse(0) with self.assertRaises(WindowLengthNotPositive): adj_array.traverse(-1) def test_array_views_arent_writable(self): data = arange(30, dtype=float).reshape(6, 5) adj_array = AdjustedArray(data, {}, float('nan')) for frame in adj_array.traverse(3): with self.assertRaises(ValueError): frame[0, 0] = 5.0 def test_inspect(self): data = arange(15, dtype=float).reshape(5, 3) adj_array = AdjustedArray( data, {4: [Float64Multiply(2, 3, 0, 0, 4.0)]}, float('nan'), ) expected = dedent("""\ Adjusted Array (float64): Data: array([[ 0., 1., 2.], [ 3., 4., 5.], [ 6., 7., 8.], [ 9., 10., 11.], [ 12., 13., 14.]]) Adjustments: {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \ last_col=0, value=4.000000)]} """) got = adj_array.inspect() self.assertEqual(expected, got) def test_update_labels(self): data = array([ ['aaa', 'bbb', 'ccc'], ['ddd', 'eee', 'fff'], ['ggg', 'hhh', 'iii'], ['jjj', 'kkk', 'lll'], ['mmm', 'nnn', 'ooo'], ]) label_array = LabelArray(data, missing_value='') adj_array = AdjustedArray( data=label_array, adjustments={4: [ObjectOverwrite(2, 3, 0, 0, 'ppp')]}, missing_value='', ) expected_data = array([ ['aaa-foo', 'bbb-foo', 'ccc-foo'], ['ddd-foo', 'eee-foo', 'fff-foo'], ['ggg-foo', 'hhh-foo', 'iii-foo'], ['jjj-foo', 'kkk-foo', 'lll-foo'], ['mmm-foo', 'nnn-foo', 'ooo-foo'], ]) expected_label_array = LabelArray(expected_data, missing_value='') expected_adj_array = AdjustedArray( data=expected_label_array, adjustments={4: [ObjectOverwrite(2, 3, 0, 0, 'ppp-foo')]}, missing_value='', ) adj_array.update_labels(lambda x: x + '-foo') # Check that the mapped AdjustedArray has the expected baseline # values and adjustment values. check_arrays(adj_array.data, expected_adj_array.data) self.assertEqual(adj_array.adjustments, expected_adj_array.adjustments) A = Float64Multiply(0, 4, 1, 1, 0.5) B = Float64Overwrite(3, 3, 4, 4, 4.2) C = Float64Multiply(0, 2, 0, 0, 0.14) D = Float64Overwrite(0, 3, 0, 0, 4.0) E = Float64Overwrite(0, 0, 1, 1, 3.7) F = Float64Multiply(0, 4, 3, 3, 10.0) G = Float64Overwrite(5, 5, 4, 4, 1.7) H = Float64Multiply(0, 4, 2, 2, 0.99) S = Float64Multiply(0, 1, 4, 4, 5.06) @parameterized.expand([( # Initial adjustments { 1: [A, B], 2: [C], 4: [D], }, # Adjustments to add { 1: [E], 2: [F, G], 3: [H, S], }, # Expected adjustments with 'append' { 1: [A, B, E], 2: [C, F, G], 3: [H, S], 4: [D], }, # Expected adjustments with 'prepend' { 1: [E, A, B], 2: [F, G, C], 3: [H, S], 4: [D], }, )]) def test_update_adjustments(self, initial_adjustments, adjustments_to_add, expected_adjustments_with_append, expected_adjustments_with_prepend): methods = ['append', 'prepend'] expected_outputs = [ expected_adjustments_with_append, expected_adjustments_with_prepend ] for method, expected_output in zip(methods, expected_outputs): data = arange(30, dtype=float).reshape(6, 5) adjusted_array = AdjustedArray(data, initial_adjustments, float('nan')) adjusted_array.update_adjustments(adjustments_to_add, method) self.assertEqual(adjusted_array.adjustments, expected_output)
def test_adjustments(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) # Use the dates from index 10 on and sids 1-3. dates_slice = slice(10, None, None) sids_slice = slice(1, 4, None) # Adjustments that should actually affect the output. relevant_adjustments = [ { 'sid': 1, 'start_date': None, 'end_date': self.dates[15], 'apply_date': self.dates[16], 'value': 0.5, 'kind': MULTIPLY, }, { 'sid': 2, 'start_date': self.dates[5], 'end_date': self.dates[15], 'apply_date': self.dates[16], 'value': 1.0, 'kind': ADD, }, { 'sid': 2, 'start_date': self.dates[15], 'end_date': self.dates[16], 'apply_date': self.dates[17], 'value': 1.0, 'kind': ADD, }, { 'sid': 3, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': 99.0, 'kind': OVERWRITE, }, ] # These adjustments shouldn't affect the output. irrelevant_adjustments = [ { # Sid Not Requested 'sid': 0, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': -9999.0, 'kind': OVERWRITE, }, { # Sid Unknown 'sid': 9999, 'start_date': self.dates[16], 'end_date': self.dates[17], 'apply_date': self.dates[18], 'value': -9999.0, 'kind': OVERWRITE, }, { # Date Not Requested 'sid': 2, 'start_date': self.dates[1], 'end_date': self.dates[2], 'apply_date': self.dates[3], 'value': -9999.0, 'kind': OVERWRITE, }, { # Date Before Known Data 'sid': 2, 'start_date': self.dates[0] - (2 * trading_day), 'end_date': self.dates[0] - trading_day, 'apply_date': self.dates[0] - trading_day, 'value': -9999.0, 'kind': OVERWRITE, }, { # Date After Known Data 'sid': 2, 'start_date': self.dates[-1] + trading_day, 'end_date': self.dates[-1] + (2 * trading_day), 'apply_date': self.dates[-1] + (3 * trading_day), 'value': -9999.0, 'kind': OVERWRITE, }, ] adjustments = DataFrame(relevant_adjustments + irrelevant_adjustments) loader = DataFrameLoader( USEquityPricing.close, baseline, adjustments=adjustments, ) expected_baseline = baseline.iloc[dates_slice, sids_slice] formatted_adjustments = loader.format_adjustments( self.dates[dates_slice], self.sids[sids_slice], ) expected_formatted_adjustments = { 6: [ Float64Multiply( first_row=0, last_row=5, first_col=0, last_col=0, value=0.5, ), Float64Add( first_row=0, last_row=5, first_col=1, last_col=1, value=1.0, ), ], 7: [ Float64Add( first_row=5, last_row=6, first_col=1, last_col=1, value=1.0, ), ], 8: [ Float64Overwrite( first_row=6, last_row=7, first_col=2, last_col=2, value=99.0, ) ], } self.assertEqual(formatted_adjustments, expected_formatted_adjustments) mask = self.mask[dates_slice, sids_slice] with patch('zipline.pipeline.loaders.frame.adjusted_array') as m: loader.load_adjusted_array( columns=[USEquityPricing.close], dates=self.dates[dates_slice], assets=self.sids[sids_slice], mask=mask, ) self.assertEqual(m.call_count, 1) args, kwargs = m.call_args assert_array_equal(kwargs['data'], expected_baseline.values) assert_array_equal(kwargs['mask'], mask) self.assertEqual(kwargs['adjustments'], expected_formatted_adjustments)
def test_adjustments(self): data = np.arange(100).reshape(self.ndates, self.nsids) baseline = pd.DataFrame(data, index=self.dates, columns=self.sids) # Use the dates from index 10 on and sids 1-3. dates_slice = slice(10, None, None) sids_slice = slice(1, 4, None) # Adjustments that should actually affect the output. relevant_adjustments = [ { "sid": 1, "start_date": None, "end_date": self.dates[15], "apply_date": self.dates[16], "value": 0.5, "kind": MULTIPLY, }, { "sid": 2, "start_date": self.dates[5], "end_date": self.dates[15], "apply_date": self.dates[16], "value": 1.0, "kind": ADD, }, { "sid": 2, "start_date": self.dates[15], "end_date": self.dates[16], "apply_date": self.dates[17], "value": 1.0, "kind": ADD, }, { "sid": 3, "start_date": self.dates[16], "end_date": self.dates[17], "apply_date": self.dates[18], "value": 99.0, "kind": OVERWRITE, }, ] # These adjustments shouldn't affect the output. irrelevant_adjustments = [ { # Sid Not Requested "sid": 0, "start_date": self.dates[16], "end_date": self.dates[17], "apply_date": self.dates[18], "value": -9999.0, "kind": OVERWRITE, }, { # Sid Unknown "sid": 9999, "start_date": self.dates[16], "end_date": self.dates[17], "apply_date": self.dates[18], "value": -9999.0, "kind": OVERWRITE, }, { # Date Not Requested "sid": 2, "start_date": self.dates[1], "end_date": self.dates[2], "apply_date": self.dates[3], "value": -9999.0, "kind": OVERWRITE, }, { # Date Before Known Data "sid": 2, "start_date": self.dates[0] - (2 * self.trading_day), "end_date": self.dates[0] - self.trading_day, "apply_date": self.dates[0] - self.trading_day, "value": -9999.0, "kind": OVERWRITE, }, { # Date After Known Data "sid": 2, "start_date": self.dates[-1] + self.trading_day, "end_date": self.dates[-1] + (2 * self.trading_day), "apply_date": self.dates[-1] + (3 * self.trading_day), "value": -9999.0, "kind": OVERWRITE, }, ] adjustments = pd.DataFrame(relevant_adjustments + irrelevant_adjustments) loader = DataFrameLoader( USEquityPricing.close, baseline, adjustments=adjustments, ) expected_baseline = baseline.iloc[dates_slice, sids_slice] formatted_adjustments = loader.format_adjustments( self.dates[dates_slice], self.sids[sids_slice], ) expected_formatted_adjustments = { 6: [ Float64Multiply( first_row=0, last_row=5, first_col=0, last_col=0, value=0.5, ), Float64Add( first_row=0, last_row=5, first_col=1, last_col=1, value=1.0, ), ], 7: [ Float64Add( first_row=5, last_row=6, first_col=1, last_col=1, value=1.0, ), ], 8: [ Float64Overwrite( first_row=6, last_row=7, first_col=2, last_col=2, value=99.0, ) ], } assert formatted_adjustments == expected_formatted_adjustments mask = self.mask[dates_slice, sids_slice] with patch("zipline.pipeline.loaders.frame.AdjustedArray") as m: loader.load_adjusted_array( US_EQUITIES, columns=[USEquityPricing.close], dates=self.dates[dates_slice], sids=self.sids[sids_slice], mask=mask, ) assert m.call_count == 1 args, kwargs = m.call_args assert_array_equal(kwargs["data"], expected_baseline.values) assert kwargs["adjustments"] == expected_formatted_adjustments
class TestAdjustedArray: def test_traverse_invalidating(self): data = np.arange(5 * 3, dtype="f8").reshape(5, 3) original_data = data.copy() adjustments = {2: [Float64Multiply(0, 4, 0, 2, 2.0)]} adjusted_array = AdjustedArray(data, adjustments, float("nan")) for _ in adjusted_array.traverse(1, copy=False): pass assert_equal(data, original_data * 2) err_msg = "cannot traverse invalidated AdjustedArray" with pytest.raises(ValueError, match=err_msg): adjusted_array.traverse(1) def test_copy(self): data = np.arange(5 * 3, dtype="f8").reshape(5, 3) original_data = data.copy() adjustments = {2: [Float64Multiply(0, 4, 0, 2, 2.0)]} adjusted_array = AdjustedArray(data, adjustments, float("nan")) traverse_copy = adjusted_array.copy() clean_copy = adjusted_array.copy() a_it = adjusted_array.traverse(2, copy=False) b_it = traverse_copy.traverse(2, copy=False) for a, b in zip(a_it, b_it): assert_equal(a, b) err_msg = "cannot copy invalidated AdjustedArray" with pytest.raises(ValueError, match=err_msg): adjusted_array.copy() # the clean copy should have the original data even though the # original adjusted array has it's data mutated in place assert_equal(clean_copy.data, original_data) assert_equal(adjusted_array.data, original_data * 2) @pytest.mark.parametrize( """name, data, lookback, adjustments, missing_value,\ perspective_offset, expected_output""", chain( _gen_unadjusted_cases( "float", make_input=as_dtype(float64_dtype), make_expected_output=as_dtype(float64_dtype), missing_value=default_missing_value_for_dtype(float64_dtype), ), _gen_unadjusted_cases( "datetime", make_input=as_dtype(datetime64ns_dtype), make_expected_output=as_dtype(datetime64ns_dtype), missing_value=default_missing_value_for_dtype(datetime64ns_dtype), ), # Test passing an array of strings to AdjustedArray. _gen_unadjusted_cases( "bytes_ndarray", make_input=as_dtype(bytes_dtype), make_expected_output=as_labelarray(bytes_dtype, b""), missing_value=b"", ), _gen_unadjusted_cases( "unicode_ndarray", make_input=as_dtype(unicode_dtype), make_expected_output=as_labelarray(unicode_dtype, ""), missing_value="", ), _gen_unadjusted_cases( "object_ndarray", make_input=lambda a: a.astype(unicode).astype(object), make_expected_output=as_labelarray(unicode_dtype, ""), missing_value="", ), # Test passing a LabelArray directly to AdjustedArray. _gen_unadjusted_cases( "bytes_labelarray", make_input=as_labelarray(bytes_dtype, b""), make_expected_output=as_labelarray(bytes_dtype, b""), missing_value=b"", ), _gen_unadjusted_cases( "unicode_labelarray", make_input=as_labelarray(unicode_dtype, None), make_expected_output=as_labelarray(unicode_dtype, None), missing_value="", ), _gen_unadjusted_cases( "object_labelarray", make_input=(lambda a: LabelArray(a.astype(unicode).astype(object), "")), make_expected_output=as_labelarray(unicode_dtype, ""), missing_value="", ), ), ) def test_no_adjustments( self, name, data, lookback, adjustments, missing_value, perspective_offset, expected_output, ): array = AdjustedArray(data, adjustments, missing_value) for _ in range(2): # Iterate 2x ensure adjusted_arrays are re-usable. in_out = zip(array.traverse(lookback), expected_output) for yielded, expected_yield in in_out: check_arrays(yielded, expected_yield) @pytest.mark.parametrize( "name, data, lookback, adjustments, missing_value,\ perspective_offset, expected", _gen_multiplicative_adjustment_cases(float64_dtype), ) def test_multiplicative_adjustments( self, name, data, lookback, adjustments, missing_value, perspective_offset, expected, ): array = AdjustedArray(data, adjustments, missing_value) for _ in range(2): # Iterate 2x ensure adjusted_arrays are re-usable. window_iter = array.traverse( lookback, perspective_offset=perspective_offset, ) for yielded, expected_yield in zip_longest(window_iter, expected): check_arrays(yielded, expected_yield) @pytest.mark.parametrize( "name, baseline, lookback, adjustments,\ missing_value, perspective_offset, expected", chain( _gen_overwrite_adjustment_cases(bool_dtype), _gen_overwrite_adjustment_cases(int64_dtype), _gen_overwrite_adjustment_cases(float64_dtype), _gen_overwrite_adjustment_cases(datetime64ns_dtype), _gen_overwrite_1d_array_adjustment_case(float64_dtype), _gen_overwrite_1d_array_adjustment_case(datetime64ns_dtype), _gen_overwrite_1d_array_adjustment_case(bool_dtype), # There are six cases here: # Using np.bytes/np.unicode/object arrays as inputs. # Passing np.bytes/np.unicode/object arrays to LabelArray, # and using those as input. # # The outputs should always be LabelArrays. _gen_unadjusted_cases( "bytes_ndarray", make_input=as_dtype(bytes_dtype), make_expected_output=as_labelarray(bytes_dtype, b""), missing_value=b"", ), _gen_unadjusted_cases( "unicode_ndarray", make_input=as_dtype(unicode_dtype), make_expected_output=as_labelarray(unicode_dtype, ""), missing_value="", ), _gen_unadjusted_cases( "object_ndarray", make_input=lambda a: a.astype(unicode).astype(object), make_expected_output=as_labelarray(unicode_dtype, ""), missing_value="", ), _gen_unadjusted_cases( "bytes_labelarray", make_input=as_labelarray(bytes_dtype, b""), make_expected_output=as_labelarray(bytes_dtype, b""), missing_value=b"", ), _gen_unadjusted_cases( "unicode_labelarray", make_input=as_labelarray(unicode_dtype, ""), make_expected_output=as_labelarray(unicode_dtype, ""), missing_value="", ), _gen_unadjusted_cases( "object_labelarray", make_input=( lambda a: LabelArray( a.astype(unicode).astype(object), None, ) ), make_expected_output=as_labelarray(unicode_dtype, ""), missing_value=None, ), ), ) def test_overwrite_adjustment_cases( self, name, baseline, lookback, adjustments, missing_value, perspective_offset, expected, ): array = AdjustedArray(baseline, adjustments, missing_value) for _ in range(2): # Iterate 2x ensure adjusted_arrays are re-usable. window_iter = array.traverse( lookback, perspective_offset=perspective_offset, ) for yielded, expected_yield in zip_longest(window_iter, expected): check_arrays(yielded, expected_yield) def test_object1darrayoverwrite(self): pairs = [u + l for u, l in product(ascii_uppercase, ascii_lowercase)] categories = pairs + ["~" + c for c in pairs] baseline = LabelArray( np.array([["".join((r, c)) for c in "abc"] for r in ascii_uppercase]), None, categories, ) full_expected = baseline.copy() def flip(cs): if cs is None: return None if cs[0] != "~": return "~" + cs return cs def make_overwrite(fr, lr, fc, lc): fr, lr, fc, lc = map(ord, (fr, lr, fc, lc)) fr -= ord("A") lr -= ord("A") fc -= ord("a") lc -= ord("a") return Object1DArrayOverwrite( fr, lr, fc, lc, baseline[fr : lr + 1, fc].map(flip), ) overwrites = { 3: [make_overwrite("A", "B", "a", "a")], 4: [make_overwrite("A", "C", "b", "c")], 5: [make_overwrite("D", "D", "a", "b")], } it = AdjustedArray(baseline, overwrites, None).traverse(3) window = next(it) expected = full_expected[:3] check_arrays(window, expected) window = next(it) full_expected[0:2, 0] = LabelArray(["~Aa", "~Ba"], None) expected = full_expected[1:4] check_arrays(window, expected) window = next(it) full_expected[0:3, 1:3] = LabelArray( [["~Ab", "~Ac"], ["~Bb", "~Bc"], ["~Cb", "~Cb"]], None ) expected = full_expected[2:5] check_arrays(window, expected) window = next(it) full_expected[3, :2] = "~Da" expected = full_expected[3:6] check_arrays(window, expected) def test_invalid_lookback(self): data = np.arange(30, dtype=float).reshape(6, 5) adj_array = AdjustedArray(data, {}, float("nan")) with pytest.raises(WindowLengthTooLong): adj_array.traverse(7) with pytest.raises(WindowLengthNotPositive): adj_array.traverse(0) with pytest.raises(WindowLengthNotPositive): adj_array.traverse(-1) def test_array_views_arent_writable(self): data = np.arange(30, dtype=float).reshape(6, 5) adj_array = AdjustedArray(data, {}, float("nan")) for frame in adj_array.traverse(3): with pytest.raises(ValueError): frame[0, 0] = 5.0 def test_inspect(self): data = np.arange(15, dtype=float).reshape(5, 3) adj_array = AdjustedArray( data, {4: [Float64Multiply(2, 3, 0, 0, 4.0)]}, float("nan"), ) # TODO: CHECK WHY DO I NEED TO FIX THE INDENT IN THE EXPECTED? expected = dedent( """\ Adjusted Array (float64): Data: array([[ 0., 1., 2.], [ 3., 4., 5.], [ 6., 7., 8.], [ 9., 10., 11.], [12., 13., 14.]]) Adjustments: {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \ last_col=0, value=4.000000)]} """ ) got = adj_array.inspect() assert expected == got def test_update_labels(self): data = np.array( [ ["aaa", "bbb", "ccc"], ["ddd", "eee", "fff"], ["ggg", "hhh", "iii"], ["jjj", "kkk", "lll"], ["mmm", "nnn", "ooo"], ] ) label_array = LabelArray(data, missing_value="") adj_array = AdjustedArray( data=label_array, adjustments={4: [ObjectOverwrite(2, 3, 0, 0, "ppp")]}, missing_value="", ) expected_data = np.array( [ ["aaa-foo", "bbb-foo", "ccc-foo"], ["ddd-foo", "eee-foo", "fff-foo"], ["ggg-foo", "hhh-foo", "iii-foo"], ["jjj-foo", "kkk-foo", "lll-foo"], ["mmm-foo", "nnn-foo", "ooo-foo"], ] ) expected_label_array = LabelArray(expected_data, missing_value="") expected_adj_array = AdjustedArray( data=expected_label_array, adjustments={4: [ObjectOverwrite(2, 3, 0, 0, "ppp-foo")]}, missing_value="", ) adj_array.update_labels(lambda x: x + "-foo") # Check that the mapped AdjustedArray has the expected baseline # values and adjustment values. check_arrays(adj_array.data, expected_adj_array.data) assert adj_array.adjustments == expected_adj_array.adjustments A = Float64Multiply(0, 4, 1, 1, 0.5) B = Float64Overwrite(3, 3, 4, 4, 4.2) C = Float64Multiply(0, 2, 0, 0, 0.14) D = Float64Overwrite(0, 3, 0, 0, 4.0) E = Float64Overwrite(0, 0, 1, 1, 3.7) F = Float64Multiply(0, 4, 3, 3, 10.0) G = Float64Overwrite(5, 5, 4, 4, 1.7) H = Float64Multiply(0, 4, 2, 2, 0.99) S = Float64Multiply(0, 1, 4, 4, 5.06) @pytest.mark.parametrize( "initial_adjustments, adjustments_to_add,\ expected_adjustments_with_append, expected_adjustments_with_prepend", [ ( # Initial adjustments { 1: [A, B], 2: [C], 4: [D], }, # Adjustments to add { 1: [E], 2: [F, G], 3: [H, S], }, # Expected adjustments with 'append' { 1: [A, B, E], 2: [C, F, G], 3: [H, S], 4: [D], }, # Expected adjustments with 'prepend' { 1: [E, A, B], 2: [F, G, C], 3: [H, S], 4: [D], }, ) ], ) def test_update_adjustments( self, initial_adjustments, adjustments_to_add, expected_adjustments_with_append, expected_adjustments_with_prepend, ): methods = ["append", "prepend"] expected_outputs = [ expected_adjustments_with_append, expected_adjustments_with_prepend, ] for method, expected_output in zip(methods, expected_outputs): data = np.arange(30, dtype=float).reshape(6, 5) adjusted_array = AdjustedArray(data, initial_adjustments, float("nan")) adjusted_array.update_adjustments(adjustments_to_add, method) assert adjusted_array.adjustments == expected_output