def test_array(self): l = [0, 1, 2, 3, 4, 5, 6, 7, 8, 123] expected = rt.FastArray([0, 1, 1, 2, 1, 2, 2, 3, 1, 6], dtype='i1') for dtype in ['i8', 'u8', 'i4', 'u4', 'i2', 'u2', 'i1', 'u1']: data = rt.FastArray(l, dtype=dtype) counts = rt.bitcount(data) assert_array_equal(counts, expected)
def test_cat2keys(self, key1, key2): multi_cat = rt.cat2keys(key1, key2) assert len(key1) == len( key2) # add test to check different length lists # these are the expected entries in the multi key categorical dictionary n = len(key1) expected_key1 = rt.FastArray([k for _ in range(n) for k in key1]) expected_key2 = rt.FastArray([k for k in key2 for _ in range(n)]) key_itr = iter(multi_cat.category_dict) actual_key1 = multi_cat.category_dict[next(key_itr)] actual_key2 = multi_cat.category_dict[next(key_itr)] assert_array_equal(expected_key1, actual_key1) assert_array_equal(expected_key2, actual_key2) # Taking the entries one by one of expected_key1 and expected_key2 should produce the # cartesian product of key1 and key2. expected_product = [ k1 + k2 for k1, k2 in itertools.product(key1, key2) ] actual_product = np.array(sorted( [k1 + k2 for k1, k2 in zip(actual_key1, actual_key2)]), dtype="U2") assert_array_equal(sorted(expected_product), actual_product)
def test_rounding_ops(self): for type_ in type_list: def rand_ary(): return np.array( np.random.randint(0, 100, size=TEST_SIZE) + np.random.rand(TEST_SIZE), dtype=type_, ) rounding_fuctions = [ '__abs__', 'around', 'round_', 'rint', 'fix', 'floor', 'ceil', 'trunc', ] a = rand_ary() self.assert_equal(abs(a), abs(rt.FastArray(a))) a = rand_ary() self.assert_equal(np.floor(a), rt.floor(rt.FastArray(a))) a = rand_ary() self.assert_equal(np.floor(a), rt.floor(rt.FastArray(a)))
def test_16bit_array(self): l = [0xFD2, 0xFD27] expected = rt.FastArray([8, 11], dtype='i1') for dtype in ['i2', 'u2']: data = rt.FastArray(l, dtype=dtype) counts = rt.bitcount(data) assert_array_equal(counts, expected)
def test_float_list_all_off(self): rt.FastArray._TOFF() rt.FastArray._ROFF() self.assert_equal(rt.FastArray(np.array(self.dataf)), np.array(self.dataf)) rt.FastArray._TON() rt.FastArray._RON()
def test_nparray_all_off(self): rt.FastArray._TOFF() rt.FastArray._ROFF() self.assert_equal(rt.FastArray(np.array(self.data)), np.array(self.data)) rt.FastArray._TON() rt.FastArray._RON()
def test_single_col_categoricals(self, func, func_name: str, data_dtype): values = [0, 1, 1, 2, 2, 2, 3, 3, 3, 4] bin_ids = ['a', 'b', 'c', 'd', 'e'] #data = np.random.rand(10) + np.random.randint(0, 10, size=10) data = np.array([ 7.19200901, 0.14907245, 2.28258397, 5.07872708, 0.76125165, 1.32797916, 3.40280423, 4.48942476, 6.98713656, 4.39541456 ]) data = rt.FastArray(data, dtype=data_dtype) map = {'vs': data, 'ks': values} pd_data = pd.DataFrame(map).groupby(by='ks') rt_data = rt.Categorical(values=values, categories=bin_ids, base_index=0) pd_out = self.groupby_func(pd_data, func, None) rt_out = self.groupby_func(rt_data, func, data) col_index = 'Count' if func_name == 'count' else 0 assert_array_almost_equal(rt_out[col_index], pd_out['vs'].values, decimal=3)
def test_scalar(self): data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 123] expected = [0, 1, 1, 2, 1, 2, 2, 3, 1, 6] for n, e in zip(data, expected): self.assertEqual(rt.bitcount(n), e) for n, e in zip(rt.FastArray(data, dtype='i8'), expected): self.assertEqual(rt.bitcount(n), e) with self.assertRaises(ValueError): rt.bitcount(3.14) with self.assertRaises(ValueError): rt.bitcount('a')
def test_list(self): data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 123] expected = rt.FastArray([0, 1, 1, 2, 1, 2, 2, 3, 1, 6], dtype='i1') counts = rt.bitcount(data) assert_array_equal(counts, expected) bad_data = [0, 1, 1.1] with self.assertRaises(ValueError): rt.bitcount(bad_data) bad_data = [0, 1, 'a'] with self.assertRaises(ValueError): rt.bitcount(bad_data)
class TestHStackAny: """Tests for the rt.hstack_any (a.k.a. rt.stack_rows) function.""" _fa1 = rt.FastArray([100, 200]) _fa2 = rt.FastArray([111, 222]) _dtn1 = rt.DateTimeNano('2021-10-12 01:02:03', from_tz='UTC') _dtn2 = rt.DateTimeNano('1980-03-04 13:14:15', from_tz='UTC') _ts1 = _dtn1 - _dtn2 _ts2 = _dtn2 - _dtn1 _ds1 = rt.Dataset({'a': 11}) _ds2 = rt.Dataset({'b': 22}) _pds1 = rt.PDataset(_ds1) _pds2 = rt.PDataset(_ds2) @pytest.mark.parametrize( "inputs,expected", [ pytest.param([_fa1, _fa2], rt.FastArray, id='FastArray,FastArray'), pytest.param([_dtn1, _dtn2], rt.DateTimeNano, id='DateTimeNano,DateTimeNano'), pytest.param([_dtn1, _dtn2], rt.DateTimeNano, id='DateTimeNano,DateTimeNano'), pytest.param([_ts1, _ts2], rt.TimeSpan, id='TimeSpan,TimeSpan'), pytest.param([_ds1, _ds2], rt.Dataset, id='Dataset,Dataset'), pytest.param([_pds1, _pds2], None, id='PDataset,PDataset'), # notyet pytest.param([_dtn1, _ts2], None, id='DateTimeNano,TimeSpan'), # neither is base pytest.param([_fa1, _dtn2], rt.FastArray, id='FastArray,DateTimeNano'), pytest.param([_ts1, _fa2], rt.FastArray, id='TimeSpan,FastArray'), pytest.param([_ds1, _pds2], rt.Dataset, id='Dataset,PDataset'), pytest.param([_pds1, _ds2], rt.Dataset, id='PDataset,Dataset'), pytest.param([_fa1, _ds2], None, id='FastArray,Dataset'), ], ) def test_hstack_any(self, inputs, expected): if expected is None: with pytest.raises(Exception): rt.hstack_any(inputs) else: result = rt.hstack_any(inputs) assert type(result) == expected
def test_alignmk(self): ds1 = rt.Dataset() ds1['Time'] = [0, 1, 4, 6, 8, 9, 11, 16, 19, 30] ds1['Px'] = [10, 12, 15, 11, 10, 9, 13, 7, 9, 10] ds2 = rt.Dataset() ds2['Time'] = [0, 0, 5, 7, 8, 10, 12, 15, 17, 20] ds2['Vols'] = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29] # Categorical keys ds1['Ticker'] = rt.Categorical(['Test'] * 10) ds2['Ticker'] = rt.Categorical(['Test', 'Blah'] * 5) res = alignmk(ds1.Ticker, ds2.Ticker, ds1.Time, ds2.Time) target = rt.FastArray([0, 0, 0, 2, 4, 4, 4, 6, 8, 8]) assert_array_equal(res, target) # char array keys ds1['Ticker'] = rt.FastArray(['Test'] * 10) ds2['Ticker'] = rt.FastArray(['Test', 'Blah'] * 5) res = alignmk(ds1.Ticker, ds2.Ticker, ds1.Time, ds2.Time) target = rt.FastArray([0, 0, 0, 2, 4, 4, 4, 6, 8, 8]) assert_array_equal(res, target)
def test_cat2keys(self, keys): key1, key2 = keys multi_cat = rt.cat2keys(key1, key2) assert len(key1) == len( key2) # add test to check different length lists # these are the expected entries in the multi key categorical dictionary n = len(key1) expected_key1 = set(rt.FastArray([k for _ in range(n) for k in key1])) expected_key2 = set(rt.FastArray([k for k in key2 for _ in range(n)])) key_itr = iter(multi_cat.category_dict) actual_key1 = set(multi_cat.category_dict[next(key_itr)]) actual_key2 = set(multi_cat.category_dict[next(key_itr)]) not_nan = lambda x: not np.isnan(x) assert not set( filter(not_nan, expected_key1.symmetric_difference(actual_key1)) ), f"\nexpected {expected_key1}\nactual {actual_key1}" assert not set( filter(not_nan, expected_key2.symmetric_difference(actual_key2)) ), f"\nexpected {expected_key2}\nactual {actual_key2}"
def test_operations(self): A_array = np.random.rand(TEST_SIZE) + 10 B_array = np.random.rand(TEST_SIZE) + 10 for type_ in type_list: a = np.array(A_array, dtype=type_) b = np.array(B_array, dtype=type_) x = rt.FastArray(np.array(A_array, dtype=type_)) y = rt.FastArray(np.array(B_array, dtype=type_)) for function_set in binary_functions: for function in function_set: # print('function - ', function) if (not np.issubdtype(a.dtype, np.integer) and function == '__truediv__'): np_out = getattr(a, function)(b) sf_out = getattr(x, function)(y) self.assert_equal(np_out, sf_out) for function_set in unary_functions: for function in function_set: ##these require 'resets' due to division a = copy.copy(A_array) b = copy.copy(B_array) x = rt.FastArray(copy.copy(A_array)) y = rt.FastArray(copy.copy(B_array)) # print('function - ', function) np_out = getattr(np, function)(a) sf_out = getattr(np, function)(x) self.assert_equal(np_out, sf_out)
def test_ema_decay(decay_rate, filter, reset, dtype_override, expected): data = rt.ones(10) times = rt.FastArray([0, 1, 1, 3, 4, 5, 5.5, 10.5, 10.55, 11]) # Call ema_decay. # Don't override the default dtype unless we actually have an override. # We don't bother doing this for the other arguments because they're either # non-optional or already default to None. if dtype_override is None: result = data.ema_decay(times, decay_rate, filter=filter, reset=reset) else: result = data.ema_decay(times, decay_rate, filter=filter, reset=reset, dtype=dtype_override) # Check the result against the expected values. assert_array_almost_equal(result, expected)
def test_single_col_groupby_tests(self): Values = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] Keys = ['a', 'b', 'c', 'a', 'b', 'c', 'd', 'e', 'f'] for type_ in type_list: data = {'Vs': rt.FastArray(Values, dtype=type_), 'Ks': Keys} pd_data = pd.DataFrame(data) sfw_data = rt.Dataset(data) key = 'Ks' val = 'Vs' pd_gb = pd_data.groupby(key) sfw_gb = sfw_data.groupby(key) for name in functions_str: pd_func = getattr(pd_gb, name) sfw_func = getattr(sfw_gb, name) pd_out = pd_func() sfw_out = sfw_func() pd_col = pd_out[val]._values if name == 'count': sfw_col = sfw_out['Count'] else: sfw_col = sfw_out[val] is_integer_subttype = np.issubdtype(type_, np.integer) is_median = name != 'median' if not safe_equal(pd_col, sfw_col) and (not is_integer_subttype and not is_median): print('data_type_t = ', type_) print('function =', name) print('pandas output =', pd_col) print('sfw output =', sfw_col) # TODO move as error message following assert self.assertTrue(False)
def test_reductions(self): for type_ in type_list: A_array = np.array( np.random.randint(100, size=TEST_SIZE) + np.array(np.random.rand(TEST_SIZE)), dtype=type_, ) B_array = np.copy(A_array) a = A_array ##copy.copy(A_array) x = rt.FastArray(B_array) ##rt.FastArray(copy.copy(A_array)) # print(a) # print(x) # print(type(a.sum())) # print(type(x.sum())) # print(type(a)) # print(type(x)) # print('current data type ==', type_) self.assert_equal(a.sum(), x.sum()) self.assert_equal(a.min(), x.min()) self.assert_equal(a.max(), x.max()) ddofs = 3 for i in range(0, ddofs): self.assert_equal(a.var(ddof=i), x.var(ddof=i)) self.assert_equal(a.std(ddof=i), x.std(ddof=i)) self.assert_equal(a.mean(), x.mean())
def test_multkey(self): alpha = 'Q W E R T Y U I O P A S D F G H J K L Z X C V B N M'.split(' ') digits = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] sz = 4000 numbers = [0] * sz keys1 = [''] * sz keys2 = [''] * sz for i in range(0, sz): numbers[i] = digits[rand.randint(0, 1000) % len(digits)] keys1[i] = alpha[rand.randint(0, 1000) % len(alpha)] keys2[i] = alpha[rand.randint(0, 1000) % len(alpha)] ary = rt.FastArray(numbers) data = {'k1': keys1, 'k2': keys2, 'beta': numbers} # print('SFW--------------------------------------------------------------') mset = rt.Dataset(data) # t = time.time() s_group = rt.GroupBy(mset, keys=['k1', 'k2']).sum() # print(time.time() - t, 'SFW GROUP BY ') # print('PANDAS--------------------------------------------------------------') df2 = pd.DataFrame(data) # t = time.time() p_group = df2.groupby(['k1', 'k2']).sum() # print(time.time() - t, 'PANDAS GROUP BY ') # print('compare out--------------------------------------------------------------') pandas = list(p_group['beta']) sfw = list(s_group['beta']) assert pandas == sfw
def test_substr_getitem_array(self): indexer = [0, 1, 0, 1, 0] expected = rt.FastArray([s[i] for s, i in zip(SYMBOLS, indexer)]) result = FAString(SYMBOLS).substr[indexer] assert_array_equal(expected, result)
class TestExtract: duplicity = 2 osi = rt.FastArray([ 'SPX UO 12/15/23 C5700', 'SPX UO 07/16/21 P3480', 'SPXW UO 07/16/21 P3190', 'SPXW UO 06/30/21 C4100', 'SPXW UO 09/17/21 C3650' ] * duplicity) expirations = [ b'12/15/23', b'07/16/21', b'07/16/21', b'06/30/21', b'09/17/21' ] * duplicity roots = [b'SPX', b'SPX', b'SPXW', b'SPXW', b'SPXW'] * duplicity strikes = [b'5700', b'3480', b'3190', b'4100', b'3650'] * duplicity dataset_out_test_cases = parametrize( 'pattern, expected', [('(\w+).* (\d{2}/\d{2}/\d{2})', dict(group_0=roots, group_1=expirations)), ('(?P<root>\w+).*(\d{2}/\d{2}/\d{2})', dict(root=roots, group_1=expirations)), ('(?P<root>\w+).*(?P<expiration>\d{2}/\d{2}/\d{2})', dict(root=roots, expiration=expirations)), (' [C|P](?P<strike>\d+)$', dict(strike=strikes)), ('(?P<root>\w+W).*(?P<expiration>\d{2}/\d{2}/\d{2})', dict(root=[root if b'W' in s else '' for s, root in zip(osi, roots)], expiration=[ exp if b'W' in s else '' for s, exp in zip(osi, expirations) ]))], ids=[ 'non-names', 'some-names', 'all-names', 'single-named', 'some-unmatched' ]) @parametrize('apply_unique', [True, False]) @dataset_out_test_cases def test_extract_dataset(self, pattern, expected, apply_unique): result = self.osi.str.extract(pattern, expand=True, apply_unique=apply_unique) [ assert_array_or_cat_equal( FastArray(expected[key]), result[key], ) for key in result ] array_out_test_cases = parametrize("pattern, expected", [ (' [C|P](\d+)', strikes), ('\w{2}', [s[:2] for s in roots]), ], ids=['group', 'no-group']) @array_out_test_cases def test_extract_array(self, pattern, expected): result = self.osi.str.extract(pattern) expected = rt.FastArray(expected) assert_array_or_cat_equal(expected, result) @parametrize("kwargs, key", [(dict(expand=True), 'group_0'), (dict(names=['extract']), 'extract')]) @array_out_test_cases def test_single_group_datasets(self, pattern, expected, kwargs, key): result = self.osi.str.extract(pattern, **kwargs) assert isinstance(result, Dataset) assert result.keys() == [key] assert_array_equal(expected, result[key]) @dataset_out_test_cases def test_categorical_extract_dataset(self, pattern, expected): result = rt.Cat(self.osi).str.extract( pattern, expand=True, ) [ assert_array_or_cat_equal(Categorical(expected[key]), result[key], relaxed_cat_check=True, check_cat_names=False) for key in result ] @array_out_test_cases def test_categorical_extract_array(self, pattern, expected): result = rt.Cat(self.osi).str.extract(pattern) assert_array_or_cat_equal(Categorical(expected), result, relaxed_cat_check=True, check_cat_names=False)
def test_substr_getitem(self, start_stop): expected = rt.FastArray([s[slice(*start_stop)] for s in SYMBOLS]) result = FAString(SYMBOLS).substr[slice(*start_stop)] assert_array_equal(expected, result)
def test_substr_getitem_single(self): expected = rt.FastArray([s[0] for s in SYMBOLS]) result = FAString(SYMBOLS).substr[0] assert_array_equal(expected, result)
def test_accum_table(self): # Create the test data def unpivot(frame): N, K = frame.shape data = { 'value': frame.values.ravel('F'), 'variable': np.asarray(frame.columns).repeat(N), 'date': np.tile(np.asarray(frame.index), K), } return pd.DataFrame(data, columns=['date', 'variable', 'value']) np.random.seed(1234) df = unpivot( pd.concat([tm.makeTimeDataFrame(), tm.makeTimeDataFrame()])) ds = dataset_from_pandas_df(df) ds.date = DateTimeNano(ds.date, from_tz='NYC').to_iso() ds.date = rt.FastArray([d[:10] for d in ds.date]) ds.variable = rt.Categorical(ds.variable) ds.date = rt.Categorical(ds.date) at = rt.AccumTable(ds.date, ds.variable) # Add and view inner tables with totals at['Sum'] = at.sum(ds.value) self.assertEqual(at['Sum'].shape, (3, 7)) assert_array_almost_equal(at['Sum']['A'], np.array([0.47, -0.79, 1.72]), decimal=2) vw = at.gen('Sum') self.assertEqual(vw.shape, (3, 7)) assert_array_almost_equal(vw['A'], np.array([0.47, -0.79, 1.72]), decimal=2) assert_array_almost_equal(vw['Sum'], np.array([-0.10, -5.02, 5.37]), decimal=2) assert_array_almost_equal(vw.footer_get_values(columns=['Sum'])['Sum'], np.array([0.25]), decimal=2) at['Mean'] = at.mean(ds.value) self.assertEqual(at['Mean'].shape, (3, 7)) assert_array_almost_equal(at['Mean']['A'], np.array([0.24, -0.39, 0.86]), decimal=2) at['Half'] = at['Mean'] / at['Sum'] self.assertEqual(at['Half'].shape, (3, 7)) assert_array_almost_equal(at['Half']['A'], np.array([0.5, 0.5, 0.5]), decimal=2) # Add and view inner tables with blanks at['Blanks'] = at['Sum'].copy() at['Blanks']['C'] = 0.0 for col in at['Blanks'][:, 1:]: at['Blanks'][col][2] = np.nan vw = at.gen('Blanks') self.assertEqual(vw.shape, (2, 9)) assert_array_almost_equal(vw['A'], np.array([0.47, -0.79]), decimal=2) assert_array_almost_equal(vw['Blanks'], np.array([-0.10, -5.02]), decimal=2) self.assertAlmostEqual(vw.footer_get_dict()['Blanks']['Blanks'], 0.245, places=2) vw = at.gen('Blanks', remove_blanks=False) self.assertEqual(vw.shape, (3, 10)) assert_array_almost_equal(vw['A'], np.array([0.47, -0.79, np.nan]), decimal=2) assert_array_almost_equal(vw['Blanks'], np.array([-0.10, -5.02, np.nan]), decimal=2) # Test division with zeros and nans at['Bad'] = at['Blanks'] / at['Half'] self.assertEqual(at['Blanks'].shape, (3, 7)) vw = at.gen('Bad') self.assertEqual(vw.shape, (2, 10)) vw = at.gen('Blanks') self.assertEqual(vw.shape, (2, 10)) vw = at.gen('Half') self.assertEqual(vw.shape, (3, 11)) # Set margin columns to the right at.set_margin_columns(['Blanks', 'Mean']) vw = at.gen('Half') self.assertEqual(vw.shape, (3, 9)) self.assertEqual(vw.keys()[6], 'Half') self.assertEqual(vw.keys()[7], 'Blanks') self.assertEqual(vw.keys()[8], 'Mean') self.assertEqual(list(vw.footer_get_dict().keys()), ['Half', 'Sum', 'Mean', 'Blanks', 'Bad']) vw = at.gen() self.assertEqual(vw.keys()[6], 'Half') vw = at.gen('Sum') self.assertEqual(vw.keys()[6], 'Sum') self.assertEqual(vw.keys()[7], 'Blanks') self.assertEqual(vw.keys()[8], 'Mean') self.assertEqual(list(vw.footer_get_dict().keys()), ['Sum', 'Mean', 'Half', 'Blanks', 'Bad']) # Set footer rows at the bottom at.set_footer_rows(['Mean']) vw = at.gen('Half') self.assertEqual(vw.shape, (3, 9)) self.assertEqual(vw.keys()[6], 'Half') self.assertEqual(vw.keys()[7], 'Blanks') self.assertEqual(vw.keys()[8], 'Mean') self.assertEqual(list(vw.footer_get_dict().keys()), ['Half', 'Mean']) vw = at.gen('Sum') self.assertEqual(vw.keys()[6], 'Sum') self.assertEqual(vw.keys()[7], 'Blanks') self.assertEqual(vw.keys()[8], 'Mean') self.assertEqual(list(vw.footer_get_dict().keys()), ['Sum', 'Mean']) # Access view Dataset elements vw = at.gen('Sum') assert_array_equal( vw.date, rt.FastArray(['2000-01-03', '2000-01-04', '2000-01-05'])) assert_array_almost_equal(vw['Sum'], np.array([-0.10, -5.02, 5.37]), decimal=2) assert_almost_equal(vw[vw.date == '2000-01-03', 'A'][0], 0.47355353, decimal=2) assert_almost_equal( list(vw.footer_get_values('Sum', columns=['A']).values())[0], 1.409830, decimal=2, )
def test_substr_array_bounds(self, start, stop, expected): result = FAString(SYMBOLS).substr(start, stop) assert_array_equal(rt.FastArray(expected), result)
def test_extract_array(self, pattern, expected): result = self.osi.str.extract(pattern) expected = rt.FastArray(expected) assert_array_or_cat_equal(expected, result)
def test_list(self): self.assert_equal(rt.FastArray(self.data), np.array(self.data))
def test_float_list_no_threads(self): rt.FastArray._TOFF() self.assert_equal(rt.FastArray(np.array(self.dataf)), np.array(self.dataf)) rt.FastArray._TON()
def test_float_list_no_recycling(self): rt.FastArray._ROFF() self.assert_equal(rt.FastArray(np.array(self.dataf)), np.array(self.dataf)) rt.FastArray._RON()
def test_float_list(self): arr_fa = rt.FastArray(self.dataf) arr_np = np.array(self.dataf) self.assert_equal(arr_fa, arr_np)
def test_char(self, position): result = FAString(SYMBOLS).char(position) expected = rt.FastArray( [s[position] if position < len(s) else '' for s in SYMBOLS]) assert_array_equal(result, expected)
def test_nparray(self): self.assert_equal(rt.FastArray(np.array(self.data)), np.array(self.data))