-------- >>> arr = RangeIndex(5) >>> arr / zeros Float64Index([nan, inf, inf, inf, inf], dtype='float64') """ return request.param # ------------------------------------------------------------------ # Vector Fixtures @pytest.fixture( params=[ Float64Index(np.arange(5, dtype="float64")), Int64Index(np.arange(5, dtype="int64")), UInt64Index(np.arange(5, dtype="uint64")), RangeIndex(5), ], ids=lambda x: type(x).__name__, ) def numeric_idx(request): """ Several types of numeric-dtypes Index objects """ return request.param # ------------------------------------------------------------------ # Scalar Fixtures
def test_slice_integer_frame_getitem(self): # similar to above, but on the getitem dim (of a DataFrame) for index in [Int64Index(range(5)), RangeIndex(5)]: s = DataFrame(np.random.randn(5, 2), index=index) def f(idxr): # getitem for l in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: result = idxr(s)[l] indexer = slice(0, 2) self.check(result, s, indexer, False) # positional indexing msg = ("cannot do slice indexing" r" on {klass} with these indexers \[(0|1)\.0\] of" " {kind}" .format(klass=type(index), kind=str(float))) with pytest.raises(TypeError, match=msg): s[l] # getitem out-of-bounds for l in [slice(-10, 10), slice(-10.0, 10.0)]: result = idxr(s)[l] self.check(result, s, slice(-10, 10), True) # positional indexing msg = ("cannot do slice indexing" r" on {klass} with these indexers \[-10\.0\] of" " {kind}" .format(klass=type(index), kind=str(float))) with pytest.raises(TypeError, match=msg): s[slice(-10.0, 10.0)] # getitem odd floats for l, res in [(slice(0.5, 1), slice(1, 2)), (slice(0, 0.5), slice(0, 1)), (slice(0.5, 1.5), slice(1, 2))]: result = idxr(s)[l] self.check(result, s, res, False) # positional indexing msg = ("cannot do slice indexing" r" on {klass} with these indexers \[0\.5\] of" " {kind}" .format(klass=type(index), kind=str(float))) with pytest.raises(TypeError, match=msg): s[l] # setitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: sc = s.copy() idxr(sc)[l] = 0 result = idxr(sc)[l].values.ravel() assert (result == 0).all() # positional indexing msg = ("cannot do slice indexing" r" on {klass} with these indexers \[(3|4)\.0\] of" " {kind}" .format(klass=type(index), kind=str(float))) with pytest.raises(TypeError, match=msg): s[l] = 0 f(lambda x: x.loc) with catch_warnings(record=True): f(lambda x: x.ix)
def create_index(self): return Int64Index(np.arange(5, dtype='int64'))
def test_take_preserve_name(self): index = Int64Index([1, 2, 3, 4], name='foo') taken = index.take([3, 0, 1]) self.assertEqual(index.name, taken.name)
class TestRangeIndexSetOps: @pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index]) def test_intersection_mismatched_dtype(self, klass): # check that we cast to float, not object index = RangeIndex(start=0, stop=20, step=2, name="foo") index = klass(index) flt = index.astype(np.float64) # bc index.equals(flt), we go through fastpath and get RangeIndex back result = index.intersection(flt) tm.assert_index_equal(result, index, exact=True) result = flt.intersection(index) tm.assert_index_equal(result, flt, exact=True) # neither empty, not-equals result = index.intersection(flt[1:]) tm.assert_index_equal(result, flt[1:], exact=True) result = flt[1:].intersection(index) tm.assert_index_equal(result, flt[1:], exact=True) # empty other result = index.intersection(flt[:0]) tm.assert_index_equal(result, flt[:0], exact=True) result = flt[:0].intersection(index) tm.assert_index_equal(result, flt[:0], exact=True) def test_intersection(self, sort): # intersect with Int64Index index = RangeIndex(start=0, stop=20, step=2) other = Index(np.arange(1, 6)) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) expected = Index( np.sort(np.asarray(np.intersect1d(index.values, other.values))) ) tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) # reversed (GH 17296) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # GH 17296: intersect two decreasing RangeIndexes first = RangeIndex(10, -2, -2) other = RangeIndex(5, -4, -1) expected = first.astype(int).intersection(other.astype(int), sort=sort) result = first.intersection(other, sort=sort).astype(int) tm.assert_index_equal(result, expected) # reversed result = other.intersection(first, sort=sort).astype(int) tm.assert_index_equal(result, expected) index = RangeIndex(5) # intersect of non-overlapping indices other = RangeIndex(5, 10, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) def test_union_noncomparable(self, sort): # corner case, non-Int64Index index = RangeIndex(start=0, stop=20, step=2) other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) result = index.union(other, sort=sort) expected = Index(np.concatenate((index, other))) tm.assert_index_equal(result, expected) result = other.union(index, sort=sort) expected = Index(np.concatenate((other, index))) tm.assert_index_equal(result, expected) @pytest.fixture( params=[ ( RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), ), ( RangeIndex(0, 10, 1), RangeIndex(5, 20, 1), RangeIndex(0, 20, 1), Int64Index(range(20)), ), ( RangeIndex(0, 10, 1), RangeIndex(10, 20, 1), RangeIndex(0, 20, 1), Int64Index(range(20)), ), ( RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), ), ( RangeIndex(0, -10, -1), RangeIndex(-10, -20, -1), RangeIndex(-19, 1, 1), Int64Index(range(0, -20, -1)), ), ( RangeIndex(0, 10, 2), RangeIndex(1, 10, 2), RangeIndex(0, 10, 1), Int64Index(list(range(0, 10, 2)) + list(range(1, 10, 2))), ), ( RangeIndex(0, 11, 2), RangeIndex(1, 12, 2), RangeIndex(0, 12, 1), Int64Index(list(range(0, 11, 2)) + list(range(1, 12, 2))), ), ( RangeIndex(0, 21, 4), RangeIndex(-2, 24, 4), RangeIndex(-2, 24, 2), Int64Index(list(range(0, 21, 4)) + list(range(-2, 24, 4))), ), ( RangeIndex(0, -20, -2), RangeIndex(-1, -21, -2), RangeIndex(-19, 1, 1), Int64Index(list(range(0, -20, -2)) + list(range(-1, -21, -2))), ), ( RangeIndex(0, 100, 5), RangeIndex(0, 100, 20), RangeIndex(0, 100, 5), Int64Index(range(0, 100, 5)), ), ( RangeIndex(0, -100, -5), RangeIndex(5, -100, -20), RangeIndex(-95, 10, 5), Int64Index(list(range(0, -100, -5)) + [5]), ), ( RangeIndex(0, -11, -1), RangeIndex(1, -12, -4), RangeIndex(-11, 2, 1), Int64Index(list(range(0, -11, -1)) + [1, -11]), ), (RangeIndex(0), RangeIndex(0), RangeIndex(0), RangeIndex(0)), ( RangeIndex(0, -10, -2), RangeIndex(0), RangeIndex(0, -10, -2), RangeIndex(0, -10, -2), ), ( RangeIndex(0, 100, 2), RangeIndex(100, 150, 200), RangeIndex(0, 102, 2), Int64Index(range(0, 102, 2)), ), ( RangeIndex(0, -100, -2), RangeIndex(-100, 50, 102), RangeIndex(-100, 4, 2), Int64Index(list(range(0, -100, -2)) + [-100, 2]), ), ( RangeIndex(0, -100, -1), RangeIndex(0, -50, -3), RangeIndex(-99, 1, 1), Int64Index(list(range(0, -100, -1))), ), ( RangeIndex(0, 1, 1), RangeIndex(5, 6, 10), RangeIndex(0, 6, 5), Int64Index([0, 5]), ), ( RangeIndex(0, 10, 5), RangeIndex(-5, -6, -20), RangeIndex(-5, 10, 5), Int64Index([0, 5, -5]), ), ( RangeIndex(0, 3, 1), RangeIndex(4, 5, 1), Int64Index([0, 1, 2, 4]), Int64Index([0, 1, 2, 4]), ), ( RangeIndex(0, 10, 1), Int64Index([]), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), ), ( RangeIndex(0), Int64Index([1, 5, 6]), Int64Index([1, 5, 6]), Int64Index([1, 5, 6]), ), ] ) def unions(self, request): """Inputs and expected outputs for RangeIndex.union tests""" return request.param def test_union_sorted(self, unions): idx1, idx2, expected_sorted, expected_notsorted = unions res1 = idx1.union(idx2, sort=None) tm.assert_index_equal(res1, expected_sorted, exact=True) res1 = idx1.union(idx2, sort=False) tm.assert_index_equal(res1, expected_notsorted, exact=True) res2 = idx2.union(idx1, sort=None) res3 = idx1._int64index.union(idx2, sort=None) tm.assert_index_equal(res2, expected_sorted, exact=True) tm.assert_index_equal(res3, expected_sorted) def test_difference(self): # GH#12034 Cases where we operate against another RangeIndex and may # get back another RangeIndex obj = RangeIndex.from_range(range(1, 10), name="foo") result = obj.difference(obj) expected = RangeIndex.from_range(range(0), name="foo") tm.assert_index_equal(result, expected, exact=True) result = obj.difference(expected.rename("bar")) tm.assert_index_equal(result, obj.rename(None), exact=True) result = obj.difference(obj[:3]) tm.assert_index_equal(result, obj[3:], exact=True) result = obj.difference(obj[-3:]) tm.assert_index_equal(result, obj[:-3], exact=True) result = obj[::-1].difference(obj[-3:]) tm.assert_index_equal(result, obj[:-3][::-1], exact=True) result = obj[::-1].difference(obj[-3:][::-1]) tm.assert_index_equal(result, obj[:-3][::-1], exact=True) result = obj.difference(obj[2:6]) expected = Int64Index([1, 2, 7, 8, 9], name="foo") tm.assert_index_equal(result, expected) def test_difference_mismatched_step(self): obj = RangeIndex.from_range(range(1, 10), name="foo") result = obj.difference(obj[::2]) expected = obj[1::2]._int64index tm.assert_index_equal(result, expected, exact=True) result = obj.difference(obj[1::2]) expected = obj[::2]._int64index tm.assert_index_equal(result, expected, exact=True) def test_symmetric_difference(self): # GH#12034 Cases where we operate against another RangeIndex and may # get back another RangeIndex left = RangeIndex.from_range(range(1, 10), name="foo") result = left.symmetric_difference(left) expected = RangeIndex.from_range(range(0), name="foo") tm.assert_index_equal(result, expected) result = left.symmetric_difference(expected.rename("bar")) tm.assert_index_equal(result, left.rename(None)) result = left[:-2].symmetric_difference(left[2:]) expected = Int64Index([1, 2, 8, 9], name="foo") tm.assert_index_equal(result, expected) right = RangeIndex.from_range(range(10, 15)) result = left.symmetric_difference(right) expected = RangeIndex.from_range(range(1, 15)) tm.assert_index_equal(result, expected) result = left.symmetric_difference(right[1:]) expected = Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14]) tm.assert_index_equal(result, expected)
def setUp(self): self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) self.setup_indices()
def test_get_indexer(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected)
class Base: """ Common tests for all variations of IntervalIndex construction. Input data to be supplied in breaks format, then converted by the subclass method get_kwargs_from_breaks to the expected format. """ @pytest.mark.parametrize( 'breaks', [[3, 14, 15, 92, 653], np.arange(10, dtype='int64'), Int64Index(range(-10, 11)), Float64Index(np.arange(20, 30, 0.5)), date_range('20180101', periods=10), date_range('20180101', periods=10, tz='US/Eastern'), timedelta_range('1 day', periods=10)]) def test_constructor(self, constructor, breaks, closed, name): result_kwargs = self.get_kwargs_from_breaks(breaks, closed) result = constructor(closed=closed, name=name, **result_kwargs) assert result.closed == closed assert result.name == name assert result.dtype.subtype == getattr(breaks, 'dtype', 'int64') tm.assert_index_equal(result.left, Index(breaks[:-1])) tm.assert_index_equal(result.right, Index(breaks[1:])) @pytest.mark.parametrize('breaks, subtype', [(Int64Index([0, 1, 2, 3, 4]), 'float64'), (Int64Index([0, 1, 2, 3, 4]), 'datetime64[ns]'), (Int64Index([0, 1, 2, 3, 4]), 'timedelta64[ns]'), (Float64Index([0, 1, 2, 3, 4]), 'int64'), (date_range('2017-01-01', periods=5), 'int64'), (timedelta_range('1 day', periods=5), 'int64')]) def test_constructor_dtype(self, constructor, breaks, subtype): # GH 19262: conversion via dtype parameter expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) expected = constructor(**expected_kwargs) result_kwargs = self.get_kwargs_from_breaks(breaks) iv_dtype = IntervalDtype(subtype) for dtype in (iv_dtype, str(iv_dtype)): result = constructor(dtype=dtype, **result_kwargs) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('breaks', [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) def test_constructor_nan(self, constructor, breaks, closed): # GH 18421 result_kwargs = self.get_kwargs_from_breaks(breaks) result = constructor(closed=closed, **result_kwargs) expected_subtype = np.float64 expected_values = np.array(breaks[:-1], dtype=object) assert result.closed == closed assert result.dtype.subtype == expected_subtype tm.assert_numpy_array_equal(result._ndarray_values, expected_values) @pytest.mark.parametrize('breaks', [[], np.array([], dtype='int64'), np.array([], dtype='float64'), np.array([], dtype='datetime64[ns]'), np.array([], dtype='timedelta64[ns]')]) def test_constructor_empty(self, constructor, breaks, closed): # GH 18421 result_kwargs = self.get_kwargs_from_breaks(breaks) result = constructor(closed=closed, **result_kwargs) expected_values = np.array([], dtype=object) expected_subtype = getattr(breaks, 'dtype', np.int64) assert result.empty assert result.closed == closed assert result.dtype.subtype == expected_subtype tm.assert_numpy_array_equal(result._ndarray_values, expected_values) @pytest.mark.parametrize('breaks', [ tuple('0123456789'), list('abcdefghij'), np.array(list('abcdefghij'), dtype=object), np.array(list('abcdefghij'), dtype='<U1') ]) def test_constructor_string(self, constructor, breaks): # GH 19016 msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') with pytest.raises(TypeError, match=msg): constructor(**self.get_kwargs_from_breaks(breaks)) @pytest.mark.parametrize('cat_constructor', [Categorical, CategoricalIndex]) def test_constructor_categorical_valid(self, constructor, cat_constructor): # GH 21243/21253 if isinstance(constructor, partial) and constructor.func is Index: # Index is defined to create CategoricalIndex from categorical data pytest.skip() breaks = np.arange(10, dtype='int64') expected = IntervalIndex.from_breaks(breaks) cat_breaks = cat_constructor(breaks) result_kwargs = self.get_kwargs_from_breaks(cat_breaks) result = constructor(**result_kwargs) tm.assert_index_equal(result, expected) def test_generic_errors(self, constructor): # filler input data to be used when supplying invalid kwargs filler = self.get_kwargs_from_breaks(range(10)) # invalid closed msg = "invalid option for 'closed': invalid" with pytest.raises(ValueError, match=msg): constructor(closed='invalid', **filler) # unsupported dtype msg = 'dtype must be an IntervalDtype, got int64' with pytest.raises(TypeError, match=msg): constructor(dtype='int64', **filler) # invalid dtype msg = "data type 'invalid' not understood" with pytest.raises(TypeError, match=msg): constructor(dtype='invalid', **filler) # no point in nesting periods in an IntervalIndex periods = period_range('2000-01-01', periods=10) periods_kwargs = self.get_kwargs_from_breaks(periods) msg = 'Period dtypes are not supported, use a PeriodIndex instead' with pytest.raises(ValueError, match=msg): constructor(**periods_kwargs) # decreasing values decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1)) msg = 'left side of interval must be <= right side' with pytest.raises(ValueError, match=msg): constructor(**decreasing_kwargs)
def test_tdi_floordiv_tdlike_scalar(self, delta): tdi = timedelta_range('1 days', '10 days', name='foo') expected = Int64Index((np.arange(10) + 1) * 12, name='foo') result = tdi // delta tm.assert_index_equal(result, expected, exact=False)
class PerformanceTestCase(TestCase): dr = date_range(start='2015-1-1', end='2015-1-2') dr.name = 'date' tickers = ['A', 'B', 'C', 'D'] factor = DataFrame(index=dr, columns=tickers, data=[[1, 2, 3, 4], [4, 3, 2, 1]]).stack() factor.index = factor.index.set_names(['date', 'asset']) factor.name = 'factor' factor_data = DataFrame() factor_data['factor'] = factor factor_data['group'] = Series(index=factor.index, data=[1, 1, 2, 2, 1, 1, 2, 2], dtype="category") @parameterized.expand([( factor_data, [4, 3, 2, 1, 1, 2, 3, 4], False, False, dr, [-1., -1.], ), ( factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, False, dr, [1., 1.], ), ( factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, MultiIndex.from_product([dr, [1, 2]], names=['date', 'group']), [1., 1., 1., 1.], ), ( factor_data, [1, 2, 3, 4, 4, 3, 2, 1], True, True, MultiIndex.from_product([dr, [1, 2]], names=['date', 'group']), [1., 1., 1., 1.], )]) def test_information_coefficient(self, factor_data, forward_returns, group_adjust, by_group, expected_ix, expected_ic_val): factor_data[1] = Series(index=factor_data.index, data=forward_returns) ic = factor_information_coefficient(factor_data=factor_data, group_adjust=group_adjust, by_group=by_group) expected_ic_df = DataFrame(index=expected_ix, columns=Int64Index([1], dtype='object'), data=expected_ic_val) assert_frame_equal(ic, expected_ic_df) @parameterized.expand([ (factor_data, [4, 3, 2, 1, 1, 2, 3, 4], False, False, 'D', dr, [-1., -1.]), (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, False, 'W', DatetimeIndex(['2015-01-04'], name='date', freq='W-SUN'), [1.]), (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, None, Int64Index([1, 2], name='group'), [1., 1.]), (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, 'W', MultiIndex.from_product([ DatetimeIndex(['2015-01-04'], name='date', freq='W-SUN'), [1, 2] ], names=['date', 'group']), [1., 1.]) ]) def test_mean_information_coefficient(self, factor_data, forward_returns, group_adjust, by_group, by_time, expected_ix, expected_ic_val): factor_data[1] = Series(index=factor_data.index, data=forward_returns) ic = mean_information_coefficient(factor_data, group_adjust=group_adjust, by_group=by_group, by_time=by_time) expected_ic_df = DataFrame(index=expected_ix, columns=Int64Index([1], dtype='object'), data=expected_ic_val) assert_frame_equal(ic, expected_ic_df) @parameterized.expand([ ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], 4.0, [nan, 1.0, 1.0, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], 3.0, [nan, 0.0, 0.0, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0]], 2.0, [nan, 1.0, 1.0, 1.0]) ]) def test_quantile_turnover(self, quantile_values, test_quantile, expected_vals): dr = date_range(start='2015-1-1', end='2015-1-4') dr.name = 'date' tickers = ['A', 'B', 'C', 'D'] quantized_test_factor = Series( DataFrame(index=dr, columns=tickers, data=quantile_values).stack()) quantized_test_factor.index = quantized_test_factor.index.set_names( ['date', 'asset']) to = quantile_turnover(quantized_test_factor, test_quantile) expected = Series(index=quantized_test_factor.index.levels[0], data=expected_vals) expected.name = test_quantile assert_series_equal(to, expected) @parameterized.expand([([1, 2, 3, 4, 4, 3, 2, 1], [4, 3, 2, 1, 1, 2, 3, 4], False, [-1.25000, -1.25000]), ([1, 1, 1, 1, 1, 1, 1, 1], [4, 3, 2, 1, 1, 2, 3, 4], False, [nan, nan]), ([1, 2, 3, 4, 4, 3, 2, 1], [4, 3, 2, 1, 1, 2, 3, 4], True, [-0.5, -0.5]), ([1, 2, 3, 4, 1, 2, 3, 4], [1, 4, 1, 2, 1, 2, 2, 1], True, [1.0, 0.0]), ([1, 1, 1, 1, 1, 1, 1, 1], [4, 3, 2, 1, 1, 2, 3, 4], True, [nan, nan])]) def test_factor_returns(self, factor_vals, fwd_return_vals, group_adjust, expected_vals): factor_data = self.factor_data.copy() factor_data[1] = fwd_return_vals factor_data['factor'] = factor_vals factor_returns_s = factor_returns(factor_data=factor_data, demeaned=True, group_adjust=group_adjust) expected = DataFrame(index=self.dr, data=expected_vals, columns=get_forward_returns_columns( factor_data.columns)) assert_frame_equal(factor_returns_s, expected) @parameterized.expand([([1, 2, 3, 4, 1, 1, 1, 1], -1, 5. / 6.)]) def test_factor_alpha_beta(self, fwd_return_vals, alpha, beta): factor_data = self.factor_data.copy() factor_data[1] = fwd_return_vals ab = factor_alpha_beta(factor_data=factor_data) expected = DataFrame(columns=[1], index=['Ann. alpha', 'beta'], data=[alpha, beta]) assert_frame_equal(ab, expected) @parameterized.expand([ ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], '2015-1-4', 1, [nan, 1.0, 1.0, 1.0]), ([[4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0]], '2015-1-4', 1, [nan, -1.0, -1.0, -1.0]), ([[1.0, 2.0, 3.0, 4.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [2.0, 1.0, 4.0, 3.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0]], '2015-1-12', 3, [nan, nan, nan, 1.0, 1.0, 1.0, 0.6, -0.6, -1.0, 1.0, -0.6, -1.0]) ]) def test_factor_rank_autocorrelation(self, factor_values, end_date, period, expected_vals): dr = date_range(start='2015-1-1', end=end_date) dr.name = 'date' tickers = ['A', 'B', 'C', 'D'] factor = DataFrame(index=dr, columns=tickers, data=factor_values).stack() factor.index = factor.index.set_names(['date', 'asset']) factor_df = DataFrame() factor_df['factor'] = factor fa = factor_rank_autocorrelation(factor_df, period) expected = Series(index=dr, data=expected_vals) expected.name = period assert_series_equal(fa, expected) @parameterized.expand([ (1, 2, False, 4, [[1.00, 0.0, -0.50, -0.75], [0.0, 0.0, 0.0, 0.0], [0.00, 0.00, 0.00, 0.00], [0.0, 0.0, 0.0, 0.0], [-0.20, 0.0, 0.25, 0.5625], [0.0, 0.0, 0.0, 0.0], [-0.3333333, 0.0, 0.50, 1.25], [0.0, 0.0, 0.0, 0.0]]), (1, 2, True, 4, [[0.8833333, 0.0, -0.5625, -1.015625], [0.0, 0.0, 0.0, 0.0], [-0.1166667, 0.0, -0.0625, -0.265625], [0.0, 0.0, 0.0, 0.0], [-0.3166667, 0.0, 0.1875, 0.296875], [0.0, 0.0, 0.0, 0.0], [-0.4500000, 0.0, 0.4375, 0.984375], [0.0, 0.0, 0.0, 0.0]]), (3, 0, False, 4, [[7.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [-0.488, -0.36, -0.2, 0.0], [0.0, 0.0, 0.0, 0.0], [-0.703704, -0.55555555, -0.333333333, 0.0], [0.0, 0.0, 0.0, 0.0]]), (0, 3, True, 4, [[0.0, -0.5625, -1.015625, -1.488281], [0.0, 0.0, 0.0, 0.0], [0.0, -0.0625, -0.265625, -0.613281], [0.0, 0.0, 0.0, 0.0], [0.0, 0.1875, 0.296875, 0.339844], [0.0, 0.0, 0.0, 0.0], [0.0, 0.4375, 0.984375, 1.761719], [0.0, 0.0, 0.0, 0.0]]), (3, 3, False, 2, [[3.5, 1.5, 0.5, 0.0, -0.25, -0.375, -0.4375], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-0.595852, -0.457778, -0.266667, 0.0, 0.375, 0.90625, 1.664062], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]), (3, 3, True, 2, [[2.047926, 0.978888, 0.383333, 0.0, -0.3125, -0.640625, -1.050781], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-2.047926, -0.978888, -0.383333, 0.0, 0.3125, 0.640625, 1.050781], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]), ]) def test_average_cumulative_return_by_quantile(self, before, after, demeaned, quantiles, expected_vals): dr = date_range(start='2015-1-15', end='2015-2-1') dr.name = 'date' tickers = ['A', 'B', 'C', 'D'] r1, r2, r3, r4 = (1.25, 1.50, 1.00, 0.50) data = [[r1**i, r2**i, r3**i, r4**i] for i in range(1, 19)] prices = DataFrame(index=dr, columns=tickers, data=data) dr2 = date_range(start='2015-1-21', end='2015-1-26') dr2.name = 'date' factor = DataFrame(index=dr2, columns=tickers, data=[[3, 4, 2, 1], [3, 4, 2, 1], [3, 4, 2, 1], [3, 4, 2, 1], [3, 4, 2, 1], [3, 4, 2, 1]]).stack() factor_data = get_clean_factor_and_forward_returns(factor, prices, quantiles=quantiles, periods=range( 0, after + 1), filter_zscore=False) avgrt = average_cumulative_return_by_quantile(factor_data, prices, before, after, demeaned) arrays = [] for q in range(1, quantiles + 1): arrays.append((q, 'mean')) arrays.append((q, 'std')) index = MultiIndex.from_tuples(arrays, names=['factor_quantile', None]) expected = DataFrame(index=index, columns=range(-before, after + 1), data=expected_vals) assert_frame_equal(avgrt, expected) @parameterized.expand([ (0, 2, False, 4, [[0.0, -0.50, -0.75], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.25, 0.5625], [0.0, 0.0, 0.0], [0.0, 0.50, 1.25], [0.0, 0.0, 0.0]]), (0, 3, True, 4, [[0.0, -0.5625, -1.015625, -1.488281], [0.0, 0.0, 0.0, 0.0], [0.0, -0.0625, -0.265625, -0.613281], [0.0, 0.0, 0.0, 0.0], [0.0, 0.1875, 0.296875, 0.339844], [0.0, 0.0, 0.0, 0.0], [0.0, 0.4375, 0.984375, 1.761719], [0.0, 0.0, 0.0, 0.0]]), (0, 3, False, 2, [[0.0, -0.25, -0.375, -0.4375], [0.0, 0.0, 0.0, 0.0], [0.0, 0.375, 0.90625, 1.664062], [0.0, 0.0, 0.0, 0.0]]), (0, 3, True, 2, [[0.0, -0.3125, -0.640625, -1.050781], [0.0, 0.0, 0.0, 0.0], [0.0, 0.3125, 0.640625, 1.050781], [0.0, 0.0, 0.0, 0.0]]), ]) def test_average_cumulative_return_by_quantile_2(self, before, after, demeaned, quantiles, expected_vals): """ Test varying factor asset universe: at different dates there might be different assets """ dr = date_range(start='2015-1-15', end='2015-1-25') dr.name = 'date' tickers = ['A', 'B', 'C', 'D', 'E', 'F'] r1, r2, r3, r4 = (1.25, 1.50, 1.00, 0.50) data = [[r1**i, r2**i, r3**i, r4**i, r2**i, r3**i] for i in range(1, 12)] prices = DataFrame(index=dr, columns=tickers, data=data) dr2 = date_range(start='2015-1-18', end='2015-1-21') dr2.name = 'date' factor = DataFrame(index=dr2, columns=tickers, data=[[3, 4, 2, 1, nan, nan], [3, 4, 2, 1, nan, nan], [3, nan, nan, 1, 4, 2], [3, nan, nan, 1, 4, 2]]).stack() factor_data = get_clean_factor_and_forward_returns(factor, prices, quantiles=quantiles, periods=range( 0, after + 1), filter_zscore=False) avgrt = average_cumulative_return_by_quantile(factor_data, prices, before, after, demeaned) arrays = [] for q in range(1, quantiles + 1): arrays.append((q, 'mean')) arrays.append((q, 'std')) index = MultiIndex.from_tuples(arrays, names=['factor_quantile', None]) expected = DataFrame(index=index, columns=range(-before, after + 1), data=expected_vals) assert_frame_equal(avgrt, expected)
def test_slice_integer(self): # same as above, but for Integer based indexes # these coerce to a like integer # oob indicates if we are out of bounds # of positional indexing for index, oob in [ (Int64Index(range(5)), False), (RangeIndex(5), False), (Int64Index(range(5)) + 10, True), ]: # s is an in-range index s = Series(range(5), index=index) # getitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: result = s.loc[l] # these are all label indexing # except getitem which is positional # empty if oob: indexer = slice(0, 0) else: indexer = slice(3, 5) self.check(result, s, indexer, False) # getitem out-of-bounds for l in [slice(-6, 6), slice(-6.0, 6.0)]: result = s.loc[l] # these are all label indexing # except getitem which is positional # empty if oob: indexer = slice(0, 0) else: indexer = slice(-6, 6) self.check(result, s, indexer, False) # positional indexing msg = ( "cannot do slice indexing " fr"on {type(index).__name__} with these indexers \[-6\.0\] of " "type float" ) with pytest.raises(TypeError, match=msg): s[slice(-6.0, 6.0)] # getitem odd floats for l, res1 in [ (slice(2.5, 4), slice(3, 5)), (slice(2, 3.5), slice(2, 4)), (slice(2.5, 3.5), slice(3, 4)), ]: result = s.loc[l] if oob: res = slice(0, 0) else: res = res1 self.check(result, s, res, False) # positional indexing msg = ( "cannot do slice indexing " fr"on {type(index).__name__} with these indexers \[(2|3)\.5\] of " "type float" ) with pytest.raises(TypeError, match=msg): s[l]
def test_union(self): i1 = Int64Index(np.arange(0, 20, 2)) i2 = Int64Index(np.arange(10, 30, 2)) result = i1.union(i2) expected = Int64Index(np.arange(0, 30, 2)) tm.assert_index_equal(result, expected)
class ConstructorTests: """ Common tests for all variations of IntervalIndex construction. Input data to be supplied in breaks format, then converted by the subclass method get_kwargs_from_breaks to the expected format. """ @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "breaks", [ [3, 14, 15, 92, 653], np.arange(10, dtype="int64"), Int64Index(range(-10, 11)), Float64Index(np.arange(20, 30, 0.5)), date_range("20180101", periods=10), date_range("20180101", periods=10, tz="US/Eastern"), timedelta_range("1 day", periods=10), ], ) def test_constructor(self, constructor, breaks, closed, name): result_kwargs = self.get_kwargs_from_breaks(breaks, closed) result = constructor(closed=closed, name=name, **result_kwargs) assert result.closed == closed assert result.name == name assert result.dtype.subtype == getattr(breaks, "dtype", "int64") tm.assert_index_equal(result.left, Index(breaks[:-1])) tm.assert_index_equal(result.right, Index(breaks[1:])) @pytest.mark.parametrize( "breaks, subtype", [ (Int64Index([0, 1, 2, 3, 4]), "float64"), (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"), (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"), (Float64Index([0, 1, 2, 3, 4]), "int64"), (date_range("2017-01-01", periods=5), "int64"), (timedelta_range("1 day", periods=5), "int64"), ], ) def test_constructor_dtype(self, constructor, breaks, subtype): # GH 19262: conversion via dtype parameter warn = None if subtype == "int64" and breaks.dtype.kind in ["M", "m"]: # astype(int64) deprecated warn = FutureWarning with tm.assert_produces_warning(warn, check_stacklevel=False): expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) expected = constructor(**expected_kwargs) result_kwargs = self.get_kwargs_from_breaks(breaks) iv_dtype = IntervalDtype(subtype, "right") for dtype in (iv_dtype, str(iv_dtype)): with tm.assert_produces_warning(warn, check_stacklevel=False): result = constructor(dtype=dtype, **result_kwargs) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ Int64Index([0, 1, 2, 3, 4]), Int64Index([0, 1, 2, 3, 4]), Int64Index([0, 1, 2, 3, 4]), Float64Index([0, 1, 2, 3, 4]), date_range("2017-01-01", periods=5), timedelta_range("1 day", periods=5), ], ) def test_constructor_pass_closed(self, constructor, breaks): # not passing closed to IntervalDtype, but to IntervalArray constructor warn = None if isinstance(constructor, partial) and constructor.func is Index: # passing kwargs to Index is deprecated warn = FutureWarning iv_dtype = IntervalDtype(breaks.dtype) result_kwargs = self.get_kwargs_from_breaks(breaks) for dtype in (iv_dtype, str(iv_dtype)): with tm.assert_produces_warning(warn, check_stacklevel=False): result = constructor(dtype=dtype, closed="left", **result_kwargs) assert result.dtype.closed == "left" @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) def test_constructor_nan(self, constructor, breaks, closed): # GH 18421 result_kwargs = self.get_kwargs_from_breaks(breaks) result = constructor(closed=closed, **result_kwargs) expected_subtype = np.float64 expected_values = np.array(breaks[:-1], dtype=object) assert result.closed == closed assert result.dtype.subtype == expected_subtype tm.assert_numpy_array_equal(np.array(result), expected_values) @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "breaks", [ [], np.array([], dtype="int64"), np.array([], dtype="float64"), np.array([], dtype="datetime64[ns]"), np.array([], dtype="timedelta64[ns]"), ], ) def test_constructor_empty(self, constructor, breaks, closed): # GH 18421 result_kwargs = self.get_kwargs_from_breaks(breaks) result = constructor(closed=closed, **result_kwargs) expected_values = np.array([], dtype=object) expected_subtype = getattr(breaks, "dtype", np.int64) assert result.empty assert result.closed == closed assert result.dtype.subtype == expected_subtype tm.assert_numpy_array_equal(np.array(result), expected_values) @pytest.mark.parametrize( "breaks", [ tuple("0123456789"), list("abcdefghij"), np.array(list("abcdefghij"), dtype=object), np.array(list("abcdefghij"), dtype="<U1"), ], ) def test_constructor_string(self, constructor, breaks): # GH 19016 msg = ( "category, object, and string subtypes are not supported " "for IntervalIndex" ) with pytest.raises(TypeError, match=msg): constructor(**self.get_kwargs_from_breaks(breaks)) @pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex]) def test_constructor_categorical_valid(self, constructor, cat_constructor): # GH 21243/21253 if isinstance(constructor, partial) and constructor.func is Index: # Index is defined to create CategoricalIndex from categorical data pytest.skip() breaks = np.arange(10, dtype="int64") expected = IntervalIndex.from_breaks(breaks) cat_breaks = cat_constructor(breaks) result_kwargs = self.get_kwargs_from_breaks(cat_breaks) result = constructor(**result_kwargs) tm.assert_index_equal(result, expected) def test_generic_errors(self, constructor): # filler input data to be used when supplying invalid kwargs filler = self.get_kwargs_from_breaks(range(10)) # invalid closed msg = "closed must be one of 'right', 'left', 'both', 'neither'" with pytest.raises(ValueError, match=msg): constructor(closed="invalid", **filler) # unsupported dtype msg = "dtype must be an IntervalDtype, got int64" with pytest.raises(TypeError, match=msg): constructor(dtype="int64", **filler) # invalid dtype msg = "data type [\"']invalid[\"'] not understood" with pytest.raises(TypeError, match=msg): constructor(dtype="invalid", **filler) # no point in nesting periods in an IntervalIndex periods = period_range("2000-01-01", periods=10) periods_kwargs = self.get_kwargs_from_breaks(periods) msg = "Period dtypes are not supported, use a PeriodIndex instead" with pytest.raises(ValueError, match=msg): constructor(**periods_kwargs) # decreasing values decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1)) msg = "left side of interval must be <= right side" with pytest.raises(ValueError, match=msg): constructor(**decreasing_kwargs)
class TestRangeIndexSetOps: def test_intersection(self, sort): # intersect with Int64Index index = RangeIndex(start=0, stop=20, step=2) other = Index(np.arange(1, 6)) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) expected = Index( np.sort(np.asarray(np.intersect1d(index.values, other.values)))) tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) # reversed (GH 17296) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # GH 17296: intersect two decreasing RangeIndexes first = RangeIndex(10, -2, -2) other = RangeIndex(5, -4, -1) expected = first.astype(int).intersection(other.astype(int), sort=sort) result = first.intersection(other, sort=sort).astype(int) tm.assert_index_equal(result, expected) # reversed result = other.intersection(first, sort=sort).astype(int) tm.assert_index_equal(result, expected) index = RangeIndex(5) # intersect of non-overlapping indices other = RangeIndex(5, 10, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) def test_union_noncomparable(self, sort): # corner case, non-Int64Index index = RangeIndex(start=0, stop=20, step=2) other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) result = index.union(other, sort=sort) expected = Index(np.concatenate((index, other))) tm.assert_index_equal(result, expected) result = other.union(index, sort=sort) expected = Index(np.concatenate((other, index))) tm.assert_index_equal(result, expected) @pytest.fixture(params=[ ( RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), ), ( RangeIndex(0, 10, 1), RangeIndex(5, 20, 1), RangeIndex(0, 20, 1), Int64Index(range(20)), ), ( RangeIndex(0, 10, 1), RangeIndex(10, 20, 1), RangeIndex(0, 20, 1), Int64Index(range(20)), ), ( RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), ), ( RangeIndex(0, -10, -1), RangeIndex(-10, -20, -1), RangeIndex(-19, 1, 1), Int64Index(range(0, -20, -1)), ), ( RangeIndex(0, 10, 2), RangeIndex(1, 10, 2), RangeIndex(0, 10, 1), Int64Index(list(range(0, 10, 2)) + list(range(1, 10, 2))), ), ( RangeIndex(0, 11, 2), RangeIndex(1, 12, 2), RangeIndex(0, 12, 1), Int64Index(list(range(0, 11, 2)) + list(range(1, 12, 2))), ), ( RangeIndex(0, 21, 4), RangeIndex(-2, 24, 4), RangeIndex(-2, 24, 2), Int64Index(list(range(0, 21, 4)) + list(range(-2, 24, 4))), ), ( RangeIndex(0, -20, -2), RangeIndex(-1, -21, -2), RangeIndex(-19, 1, 1), Int64Index(list(range(0, -20, -2)) + list(range(-1, -21, -2))), ), ( RangeIndex(0, 100, 5), RangeIndex(0, 100, 20), RangeIndex(0, 100, 5), Int64Index(range(0, 100, 5)), ), ( RangeIndex(0, -100, -5), RangeIndex(5, -100, -20), RangeIndex(-95, 10, 5), Int64Index(list(range(0, -100, -5)) + [5]), ), ( RangeIndex(0, -11, -1), RangeIndex(1, -12, -4), RangeIndex(-11, 2, 1), Int64Index(list(range(0, -11, -1)) + [1, -11]), ), (RangeIndex(0), RangeIndex(0), RangeIndex(0), RangeIndex(0)), ( RangeIndex(0, -10, -2), RangeIndex(0), RangeIndex(0, -10, -2), RangeIndex(0, -10, -2), ), ( RangeIndex(0, 100, 2), RangeIndex(100, 150, 200), RangeIndex(0, 102, 2), Int64Index(range(0, 102, 2)), ), ( RangeIndex(0, -100, -2), RangeIndex(-100, 50, 102), RangeIndex(-100, 4, 2), Int64Index(list(range(0, -100, -2)) + [-100, 2]), ), ( RangeIndex(0, -100, -1), RangeIndex(0, -50, -3), RangeIndex(-99, 1, 1), Int64Index(list(range(0, -100, -1))), ), ( RangeIndex(0, 1, 1), RangeIndex(5, 6, 10), RangeIndex(0, 6, 5), Int64Index([0, 5]), ), ( RangeIndex(0, 10, 5), RangeIndex(-5, -6, -20), RangeIndex(-5, 10, 5), Int64Index([0, 5, -5]), ), ( RangeIndex(0, 3, 1), RangeIndex(4, 5, 1), Int64Index([0, 1, 2, 4]), Int64Index([0, 1, 2, 4]), ), ( RangeIndex(0, 10, 1), Int64Index([]), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), ), ( RangeIndex(0), Int64Index([1, 5, 6]), Int64Index([1, 5, 6]), Int64Index([1, 5, 6]), ), ]) def unions(self, request): """Inputs and expected outputs for RangeIndex.union tests""" return request.param def test_union_sorted(self, unions): idx1, idx2, expected_sorted, expected_notsorted = unions res1 = idx1.union(idx2, sort=None) tm.assert_index_equal(res1, expected_sorted, exact=True) res1 = idx1.union(idx2, sort=False) tm.assert_index_equal(res1, expected_notsorted, exact=True) res2 = idx2.union(idx1, sort=None) res3 = idx1._int64index.union(idx2, sort=None) tm.assert_index_equal(res2, expected_sorted, exact=True) tm.assert_index_equal(res3, expected_sorted)
def create_index(self) -> Int64Index: # return Int64Index(np.arange(5, dtype="int64")) return Int64Index(range(0, 20, 2))
class SliceTestCase(WithSeededRandomPipelineEngine, ZiplineTestCase): sids = ASSET_FINDER_EQUITY_SIDS = Int64Index([1, 2, 3]) START_DATE = Timestamp("2015-01-31", tz="UTC") END_DATE = Timestamp("2015-03-01", tz="UTC") ASSET_FINDER_COUNTRY_CODE = "US" SEEDED_RANDOM_PIPELINE_DEFAULT_DOMAIN = US_EQUITIES @classmethod def init_class_fixtures(cls): super(SliceTestCase, cls).init_class_fixtures() # Using the date at index 14 as the start date because when running # pipelines, especially those involving correlations or regressions, we # want to make sure there are enough days to look back on. The end date # at index 18 is chosen for convenience, as it makes for a contiguous # five day span. cls.pipeline_start_date = cls.trading_days[14] cls.pipeline_end_date = cls.trading_days[18] # Random input for factors. cls.col = TestingDataSet.float_col @parameter_space(my_asset_column=[0, 1, 2], window_length_=[1, 2, 3]) def test_slice(self, my_asset_column, window_length_): """ Test that slices can be created by indexing into a term, and that they have the correct shape when used as inputs. """ sids = self.sids my_asset = self.asset_finder.retrieve_asset(self.sids[my_asset_column]) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[my_asset] class UsesSlicedInput(CustomFactor): window_length = window_length_ inputs = [returns, returns_slice] def compute(self, today, assets, out, returns, returns_slice): # Make sure that our slice is the correct shape (i.e. has only # one column) and that it has the same values as the original # returns factor from which it is derived. assert returns_slice.shape == (self.window_length, 1) assert returns.shape == (self.window_length, len(sids)) check_arrays(returns_slice[:, 0], returns[:, my_asset_column]) # Assertions about the expected slice data are made in the `compute` # function of our custom factor above. self.run_pipeline( Pipeline(columns={"uses_sliced_input": UsesSlicedInput()}), self.pipeline_start_date, self.pipeline_end_date, ) @parameter_space(unmasked_column=[0, 1, 2], slice_column=[0, 1, 2]) def test_slice_with_masking(self, unmasked_column, slice_column): """ Test that masking a factor that uses slices as inputs does not mask the slice data. """ sids = self.sids asset_finder = self.asset_finder start_date = self.pipeline_start_date end_date = self.pipeline_end_date # Create a filter that masks out all but a single asset. unmasked_asset = asset_finder.retrieve_asset(sids[unmasked_column]) unmasked_asset_only = AssetID().eq(unmasked_asset.sid) # Asset used to create our slice. In the cases where this is different # than `unmasked_asset`, our slice should still have non-missing data # when used as an input to our custom factor. That is, it should not be # masked out. slice_asset = asset_finder.retrieve_asset(sids[slice_column]) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[slice_asset] returns_results = self.run_pipeline( Pipeline(columns={"returns": returns}), start_date, end_date, ) returns_results = returns_results["returns"].unstack() class UsesSlicedInput(CustomFactor): window_length = 1 inputs = [returns, returns_slice] def compute(self, today, assets, out, returns, returns_slice): # Ensure that our mask correctly affects the `returns` input # and does not affect the `returns_slice` input. assert returns.shape == (1, 1) assert returns_slice.shape == (1, 1) assert returns[0, 0] == returns_results.loc[today, unmasked_asset] assert returns_slice[0, 0] == returns_results.loc[today, slice_asset] columns = {"masked": UsesSlicedInput(mask=unmasked_asset_only)} # Assertions about the expected data are made in the `compute` function # of our custom factor above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date) def test_adding_slice_column(self): """ Test that slices cannot be added as a pipeline column. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) open_slice = OpenPrice()[my_asset] with self.assertRaises(UnsupportedPipelineOutput): Pipeline(columns={"open_slice": open_slice}) pipe = Pipeline(columns={}) with self.assertRaises(UnsupportedPipelineOutput): pipe.add(open_slice, "open_slice") def test_loadable_term_slices(self): """ Test that slicing loadable terms raises the proper error. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) with self.assertRaises(NonSliceableTerm): USEquityPricing.close[my_asset] def test_non_existent_asset(self): """ Test that indexing into a term with a non-existent asset raises the proper exception. """ my_asset = Asset( 0, exchange_info=ExchangeInfo("TEST FULL", "TEST", "US"), ) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[my_asset] class UsesSlicedInput(CustomFactor): window_length = 2 inputs = [returns_slice] def compute(self, today, assets, out, returns_slice): pass with self.assertRaises(NonExistentAssetInTimeFrame): self.run_pipeline( Pipeline(columns={"uses_sliced_input": UsesSlicedInput()}), self.pipeline_start_date, self.pipeline_end_date, ) def test_window_safety_of_slices(self): """ Test that slices correctly inherit the `window_safe` property of the term from which they are derived. """ col = self.col my_asset = self.asset_finder.retrieve_asset(self.sids[0]) # SimpleMovingAverage is not window safe. sma = SimpleMovingAverage(inputs=[self.col], window_length=10) sma_slice = sma[my_asset] class UsesSlicedInput(CustomFactor): window_length = 2 inputs = [sma_slice] def compute(self, today, assets, out, sma_slice): pass with self.assertRaises(NonWindowSafeInput): self.run_pipeline( Pipeline(columns={"uses_sliced_input": UsesSlicedInput()}), self.pipeline_start_date, self.pipeline_end_date, ) # Make sure that slices of custom factors are not window safe. class MyUnsafeFactor(CustomFactor): window_length = 2 inputs = [col] def compute(self, today, assets, out, col): pass my_unsafe_factor = MyUnsafeFactor() my_unsafe_factor_slice = my_unsafe_factor[my_asset] class UsesSlicedInput(CustomFactor): window_length = 2 inputs = [my_unsafe_factor_slice] def compute(self, today, assets, out, my_unsafe_factor_slice): pass with self.assertRaises(NonWindowSafeInput): self.run_pipeline( Pipeline(columns={"uses_sliced_input": UsesSlicedInput()}), self.pipeline_start_date, self.pipeline_end_date, ) # Create a window safe factor. class MySafeFactor(CustomFactor): window_length = 2 inputs = [col] window_safe = True def compute(self, today, assets, out, col): pass my_safe_factor = MySafeFactor() my_safe_factor_slice = my_safe_factor[my_asset] # Make sure that correlations are not safe if either the factor *or* # the target slice are not window safe. with self.assertRaises(NonWindowSafeInput): my_unsafe_factor.pearsonr( target=my_safe_factor_slice, correlation_length=10, ) with self.assertRaises(NonWindowSafeInput): my_safe_factor.pearsonr( target=my_unsafe_factor_slice, correlation_length=10, ) def test_single_column_output(self): """ Tests for custom factors that compute a 1D out. """ start_date = self.pipeline_start_date end_date = self.pipeline_end_date alternating_mask = (AssetIDPlusDay() % 2).eq(0) cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day) class SingleColumnOutput(CustomFactor): window_length = 1 inputs = [self.col] window_safe = True ndim = 1 def compute(self, today, assets, out, col): # Because we specified ndim as 1, `out` should be a singleton # array but `close` should be a regular sized input. assert out.shape == (1,) assert col.shape == (1, 3) out[:] = col.sum() # Since we cannot add single column output factors as pipeline # columns, we have to test its output through another factor. class UsesSingleColumnOutput(CustomFactor): window_length = 1 inputs = [SingleColumnOutput()] def compute(self, today, assets, out, single_column_output): # Make sure that `single_column` has the correct shape. That # is, it should always have one column regardless of any mask # passed to `UsesSingleColumnInput`. assert single_column_output.shape == (1, 1) for mask in (alternating_mask, cascading_mask): columns = { "uses_single_column_output": UsesSingleColumnOutput(), "uses_single_column_output_masked": UsesSingleColumnOutput( mask=mask, ), } # Assertions about the expected shapes of our data are made in the # `compute` function of our custom factors above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date) def test_masked_single_column_output(self): """ Tests for masking custom factors that compute a 1D out. """ start_date = self.pipeline_start_date end_date = self.pipeline_end_date alternating_mask = (AssetIDPlusDay() % 2).eq(0) cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day) alternating_mask.window_safe = True cascading_mask.window_safe = True for mask in (alternating_mask, cascading_mask): class SingleColumnOutput(CustomFactor): window_length = 1 inputs = [self.col, mask] window_safe = True ndim = 1 def compute(self, today, assets, out, col, mask): # Because we specified ndim as 1, `out` should always be a # singleton array but `close` should be a sized based on # the mask we passed. assert out.shape == (1,) assert col.shape == (1, mask.sum()) out[:] = col.sum() # Since we cannot add single column output factors as pipeline # columns, we have to test its output through another factor. class UsesSingleColumnInput(CustomFactor): window_length = 1 inputs = [self.col, mask, SingleColumnOutput(mask=mask)] def compute(self, today, assets, out, col, mask, single_column_output): # Make sure that `single_column` has the correct value # based on the masked it used. assert single_column_output.shape == (1, 1) single_column_output_value = single_column_output[0][0] expected_value = where(mask, col, 0).sum() assert single_column_output_value == expected_value columns = {"uses_single_column_input": UsesSingleColumnInput()} # Assertions about the expected shapes of our data are made in the # `compute` function of our custom factors above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date) @parameter_space(returns_length=[2, 3], correlation_length=[3, 4]) def test_factor_correlation_methods(self, returns_length, correlation_length): """ Ensure that `Factor.pearsonr` and `Factor.spearmanr` are consistent with the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] pearson = returns.pearsonr( target=returns_slice, correlation_length=correlation_length, ) spearman = returns.spearmanr( target=returns_slice, correlation_length=correlation_length, ) expected_pearson = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, ) expected_spearman = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, ) # These built-ins construct their own Returns factor to use as inputs, # so the only way to set our own inputs is to do so after the fact. # This should not be done in practice. It is necessary here because we # want Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_pearson.inputs = [returns, returns_slice] expected_spearman.inputs = [returns, returns_slice] columns = { "pearson": pearson, "spearman": spearman, "expected_pearson": expected_pearson, "expected_spearman": expected_spearman, } results = self.run_pipeline( Pipeline(columns=columns), self.pipeline_start_date, self.pipeline_end_date, ) pearson_results = results["pearson"].unstack() spearman_results = results["spearman"].unstack() expected_pearson_results = results["expected_pearson"].unstack() expected_spearman_results = results["expected_spearman"].unstack() assert_frame_equal(pearson_results, expected_pearson_results) assert_frame_equal(spearman_results, expected_spearman_results) # Make sure we cannot call the correlation methods on factors or slices # of dtype `datetime64[ns]`. class DateFactor(CustomFactor): window_length = 1 inputs = [] dtype = datetime64ns_dtype window_safe = True def compute(self, today, assets, out): pass date_factor = DateFactor() date_factor_slice = date_factor[my_asset] with self.assertRaises(TypeError): date_factor.pearsonr( target=returns_slice, correlation_length=correlation_length, ) with self.assertRaises(TypeError): date_factor.spearmanr( target=returns_slice, correlation_length=correlation_length, ) with self.assertRaises(TypeError): returns.pearsonr( target=date_factor_slice, correlation_length=correlation_length, ) with self.assertRaises(TypeError): returns.pearsonr( target=date_factor_slice, correlation_length=correlation_length, ) @parameter_space(returns_length=[2, 3], regression_length=[3, 4]) def test_factor_regression_method(self, returns_length, regression_length): """ Ensure that `Factor.linear_regression` is consistent with the built-in factor `RollingLinearRegressionOfReturns`. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] regression = returns.linear_regression( target=returns_slice, regression_length=regression_length, ) expected_regression = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, ) # These built-ins construct their own Returns factor to use as inputs, # so the only way to set our own inputs is to do so after the fact. # This should not be done in practice. It is necessary here because we # want Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_regression.inputs = [returns, returns_slice] class MyFactor(CustomFactor): inputs = () window_length = 1 def compute(self, today, assets, out): out[:] = 0 columns = { "regression": regression, "expected_regression": expected_regression, } results = self.run_pipeline( Pipeline(columns=columns), self.pipeline_start_date, self.pipeline_end_date, ) regression_results = results["regression"].unstack() expected_regression_results = results["expected_regression"].unstack() assert_frame_equal(regression_results, expected_regression_results) # Make sure we cannot call the linear regression method on factors or # slices of dtype `datetime64[ns]`. class DateFactor(CustomFactor): window_length = 1 inputs = [] dtype = datetime64ns_dtype window_safe = True def compute(self, today, assets, out): pass date_factor = DateFactor() date_factor_slice = date_factor[my_asset] with self.assertRaises(TypeError): date_factor.linear_regression( target=returns_slice, regression_length=regression_length, ) with self.assertRaises(TypeError): returns.linear_regression( target=date_factor_slice, regression_length=regression_length, ) def test_slice_repr(self): my_asset = self.asset_finder.retrieve_asset(self.sids[0]) slice_ = Returns(window_length=2)[my_asset] result = repr(slice_) self.assertEqual(result, "Returns(...)[{}]".format(my_asset)) def test_slice_subtypes(self): my_asset = self.asset_finder.retrieve_asset(self.sids[0]) class SomeFactor(Factor): inputs = () window_length = 1 dtype = float self.assertIsInstance(SomeFactor()[my_asset], Factor) class SomeFilter(Filter): inputs = () window_length = 1 self.assertIsInstance(SomeFilter()[my_asset], Filter) class SomeClassifier(Classifier): inputs = () window_length = 1 dtype = object self.assertIsInstance(SomeClassifier()[my_asset], Classifier)
def test_constructor_unwraps_index(self): idx = Index([1, 2]) result = Int64Index(idx) expected = np.array([1, 2], dtype="int64") tm.assert_numpy_array_equal(result._data, expected)
def test_read_with_adjustments(self): columns = [USEquityPricing.high, USEquityPricing.volume] query_days = self.calendar_days_between(TEST_QUERY_START, TEST_QUERY_STOP) # Our expected results for each day are based on values from the # previous day. shifted_query_days = self.calendar_days_between( TEST_QUERY_START, TEST_QUERY_STOP, shift=-1, ) baseline_reader = BcolzDailyBarReader(self.bcolz_path) adjustment_reader = SQLiteAdjustmentReader(self.db_path) pricing_loader = USEquityPricingLoader( baseline_reader, adjustment_reader, ) highs, volumes = pricing_loader.load_adjusted_array( columns, dates=query_days, assets=Int64Index(arange(1, 7)), mask=ones((len(query_days), 6), dtype=bool), ) expected_baseline_highs = self.bcolz_writer.expected_values_2d( shifted_query_days, self.assets, 'high', ) expected_baseline_volumes = self.bcolz_writer.expected_values_2d( shifted_query_days, self.assets, 'volume', ) # At each point in time, the AdjustedArrays should yield the baseline # with all adjustments up to that date applied. for windowlen in range(1, len(query_days) + 1): for offset, window in enumerate(highs.traverse(windowlen)): baseline = expected_baseline_highs[offset:offset + windowlen] baseline_dates = query_days[offset:offset + windowlen] expected_adjusted_highs = self.apply_adjustments( baseline_dates, self.assets, baseline, # Apply all adjustments. concat([SPLITS, MERGERS, DIVIDENDS_EXPECTED], ignore_index=True), ) assert_allclose(expected_adjusted_highs, window) for offset, window in enumerate(volumes.traverse(windowlen)): baseline = expected_baseline_volumes[offset:offset + windowlen] baseline_dates = query_days[offset:offset + windowlen] # Apply only splits and invert the ratio. adjustments = SPLITS.copy() adjustments.ratio = 1 / adjustments.ratio expected_adjusted_volumes = self.apply_adjustments( baseline_dates, self.assets, baseline, adjustments, ) # FIXME: Make AdjustedArray properly support integral types. assert_array_equal( expected_adjusted_volumes, window.astype(uint32), ) # Verify that we checked up to the longest possible window. with self.assertRaises(WindowLengthTooLong): highs.traverse(windowlen + 1) with self.assertRaises(WindowLengthTooLong): volumes.traverse(windowlen + 1)
def test_copy(self): i = Int64Index([], name='Foo') i_copy = i.copy() self.assertEqual(i_copy.name, 'Foo')
def get_index_loc(key, index): """ Get the location of a specific key in an index Parameters ---------- key : label The key for which to find the location if the underlying index is a DateIndex or a location if the underlying index is a RangeIndex or an Int64Index. index : pd.Index The index to search. Returns ------- loc : int The location of the key index : pd.Index The index including the key; this is a copy of the original index unless the index had to be expanded to accommodate `key`. index_was_expanded : bool Whether or not the index was expanded to accommodate `key`. Notes ----- If `key` is past the end of of the given index, and the index is either an Int64Index or a date index, this function extends the index up to and including key, and then returns the location in the new index. """ base_index = index index = base_index date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex)) int_index = isinstance(base_index, Int64Index) range_index = isinstance(base_index, RangeIndex) index_class = type(base_index) nobs = len(index) # Special handling for RangeIndex if range_index and isinstance(key, (int, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > nobs - 1: # See gh5835. Remove the except after pandas 0.25 required. try: base_index_start = base_index.start base_index_step = base_index.step except AttributeError: base_index_start = base_index._start base_index_step = base_index._step stop = base_index_start + (key + 1) * base_index_step index = RangeIndex(start=base_index_start, stop=stop, step=base_index_step) # Special handling for Int64Index if (not range_index and int_index and not date_index and isinstance(key, (int, np.integer))): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > base_index[-1]: index = Int64Index(np.arange(base_index[0], int(key + 1))) # Special handling for date indexes if date_index: # Use index type to choose creation function if index_class is DatetimeIndex: index_fn = date_range else: index_fn = period_range # Integer key (i.e. already given a location) if isinstance(key, (int, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key < nobs: key = index[nobs + key] # Out-of-sample (note that we include key itself in the new # index) elif key > len(base_index) - 1: index = index_fn(start=base_index[0], periods=int(key + 1), freq=base_index.freq) key = index[-1] else: key = index[key] # Other key types (i.e. string date or some datetime-like object) else: # Covert the key to the appropriate date-like object if index_class is PeriodIndex: date_key = Period(key, freq=base_index.freq) else: date_key = Timestamp(key, freq=base_index.freq) # Out-of-sample if date_key > base_index[-1]: # First create an index that may not always include `key` index = index_fn(start=base_index[0], end=date_key, freq=base_index.freq) # Now make sure we include `key` if not index[-1] == date_key: index = index_fn(start=base_index[0], periods=len(index) + 1, freq=base_index.freq) # To avoid possible inconsistencies with `get_loc` below, # set the key directly equal to the last index location key = index[-1] # Get the location if date_index: # (note that get_loc will throw a KeyError if key is invalid) loc = index.get_loc(key) elif int_index or range_index: # For Int64Index and RangeIndex, key is assumed to be the location # and not an index value (this assumption is required to support # RangeIndex) try: index[key] # We want to raise a KeyError in this case, to keep the exception # consistent across index types. # - Attempting to index with an out-of-bound location (e.g. # index[10] on an index of length 9) will raise an IndexError # (as of Pandas 0.22) # - Attemtping to index with a type that cannot be cast to integer # (e.g. a non-numeric string) will raise a ValueError if the # index is RangeIndex (otherwise will raise an IndexError) # (as of Pandas 0.22) except (IndexError, ValueError) as e: raise KeyError(str(e)) loc = key else: loc = index.get_loc(key) # Check if we now have a modified index index_was_expanded = index is not base_index # Return the index through the end of the loc / slice if isinstance(loc, slice): end = loc.stop - 1 else: end = loc return loc, index[:end + 1], index_was_expanded
def test_get_indexer_backfill(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target, method='backfill') expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected)
class StatisticalMethodsTestCase(WithSeededRandomPipelineEngine, GatewayTestCase): sids = ASSET_FINDER_EQUITY_SIDS = Int64Index([1, 2, 3]) START_DATE = Timestamp('2015-01-31', tz='UTC') END_DATE = Timestamp('2015-03-01', tz='UTC') @classmethod def init_class_fixtures(cls): super(StatisticalMethodsTestCase, cls).init_class_fixtures() # Using these start and end dates because they are a contigous span of # 5 days (Monday - Friday) and they allow for plenty of days to look # back on when computing correlations and regressions. cls.dates = dates = cls.trading_days cls.start_date_index = start_date_index = 14 cls.end_date_index = end_date_index = 18 cls.pipeline_start_date = cls.trading_days[start_date_index] cls.pipeline_end_date = cls.trading_days[end_date_index] sids = cls.sids cls.assets = assets = cls.asset_finder.retrieve_all(sids) cls.my_asset_column = my_asset_column = 0 cls.my_asset = assets[my_asset_column] cls.num_days = num_days = end_date_index - start_date_index + 1 cls.num_assets = num_assets = len(assets) cls.cascading_mask = \ AssetIDPlusDay() < (sids[-1] + dates[start_date_index].day) cls.expected_cascading_mask_result = make_cascading_boolean_array( shape=(num_days, num_assets), ) cls.alternating_mask = (AssetIDPlusDay() % 2).eq(0) cls.expected_alternating_mask_result = make_alternating_boolean_array( shape=(num_days, num_assets), ) cls.expected_no_mask_result = full( shape=(num_days, num_assets), fill_value=True, dtype=bool_dtype, ) # Random input for factors. cls.col = TestingDataSet.float_col @parameter_space(returns_length=[2, 3], correlation_length=[3, 4]) def test_factor_correlation_methods(self, returns_length, correlation_length): """ Ensure that `Factor.pearsonr` and `Factor.spearmanr` are consistent with the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ my_asset = self.my_asset start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] pearson = returns.pearsonr( target=returns_slice, correlation_length=correlation_length, ) spearman = returns.spearmanr( target=returns_slice, correlation_length=correlation_length, ) expected_pearson = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, ) expected_spearman = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, ) # These built-ins construct their own Returns factor to use as inputs, # so the only way to set our own inputs is to do so after the fact. # This should not be done in practice. It is necessary here because we # want Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_pearson.inputs = [returns, returns_slice] expected_spearman.inputs = [returns, returns_slice] columns = { 'pearson': pearson, 'spearman': spearman, 'expected_pearson': expected_pearson, 'expected_spearman': expected_spearman, } results = run_pipeline(Pipeline(columns=columns), start_date, end_date) pearson_results = results['pearson'].unstack() spearman_results = results['spearman'].unstack() expected_pearson_results = results['expected_pearson'].unstack() expected_spearman_results = results['expected_spearman'].unstack() assert_frame_equal(pearson_results, expected_pearson_results) assert_frame_equal(spearman_results, expected_spearman_results) def test_correlation_methods_bad_type(self): """ Make sure we cannot call the Factor correlation methods on factors or slices that are not of float or int dtype. """ # These are arbitrary for the purpose of this test. returns_length = 2 correlation_length = 10 returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[self.my_asset] class BadTypeFactor(CustomFactor): inputs = [] window_length = 1 dtype = datetime64ns_dtype window_safe = True def compute(self, today, assets, out): pass bad_type_factor = BadTypeFactor() bad_type_factor_slice = bad_type_factor[self.my_asset] with self.assertRaises(TypeError): bad_type_factor.pearsonr( target=returns_slice, correlation_length=correlation_length, ) with self.assertRaises(TypeError): bad_type_factor.spearmanr( target=returns_slice, correlation_length=correlation_length, ) with self.assertRaises(TypeError): returns.pearsonr( target=bad_type_factor_slice, correlation_length=correlation_length, ) with self.assertRaises(TypeError): returns.spearmanr( target=bad_type_factor_slice, correlation_length=correlation_length, ) @parameter_space(returns_length=[2, 3], regression_length=[3, 4]) def test_factor_regression_method(self, returns_length, regression_length): """ Ensure that `Factor.linear_regression` is consistent with the built-in factor `RollingLinearRegressionOfReturns`. """ my_asset = self.my_asset start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] regression = returns.linear_regression( target=returns_slice, regression_length=regression_length, ) expected_regression = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, ) # This built-in constructs its own Returns factor to use as an input, # so the only way to set our own input is to do so after the fact. This # should not be done in practice. It is necessary here because we want # Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_regression.inputs = [returns, returns_slice] columns = { 'regression': regression, 'expected_regression': expected_regression, } results = run_pipeline(Pipeline(columns=columns), start_date, end_date) regression_results = results['regression'].unstack() expected_regression_results = results['expected_regression'].unstack() assert_frame_equal(regression_results, expected_regression_results) def test_regression_method_bad_type(self): """ Make sure we cannot call the Factor linear regression method on factors or slices that are not of float or int dtype. """ # These are arbitrary for the purpose of this test. returns_length = 2 regression_length = 10 returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[self.my_asset] class BadTypeFactor(CustomFactor): window_length = 1 inputs = [] dtype = datetime64ns_dtype window_safe = True def compute(self, today, assets, out): pass bad_type_factor = BadTypeFactor() bad_type_factor_slice = bad_type_factor[self.my_asset] with self.assertRaises(TypeError): bad_type_factor.linear_regression( target=returns_slice, regression_length=regression_length, ) with self.assertRaises(TypeError): returns.linear_regression( target=bad_type_factor_slice, regression_length=regression_length, ) @parameter_space(correlation_length=[2, 3, 4]) def test_factor_correlation_methods_two_factors(self, correlation_length): """ Tests for `Factor.pearsonr` and `Factor.spearmanr` when passed another 2D factor instead of a Slice. """ assets = self.assets dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # Ensure that the correlation methods cannot be called with two 2D # factors which have different masks. returns_masked_1 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(1), ) returns_masked_2 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(2), ) with self.assertRaises(IncompatibleTerms): returns_masked_1.pearsonr( target=returns_masked_2, correlation_length=correlation_length, ) with self.assertRaises(IncompatibleTerms): returns_masked_1.spearmanr( target=returns_masked_2, correlation_length=correlation_length, ) returns_5 = Returns(window_length=5, inputs=[self.col]) returns_10 = Returns(window_length=10, inputs=[self.col]) pearson_factor = returns_5.pearsonr( target=returns_10, correlation_length=correlation_length, ) spearman_factor = returns_5.spearmanr( target=returns_10, correlation_length=correlation_length, ) columns = { 'pearson_factor': pearson_factor, 'spearman_factor': spearman_factor, } pipeline = Pipeline(columns=columns) results = run_pipeline(pipeline, start_date, end_date) pearson_results = results['pearson_factor'].unstack() spearman_results = results['spearman_factor'].unstack() # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. columns = {'returns_5': returns_5, 'returns_10': returns_10} results = run_pipeline( Pipeline(columns=columns), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_5_results = results['returns_5'].unstack() returns_10_results = results['returns_10'].unstack() # On each day, calculate the expected correlation coefficients # between each asset's 5 and 10 day rolling returns. Each correlation # is calculated over `correlation_length` days. expected_pearson_results = full_like(pearson_results, nan) expected_spearman_results = full_like(spearman_results, nan) for day in range(num_days): todays_returns_5 = returns_5_results.iloc[ day:day + correlation_length ] todays_returns_10 = returns_10_results.iloc[ day:day + correlation_length ] for asset, asset_returns_5 in todays_returns_5.iteritems(): asset_column = int(asset) - 1 asset_returns_10 = todays_returns_10[asset] expected_pearson_results[day, asset_column] = pearsonr( asset_returns_5, asset_returns_10, )[0] expected_spearman_results[day, asset_column] = spearmanr( asset_returns_5, asset_returns_10, )[0] expected_pearson_results = DataFrame( data=expected_pearson_results, index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = DataFrame( data=expected_spearman_results, index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results) @parameter_space(regression_length=[2, 3, 4]) def test_factor_regression_method_two_factors(self, regression_length): """ Tests for `Factor.linear_regression` when passed another 2D factor instead of a Slice. """ assets = self.assets dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] # Ensure that the `linear_regression` method cannot be called with two # 2D factors which have different masks. returns_masked_1 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(1), ) returns_masked_2 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(2), ) with self.assertRaises(IncompatibleTerms): returns_masked_1.linear_regression( target=returns_masked_2, regression_length=regression_length, ) returns_5 = Returns(window_length=5, inputs=[self.col]) returns_10 = Returns(window_length=10, inputs=[self.col]) regression_factor = returns_5.linear_regression( target=returns_10, regression_length=regression_length, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) results = run_pipeline(pipeline, start_date, end_date) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is because # we need (regression_length - 1) extra days of returns to compute our # expected regressions. columns = {'returns_5': returns_5, 'returns_10': returns_10} results = run_pipeline( Pipeline(columns=columns), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_5_results = results['returns_5'].unstack() returns_10_results = results['returns_10'].unstack() # On each day, for each asset, calculate the expected regression # results of Y ~ X where Y is the asset's rolling 5 day returns and X # is the asset's rolling 10 day returns. Each regression is calculated # over `regression_length` days of data. for day in range(num_days): todays_returns_5 = returns_5_results.iloc[ day:day + regression_length ] todays_returns_10 = returns_10_results.iloc[ day:day + regression_length ] for asset, asset_returns_5 in todays_returns_5.iteritems(): asset_column = int(asset) - 1 asset_returns_10 = todays_returns_10[asset] expected_regression_results = linregress( y=asset_returns_5, x=asset_returns_10, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( expected_output_results[output], index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result)
def test_slice_keep_name(self): idx = Int64Index([1, 2], name='asdf') self.assertEqual(idx.name, idx[1:].name)
class StatisticalBuiltInsTestCase(WithTradingEnvironment, GatewayTestCase): sids = ASSET_FINDER_EQUITY_SIDS = Int64Index([1, 2, 3]) START_DATE = Timestamp('2015-01-31', tz='UTC') END_DATE = Timestamp('2015-03-01', tz='UTC') ASSET_FINDER_EQUITY_SYMBOLS = ('A', 'B', 'C') @classmethod def init_class_fixtures(cls): super(StatisticalBuiltInsTestCase, cls).init_class_fixtures() day = cls.trading_calendar.day cls.dates = dates = date_range( '2015-02-01', '2015-02-28', freq=day, tz='UTC', ) # Using these start and end dates because they are a contigous span of # 5 days (Monday - Friday) and they allow for plenty of days to look # back on when computing correlations and regressions. cls.start_date_index = start_date_index = 14 cls.end_date_index = end_date_index = 18 cls.pipeline_start_date = dates[start_date_index] cls.pipeline_end_date = dates[end_date_index] cls.num_days = num_days = end_date_index - start_date_index + 1 sids = cls.sids cls.assets = assets = cls.asset_finder.retrieve_all(sids) cls.my_asset_column = my_asset_column = 0 cls.my_asset = assets[my_asset_column] cls.num_assets = num_assets = len(assets) cls.raw_data = raw_data = DataFrame( data=arange(len(dates) * len(sids), dtype=float64_dtype).reshape( len(dates), len(sids), ), index=dates, columns=assets, ) # Using mock 'close' data here because the correlation and regression # built-ins use USEquityPricing.close as the input to their `Returns` # factors. Since there is no way to change that when constructing an # instance of these built-ins, we need to test with mock 'close' data # to most accurately reflect their true behavior and results. close_loader = DataFrameLoader(USEquityPricing.close, raw_data) cls.run_pipeline = SimplePipelineEngine( {USEquityPricing.close: close_loader}.__getitem__, dates, cls.asset_finder, ).run_pipeline cls.cascading_mask = \ AssetIDPlusDay() < (sids[-1] + dates[start_date_index].day) cls.expected_cascading_mask_result = make_cascading_boolean_array( shape=(num_days, num_assets), ) cls.alternating_mask = (AssetIDPlusDay() % 2).eq(0) cls.expected_alternating_mask_result = make_alternating_boolean_array( shape=(num_days, num_assets), ) cls.expected_no_mask_result = full( shape=(num_days, num_assets), fill_value=True, dtype=bool_dtype, ) @parameter_space(returns_length=[2, 3], correlation_length=[3, 4]) def test_correlation_factors(self, returns_length, correlation_length): """ Tests for the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length) masks = (self.cascading_mask, self.alternating_mask, NotSpecified) expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): pearson_factor = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) spearman_factor = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) columns = { 'pearson_factor': pearson_factor, 'spearman_factor': spearman_factor, } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) pearson_results = results['pearson_factor'].unstack() spearman_results = results['spearman_factor'].unstack() if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected correlation coefficients # between the asset we are interested in and each other asset. Each # correlation is calculated over `correlation_length` days. expected_pearson_results = full_like(pearson_results, nan) expected_spearman_results = full_like(spearman_results, nan) for day in range(num_days): todays_returns = returns_results.iloc[ day:day + correlation_length ] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_pearson_results[day, asset_column] = pearsonr( my_asset_returns, other_asset_returns, )[0] expected_spearman_results[day, asset_column] = spearmanr( my_asset_returns, other_asset_returns, )[0] expected_pearson_results = DataFrame( data=where(expected_mask, expected_pearson_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = DataFrame( data=where(expected_mask, expected_spearman_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results) @parameter_space(returns_length=[2, 3], regression_length=[3, 4]) def test_regression_of_returns_factor(self, returns_length, regression_length): """ Tests for the built-in factor `RollingLinearRegressionOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] returns = Returns(window_length=returns_length) masks = self.cascading_mask, self.alternating_mask, NotSpecified expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): regression_factor = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, mask=mask, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is # because we need (regression_length - 1) extra days of returns to # compute our expected regressions. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected regression results for Y ~ X # where Y is the asset we are interested in and X is each other # asset. Each regression is calculated over `regression_length` # days of data. for day in range(num_days): todays_returns = returns_results.iloc[ day:day + regression_length ] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_regression_results = linregress( y=other_asset_returns, x=my_asset_returns, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( where(expected_mask, expected_output_results[output], nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result) def test_simple_beta_matches_regression(self): run_pipeline = self.run_pipeline simple_beta = SimpleBeta(target=self.my_asset, regression_length=10) complex_beta = RollingLinearRegressionOfReturns( target=self.my_asset, returns_length=2, regression_length=10, ).beta pipe = Pipeline({'simple': simple_beta, 'complex': complex_beta}) results = run_pipeline( pipe, self.pipeline_start_date, self.pipeline_end_date, ) assert_equal(results['simple'], results['complex'], check_names=False) def test_simple_beta_allowed_missing_calculation(self): for percentage, expected in [(0.651, 65), (0.659, 65), (0.66, 66), (0.0, 0), (1.0, 100)]: beta = SimpleBeta( target=self.my_asset, regression_length=100, allowed_missing_percentage=percentage, ) self.assertEqual(beta.params['allowed_missing_count'], expected) def test_correlation_and_regression_with_bad_asset(self): """ Test that `RollingPearsonOfReturns`, `RollingSpearmanOfReturns` and `RollingLinearRegressionOfReturns` raise the proper exception when given a nonexistent target asset. """ my_asset = Equity(0, exchange="TEST") start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline # This filter is arbitrary; the important thing is that we test each # factor both with and without a specified mask. my_asset_filter = AssetID().eq(1) for mask in (NotSpecified, my_asset_filter): pearson_factor = RollingPearsonOfReturns( target=my_asset, returns_length=3, correlation_length=3, mask=mask, ) spearman_factor = RollingSpearmanOfReturns( target=my_asset, returns_length=3, correlation_length=3, mask=mask, ) regression_factor = RollingLinearRegressionOfReturns( target=my_asset, returns_length=3, regression_length=3, mask=mask, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'pearson_factor': pearson_factor}), start_date, end_date, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'spearman_factor': spearman_factor}), start_date, end_date, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'regression_factor': regression_factor}), start_date, end_date, ) def test_require_length_greater_than_one(self): my_asset = Equity(0, exchange="TEST") with self.assertRaises(ValueError): RollingPearsonOfReturns( target=my_asset, returns_length=3, correlation_length=1, ) with self.assertRaises(ValueError): RollingSpearmanOfReturns( target=my_asset, returns_length=3, correlation_length=1, ) with self.assertRaises(ValueError): RollingLinearRegressionOfReturns( target=my_asset, returns_length=3, regression_length=1, ) def test_simple_beta_input_validation(self): with self.assertRaises(TypeError) as e: SimpleBeta( target="SPY", regression_length=100, allowed_missing_percentage=0.5, ) result = str(e.exception) expected = ( r"SimpleBeta\(\) expected a value of type" " .*Asset for argument 'target'," " but got str instead." ) self.assertRegexpMatches(result, expected) with self.assertRaises(ValueError) as e: SimpleBeta( target=self.my_asset, regression_length=1, allowed_missing_percentage=0.5, ) result = str(e.exception) expected = ( "SimpleBeta() expected a value greater than or equal to 3" " for argument 'regression_length', but got 1 instead." ) self.assertEqual(result, expected) with self.assertRaises(ValueError) as e: SimpleBeta( target=self.my_asset, regression_length=100, allowed_missing_percentage=50, ) result = str(e.exception) expected = ( "SimpleBeta() expected a value inclusively between 0.0 and 1.0 " "for argument 'allowed_missing_percentage', but got 50 instead." ) self.assertEqual(result, expected) def test_simple_beta_target(self): beta = SimpleBeta( target=self.my_asset, regression_length=50, allowed_missing_percentage=0.5, ) self.assertIs(beta.target, self.my_asset) def test_simple_beta_repr(self): beta = SimpleBeta( target=self.my_asset, regression_length=50, allowed_missing_percentage=0.5, ) result = repr(beta) expected = "SimpleBeta({}, length=50, allowed_missing=25)".format( self.my_asset, ) self.assertEqual(result, expected) def test_simple_beta_short_repr(self): beta = SimpleBeta( target=self.my_asset, regression_length=50, allowed_missing_percentage=0.5, ) result = beta.short_repr() expected = "SimpleBeta('A', 50, 25)".format(self.my_asset) self.assertEqual(result, expected)
def test_slice_integer(self): # same as above, but for Integer based indexes # these coerce to a like integer # oob indicates if we are out of bounds # of positional indexing for index, oob in [(Int64Index(range(5)), False), (RangeIndex(5), False), (Int64Index(range(5)) + 10, True)]: # s is an in-range index s = Series(range(5), index=index) # getitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: for idxr in [lambda x: x.loc, lambda x: x.ix]: with catch_warnings(record=True): result = idxr(s)[l] # these are all label indexing # except getitem which is positional # empty if oob: indexer = slice(0, 0) else: indexer = slice(3, 5) self.check(result, s, indexer, False) # positional indexing msg = ("cannot do slice indexing" r" on {klass} with these indexers \[(3|4)\.0\] of" " {kind}" .format(klass=type(index), kind=str(float))) with pytest.raises(TypeError, match=msg): s[l] # getitem out-of-bounds for l in [slice(-6, 6), slice(-6.0, 6.0)]: for idxr in [lambda x: x.loc, lambda x: x.ix]: with catch_warnings(record=True): result = idxr(s)[l] # these are all label indexing # except getitem which is positional # empty if oob: indexer = slice(0, 0) else: indexer = slice(-6, 6) self.check(result, s, indexer, False) # positional indexing msg = ("cannot do slice indexing" r" on {klass} with these indexers \[-6\.0\] of" " {kind}" .format(klass=type(index), kind=str(float))) with pytest.raises(TypeError, match=msg): s[slice(-6.0, 6.0)] # getitem odd floats for l, res1 in [(slice(2.5, 4), slice(3, 5)), (slice(2, 3.5), slice(2, 4)), (slice(2.5, 3.5), slice(3, 4))]: for idxr in [lambda x: x.loc, lambda x: x.ix]: with catch_warnings(record=True): result = idxr(s)[l] if oob: res = slice(0, 0) else: res = res1 self.check(result, s, res, False) # positional indexing msg = ("cannot do slice indexing" r" on {klass} with these indexers \[(2|3)\.5\] of" " {kind}" .format(klass=type(index), kind=str(float))) with pytest.raises(TypeError, match=msg): s[l] # setitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: for idxr in [lambda x: x.loc, lambda x: x.ix]: sc = s.copy() with catch_warnings(record=True): idxr(sc)[l] = 0 result = idxr(sc)[l].values.ravel() assert (result == 0).all() # positional indexing msg = ("cannot do slice indexing" r" on {klass} with these indexers \[(3|4)\.0\] of" " {kind}" .format(klass=type(index), kind=str(float))) with pytest.raises(TypeError, match=msg): s[l] = 0
class TestFloat64Index(Numeric): _holder = Float64Index @pytest.fixture( params=[ [1.5, 2, 3, 4, 5], [0.0, 2.5, 5.0, 7.5, 10.0], [5, 4, 3, 2, 1.5], [10.0, 7.5, 5.0, 2.5, 0.0], ], ids=["mixed", "float", "mixed_dec", "float_dec"], ) def index(self, request): return Float64Index(request.param) @pytest.fixture def mixed_index(self): return Float64Index([1.5, 2, 3, 4, 5]) @pytest.fixture def float_index(self): return Float64Index([0.0, 2.5, 5.0, 7.5, 10.0]) def create_index(self) -> Float64Index: return Float64Index(np.arange(5, dtype="float64")) def test_repr_roundtrip(self, index): tm.assert_index_equal(eval(repr(index)), index) def check_is_index(self, i): assert isinstance(i, Index) assert not isinstance(i, Float64Index) def check_coerce(self, a, b, is_float_index=True): assert a.equals(b) tm.assert_index_equal(a, b, exact=False) if is_float_index: assert isinstance(b, Float64Index) else: self.check_is_index(b) def test_constructor(self): # explicit construction index = Float64Index([1, 2, 3, 4, 5]) assert isinstance(index, Float64Index) expected = np.array([1, 2, 3, 4, 5], dtype="float64") tm.assert_numpy_array_equal(index.values, expected) index = Float64Index(np.array([1, 2, 3, 4, 5])) assert isinstance(index, Float64Index) index = Float64Index([1.0, 2, 3, 4, 5]) assert isinstance(index, Float64Index) index = Float64Index(np.array([1.0, 2, 3, 4, 5])) assert isinstance(index, Float64Index) assert index.dtype == float index = Float64Index(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) assert isinstance(index, Float64Index) assert index.dtype == np.float64 index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32) assert isinstance(index, Float64Index) assert index.dtype == np.float64 # nan handling result = Float64Index([np.nan, np.nan]) assert pd.isna(result.values).all() result = Float64Index(np.array([np.nan])) assert pd.isna(result.values).all() result = Index(np.array([np.nan])) assert pd.isna(result.values).all() @pytest.mark.parametrize( "index, dtype", [ (Int64Index, "float64"), (UInt64Index, "categorical"), (Float64Index, "datetime64"), (RangeIndex, "float64"), ], ) def test_invalid_dtype(self, index, dtype): # GH 29539 with pytest.raises( ValueError, match= rf"Incorrect `dtype` passed: expected \w+(?: \w+)?, received {dtype}", ): index([1, 2, 3], dtype=dtype) def test_constructor_invalid(self): # invalid msg = (r"Float64Index\(\.\.\.\) must be called with a collection of " r"some kind, 0\.0 was passed") with pytest.raises(TypeError, match=msg): Float64Index(0.0) # 2021-02-1 we get ValueError in numpy 1.20, but not on all builds msg = "|".join([ "String dtype not supported, you may need to explicitly cast ", "could not convert string to float: 'a'", ]) with pytest.raises((TypeError, ValueError), match=msg): Float64Index(["a", "b", 0.0]) msg = r"float\(\) argument must be a string or a number, not 'Timestamp'" with pytest.raises(TypeError, match=msg): Float64Index([Timestamp("20130101")]) def test_constructor_coerce(self, mixed_index, float_index): self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5])) self.check_coerce(float_index, Index(np.arange(5) * 2.5)) self.check_coerce(float_index, Index(np.array(np.arange(5) * 2.5, dtype=object))) def test_constructor_explicit(self, mixed_index, float_index): # these don't auto convert self.check_coerce(float_index, Index((np.arange(5) * 2.5), dtype=object), is_float_index=False) self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5], dtype=object), is_float_index=False) def test_type_coercion_fail(self, any_int_dtype): # see gh-15832 msg = "Trying to coerce float values to integers" with pytest.raises(ValueError, match=msg): Index([1, 2, 3.5], dtype=any_int_dtype) def test_type_coercion_valid(self, float_dtype): # There is no Float32Index, so we always # generate Float64Index. i = Index([1, 2, 3.5], dtype=float_dtype) tm.assert_index_equal(i, Index([1, 2, 3.5])) def test_equals_numeric(self): i = Float64Index([1.0, 2.0]) assert i.equals(i) assert i.identical(i) i2 = Float64Index([1.0, 2.0]) assert i.equals(i2) i = Float64Index([1.0, np.nan]) assert i.equals(i) assert i.identical(i) i2 = Float64Index([1.0, np.nan]) assert i.equals(i2) @pytest.mark.parametrize( "other", ( Int64Index([1, 2]), Index([1.0, 2.0], dtype=object), Index([1, 2], dtype=object), ), ) def test_equals_numeric_other_index_type(self, other): i = Float64Index([1.0, 2.0]) assert i.equals(other) assert other.equals(i) @pytest.mark.parametrize( "vals", [ pd.date_range("2016-01-01", periods=3), pd.timedelta_range("1 Day", periods=3), ], ) def test_lookups_datetimelike_values(self, vals): # If we have datetime64 or timedelta64 values, make sure they are # wrappped correctly GH#31163 ser = Series(vals, index=range(3, 6)) ser.index = ser.index.astype("float64") expected = vals[1] with tm.assert_produces_warning(FutureWarning): result = ser.index.get_value(ser, 4.0) assert isinstance(result, type(expected)) and result == expected with tm.assert_produces_warning(FutureWarning): result = ser.index.get_value(ser, 4) assert isinstance(result, type(expected)) and result == expected result = ser[4.0] assert isinstance(result, type(expected)) and result == expected result = ser[4] assert isinstance(result, type(expected)) and result == expected result = ser.loc[4.0] assert isinstance(result, type(expected)) and result == expected result = ser.loc[4] assert isinstance(result, type(expected)) and result == expected result = ser.at[4.0] assert isinstance(result, type(expected)) and result == expected # GH#31329 .at[4] should cast to 4.0, matching .loc behavior result = ser.at[4] assert isinstance(result, type(expected)) and result == expected result = ser.iloc[1] assert isinstance(result, type(expected)) and result == expected result = ser.iat[1] assert isinstance(result, type(expected)) and result == expected def test_doesnt_contain_all_the_things(self): i = Float64Index([np.nan]) assert not i.isin([0]).item() assert not i.isin([1]).item() assert i.isin([np.nan]).item() def test_nan_multiple_containment(self): i = Float64Index([1.0, np.nan]) tm.assert_numpy_array_equal(i.isin([1.0]), np.array([True, False])) tm.assert_numpy_array_equal(i.isin([2.0, np.pi]), np.array([False, False])) tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, True])) tm.assert_numpy_array_equal(i.isin([1.0, np.nan]), np.array([True, True])) i = Float64Index([1.0, 2.0]) tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, False])) def test_fillna_float64(self): # GH 11343 idx = Index([1.0, np.nan, 3.0], dtype=float, name="x") # can't downcast exp = Index([1.0, 0.1, 3.0], name="x") tm.assert_index_equal(idx.fillna(0.1), exp) # downcast exp = Float64Index([1.0, 2.0, 3.0], name="x") tm.assert_index_equal(idx.fillna(2), exp) # object exp = Index([1.0, "obj", 3.0], name="x") tm.assert_index_equal(idx.fillna("obj"), exp)
def test_union_with_DatetimeIndex(self): i1 = Int64Index(np.arange(0, 20, 2)) i2 = DatetimeIndex(start='2012-01-03 00:00:00', periods=10, freq='D') i1.union(i2) # Works i2.union(i1) # Fails with "AttributeError: can't set attribute"
def index(self, request): return Int64Index(request.param)
def setup_method(self, method): self.indices = dict(index=Int64Index(np.arange(0, 20, 2)), index_dec=Int64Index(np.arange(19, -1, -1))) self.setup_indices()
def test_slice_integer_frame_getitem(self): # similar to above, but on the getitem dim (of a DataFrame) for index in [Int64Index(range(5)), RangeIndex(5)]: s = DataFrame(np.random.randn(5, 2), index=index) def f(idxr): # getitem for l in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: result = idxr(s)[l] indexer = slice(0, 2) self.check(result, s, indexer, False) # positional indexing def f(): s[l] pytest.raises(TypeError, f) # getitem out-of-bounds for l in [slice(-10, 10), slice(-10.0, 10.0)]: result = idxr(s)[l] self.check(result, s, slice(-10, 10), True) # positional indexing def f(): s[slice(-10.0, 10.0)] pytest.raises(TypeError, f) # getitem odd floats for l, res in [(slice(0.5, 1), slice(1, 2)), (slice(0, 0.5), slice(0, 1)), (slice(0.5, 1.5), slice(1, 2))]: result = idxr(s)[l] self.check(result, s, res, False) # positional indexing def f(): s[l] pytest.raises(TypeError, f) # setitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: sc = s.copy() idxr(sc)[l] = 0 result = idxr(sc)[l].values.ravel() assert (result == 0).all() # positional indexing def f(): s[l] = 0 pytest.raises(TypeError, f) f(lambda x: x.loc) with catch_warnings(record=True): f(lambda x: x.ix)