예제 #1
0
    --------
    >>> arr = RangeIndex(5)
    >>> arr / zeros
    Float64Index([nan, inf, inf, inf, inf], dtype='float64')
    """
    return request.param


# ------------------------------------------------------------------
# Vector Fixtures


@pytest.fixture(
    params=[
        Float64Index(np.arange(5, dtype="float64")),
        Int64Index(np.arange(5, dtype="int64")),
        UInt64Index(np.arange(5, dtype="uint64")),
        RangeIndex(5),
    ],
    ids=lambda x: type(x).__name__,
)
def numeric_idx(request):
    """
    Several types of numeric-dtypes Index objects
    """
    return request.param


# ------------------------------------------------------------------
# Scalar Fixtures
예제 #2
0
    def test_slice_integer_frame_getitem(self):

        # similar to above, but on the getitem dim (of a DataFrame)
        for index in [Int64Index(range(5)), RangeIndex(5)]:

            s = DataFrame(np.random.randn(5, 2), index=index)

            def f(idxr):

                # getitem
                for l in [slice(0.0, 1),
                          slice(0, 1.0),
                          slice(0.0, 1.0)]:

                    result = idxr(s)[l]
                    indexer = slice(0, 2)
                    self.check(result, s, indexer, False)

                    # positional indexing
                    msg = ("cannot do slice indexing"
                           r" on {klass} with these indexers \[(0|1)\.0\] of"
                           " {kind}"
                           .format(klass=type(index), kind=str(float)))
                    with pytest.raises(TypeError, match=msg):
                        s[l]

                # getitem out-of-bounds
                for l in [slice(-10, 10),
                          slice(-10.0, 10.0)]:

                    result = idxr(s)[l]
                    self.check(result, s, slice(-10, 10), True)

                # positional indexing
                msg = ("cannot do slice indexing"
                       r" on {klass} with these indexers \[-10\.0\] of"
                       " {kind}"
                       .format(klass=type(index), kind=str(float)))
                with pytest.raises(TypeError, match=msg):
                    s[slice(-10.0, 10.0)]

                # getitem odd floats
                for l, res in [(slice(0.5, 1), slice(1, 2)),
                               (slice(0, 0.5), slice(0, 1)),
                               (slice(0.5, 1.5), slice(1, 2))]:

                    result = idxr(s)[l]
                    self.check(result, s, res, False)

                    # positional indexing
                    msg = ("cannot do slice indexing"
                           r" on {klass} with these indexers \[0\.5\] of"
                           " {kind}"
                           .format(klass=type(index), kind=str(float)))
                    with pytest.raises(TypeError, match=msg):
                        s[l]

                # setitem
                for l in [slice(3.0, 4),
                          slice(3, 4.0),
                          slice(3.0, 4.0)]:

                    sc = s.copy()
                    idxr(sc)[l] = 0
                    result = idxr(sc)[l].values.ravel()
                    assert (result == 0).all()

                    # positional indexing
                    msg = ("cannot do slice indexing"
                           r" on {klass} with these indexers \[(3|4)\.0\] of"
                           " {kind}"
                           .format(klass=type(index), kind=str(float)))
                    with pytest.raises(TypeError, match=msg):
                        s[l] = 0

            f(lambda x: x.loc)
            with catch_warnings(record=True):
                f(lambda x: x.ix)
예제 #3
0
 def create_index(self):
     return Int64Index(np.arange(5, dtype='int64'))
예제 #4
0
 def test_take_preserve_name(self):
     index = Int64Index([1, 2, 3, 4], name='foo')
     taken = index.take([3, 0, 1])
     self.assertEqual(index.name, taken.name)
예제 #5
0
class TestRangeIndexSetOps:
    @pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index])
    def test_intersection_mismatched_dtype(self, klass):
        # check that we cast to float, not object
        index = RangeIndex(start=0, stop=20, step=2, name="foo")
        index = klass(index)

        flt = index.astype(np.float64)

        # bc index.equals(flt), we go through fastpath and get RangeIndex back
        result = index.intersection(flt)
        tm.assert_index_equal(result, index, exact=True)

        result = flt.intersection(index)
        tm.assert_index_equal(result, flt, exact=True)

        # neither empty, not-equals
        result = index.intersection(flt[1:])
        tm.assert_index_equal(result, flt[1:], exact=True)

        result = flt[1:].intersection(index)
        tm.assert_index_equal(result, flt[1:], exact=True)

        # empty other
        result = index.intersection(flt[:0])
        tm.assert_index_equal(result, flt[:0], exact=True)

        result = flt[:0].intersection(index)
        tm.assert_index_equal(result, flt[:0], exact=True)

    def test_intersection(self, sort):
        # intersect with Int64Index
        index = RangeIndex(start=0, stop=20, step=2)
        other = Index(np.arange(1, 6))
        result = index.intersection(other, sort=sort)
        expected = Index(np.sort(np.intersect1d(index.values, other.values)))
        tm.assert_index_equal(result, expected)

        result = other.intersection(index, sort=sort)
        expected = Index(
            np.sort(np.asarray(np.intersect1d(index.values, other.values)))
        )
        tm.assert_index_equal(result, expected)

        # intersect with increasing RangeIndex
        other = RangeIndex(1, 6)
        result = index.intersection(other, sort=sort)
        expected = Index(np.sort(np.intersect1d(index.values, other.values)))
        tm.assert_index_equal(result, expected)

        # intersect with decreasing RangeIndex
        other = RangeIndex(5, 0, -1)
        result = index.intersection(other, sort=sort)
        expected = Index(np.sort(np.intersect1d(index.values, other.values)))
        tm.assert_index_equal(result, expected)

        # reversed (GH 17296)
        result = other.intersection(index, sort=sort)
        tm.assert_index_equal(result, expected)

        # GH 17296: intersect two decreasing RangeIndexes
        first = RangeIndex(10, -2, -2)
        other = RangeIndex(5, -4, -1)
        expected = first.astype(int).intersection(other.astype(int), sort=sort)
        result = first.intersection(other, sort=sort).astype(int)
        tm.assert_index_equal(result, expected)

        # reversed
        result = other.intersection(first, sort=sort).astype(int)
        tm.assert_index_equal(result, expected)

        index = RangeIndex(5)

        # intersect of non-overlapping indices
        other = RangeIndex(5, 10, 1)
        result = index.intersection(other, sort=sort)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        other = RangeIndex(-1, -5, -1)
        result = index.intersection(other, sort=sort)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        # intersection of empty indices
        other = RangeIndex(0, 0, 1)
        result = index.intersection(other, sort=sort)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        result = other.intersection(index, sort=sort)
        tm.assert_index_equal(result, expected)

        # intersection of non-overlapping values based on start value and gcd
        index = RangeIndex(1, 10, 2)
        other = RangeIndex(0, 10, 4)
        result = index.intersection(other, sort=sort)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

    def test_union_noncomparable(self, sort):
        # corner case, non-Int64Index
        index = RangeIndex(start=0, stop=20, step=2)
        other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object)
        result = index.union(other, sort=sort)
        expected = Index(np.concatenate((index, other)))
        tm.assert_index_equal(result, expected)

        result = other.union(index, sort=sort)
        expected = Index(np.concatenate((other, index)))
        tm.assert_index_equal(result, expected)

    @pytest.fixture(
        params=[
            (
                RangeIndex(0, 10, 1),
                RangeIndex(0, 10, 1),
                RangeIndex(0, 10, 1),
                RangeIndex(0, 10, 1),
            ),
            (
                RangeIndex(0, 10, 1),
                RangeIndex(5, 20, 1),
                RangeIndex(0, 20, 1),
                Int64Index(range(20)),
            ),
            (
                RangeIndex(0, 10, 1),
                RangeIndex(10, 20, 1),
                RangeIndex(0, 20, 1),
                Int64Index(range(20)),
            ),
            (
                RangeIndex(0, -10, -1),
                RangeIndex(0, -10, -1),
                RangeIndex(0, -10, -1),
                RangeIndex(0, -10, -1),
            ),
            (
                RangeIndex(0, -10, -1),
                RangeIndex(-10, -20, -1),
                RangeIndex(-19, 1, 1),
                Int64Index(range(0, -20, -1)),
            ),
            (
                RangeIndex(0, 10, 2),
                RangeIndex(1, 10, 2),
                RangeIndex(0, 10, 1),
                Int64Index(list(range(0, 10, 2)) + list(range(1, 10, 2))),
            ),
            (
                RangeIndex(0, 11, 2),
                RangeIndex(1, 12, 2),
                RangeIndex(0, 12, 1),
                Int64Index(list(range(0, 11, 2)) + list(range(1, 12, 2))),
            ),
            (
                RangeIndex(0, 21, 4),
                RangeIndex(-2, 24, 4),
                RangeIndex(-2, 24, 2),
                Int64Index(list(range(0, 21, 4)) + list(range(-2, 24, 4))),
            ),
            (
                RangeIndex(0, -20, -2),
                RangeIndex(-1, -21, -2),
                RangeIndex(-19, 1, 1),
                Int64Index(list(range(0, -20, -2)) + list(range(-1, -21, -2))),
            ),
            (
                RangeIndex(0, 100, 5),
                RangeIndex(0, 100, 20),
                RangeIndex(0, 100, 5),
                Int64Index(range(0, 100, 5)),
            ),
            (
                RangeIndex(0, -100, -5),
                RangeIndex(5, -100, -20),
                RangeIndex(-95, 10, 5),
                Int64Index(list(range(0, -100, -5)) + [5]),
            ),
            (
                RangeIndex(0, -11, -1),
                RangeIndex(1, -12, -4),
                RangeIndex(-11, 2, 1),
                Int64Index(list(range(0, -11, -1)) + [1, -11]),
            ),
            (RangeIndex(0), RangeIndex(0), RangeIndex(0), RangeIndex(0)),
            (
                RangeIndex(0, -10, -2),
                RangeIndex(0),
                RangeIndex(0, -10, -2),
                RangeIndex(0, -10, -2),
            ),
            (
                RangeIndex(0, 100, 2),
                RangeIndex(100, 150, 200),
                RangeIndex(0, 102, 2),
                Int64Index(range(0, 102, 2)),
            ),
            (
                RangeIndex(0, -100, -2),
                RangeIndex(-100, 50, 102),
                RangeIndex(-100, 4, 2),
                Int64Index(list(range(0, -100, -2)) + [-100, 2]),
            ),
            (
                RangeIndex(0, -100, -1),
                RangeIndex(0, -50, -3),
                RangeIndex(-99, 1, 1),
                Int64Index(list(range(0, -100, -1))),
            ),
            (
                RangeIndex(0, 1, 1),
                RangeIndex(5, 6, 10),
                RangeIndex(0, 6, 5),
                Int64Index([0, 5]),
            ),
            (
                RangeIndex(0, 10, 5),
                RangeIndex(-5, -6, -20),
                RangeIndex(-5, 10, 5),
                Int64Index([0, 5, -5]),
            ),
            (
                RangeIndex(0, 3, 1),
                RangeIndex(4, 5, 1),
                Int64Index([0, 1, 2, 4]),
                Int64Index([0, 1, 2, 4]),
            ),
            (
                RangeIndex(0, 10, 1),
                Int64Index([]),
                RangeIndex(0, 10, 1),
                RangeIndex(0, 10, 1),
            ),
            (
                RangeIndex(0),
                Int64Index([1, 5, 6]),
                Int64Index([1, 5, 6]),
                Int64Index([1, 5, 6]),
            ),
        ]
    )
    def unions(self, request):
        """Inputs and expected outputs for RangeIndex.union tests"""
        return request.param

    def test_union_sorted(self, unions):

        idx1, idx2, expected_sorted, expected_notsorted = unions

        res1 = idx1.union(idx2, sort=None)
        tm.assert_index_equal(res1, expected_sorted, exact=True)

        res1 = idx1.union(idx2, sort=False)
        tm.assert_index_equal(res1, expected_notsorted, exact=True)

        res2 = idx2.union(idx1, sort=None)
        res3 = idx1._int64index.union(idx2, sort=None)
        tm.assert_index_equal(res2, expected_sorted, exact=True)
        tm.assert_index_equal(res3, expected_sorted)

    def test_difference(self):
        # GH#12034 Cases where we operate against another RangeIndex and may
        #  get back another RangeIndex
        obj = RangeIndex.from_range(range(1, 10), name="foo")

        result = obj.difference(obj)
        expected = RangeIndex.from_range(range(0), name="foo")
        tm.assert_index_equal(result, expected, exact=True)

        result = obj.difference(expected.rename("bar"))
        tm.assert_index_equal(result, obj.rename(None), exact=True)

        result = obj.difference(obj[:3])
        tm.assert_index_equal(result, obj[3:], exact=True)

        result = obj.difference(obj[-3:])
        tm.assert_index_equal(result, obj[:-3], exact=True)

        result = obj[::-1].difference(obj[-3:])
        tm.assert_index_equal(result, obj[:-3][::-1], exact=True)

        result = obj[::-1].difference(obj[-3:][::-1])
        tm.assert_index_equal(result, obj[:-3][::-1], exact=True)

        result = obj.difference(obj[2:6])
        expected = Int64Index([1, 2, 7, 8, 9], name="foo")
        tm.assert_index_equal(result, expected)

    def test_difference_mismatched_step(self):
        obj = RangeIndex.from_range(range(1, 10), name="foo")

        result = obj.difference(obj[::2])
        expected = obj[1::2]._int64index
        tm.assert_index_equal(result, expected, exact=True)

        result = obj.difference(obj[1::2])
        expected = obj[::2]._int64index
        tm.assert_index_equal(result, expected, exact=True)

    def test_symmetric_difference(self):
        # GH#12034 Cases where we operate against another RangeIndex and may
        #  get back another RangeIndex
        left = RangeIndex.from_range(range(1, 10), name="foo")

        result = left.symmetric_difference(left)
        expected = RangeIndex.from_range(range(0), name="foo")
        tm.assert_index_equal(result, expected)

        result = left.symmetric_difference(expected.rename("bar"))
        tm.assert_index_equal(result, left.rename(None))

        result = left[:-2].symmetric_difference(left[2:])
        expected = Int64Index([1, 2, 8, 9], name="foo")
        tm.assert_index_equal(result, expected)

        right = RangeIndex.from_range(range(10, 15))

        result = left.symmetric_difference(right)
        expected = RangeIndex.from_range(range(1, 15))
        tm.assert_index_equal(result, expected)

        result = left.symmetric_difference(right[1:])
        expected = Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14])
        tm.assert_index_equal(result, expected)
예제 #6
0
 def setUp(self):
     self.indices = dict(index=Int64Index(np.arange(0, 20, 2)))
     self.setup_indices()
예제 #7
0
 def test_get_indexer(self):
     target = Int64Index(np.arange(10))
     indexer = self.index.get_indexer(target)
     expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp)
     tm.assert_numpy_array_equal(indexer, expected)
예제 #8
0
class Base:
    """
    Common tests for all variations of IntervalIndex construction. Input data
    to be supplied in breaks format, then converted by the subclass method
    get_kwargs_from_breaks to the expected format.
    """
    @pytest.mark.parametrize(
        'breaks', [[3, 14, 15, 92, 653],
                   np.arange(10, dtype='int64'),
                   Int64Index(range(-10, 11)),
                   Float64Index(np.arange(20, 30, 0.5)),
                   date_range('20180101', periods=10),
                   date_range('20180101', periods=10, tz='US/Eastern'),
                   timedelta_range('1 day', periods=10)])
    def test_constructor(self, constructor, breaks, closed, name):
        result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
        result = constructor(closed=closed, name=name, **result_kwargs)

        assert result.closed == closed
        assert result.name == name
        assert result.dtype.subtype == getattr(breaks, 'dtype', 'int64')
        tm.assert_index_equal(result.left, Index(breaks[:-1]))
        tm.assert_index_equal(result.right, Index(breaks[1:]))

    @pytest.mark.parametrize('breaks, subtype',
                             [(Int64Index([0, 1, 2, 3, 4]), 'float64'),
                              (Int64Index([0, 1, 2, 3, 4]), 'datetime64[ns]'),
                              (Int64Index([0, 1, 2, 3, 4]), 'timedelta64[ns]'),
                              (Float64Index([0, 1, 2, 3, 4]), 'int64'),
                              (date_range('2017-01-01', periods=5), 'int64'),
                              (timedelta_range('1 day', periods=5), 'int64')])
    def test_constructor_dtype(self, constructor, breaks, subtype):
        # GH 19262: conversion via dtype parameter
        expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
        expected = constructor(**expected_kwargs)

        result_kwargs = self.get_kwargs_from_breaks(breaks)
        iv_dtype = IntervalDtype(subtype)
        for dtype in (iv_dtype, str(iv_dtype)):
            result = constructor(dtype=dtype, **result_kwargs)
            tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('breaks',
                             [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
    def test_constructor_nan(self, constructor, breaks, closed):
        # GH 18421
        result_kwargs = self.get_kwargs_from_breaks(breaks)
        result = constructor(closed=closed, **result_kwargs)

        expected_subtype = np.float64
        expected_values = np.array(breaks[:-1], dtype=object)

        assert result.closed == closed
        assert result.dtype.subtype == expected_subtype
        tm.assert_numpy_array_equal(result._ndarray_values, expected_values)

    @pytest.mark.parametrize('breaks', [[],
                                        np.array([], dtype='int64'),
                                        np.array([], dtype='float64'),
                                        np.array([], dtype='datetime64[ns]'),
                                        np.array([], dtype='timedelta64[ns]')])
    def test_constructor_empty(self, constructor, breaks, closed):
        # GH 18421
        result_kwargs = self.get_kwargs_from_breaks(breaks)
        result = constructor(closed=closed, **result_kwargs)

        expected_values = np.array([], dtype=object)
        expected_subtype = getattr(breaks, 'dtype', np.int64)

        assert result.empty
        assert result.closed == closed
        assert result.dtype.subtype == expected_subtype
        tm.assert_numpy_array_equal(result._ndarray_values, expected_values)

    @pytest.mark.parametrize('breaks', [
        tuple('0123456789'),
        list('abcdefghij'),
        np.array(list('abcdefghij'), dtype=object),
        np.array(list('abcdefghij'), dtype='<U1')
    ])
    def test_constructor_string(self, constructor, breaks):
        # GH 19016
        msg = ('category, object, and string subtypes are not supported '
               'for IntervalIndex')
        with pytest.raises(TypeError, match=msg):
            constructor(**self.get_kwargs_from_breaks(breaks))

    @pytest.mark.parametrize('cat_constructor',
                             [Categorical, CategoricalIndex])
    def test_constructor_categorical_valid(self, constructor, cat_constructor):
        # GH 21243/21253
        if isinstance(constructor, partial) and constructor.func is Index:
            # Index is defined to create CategoricalIndex from categorical data
            pytest.skip()

        breaks = np.arange(10, dtype='int64')
        expected = IntervalIndex.from_breaks(breaks)

        cat_breaks = cat_constructor(breaks)
        result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
        result = constructor(**result_kwargs)
        tm.assert_index_equal(result, expected)

    def test_generic_errors(self, constructor):
        # filler input data to be used when supplying invalid kwargs
        filler = self.get_kwargs_from_breaks(range(10))

        # invalid closed
        msg = "invalid option for 'closed': invalid"
        with pytest.raises(ValueError, match=msg):
            constructor(closed='invalid', **filler)

        # unsupported dtype
        msg = 'dtype must be an IntervalDtype, got int64'
        with pytest.raises(TypeError, match=msg):
            constructor(dtype='int64', **filler)

        # invalid dtype
        msg = "data type 'invalid' not understood"
        with pytest.raises(TypeError, match=msg):
            constructor(dtype='invalid', **filler)

        # no point in nesting periods in an IntervalIndex
        periods = period_range('2000-01-01', periods=10)
        periods_kwargs = self.get_kwargs_from_breaks(periods)
        msg = 'Period dtypes are not supported, use a PeriodIndex instead'
        with pytest.raises(ValueError, match=msg):
            constructor(**periods_kwargs)

        # decreasing values
        decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
        msg = 'left side of interval must be <= right side'
        with pytest.raises(ValueError, match=msg):
            constructor(**decreasing_kwargs)
예제 #9
0
    def test_tdi_floordiv_tdlike_scalar(self, delta):
        tdi = timedelta_range('1 days', '10 days', name='foo')
        expected = Int64Index((np.arange(10) + 1) * 12, name='foo')

        result = tdi // delta
        tm.assert_index_equal(result, expected, exact=False)
예제 #10
0
class PerformanceTestCase(TestCase):
    dr = date_range(start='2015-1-1', end='2015-1-2')
    dr.name = 'date'
    tickers = ['A', 'B', 'C', 'D']
    factor = DataFrame(index=dr,
                       columns=tickers,
                       data=[[1, 2, 3, 4], [4, 3, 2, 1]]).stack()
    factor.index = factor.index.set_names(['date', 'asset'])
    factor.name = 'factor'
    factor_data = DataFrame()
    factor_data['factor'] = factor
    factor_data['group'] = Series(index=factor.index,
                                  data=[1, 1, 2, 2, 1, 1, 2, 2],
                                  dtype="category")

    @parameterized.expand([(
        factor_data,
        [4, 3, 2, 1, 1, 2, 3, 4],
        False,
        False,
        dr,
        [-1., -1.],
    ), (
        factor_data,
        [1, 2, 3, 4, 4, 3, 2, 1],
        False,
        False,
        dr,
        [1., 1.],
    ),
                           (
                               factor_data,
                               [1, 2, 3, 4, 4, 3, 2, 1],
                               False,
                               True,
                               MultiIndex.from_product([dr, [1, 2]],
                                                       names=['date',
                                                              'group']),
                               [1., 1., 1., 1.],
                           ),
                           (
                               factor_data,
                               [1, 2, 3, 4, 4, 3, 2, 1],
                               True,
                               True,
                               MultiIndex.from_product([dr, [1, 2]],
                                                       names=['date',
                                                              'group']),
                               [1., 1., 1., 1.],
                           )])
    def test_information_coefficient(self, factor_data, forward_returns,
                                     group_adjust, by_group, expected_ix,
                                     expected_ic_val):

        factor_data[1] = Series(index=factor_data.index, data=forward_returns)

        ic = factor_information_coefficient(factor_data=factor_data,
                                            group_adjust=group_adjust,
                                            by_group=by_group)

        expected_ic_df = DataFrame(index=expected_ix,
                                   columns=Int64Index([1], dtype='object'),
                                   data=expected_ic_val)

        assert_frame_equal(ic, expected_ic_df)

    @parameterized.expand([
        (factor_data, [4, 3, 2, 1, 1, 2, 3,
                       4], False, False, 'D', dr, [-1., -1.]),
        (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, False, 'W',
         DatetimeIndex(['2015-01-04'], name='date', freq='W-SUN'), [1.]),
        (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, None,
         Int64Index([1, 2], name='group'), [1., 1.]),
        (factor_data, [1, 2, 3, 4, 4, 3, 2, 1], False, True, 'W',
         MultiIndex.from_product([
             DatetimeIndex(['2015-01-04'], name='date', freq='W-SUN'), [1, 2]
         ],
                                 names=['date', 'group']), [1., 1.])
    ])
    def test_mean_information_coefficient(self, factor_data, forward_returns,
                                          group_adjust, by_group, by_time,
                                          expected_ix, expected_ic_val):

        factor_data[1] = Series(index=factor_data.index, data=forward_returns)

        ic = mean_information_coefficient(factor_data,
                                          group_adjust=group_adjust,
                                          by_group=by_group,
                                          by_time=by_time)

        expected_ic_df = DataFrame(index=expected_ix,
                                   columns=Int64Index([1], dtype='object'),
                                   data=expected_ic_val)

        assert_frame_equal(ic, expected_ic_df)

    @parameterized.expand([
        ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0],
          [1.0, 2.0, 3.0, 4.0]], 4.0, [nan, 1.0, 1.0, 0.0]),
        ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0],
          [1.0, 2.0, 3.0, 4.0]], 3.0, [nan, 0.0, 0.0, 0.0]),
        ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0],
          [4.0, 3.0, 2.0, 1.0]], 2.0, [nan, 1.0, 1.0, 1.0])
    ])
    def test_quantile_turnover(self, quantile_values, test_quantile,
                               expected_vals):

        dr = date_range(start='2015-1-1', end='2015-1-4')
        dr.name = 'date'
        tickers = ['A', 'B', 'C', 'D']

        quantized_test_factor = Series(
            DataFrame(index=dr, columns=tickers, data=quantile_values).stack())
        quantized_test_factor.index = quantized_test_factor.index.set_names(
            ['date', 'asset'])

        to = quantile_turnover(quantized_test_factor, test_quantile)

        expected = Series(index=quantized_test_factor.index.levels[0],
                          data=expected_vals)
        expected.name = test_quantile

        assert_series_equal(to, expected)

    @parameterized.expand([([1, 2, 3, 4, 4, 3, 2,
                             1], [4, 3, 2, 1, 1, 2, 3,
                                  4], False, [-1.25000, -1.25000]),
                           ([1, 1, 1, 1, 1, 1, 1, 1], [4, 3, 2, 1, 1, 2, 3,
                                                       4], False, [nan, nan]),
                           ([1, 2, 3, 4, 4, 3, 2, 1], [4, 3, 2, 1, 1, 2, 3,
                                                       4], True, [-0.5, -0.5]),
                           ([1, 2, 3, 4, 1, 2, 3, 4], [1, 4, 1, 2, 1, 2, 2,
                                                       1], True, [1.0, 0.0]),
                           ([1, 1, 1, 1, 1, 1, 1, 1], [4, 3, 2, 1, 1, 2, 3,
                                                       4], True, [nan, nan])])
    def test_factor_returns(self, factor_vals, fwd_return_vals, group_adjust,
                            expected_vals):

        factor_data = self.factor_data.copy()
        factor_data[1] = fwd_return_vals
        factor_data['factor'] = factor_vals

        factor_returns_s = factor_returns(factor_data=factor_data,
                                          demeaned=True,
                                          group_adjust=group_adjust)

        expected = DataFrame(index=self.dr,
                             data=expected_vals,
                             columns=get_forward_returns_columns(
                                 factor_data.columns))

        assert_frame_equal(factor_returns_s, expected)

    @parameterized.expand([([1, 2, 3, 4, 1, 1, 1, 1], -1, 5. / 6.)])
    def test_factor_alpha_beta(self, fwd_return_vals, alpha, beta):

        factor_data = self.factor_data.copy()
        factor_data[1] = fwd_return_vals

        ab = factor_alpha_beta(factor_data=factor_data)

        expected = DataFrame(columns=[1],
                             index=['Ann. alpha', 'beta'],
                             data=[alpha, beta])

        assert_frame_equal(ab, expected)

    @parameterized.expand([
        ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0],
          [1.0, 2.0, 3.0, 4.0]], '2015-1-4', 1, [nan, 1.0, 1.0, 1.0]),
        ([[4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0],
          [1.0, 2.0, 3.0, 4.0]], '2015-1-4', 1, [nan, -1.0, -1.0, -1.0]),
        ([[1.0, 2.0, 3.0, 4.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0],
          [1.0, 2.0, 3.0, 4.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0],
          [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0],
          [2.0, 1.0, 4.0, 3.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0,
                                                       1.0]], '2015-1-12', 3,
         [nan, nan, nan, 1.0, 1.0, 1.0, 0.6, -0.6, -1.0, 1.0, -0.6, -1.0])
    ])
    def test_factor_rank_autocorrelation(self, factor_values, end_date, period,
                                         expected_vals):
        dr = date_range(start='2015-1-1', end=end_date)
        dr.name = 'date'
        tickers = ['A', 'B', 'C', 'D']
        factor = DataFrame(index=dr, columns=tickers,
                           data=factor_values).stack()
        factor.index = factor.index.set_names(['date', 'asset'])

        factor_df = DataFrame()
        factor_df['factor'] = factor

        fa = factor_rank_autocorrelation(factor_df, period)
        expected = Series(index=dr, data=expected_vals)
        expected.name = period

        assert_series_equal(fa, expected)

    @parameterized.expand([
        (1, 2, False, 4, [[1.00, 0.0, -0.50, -0.75], [0.0, 0.0, 0.0, 0.0],
                          [0.00, 0.00, 0.00, 0.00], [0.0, 0.0, 0.0, 0.0],
                          [-0.20, 0.0, 0.25, 0.5625], [0.0, 0.0, 0.0, 0.0],
                          [-0.3333333, 0.0, 0.50, 1.25], [0.0, 0.0, 0.0,
                                                          0.0]]),
        (1, 2, True, 4, [[0.8833333, 0.0, -0.5625, -1.015625],
                         [0.0, 0.0, 0.0, 0.0],
                         [-0.1166667, 0.0, -0.0625, -0.265625],
                         [0.0, 0.0, 0.0, 0.0],
                         [-0.3166667, 0.0, 0.1875, 0.296875],
                         [0.0, 0.0, 0.0, 0.0],
                         [-0.4500000, 0.0, 0.4375, 0.984375],
                         [0.0, 0.0, 0.0, 0.0]]),
        (3, 0, False, 4, [[7.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0],
                          [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0],
                          [-0.488, -0.36, -0.2, 0.0], [0.0, 0.0, 0.0, 0.0],
                          [-0.703704, -0.55555555, -0.333333333, 0.0],
                          [0.0, 0.0, 0.0, 0.0]]),
        (0, 3, True, 4, [[0.0, -0.5625, -1.015625, -1.488281],
                         [0.0, 0.0, 0.0, 0.0],
                         [0.0, -0.0625, -0.265625, -0.613281],
                         [0.0, 0.0, 0.0,
                          0.0], [0.0, 0.1875, 0.296875, 0.339844],
                         [0.0, 0.0, 0.0,
                          0.0], [0.0, 0.4375, 0.984375, 1.761719],
                         [0.0, 0.0, 0.0, 0.0]]),
        (3, 3, False, 2,
         [[3.5, 1.5, 0.5, 0.0, -0.25, -0.375, -0.4375],
          [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
          [-0.595852, -0.457778, -0.266667, 0.0, 0.375, 0.90625, 1.664062],
          [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]),
        (3, 3, True, 2,
         [[2.047926, 0.978888, 0.383333, 0.0, -0.3125, -0.640625, -1.050781],
          [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
          [-2.047926, -0.978888, -0.383333, 0.0, 0.3125, 0.640625, 1.050781],
          [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]),
    ])
    def test_average_cumulative_return_by_quantile(self, before, after,
                                                   demeaned, quantiles,
                                                   expected_vals):
        dr = date_range(start='2015-1-15', end='2015-2-1')
        dr.name = 'date'
        tickers = ['A', 'B', 'C', 'D']
        r1, r2, r3, r4 = (1.25, 1.50, 1.00, 0.50)
        data = [[r1**i, r2**i, r3**i, r4**i] for i in range(1, 19)]
        prices = DataFrame(index=dr, columns=tickers, data=data)
        dr2 = date_range(start='2015-1-21', end='2015-1-26')
        dr2.name = 'date'
        factor = DataFrame(index=dr2,
                           columns=tickers,
                           data=[[3, 4, 2, 1], [3, 4, 2, 1], [3, 4, 2, 1],
                                 [3, 4, 2, 1], [3, 4, 2, 1], [3, 4, 2,
                                                              1]]).stack()

        factor_data = get_clean_factor_and_forward_returns(factor,
                                                           prices,
                                                           quantiles=quantiles,
                                                           periods=range(
                                                               0, after + 1),
                                                           filter_zscore=False)

        avgrt = average_cumulative_return_by_quantile(factor_data, prices,
                                                      before, after, demeaned)
        arrays = []
        for q in range(1, quantiles + 1):
            arrays.append((q, 'mean'))
            arrays.append((q, 'std'))
        index = MultiIndex.from_tuples(arrays, names=['factor_quantile', None])
        expected = DataFrame(index=index,
                             columns=range(-before, after + 1),
                             data=expected_vals)
        assert_frame_equal(avgrt, expected)

    @parameterized.expand([
        (0, 2, False, 4, [[0.0, -0.50, -0.75],
                          [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0],
                          [0.0, 0.25, 0.5625], [0.0, 0.0, 0.0],
                          [0.0, 0.50, 1.25], [0.0, 0.0, 0.0]]),
        (0, 3, True, 4, [[0.0, -0.5625, -1.015625, -1.488281],
                         [0.0, 0.0, 0.0, 0.0],
                         [0.0, -0.0625, -0.265625, -0.613281],
                         [0.0, 0.0, 0.0,
                          0.0], [0.0, 0.1875, 0.296875, 0.339844],
                         [0.0, 0.0, 0.0,
                          0.0], [0.0, 0.4375, 0.984375, 1.761719],
                         [0.0, 0.0, 0.0, 0.0]]),
        (0, 3, False, 2, [[0.0, -0.25, -0.375, -0.4375], [0.0, 0.0, 0.0, 0.0],
                          [0.0, 0.375, 0.90625, 1.664062],
                          [0.0, 0.0, 0.0, 0.0]]),
        (0, 3, True, 2, [[0.0, -0.3125, -0.640625, -1.050781],
                         [0.0, 0.0, 0.0,
                          0.0], [0.0, 0.3125, 0.640625, 1.050781],
                         [0.0, 0.0, 0.0, 0.0]]),
    ])
    def test_average_cumulative_return_by_quantile_2(self, before, after,
                                                     demeaned, quantiles,
                                                     expected_vals):
        """
        Test varying factor asset universe: at different dates there might be
        different assets
        """
        dr = date_range(start='2015-1-15', end='2015-1-25')
        dr.name = 'date'
        tickers = ['A', 'B', 'C', 'D', 'E', 'F']
        r1, r2, r3, r4 = (1.25, 1.50, 1.00, 0.50)
        data = [[r1**i, r2**i, r3**i, r4**i, r2**i, r3**i]
                for i in range(1, 12)]
        prices = DataFrame(index=dr, columns=tickers, data=data)
        dr2 = date_range(start='2015-1-18', end='2015-1-21')
        dr2.name = 'date'
        factor = DataFrame(index=dr2,
                           columns=tickers,
                           data=[[3, 4, 2, 1, nan,
                                  nan], [3, 4, 2, 1, nan, nan],
                                 [3, nan, nan, 1, 4, 2],
                                 [3, nan, nan, 1, 4, 2]]).stack()

        factor_data = get_clean_factor_and_forward_returns(factor,
                                                           prices,
                                                           quantiles=quantiles,
                                                           periods=range(
                                                               0, after + 1),
                                                           filter_zscore=False)

        avgrt = average_cumulative_return_by_quantile(factor_data, prices,
                                                      before, after, demeaned)
        arrays = []
        for q in range(1, quantiles + 1):
            arrays.append((q, 'mean'))
            arrays.append((q, 'std'))
        index = MultiIndex.from_tuples(arrays, names=['factor_quantile', None])
        expected = DataFrame(index=index,
                             columns=range(-before, after + 1),
                             data=expected_vals)
        assert_frame_equal(avgrt, expected)
예제 #11
0
    def test_slice_integer(self):

        # same as above, but for Integer based indexes
        # these coerce to a like integer
        # oob indicates if we are out of bounds
        # of positional indexing
        for index, oob in [
            (Int64Index(range(5)), False),
            (RangeIndex(5), False),
            (Int64Index(range(5)) + 10, True),
        ]:

            # s is an in-range index
            s = Series(range(5), index=index)

            # getitem
            for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:

                result = s.loc[l]

                # these are all label indexing
                # except getitem which is positional
                # empty
                if oob:
                    indexer = slice(0, 0)
                else:
                    indexer = slice(3, 5)
                self.check(result, s, indexer, False)

            # getitem out-of-bounds
            for l in [slice(-6, 6), slice(-6.0, 6.0)]:

                result = s.loc[l]

                # these are all label indexing
                # except getitem which is positional
                # empty
                if oob:
                    indexer = slice(0, 0)
                else:
                    indexer = slice(-6, 6)
                self.check(result, s, indexer, False)

            # positional indexing
            msg = (
                "cannot do slice indexing "
                fr"on {type(index).__name__} with these indexers \[-6\.0\] of "
                "type float"
            )
            with pytest.raises(TypeError, match=msg):
                s[slice(-6.0, 6.0)]

            # getitem odd floats
            for l, res1 in [
                (slice(2.5, 4), slice(3, 5)),
                (slice(2, 3.5), slice(2, 4)),
                (slice(2.5, 3.5), slice(3, 4)),
            ]:

                result = s.loc[l]
                if oob:
                    res = slice(0, 0)
                else:
                    res = res1

                self.check(result, s, res, False)

                # positional indexing
                msg = (
                    "cannot do slice indexing "
                    fr"on {type(index).__name__} with these indexers \[(2|3)\.5\] of "
                    "type float"
                )
                with pytest.raises(TypeError, match=msg):
                    s[l]
예제 #12
0
 def test_union(self):
     i1 = Int64Index(np.arange(0, 20, 2))
     i2 = Int64Index(np.arange(10, 30, 2))
     result = i1.union(i2)
     expected = Int64Index(np.arange(0, 30, 2))
     tm.assert_index_equal(result, expected)
예제 #13
0
class ConstructorTests:
    """
    Common tests for all variations of IntervalIndex construction. Input data
    to be supplied in breaks format, then converted by the subclass method
    get_kwargs_from_breaks to the expected format.
    """

    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
    @pytest.mark.parametrize(
        "breaks",
        [
            [3, 14, 15, 92, 653],
            np.arange(10, dtype="int64"),
            Int64Index(range(-10, 11)),
            Float64Index(np.arange(20, 30, 0.5)),
            date_range("20180101", periods=10),
            date_range("20180101", periods=10, tz="US/Eastern"),
            timedelta_range("1 day", periods=10),
        ],
    )
    def test_constructor(self, constructor, breaks, closed, name):
        result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
        result = constructor(closed=closed, name=name, **result_kwargs)

        assert result.closed == closed
        assert result.name == name
        assert result.dtype.subtype == getattr(breaks, "dtype", "int64")
        tm.assert_index_equal(result.left, Index(breaks[:-1]))
        tm.assert_index_equal(result.right, Index(breaks[1:]))

    @pytest.mark.parametrize(
        "breaks, subtype",
        [
            (Int64Index([0, 1, 2, 3, 4]), "float64"),
            (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"),
            (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"),
            (Float64Index([0, 1, 2, 3, 4]), "int64"),
            (date_range("2017-01-01", periods=5), "int64"),
            (timedelta_range("1 day", periods=5), "int64"),
        ],
    )
    def test_constructor_dtype(self, constructor, breaks, subtype):
        # GH 19262: conversion via dtype parameter
        warn = None
        if subtype == "int64" and breaks.dtype.kind in ["M", "m"]:
            # astype(int64) deprecated
            warn = FutureWarning

        with tm.assert_produces_warning(warn, check_stacklevel=False):
            expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
        expected = constructor(**expected_kwargs)

        result_kwargs = self.get_kwargs_from_breaks(breaks)
        iv_dtype = IntervalDtype(subtype, "right")
        for dtype in (iv_dtype, str(iv_dtype)):
            with tm.assert_produces_warning(warn, check_stacklevel=False):

                result = constructor(dtype=dtype, **result_kwargs)
            tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            Int64Index([0, 1, 2, 3, 4]),
            Int64Index([0, 1, 2, 3, 4]),
            Int64Index([0, 1, 2, 3, 4]),
            Float64Index([0, 1, 2, 3, 4]),
            date_range("2017-01-01", periods=5),
            timedelta_range("1 day", periods=5),
        ],
    )
    def test_constructor_pass_closed(self, constructor, breaks):
        # not passing closed to IntervalDtype, but to IntervalArray constructor
        warn = None
        if isinstance(constructor, partial) and constructor.func is Index:
            # passing kwargs to Index is deprecated
            warn = FutureWarning

        iv_dtype = IntervalDtype(breaks.dtype)

        result_kwargs = self.get_kwargs_from_breaks(breaks)

        for dtype in (iv_dtype, str(iv_dtype)):
            with tm.assert_produces_warning(warn, check_stacklevel=False):

                result = constructor(dtype=dtype, closed="left", **result_kwargs)
            assert result.dtype.closed == "left"

    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
    @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
    def test_constructor_nan(self, constructor, breaks, closed):
        # GH 18421
        result_kwargs = self.get_kwargs_from_breaks(breaks)
        result = constructor(closed=closed, **result_kwargs)

        expected_subtype = np.float64
        expected_values = np.array(breaks[:-1], dtype=object)

        assert result.closed == closed
        assert result.dtype.subtype == expected_subtype
        tm.assert_numpy_array_equal(np.array(result), expected_values)

    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
    @pytest.mark.parametrize(
        "breaks",
        [
            [],
            np.array([], dtype="int64"),
            np.array([], dtype="float64"),
            np.array([], dtype="datetime64[ns]"),
            np.array([], dtype="timedelta64[ns]"),
        ],
    )
    def test_constructor_empty(self, constructor, breaks, closed):
        # GH 18421
        result_kwargs = self.get_kwargs_from_breaks(breaks)
        result = constructor(closed=closed, **result_kwargs)

        expected_values = np.array([], dtype=object)
        expected_subtype = getattr(breaks, "dtype", np.int64)

        assert result.empty
        assert result.closed == closed
        assert result.dtype.subtype == expected_subtype
        tm.assert_numpy_array_equal(np.array(result), expected_values)

    @pytest.mark.parametrize(
        "breaks",
        [
            tuple("0123456789"),
            list("abcdefghij"),
            np.array(list("abcdefghij"), dtype=object),
            np.array(list("abcdefghij"), dtype="<U1"),
        ],
    )
    def test_constructor_string(self, constructor, breaks):
        # GH 19016
        msg = (
            "category, object, and string subtypes are not supported "
            "for IntervalIndex"
        )
        with pytest.raises(TypeError, match=msg):
            constructor(**self.get_kwargs_from_breaks(breaks))

    @pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
    def test_constructor_categorical_valid(self, constructor, cat_constructor):
        # GH 21243/21253
        if isinstance(constructor, partial) and constructor.func is Index:
            # Index is defined to create CategoricalIndex from categorical data
            pytest.skip()

        breaks = np.arange(10, dtype="int64")
        expected = IntervalIndex.from_breaks(breaks)

        cat_breaks = cat_constructor(breaks)
        result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
        result = constructor(**result_kwargs)
        tm.assert_index_equal(result, expected)

    def test_generic_errors(self, constructor):
        # filler input data to be used when supplying invalid kwargs
        filler = self.get_kwargs_from_breaks(range(10))

        # invalid closed
        msg = "closed must be one of 'right', 'left', 'both', 'neither'"
        with pytest.raises(ValueError, match=msg):
            constructor(closed="invalid", **filler)

        # unsupported dtype
        msg = "dtype must be an IntervalDtype, got int64"
        with pytest.raises(TypeError, match=msg):
            constructor(dtype="int64", **filler)

        # invalid dtype
        msg = "data type [\"']invalid[\"'] not understood"
        with pytest.raises(TypeError, match=msg):
            constructor(dtype="invalid", **filler)

        # no point in nesting periods in an IntervalIndex
        periods = period_range("2000-01-01", periods=10)
        periods_kwargs = self.get_kwargs_from_breaks(periods)
        msg = "Period dtypes are not supported, use a PeriodIndex instead"
        with pytest.raises(ValueError, match=msg):
            constructor(**periods_kwargs)

        # decreasing values
        decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
        msg = "left side of interval must be <= right side"
        with pytest.raises(ValueError, match=msg):
            constructor(**decreasing_kwargs)
예제 #14
0
class TestRangeIndexSetOps:
    def test_intersection(self, sort):
        # intersect with Int64Index
        index = RangeIndex(start=0, stop=20, step=2)
        other = Index(np.arange(1, 6))
        result = index.intersection(other, sort=sort)
        expected = Index(np.sort(np.intersect1d(index.values, other.values)))
        tm.assert_index_equal(result, expected)

        result = other.intersection(index, sort=sort)
        expected = Index(
            np.sort(np.asarray(np.intersect1d(index.values, other.values))))
        tm.assert_index_equal(result, expected)

        # intersect with increasing RangeIndex
        other = RangeIndex(1, 6)
        result = index.intersection(other, sort=sort)
        expected = Index(np.sort(np.intersect1d(index.values, other.values)))
        tm.assert_index_equal(result, expected)

        # intersect with decreasing RangeIndex
        other = RangeIndex(5, 0, -1)
        result = index.intersection(other, sort=sort)
        expected = Index(np.sort(np.intersect1d(index.values, other.values)))
        tm.assert_index_equal(result, expected)

        # reversed (GH 17296)
        result = other.intersection(index, sort=sort)
        tm.assert_index_equal(result, expected)

        # GH 17296: intersect two decreasing RangeIndexes
        first = RangeIndex(10, -2, -2)
        other = RangeIndex(5, -4, -1)
        expected = first.astype(int).intersection(other.astype(int), sort=sort)
        result = first.intersection(other, sort=sort).astype(int)
        tm.assert_index_equal(result, expected)

        # reversed
        result = other.intersection(first, sort=sort).astype(int)
        tm.assert_index_equal(result, expected)

        index = RangeIndex(5)

        # intersect of non-overlapping indices
        other = RangeIndex(5, 10, 1)
        result = index.intersection(other, sort=sort)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        other = RangeIndex(-1, -5, -1)
        result = index.intersection(other, sort=sort)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        # intersection of empty indices
        other = RangeIndex(0, 0, 1)
        result = index.intersection(other, sort=sort)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

        result = other.intersection(index, sort=sort)
        tm.assert_index_equal(result, expected)

        # intersection of non-overlapping values based on start value and gcd
        index = RangeIndex(1, 10, 2)
        other = RangeIndex(0, 10, 4)
        result = index.intersection(other, sort=sort)
        expected = RangeIndex(0, 0, 1)
        tm.assert_index_equal(result, expected)

    def test_union_noncomparable(self, sort):
        # corner case, non-Int64Index
        index = RangeIndex(start=0, stop=20, step=2)
        other = Index([datetime.now() + timedelta(i) for i in range(4)],
                      dtype=object)
        result = index.union(other, sort=sort)
        expected = Index(np.concatenate((index, other)))
        tm.assert_index_equal(result, expected)

        result = other.union(index, sort=sort)
        expected = Index(np.concatenate((other, index)))
        tm.assert_index_equal(result, expected)

    @pytest.fixture(params=[
        (
            RangeIndex(0, 10, 1),
            RangeIndex(0, 10, 1),
            RangeIndex(0, 10, 1),
            RangeIndex(0, 10, 1),
        ),
        (
            RangeIndex(0, 10, 1),
            RangeIndex(5, 20, 1),
            RangeIndex(0, 20, 1),
            Int64Index(range(20)),
        ),
        (
            RangeIndex(0, 10, 1),
            RangeIndex(10, 20, 1),
            RangeIndex(0, 20, 1),
            Int64Index(range(20)),
        ),
        (
            RangeIndex(0, -10, -1),
            RangeIndex(0, -10, -1),
            RangeIndex(0, -10, -1),
            RangeIndex(0, -10, -1),
        ),
        (
            RangeIndex(0, -10, -1),
            RangeIndex(-10, -20, -1),
            RangeIndex(-19, 1, 1),
            Int64Index(range(0, -20, -1)),
        ),
        (
            RangeIndex(0, 10, 2),
            RangeIndex(1, 10, 2),
            RangeIndex(0, 10, 1),
            Int64Index(list(range(0, 10, 2)) + list(range(1, 10, 2))),
        ),
        (
            RangeIndex(0, 11, 2),
            RangeIndex(1, 12, 2),
            RangeIndex(0, 12, 1),
            Int64Index(list(range(0, 11, 2)) + list(range(1, 12, 2))),
        ),
        (
            RangeIndex(0, 21, 4),
            RangeIndex(-2, 24, 4),
            RangeIndex(-2, 24, 2),
            Int64Index(list(range(0, 21, 4)) + list(range(-2, 24, 4))),
        ),
        (
            RangeIndex(0, -20, -2),
            RangeIndex(-1, -21, -2),
            RangeIndex(-19, 1, 1),
            Int64Index(list(range(0, -20, -2)) + list(range(-1, -21, -2))),
        ),
        (
            RangeIndex(0, 100, 5),
            RangeIndex(0, 100, 20),
            RangeIndex(0, 100, 5),
            Int64Index(range(0, 100, 5)),
        ),
        (
            RangeIndex(0, -100, -5),
            RangeIndex(5, -100, -20),
            RangeIndex(-95, 10, 5),
            Int64Index(list(range(0, -100, -5)) + [5]),
        ),
        (
            RangeIndex(0, -11, -1),
            RangeIndex(1, -12, -4),
            RangeIndex(-11, 2, 1),
            Int64Index(list(range(0, -11, -1)) + [1, -11]),
        ),
        (RangeIndex(0), RangeIndex(0), RangeIndex(0), RangeIndex(0)),
        (
            RangeIndex(0, -10, -2),
            RangeIndex(0),
            RangeIndex(0, -10, -2),
            RangeIndex(0, -10, -2),
        ),
        (
            RangeIndex(0, 100, 2),
            RangeIndex(100, 150, 200),
            RangeIndex(0, 102, 2),
            Int64Index(range(0, 102, 2)),
        ),
        (
            RangeIndex(0, -100, -2),
            RangeIndex(-100, 50, 102),
            RangeIndex(-100, 4, 2),
            Int64Index(list(range(0, -100, -2)) + [-100, 2]),
        ),
        (
            RangeIndex(0, -100, -1),
            RangeIndex(0, -50, -3),
            RangeIndex(-99, 1, 1),
            Int64Index(list(range(0, -100, -1))),
        ),
        (
            RangeIndex(0, 1, 1),
            RangeIndex(5, 6, 10),
            RangeIndex(0, 6, 5),
            Int64Index([0, 5]),
        ),
        (
            RangeIndex(0, 10, 5),
            RangeIndex(-5, -6, -20),
            RangeIndex(-5, 10, 5),
            Int64Index([0, 5, -5]),
        ),
        (
            RangeIndex(0, 3, 1),
            RangeIndex(4, 5, 1),
            Int64Index([0, 1, 2, 4]),
            Int64Index([0, 1, 2, 4]),
        ),
        (
            RangeIndex(0, 10, 1),
            Int64Index([]),
            RangeIndex(0, 10, 1),
            RangeIndex(0, 10, 1),
        ),
        (
            RangeIndex(0),
            Int64Index([1, 5, 6]),
            Int64Index([1, 5, 6]),
            Int64Index([1, 5, 6]),
        ),
    ])
    def unions(self, request):
        """Inputs and expected outputs for RangeIndex.union tests"""
        return request.param

    def test_union_sorted(self, unions):

        idx1, idx2, expected_sorted, expected_notsorted = unions

        res1 = idx1.union(idx2, sort=None)
        tm.assert_index_equal(res1, expected_sorted, exact=True)

        res1 = idx1.union(idx2, sort=False)
        tm.assert_index_equal(res1, expected_notsorted, exact=True)

        res2 = idx2.union(idx1, sort=None)
        res3 = idx1._int64index.union(idx2, sort=None)
        tm.assert_index_equal(res2, expected_sorted, exact=True)
        tm.assert_index_equal(res3, expected_sorted)
예제 #15
0
 def create_index(self) -> Int64Index:
     # return Int64Index(np.arange(5, dtype="int64"))
     return Int64Index(range(0, 20, 2))
예제 #16
0
class SliceTestCase(WithSeededRandomPipelineEngine, ZiplineTestCase):
    sids = ASSET_FINDER_EQUITY_SIDS = Int64Index([1, 2, 3])
    START_DATE = Timestamp("2015-01-31", tz="UTC")
    END_DATE = Timestamp("2015-03-01", tz="UTC")
    ASSET_FINDER_COUNTRY_CODE = "US"
    SEEDED_RANDOM_PIPELINE_DEFAULT_DOMAIN = US_EQUITIES

    @classmethod
    def init_class_fixtures(cls):
        super(SliceTestCase, cls).init_class_fixtures()

        # Using the date at index 14 as the start date because when running
        # pipelines, especially those involving correlations or regressions, we
        # want to make sure there are enough days to look back on. The end date
        # at index 18 is chosen for convenience, as it makes for a contiguous
        # five day span.
        cls.pipeline_start_date = cls.trading_days[14]
        cls.pipeline_end_date = cls.trading_days[18]

        # Random input for factors.
        cls.col = TestingDataSet.float_col

    @parameter_space(my_asset_column=[0, 1, 2], window_length_=[1, 2, 3])
    def test_slice(self, my_asset_column, window_length_):
        """
        Test that slices can be created by indexing into a term, and that they
        have the correct shape when used as inputs.
        """
        sids = self.sids
        my_asset = self.asset_finder.retrieve_asset(self.sids[my_asset_column])

        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[my_asset]

        class UsesSlicedInput(CustomFactor):
            window_length = window_length_
            inputs = [returns, returns_slice]

            def compute(self, today, assets, out, returns, returns_slice):
                # Make sure that our slice is the correct shape (i.e. has only
                # one column) and that it has the same values as the original
                # returns factor from which it is derived.
                assert returns_slice.shape == (self.window_length, 1)
                assert returns.shape == (self.window_length, len(sids))
                check_arrays(returns_slice[:, 0], returns[:, my_asset_column])

        # Assertions about the expected slice data are made in the `compute`
        # function of our custom factor above.
        self.run_pipeline(
            Pipeline(columns={"uses_sliced_input": UsesSlicedInput()}),
            self.pipeline_start_date,
            self.pipeline_end_date,
        )

    @parameter_space(unmasked_column=[0, 1, 2], slice_column=[0, 1, 2])
    def test_slice_with_masking(self, unmasked_column, slice_column):
        """
        Test that masking a factor that uses slices as inputs does not mask the
        slice data.
        """
        sids = self.sids
        asset_finder = self.asset_finder
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        # Create a filter that masks out all but a single asset.
        unmasked_asset = asset_finder.retrieve_asset(sids[unmasked_column])
        unmasked_asset_only = AssetID().eq(unmasked_asset.sid)

        # Asset used to create our slice. In the cases where this is different
        # than `unmasked_asset`, our slice should still have non-missing data
        # when used as an input to our custom factor. That is, it should not be
        # masked out.
        slice_asset = asset_finder.retrieve_asset(sids[slice_column])

        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[slice_asset]

        returns_results = self.run_pipeline(
            Pipeline(columns={"returns": returns}),
            start_date,
            end_date,
        )
        returns_results = returns_results["returns"].unstack()

        class UsesSlicedInput(CustomFactor):
            window_length = 1
            inputs = [returns, returns_slice]

            def compute(self, today, assets, out, returns, returns_slice):
                # Ensure that our mask correctly affects the `returns` input
                # and does not affect the `returns_slice` input.
                assert returns.shape == (1, 1)
                assert returns_slice.shape == (1, 1)
                assert returns[0, 0] == returns_results.loc[today, unmasked_asset]
                assert returns_slice[0, 0] == returns_results.loc[today, slice_asset]

        columns = {"masked": UsesSlicedInput(mask=unmasked_asset_only)}

        # Assertions about the expected data are made in the `compute` function
        # of our custom factor above.
        self.run_pipeline(Pipeline(columns=columns), start_date, end_date)

    def test_adding_slice_column(self):
        """
        Test that slices cannot be added as a pipeline column.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])
        open_slice = OpenPrice()[my_asset]

        with self.assertRaises(UnsupportedPipelineOutput):
            Pipeline(columns={"open_slice": open_slice})

        pipe = Pipeline(columns={})
        with self.assertRaises(UnsupportedPipelineOutput):
            pipe.add(open_slice, "open_slice")

    def test_loadable_term_slices(self):
        """
        Test that slicing loadable terms raises the proper error.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])

        with self.assertRaises(NonSliceableTerm):
            USEquityPricing.close[my_asset]

    def test_non_existent_asset(self):
        """
        Test that indexing into a term with a non-existent asset raises the
        proper exception.
        """
        my_asset = Asset(
            0,
            exchange_info=ExchangeInfo("TEST FULL", "TEST", "US"),
        )
        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[my_asset]

        class UsesSlicedInput(CustomFactor):
            window_length = 2
            inputs = [returns_slice]

            def compute(self, today, assets, out, returns_slice):
                pass

        with self.assertRaises(NonExistentAssetInTimeFrame):
            self.run_pipeline(
                Pipeline(columns={"uses_sliced_input": UsesSlicedInput()}),
                self.pipeline_start_date,
                self.pipeline_end_date,
            )

    def test_window_safety_of_slices(self):
        """
        Test that slices correctly inherit the `window_safe` property of the
        term from which they are derived.
        """
        col = self.col
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])

        # SimpleMovingAverage is not window safe.
        sma = SimpleMovingAverage(inputs=[self.col], window_length=10)
        sma_slice = sma[my_asset]

        class UsesSlicedInput(CustomFactor):
            window_length = 2
            inputs = [sma_slice]

            def compute(self, today, assets, out, sma_slice):
                pass

        with self.assertRaises(NonWindowSafeInput):
            self.run_pipeline(
                Pipeline(columns={"uses_sliced_input": UsesSlicedInput()}),
                self.pipeline_start_date,
                self.pipeline_end_date,
            )

        # Make sure that slices of custom factors are not window safe.
        class MyUnsafeFactor(CustomFactor):
            window_length = 2
            inputs = [col]

            def compute(self, today, assets, out, col):
                pass

        my_unsafe_factor = MyUnsafeFactor()
        my_unsafe_factor_slice = my_unsafe_factor[my_asset]

        class UsesSlicedInput(CustomFactor):
            window_length = 2
            inputs = [my_unsafe_factor_slice]

            def compute(self, today, assets, out, my_unsafe_factor_slice):
                pass

        with self.assertRaises(NonWindowSafeInput):
            self.run_pipeline(
                Pipeline(columns={"uses_sliced_input": UsesSlicedInput()}),
                self.pipeline_start_date,
                self.pipeline_end_date,
            )

        # Create a window safe factor.
        class MySafeFactor(CustomFactor):
            window_length = 2
            inputs = [col]
            window_safe = True

            def compute(self, today, assets, out, col):
                pass

        my_safe_factor = MySafeFactor()
        my_safe_factor_slice = my_safe_factor[my_asset]

        # Make sure that correlations are not safe if either the factor *or*
        # the target slice are not window safe.
        with self.assertRaises(NonWindowSafeInput):
            my_unsafe_factor.pearsonr(
                target=my_safe_factor_slice,
                correlation_length=10,
            )

        with self.assertRaises(NonWindowSafeInput):
            my_safe_factor.pearsonr(
                target=my_unsafe_factor_slice,
                correlation_length=10,
            )

    def test_single_column_output(self):
        """
        Tests for custom factors that compute a 1D out.
        """
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day)

        class SingleColumnOutput(CustomFactor):
            window_length = 1
            inputs = [self.col]
            window_safe = True
            ndim = 1

            def compute(self, today, assets, out, col):
                # Because we specified ndim as 1, `out` should be a singleton
                # array but `close` should be a regular sized input.
                assert out.shape == (1,)
                assert col.shape == (1, 3)
                out[:] = col.sum()

        # Since we cannot add single column output factors as pipeline
        # columns, we have to test its output through another factor.
        class UsesSingleColumnOutput(CustomFactor):
            window_length = 1
            inputs = [SingleColumnOutput()]

            def compute(self, today, assets, out, single_column_output):
                # Make sure that `single_column` has the correct shape. That
                # is, it should always have one column regardless of any mask
                # passed to `UsesSingleColumnInput`.
                assert single_column_output.shape == (1, 1)

        for mask in (alternating_mask, cascading_mask):
            columns = {
                "uses_single_column_output": UsesSingleColumnOutput(),
                "uses_single_column_output_masked": UsesSingleColumnOutput(
                    mask=mask,
                ),
            }

            # Assertions about the expected shapes of our data are made in the
            # `compute` function of our custom factors above.
            self.run_pipeline(Pipeline(columns=columns), start_date, end_date)

    def test_masked_single_column_output(self):
        """
        Tests for masking custom factors that compute a 1D out.
        """
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day)
        alternating_mask.window_safe = True
        cascading_mask.window_safe = True

        for mask in (alternating_mask, cascading_mask):

            class SingleColumnOutput(CustomFactor):
                window_length = 1
                inputs = [self.col, mask]
                window_safe = True
                ndim = 1

                def compute(self, today, assets, out, col, mask):
                    # Because we specified ndim as 1, `out` should always be a
                    # singleton array but `close` should be a sized based on
                    # the mask we passed.
                    assert out.shape == (1,)
                    assert col.shape == (1, mask.sum())
                    out[:] = col.sum()

            # Since we cannot add single column output factors as pipeline
            # columns, we have to test its output through another factor.
            class UsesSingleColumnInput(CustomFactor):
                window_length = 1
                inputs = [self.col, mask, SingleColumnOutput(mask=mask)]

                def compute(self, today, assets, out, col, mask, single_column_output):
                    # Make sure that `single_column` has the correct value
                    # based on the masked it used.
                    assert single_column_output.shape == (1, 1)
                    single_column_output_value = single_column_output[0][0]
                    expected_value = where(mask, col, 0).sum()
                    assert single_column_output_value == expected_value

            columns = {"uses_single_column_input": UsesSingleColumnInput()}

            # Assertions about the expected shapes of our data are made in the
            # `compute` function of our custom factors above.
            self.run_pipeline(Pipeline(columns=columns), start_date, end_date)

    @parameter_space(returns_length=[2, 3], correlation_length=[3, 4])
    def test_factor_correlation_methods(self, returns_length, correlation_length):
        """
        Ensure that `Factor.pearsonr` and `Factor.spearmanr` are consistent
        with the built-in factors `RollingPearsonOfReturns` and
        `RollingSpearmanOfReturns`.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        pearson = returns.pearsonr(
            target=returns_slice,
            correlation_length=correlation_length,
        )
        spearman = returns.spearmanr(
            target=returns_slice,
            correlation_length=correlation_length,
        )
        expected_pearson = RollingPearsonOfReturns(
            target=my_asset,
            returns_length=returns_length,
            correlation_length=correlation_length,
        )
        expected_spearman = RollingSpearmanOfReturns(
            target=my_asset,
            returns_length=returns_length,
            correlation_length=correlation_length,
        )

        # These built-ins construct their own Returns factor to use as inputs,
        # so the only way to set our own inputs is to do so after the fact.
        # This should not be done in practice. It is necessary here because we
        # want Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_pearson.inputs = [returns, returns_slice]
        expected_spearman.inputs = [returns, returns_slice]

        columns = {
            "pearson": pearson,
            "spearman": spearman,
            "expected_pearson": expected_pearson,
            "expected_spearman": expected_spearman,
        }

        results = self.run_pipeline(
            Pipeline(columns=columns),
            self.pipeline_start_date,
            self.pipeline_end_date,
        )
        pearson_results = results["pearson"].unstack()
        spearman_results = results["spearman"].unstack()
        expected_pearson_results = results["expected_pearson"].unstack()
        expected_spearman_results = results["expected_spearman"].unstack()

        assert_frame_equal(pearson_results, expected_pearson_results)
        assert_frame_equal(spearman_results, expected_spearman_results)

        # Make sure we cannot call the correlation methods on factors or slices
        # of dtype `datetime64[ns]`.
        class DateFactor(CustomFactor):
            window_length = 1
            inputs = []
            dtype = datetime64ns_dtype
            window_safe = True

            def compute(self, today, assets, out):
                pass

        date_factor = DateFactor()
        date_factor_slice = date_factor[my_asset]

        with self.assertRaises(TypeError):
            date_factor.pearsonr(
                target=returns_slice,
                correlation_length=correlation_length,
            )
        with self.assertRaises(TypeError):
            date_factor.spearmanr(
                target=returns_slice,
                correlation_length=correlation_length,
            )
        with self.assertRaises(TypeError):
            returns.pearsonr(
                target=date_factor_slice,
                correlation_length=correlation_length,
            )
        with self.assertRaises(TypeError):
            returns.pearsonr(
                target=date_factor_slice,
                correlation_length=correlation_length,
            )

    @parameter_space(returns_length=[2, 3], regression_length=[3, 4])
    def test_factor_regression_method(self, returns_length, regression_length):
        """
        Ensure that `Factor.linear_regression` is consistent with the built-in
        factor `RollingLinearRegressionOfReturns`.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        regression = returns.linear_regression(
            target=returns_slice,
            regression_length=regression_length,
        )
        expected_regression = RollingLinearRegressionOfReturns(
            target=my_asset,
            returns_length=returns_length,
            regression_length=regression_length,
        )

        # These built-ins construct their own Returns factor to use as inputs,
        # so the only way to set our own inputs is to do so after the fact.
        # This should not be done in practice. It is necessary here because we
        # want Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_regression.inputs = [returns, returns_slice]

        class MyFactor(CustomFactor):
            inputs = ()
            window_length = 1

            def compute(self, today, assets, out):
                out[:] = 0

        columns = {
            "regression": regression,
            "expected_regression": expected_regression,
        }

        results = self.run_pipeline(
            Pipeline(columns=columns),
            self.pipeline_start_date,
            self.pipeline_end_date,
        )
        regression_results = results["regression"].unstack()
        expected_regression_results = results["expected_regression"].unstack()

        assert_frame_equal(regression_results, expected_regression_results)

        # Make sure we cannot call the linear regression method on factors or
        # slices of dtype `datetime64[ns]`.
        class DateFactor(CustomFactor):
            window_length = 1
            inputs = []
            dtype = datetime64ns_dtype
            window_safe = True

            def compute(self, today, assets, out):
                pass

        date_factor = DateFactor()
        date_factor_slice = date_factor[my_asset]

        with self.assertRaises(TypeError):
            date_factor.linear_regression(
                target=returns_slice,
                regression_length=regression_length,
            )
        with self.assertRaises(TypeError):
            returns.linear_regression(
                target=date_factor_slice,
                regression_length=regression_length,
            )

    def test_slice_repr(self):
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])
        slice_ = Returns(window_length=2)[my_asset]
        result = repr(slice_)
        self.assertEqual(result, "Returns(...)[{}]".format(my_asset))

    def test_slice_subtypes(self):
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])

        class SomeFactor(Factor):
            inputs = ()
            window_length = 1
            dtype = float

        self.assertIsInstance(SomeFactor()[my_asset], Factor)

        class SomeFilter(Filter):
            inputs = ()
            window_length = 1

        self.assertIsInstance(SomeFilter()[my_asset], Filter)

        class SomeClassifier(Classifier):
            inputs = ()
            window_length = 1
            dtype = object

        self.assertIsInstance(SomeClassifier()[my_asset], Classifier)
예제 #17
0
 def test_constructor_unwraps_index(self):
     idx = Index([1, 2])
     result = Int64Index(idx)
     expected = np.array([1, 2], dtype="int64")
     tm.assert_numpy_array_equal(result._data, expected)
예제 #18
0
    def test_read_with_adjustments(self):
        columns = [USEquityPricing.high, USEquityPricing.volume]
        query_days = self.calendar_days_between(TEST_QUERY_START,
                                                TEST_QUERY_STOP)
        # Our expected results for each day are based on values from the
        # previous day.
        shifted_query_days = self.calendar_days_between(
            TEST_QUERY_START,
            TEST_QUERY_STOP,
            shift=-1,
        )

        baseline_reader = BcolzDailyBarReader(self.bcolz_path)
        adjustment_reader = SQLiteAdjustmentReader(self.db_path)
        pricing_loader = USEquityPricingLoader(
            baseline_reader,
            adjustment_reader,
        )

        highs, volumes = pricing_loader.load_adjusted_array(
            columns,
            dates=query_days,
            assets=Int64Index(arange(1, 7)),
            mask=ones((len(query_days), 6), dtype=bool),
        )

        expected_baseline_highs = self.bcolz_writer.expected_values_2d(
            shifted_query_days,
            self.assets,
            'high',
        )
        expected_baseline_volumes = self.bcolz_writer.expected_values_2d(
            shifted_query_days,
            self.assets,
            'volume',
        )

        # At each point in time, the AdjustedArrays should yield the baseline
        # with all adjustments up to that date applied.
        for windowlen in range(1, len(query_days) + 1):
            for offset, window in enumerate(highs.traverse(windowlen)):
                baseline = expected_baseline_highs[offset:offset + windowlen]
                baseline_dates = query_days[offset:offset + windowlen]
                expected_adjusted_highs = self.apply_adjustments(
                    baseline_dates,
                    self.assets,
                    baseline,
                    # Apply all adjustments.
                    concat([SPLITS, MERGERS, DIVIDENDS_EXPECTED],
                           ignore_index=True),
                )
                assert_allclose(expected_adjusted_highs, window)

            for offset, window in enumerate(volumes.traverse(windowlen)):
                baseline = expected_baseline_volumes[offset:offset + windowlen]
                baseline_dates = query_days[offset:offset + windowlen]
                # Apply only splits and invert the ratio.
                adjustments = SPLITS.copy()
                adjustments.ratio = 1 / adjustments.ratio

                expected_adjusted_volumes = self.apply_adjustments(
                    baseline_dates,
                    self.assets,
                    baseline,
                    adjustments,
                )
                # FIXME: Make AdjustedArray properly support integral types.
                assert_array_equal(
                    expected_adjusted_volumes,
                    window.astype(uint32),
                )

        # Verify that we checked up to the longest possible window.
        with self.assertRaises(WindowLengthTooLong):
            highs.traverse(windowlen + 1)
        with self.assertRaises(WindowLengthTooLong):
            volumes.traverse(windowlen + 1)
예제 #19
0
 def test_copy(self):
     i = Int64Index([], name='Foo')
     i_copy = i.copy()
     self.assertEqual(i_copy.name, 'Foo')
예제 #20
0
def get_index_loc(key, index):
    """
    Get the location of a specific key in an index

    Parameters
    ----------
    key : label
        The key for which to find the location if the underlying index is
        a DateIndex or a location if the underlying index is a RangeIndex
        or an Int64Index.
    index : pd.Index
        The index to search.

    Returns
    -------
    loc : int
        The location of the key
    index : pd.Index
        The index including the key; this is a copy of the original index
        unless the index had to be expanded to accommodate `key`.
    index_was_expanded : bool
        Whether or not the index was expanded to accommodate `key`.

    Notes
    -----
    If `key` is past the end of of the given index, and the index is either
    an Int64Index or a date index, this function extends the index up to
    and including key, and then returns the location in the new index.
    """
    base_index = index

    index = base_index
    date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex))
    int_index = isinstance(base_index, Int64Index)
    range_index = isinstance(base_index, RangeIndex)
    index_class = type(base_index)
    nobs = len(index)

    # Special handling for RangeIndex
    if range_index and isinstance(key, (int, np.integer)):
        # Negative indices (that lie in the Index)
        if key < 0 and -key <= nobs:
            key = nobs + key
        # Out-of-sample (note that we include key itself in the new index)
        elif key > nobs - 1:
            # See gh5835. Remove the except after pandas 0.25 required.
            try:
                base_index_start = base_index.start
                base_index_step = base_index.step
            except AttributeError:
                base_index_start = base_index._start
                base_index_step = base_index._step
            stop = base_index_start + (key + 1) * base_index_step
            index = RangeIndex(start=base_index_start,
                               stop=stop,
                               step=base_index_step)

    # Special handling for Int64Index
    if (not range_index and int_index and not date_index
            and isinstance(key, (int, np.integer))):
        # Negative indices (that lie in the Index)
        if key < 0 and -key <= nobs:
            key = nobs + key
        # Out-of-sample (note that we include key itself in the new index)
        elif key > base_index[-1]:
            index = Int64Index(np.arange(base_index[0], int(key + 1)))

    # Special handling for date indexes
    if date_index:
        # Use index type to choose creation function
        if index_class is DatetimeIndex:
            index_fn = date_range
        else:
            index_fn = period_range
        # Integer key (i.e. already given a location)
        if isinstance(key, (int, np.integer)):
            # Negative indices (that lie in the Index)
            if key < 0 and -key < nobs:
                key = index[nobs + key]
            # Out-of-sample (note that we include key itself in the new
            # index)
            elif key > len(base_index) - 1:
                index = index_fn(start=base_index[0],
                                 periods=int(key + 1),
                                 freq=base_index.freq)
                key = index[-1]
            else:
                key = index[key]
        # Other key types (i.e. string date or some datetime-like object)
        else:
            # Covert the key to the appropriate date-like object
            if index_class is PeriodIndex:
                date_key = Period(key, freq=base_index.freq)
            else:
                date_key = Timestamp(key, freq=base_index.freq)

            # Out-of-sample
            if date_key > base_index[-1]:
                # First create an index that may not always include `key`
                index = index_fn(start=base_index[0],
                                 end=date_key,
                                 freq=base_index.freq)

                # Now make sure we include `key`
                if not index[-1] == date_key:
                    index = index_fn(start=base_index[0],
                                     periods=len(index) + 1,
                                     freq=base_index.freq)

                # To avoid possible inconsistencies with `get_loc` below,
                # set the key directly equal to the last index location
                key = index[-1]

    # Get the location
    if date_index:
        # (note that get_loc will throw a KeyError if key is invalid)
        loc = index.get_loc(key)
    elif int_index or range_index:
        # For Int64Index and RangeIndex, key is assumed to be the location
        # and not an index value (this assumption is required to support
        # RangeIndex)
        try:
            index[key]
        # We want to raise a KeyError in this case, to keep the exception
        # consistent across index types.
        # - Attempting to index with an out-of-bound location (e.g.
        #   index[10] on an index of length 9) will raise an IndexError
        #   (as of Pandas 0.22)
        # - Attemtping to index with a type that cannot be cast to integer
        #   (e.g. a non-numeric string) will raise a ValueError if the
        #   index is RangeIndex (otherwise will raise an IndexError)
        #   (as of Pandas 0.22)
        except (IndexError, ValueError) as e:
            raise KeyError(str(e))
        loc = key
    else:
        loc = index.get_loc(key)

    # Check if we now have a modified index
    index_was_expanded = index is not base_index

    # Return the index through the end of the loc / slice
    if isinstance(loc, slice):
        end = loc.stop - 1
    else:
        end = loc

    return loc, index[:end + 1], index_was_expanded
예제 #21
0
 def test_get_indexer_backfill(self):
     target = Int64Index(np.arange(10))
     indexer = self.index.get_indexer(target, method='backfill')
     expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
     tm.assert_numpy_array_equal(indexer, expected)
예제 #22
0
class StatisticalMethodsTestCase(WithSeededRandomPipelineEngine,
                                 GatewayTestCase):
    sids = ASSET_FINDER_EQUITY_SIDS = Int64Index([1, 2, 3])
    START_DATE = Timestamp('2015-01-31', tz='UTC')
    END_DATE = Timestamp('2015-03-01', tz='UTC')

    @classmethod
    def init_class_fixtures(cls):
        super(StatisticalMethodsTestCase, cls).init_class_fixtures()

        # Using these start and end dates because they are a contigous span of
        # 5 days (Monday - Friday) and they allow for plenty of days to look
        # back on when computing correlations and regressions.
        cls.dates = dates = cls.trading_days
        cls.start_date_index = start_date_index = 14
        cls.end_date_index = end_date_index = 18
        cls.pipeline_start_date = cls.trading_days[start_date_index]
        cls.pipeline_end_date = cls.trading_days[end_date_index]

        sids = cls.sids
        cls.assets = assets = cls.asset_finder.retrieve_all(sids)
        cls.my_asset_column = my_asset_column = 0
        cls.my_asset = assets[my_asset_column]
        cls.num_days = num_days = end_date_index - start_date_index + 1
        cls.num_assets = num_assets = len(assets)

        cls.cascading_mask = \
            AssetIDPlusDay() < (sids[-1] + dates[start_date_index].day)
        cls.expected_cascading_mask_result = make_cascading_boolean_array(
            shape=(num_days, num_assets),
        )
        cls.alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cls.expected_alternating_mask_result = make_alternating_boolean_array(
            shape=(num_days, num_assets),
        )
        cls.expected_no_mask_result = full(
            shape=(num_days, num_assets), fill_value=True, dtype=bool_dtype,
        )

        # Random input for factors.
        cls.col = TestingDataSet.float_col

    @parameter_space(returns_length=[2, 3], correlation_length=[3, 4])
    def test_factor_correlation_methods(self,
                                        returns_length,
                                        correlation_length):
        """
        Ensure that `Factor.pearsonr` and `Factor.spearmanr` are consistent
        with the built-in factors `RollingPearsonOfReturns` and
        `RollingSpearmanOfReturns`.
        """
        my_asset = self.my_asset
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        pearson = returns.pearsonr(
            target=returns_slice, correlation_length=correlation_length,
        )
        spearman = returns.spearmanr(
            target=returns_slice, correlation_length=correlation_length,
        )
        expected_pearson = RollingPearsonOfReturns(
            target=my_asset,
            returns_length=returns_length,
            correlation_length=correlation_length,
        )
        expected_spearman = RollingSpearmanOfReturns(
            target=my_asset,
            returns_length=returns_length,
            correlation_length=correlation_length,
        )

        # These built-ins construct their own Returns factor to use as inputs,
        # so the only way to set our own inputs is to do so after the fact.
        # This should not be done in practice. It is necessary here because we
        # want Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_pearson.inputs = [returns, returns_slice]
        expected_spearman.inputs = [returns, returns_slice]

        columns = {
            'pearson': pearson,
            'spearman': spearman,
            'expected_pearson': expected_pearson,
            'expected_spearman': expected_spearman,
        }

        results = run_pipeline(Pipeline(columns=columns), start_date, end_date)
        pearson_results = results['pearson'].unstack()
        spearman_results = results['spearman'].unstack()
        expected_pearson_results = results['expected_pearson'].unstack()
        expected_spearman_results = results['expected_spearman'].unstack()

        assert_frame_equal(pearson_results, expected_pearson_results)
        assert_frame_equal(spearman_results, expected_spearman_results)

    def test_correlation_methods_bad_type(self):
        """
        Make sure we cannot call the Factor correlation methods on factors or
        slices that are not of float or int dtype.
        """
        # These are arbitrary for the purpose of this test.
        returns_length = 2
        correlation_length = 10

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[self.my_asset]

        class BadTypeFactor(CustomFactor):
            inputs = []
            window_length = 1
            dtype = datetime64ns_dtype
            window_safe = True

            def compute(self, today, assets, out):
                pass

        bad_type_factor = BadTypeFactor()
        bad_type_factor_slice = bad_type_factor[self.my_asset]

        with self.assertRaises(TypeError):
            bad_type_factor.pearsonr(
                target=returns_slice, correlation_length=correlation_length,
            )
        with self.assertRaises(TypeError):
            bad_type_factor.spearmanr(
                target=returns_slice, correlation_length=correlation_length,
            )
        with self.assertRaises(TypeError):
            returns.pearsonr(
                target=bad_type_factor_slice,
                correlation_length=correlation_length,
            )
        with self.assertRaises(TypeError):
            returns.spearmanr(
                target=bad_type_factor_slice,
                correlation_length=correlation_length,
            )

    @parameter_space(returns_length=[2, 3], regression_length=[3, 4])
    def test_factor_regression_method(self, returns_length, regression_length):
        """
        Ensure that `Factor.linear_regression` is consistent with the built-in
        factor `RollingLinearRegressionOfReturns`.
        """
        my_asset = self.my_asset
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        regression = returns.linear_regression(
            target=returns_slice, regression_length=regression_length,
        )
        expected_regression = RollingLinearRegressionOfReturns(
            target=my_asset,
            returns_length=returns_length,
            regression_length=regression_length,
        )

        # This built-in constructs its own Returns factor to use as an input,
        # so the only way to set our own input is to do so after the fact. This
        # should not be done in practice. It is necessary here because we want
        # Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_regression.inputs = [returns, returns_slice]

        columns = {
            'regression': regression,
            'expected_regression': expected_regression,
        }

        results = run_pipeline(Pipeline(columns=columns), start_date, end_date)
        regression_results = results['regression'].unstack()
        expected_regression_results = results['expected_regression'].unstack()

        assert_frame_equal(regression_results, expected_regression_results)

    def test_regression_method_bad_type(self):
        """
        Make sure we cannot call the Factor linear regression method on factors
        or slices that are not of float or int dtype.
        """
        # These are arbitrary for the purpose of this test.
        returns_length = 2
        regression_length = 10

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[self.my_asset]

        class BadTypeFactor(CustomFactor):
            window_length = 1
            inputs = []
            dtype = datetime64ns_dtype
            window_safe = True

            def compute(self, today, assets, out):
                pass

        bad_type_factor = BadTypeFactor()
        bad_type_factor_slice = bad_type_factor[self.my_asset]

        with self.assertRaises(TypeError):
            bad_type_factor.linear_regression(
                target=returns_slice, regression_length=regression_length,
            )
        with self.assertRaises(TypeError):
            returns.linear_regression(
                target=bad_type_factor_slice,
                regression_length=regression_length,
            )

    @parameter_space(correlation_length=[2, 3, 4])
    def test_factor_correlation_methods_two_factors(self, correlation_length):
        """
        Tests for `Factor.pearsonr` and `Factor.spearmanr` when passed another
        2D factor instead of a Slice.
        """
        assets = self.assets
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # Ensure that the correlation methods cannot be called with two 2D
        # factors which have different masks.
        returns_masked_1 = Returns(
            window_length=5, inputs=[self.col], mask=AssetID().eq(1),
        )
        returns_masked_2 = Returns(
            window_length=5, inputs=[self.col], mask=AssetID().eq(2),
        )
        with self.assertRaises(IncompatibleTerms):
            returns_masked_1.pearsonr(
                target=returns_masked_2, correlation_length=correlation_length,
            )
        with self.assertRaises(IncompatibleTerms):
            returns_masked_1.spearmanr(
                target=returns_masked_2, correlation_length=correlation_length,
            )

        returns_5 = Returns(window_length=5, inputs=[self.col])
        returns_10 = Returns(window_length=10, inputs=[self.col])

        pearson_factor = returns_5.pearsonr(
            target=returns_10, correlation_length=correlation_length,
        )
        spearman_factor = returns_5.spearmanr(
            target=returns_10, correlation_length=correlation_length,
        )

        columns = {
            'pearson_factor': pearson_factor,
            'spearman_factor': spearman_factor,
        }
        pipeline = Pipeline(columns=columns)

        results = run_pipeline(pipeline, start_date, end_date)
        pearson_results = results['pearson_factor'].unstack()
        spearman_results = results['spearman_factor'].unstack()

        # Run a separate pipeline that calculates returns starting
        # (correlation_length - 1) days prior to our start date. This is
        # because we need (correlation_length - 1) extra days of returns to
        # compute our expected correlations.
        columns = {'returns_5': returns_5, 'returns_10': returns_10}
        results = run_pipeline(
            Pipeline(columns=columns),
            dates[start_date_index - (correlation_length - 1)],
            dates[end_date_index],
        )
        returns_5_results = results['returns_5'].unstack()
        returns_10_results = results['returns_10'].unstack()

        # On each day, calculate the expected correlation coefficients
        # between each asset's 5 and 10 day rolling returns. Each correlation
        # is calculated over `correlation_length` days.
        expected_pearson_results = full_like(pearson_results, nan)
        expected_spearman_results = full_like(spearman_results, nan)
        for day in range(num_days):
            todays_returns_5 = returns_5_results.iloc[
                day:day + correlation_length
            ]
            todays_returns_10 = returns_10_results.iloc[
                day:day + correlation_length
            ]
            for asset, asset_returns_5 in todays_returns_5.iteritems():
                asset_column = int(asset) - 1
                asset_returns_10 = todays_returns_10[asset]
                expected_pearson_results[day, asset_column] = pearsonr(
                    asset_returns_5, asset_returns_10,
                )[0]
                expected_spearman_results[day, asset_column] = spearmanr(
                    asset_returns_5, asset_returns_10,
                )[0]

        expected_pearson_results = DataFrame(
            data=expected_pearson_results,
            index=dates[start_date_index:end_date_index + 1],
            columns=assets,
        )
        assert_frame_equal(pearson_results, expected_pearson_results)

        expected_spearman_results = DataFrame(
            data=expected_spearman_results,
            index=dates[start_date_index:end_date_index + 1],
            columns=assets,
        )
        assert_frame_equal(spearman_results, expected_spearman_results)

    @parameter_space(regression_length=[2, 3, 4])
    def test_factor_regression_method_two_factors(self, regression_length):
        """
        Tests for `Factor.linear_regression` when passed another 2D factor
        instead of a Slice.
        """
        assets = self.assets
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # The order of these is meant to align with the output of `linregress`.
        outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr']

        # Ensure that the `linear_regression` method cannot be called with two
        # 2D factors which have different masks.
        returns_masked_1 = Returns(
            window_length=5, inputs=[self.col], mask=AssetID().eq(1),
        )
        returns_masked_2 = Returns(
            window_length=5, inputs=[self.col], mask=AssetID().eq(2),
        )
        with self.assertRaises(IncompatibleTerms):
            returns_masked_1.linear_regression(
                target=returns_masked_2, regression_length=regression_length,
            )

        returns_5 = Returns(window_length=5, inputs=[self.col])
        returns_10 = Returns(window_length=10, inputs=[self.col])

        regression_factor = returns_5.linear_regression(
            target=returns_10, regression_length=regression_length,
        )

        columns = {
            output: getattr(regression_factor, output)
            for output in outputs
        }
        pipeline = Pipeline(columns=columns)

        results = run_pipeline(pipeline, start_date, end_date)

        output_results = {}
        expected_output_results = {}
        for output in outputs:
            output_results[output] = results[output].unstack()
            expected_output_results[output] = full_like(
                output_results[output], nan,
            )

        # Run a separate pipeline that calculates returns starting
        # (regression_length - 1) days prior to our start date. This is because
        # we need (regression_length - 1) extra days of returns to compute our
        # expected regressions.
        columns = {'returns_5': returns_5, 'returns_10': returns_10}
        results = run_pipeline(
            Pipeline(columns=columns),
            dates[start_date_index - (regression_length - 1)],
            dates[end_date_index],
        )
        returns_5_results = results['returns_5'].unstack()
        returns_10_results = results['returns_10'].unstack()

        # On each day, for each asset, calculate the expected regression
        # results of Y ~ X where Y is the asset's rolling 5 day returns and X
        # is the asset's rolling 10 day returns. Each regression is calculated
        # over `regression_length` days of data.
        for day in range(num_days):
            todays_returns_5 = returns_5_results.iloc[
                day:day + regression_length
            ]
            todays_returns_10 = returns_10_results.iloc[
                day:day + regression_length
            ]
            for asset, asset_returns_5 in todays_returns_5.iteritems():
                asset_column = int(asset) - 1
                asset_returns_10 = todays_returns_10[asset]
                expected_regression_results = linregress(
                    y=asset_returns_5, x=asset_returns_10,
                )
                for i, output in enumerate(outputs):
                    expected_output_results[output][day, asset_column] = \
                        expected_regression_results[i]

        for output in outputs:
            output_result = output_results[output]
            expected_output_result = DataFrame(
                expected_output_results[output],
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(output_result, expected_output_result)
예제 #23
0
 def test_slice_keep_name(self):
     idx = Int64Index([1, 2], name='asdf')
     self.assertEqual(idx.name, idx[1:].name)
예제 #24
0
class StatisticalBuiltInsTestCase(WithTradingEnvironment, GatewayTestCase):
    sids = ASSET_FINDER_EQUITY_SIDS = Int64Index([1, 2, 3])
    START_DATE = Timestamp('2015-01-31', tz='UTC')
    END_DATE = Timestamp('2015-03-01', tz='UTC')
    ASSET_FINDER_EQUITY_SYMBOLS = ('A', 'B', 'C')

    @classmethod
    def init_class_fixtures(cls):
        super(StatisticalBuiltInsTestCase, cls).init_class_fixtures()

        day = cls.trading_calendar.day
        cls.dates = dates = date_range(
            '2015-02-01', '2015-02-28', freq=day, tz='UTC',
        )

        # Using these start and end dates because they are a contigous span of
        # 5 days (Monday - Friday) and they allow for plenty of days to look
        # back on when computing correlations and regressions.
        cls.start_date_index = start_date_index = 14
        cls.end_date_index = end_date_index = 18
        cls.pipeline_start_date = dates[start_date_index]
        cls.pipeline_end_date = dates[end_date_index]
        cls.num_days = num_days = end_date_index - start_date_index + 1

        sids = cls.sids
        cls.assets = assets = cls.asset_finder.retrieve_all(sids)
        cls.my_asset_column = my_asset_column = 0
        cls.my_asset = assets[my_asset_column]
        cls.num_assets = num_assets = len(assets)

        cls.raw_data = raw_data = DataFrame(
            data=arange(len(dates) * len(sids), dtype=float64_dtype).reshape(
                len(dates), len(sids),
            ),
            index=dates,
            columns=assets,
        )

        # Using mock 'close' data here because the correlation and regression
        # built-ins use USEquityPricing.close as the input to their `Returns`
        # factors. Since there is no way to change that when constructing an
        # instance of these built-ins, we need to test with mock 'close' data
        # to most accurately reflect their true behavior and results.
        close_loader = DataFrameLoader(USEquityPricing.close, raw_data)

        cls.run_pipeline = SimplePipelineEngine(
            {USEquityPricing.close: close_loader}.__getitem__,
            dates,
            cls.asset_finder,
        ).run_pipeline

        cls.cascading_mask = \
            AssetIDPlusDay() < (sids[-1] + dates[start_date_index].day)
        cls.expected_cascading_mask_result = make_cascading_boolean_array(
            shape=(num_days, num_assets),
        )
        cls.alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cls.expected_alternating_mask_result = make_alternating_boolean_array(
            shape=(num_days, num_assets),
        )
        cls.expected_no_mask_result = full(
            shape=(num_days, num_assets), fill_value=True, dtype=bool_dtype,
        )

    @parameter_space(returns_length=[2, 3], correlation_length=[3, 4])
    def test_correlation_factors(self, returns_length, correlation_length):
        """
        Tests for the built-in factors `RollingPearsonOfReturns` and
        `RollingSpearmanOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length)
        masks = (self.cascading_mask, self.alternating_mask, NotSpecified)
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            pearson_factor = RollingPearsonOfReturns(
                target=my_asset,
                returns_length=returns_length,
                correlation_length=correlation_length,
                mask=mask,
            )
            spearman_factor = RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=returns_length,
                correlation_length=correlation_length,
                mask=mask,
            )

            columns = {
                'pearson_factor': pearson_factor,
                'spearman_factor': spearman_factor,
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            pearson_results = results['pearson_factor'].unstack()
            spearman_results = results['spearman_factor'].unstack()
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            # Run a separate pipeline that calculates returns starting
            # (correlation_length - 1) days prior to our start date. This is
            # because we need (correlation_length - 1) extra days of returns to
            # compute our expected correlations.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (correlation_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected correlation coefficients
            # between the asset we are interested in and each other asset. Each
            # correlation is calculated over `correlation_length` days.
            expected_pearson_results = full_like(pearson_results, nan)
            expected_spearman_results = full_like(spearman_results, nan)
            for day in range(num_days):
                todays_returns = returns_results.iloc[
                    day:day + correlation_length
                ]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_pearson_results[day, asset_column] = pearsonr(
                        my_asset_returns, other_asset_returns,
                    )[0]
                    expected_spearman_results[day, asset_column] = spearmanr(
                        my_asset_returns, other_asset_returns,
                    )[0]

            expected_pearson_results = DataFrame(
                data=where(expected_mask, expected_pearson_results, nan),
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(pearson_results, expected_pearson_results)

            expected_spearman_results = DataFrame(
                data=where(expected_mask, expected_spearman_results, nan),
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(spearman_results, expected_spearman_results)

    @parameter_space(returns_length=[2, 3], regression_length=[3, 4])
    def test_regression_of_returns_factor(self,
                                          returns_length,
                                          regression_length):
        """
        Tests for the built-in factor `RollingLinearRegressionOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # The order of these is meant to align with the output of `linregress`.
        outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr']

        returns = Returns(window_length=returns_length)
        masks = self.cascading_mask, self.alternating_mask, NotSpecified
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=returns_length,
                regression_length=regression_length,
                mask=mask,
            )

            columns = {
                output: getattr(regression_factor, output)
                for output in outputs
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            output_results = {}
            expected_output_results = {}
            for output in outputs:
                output_results[output] = results[output].unstack()
                expected_output_results[output] = full_like(
                    output_results[output], nan,
                )

            # Run a separate pipeline that calculates returns starting
            # (regression_length - 1) days prior to our start date. This is
            # because we need (regression_length - 1) extra days of returns to
            # compute our expected regressions.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (regression_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected regression results for Y ~ X
            # where Y is the asset we are interested in and X is each other
            # asset. Each regression is calculated over `regression_length`
            # days of data.
            for day in range(num_days):
                todays_returns = returns_results.iloc[
                    day:day + regression_length
                ]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_regression_results = linregress(
                        y=other_asset_returns, x=my_asset_returns,
                    )
                    for i, output in enumerate(outputs):
                        expected_output_results[output][day, asset_column] = \
                            expected_regression_results[i]

            for output in outputs:
                output_result = output_results[output]
                expected_output_result = DataFrame(
                    where(expected_mask, expected_output_results[output], nan),
                    index=dates[start_date_index:end_date_index + 1],
                    columns=assets,
                )
                assert_frame_equal(output_result, expected_output_result)

    def test_simple_beta_matches_regression(self):
        run_pipeline = self.run_pipeline
        simple_beta = SimpleBeta(target=self.my_asset, regression_length=10)
        complex_beta = RollingLinearRegressionOfReturns(
            target=self.my_asset,
            returns_length=2,
            regression_length=10,
        ).beta
        pipe = Pipeline({'simple': simple_beta, 'complex': complex_beta})
        results = run_pipeline(
            pipe,
            self.pipeline_start_date,
            self.pipeline_end_date,
        )
        assert_equal(results['simple'], results['complex'], check_names=False)

    def test_simple_beta_allowed_missing_calculation(self):
        for percentage, expected in [(0.651, 65),
                                     (0.659, 65),
                                     (0.66, 66),
                                     (0.0, 0),
                                     (1.0, 100)]:
            beta = SimpleBeta(
                target=self.my_asset,
                regression_length=100,
                allowed_missing_percentage=percentage,
            )
            self.assertEqual(beta.params['allowed_missing_count'], expected)

    def test_correlation_and_regression_with_bad_asset(self):
        """
        Test that `RollingPearsonOfReturns`, `RollingSpearmanOfReturns` and
        `RollingLinearRegressionOfReturns` raise the proper exception when
        given a nonexistent target asset.
        """
        my_asset = Equity(0, exchange="TEST")
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        # This filter is arbitrary; the important thing is that we test each
        # factor both with and without a specified mask.
        my_asset_filter = AssetID().eq(1)

        for mask in (NotSpecified, my_asset_filter):
            pearson_factor = RollingPearsonOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=3,
                mask=mask,
            )
            spearman_factor = RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=3,
                mask=mask,
            )
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=3,
                regression_length=3,
                mask=mask,
            )

            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'pearson_factor': pearson_factor}),
                    start_date,
                    end_date,
                )
            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'spearman_factor': spearman_factor}),
                    start_date,
                    end_date,
                )
            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'regression_factor': regression_factor}),
                    start_date,
                    end_date,
                )

    def test_require_length_greater_than_one(self):
        my_asset = Equity(0, exchange="TEST")

        with self.assertRaises(ValueError):
            RollingPearsonOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=1,
            )

        with self.assertRaises(ValueError):
            RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=1,
            )

        with self.assertRaises(ValueError):
            RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=3,
                regression_length=1,
            )

    def test_simple_beta_input_validation(self):
        with self.assertRaises(TypeError) as e:
            SimpleBeta(
                target="SPY",
                regression_length=100,
                allowed_missing_percentage=0.5,
            )
        result = str(e.exception)
        expected = (
            r"SimpleBeta\(\) expected a value of type"
            " .*Asset for argument 'target',"
            " but got str instead."
        )
        self.assertRegexpMatches(result, expected)

        with self.assertRaises(ValueError) as e:
            SimpleBeta(
                target=self.my_asset,
                regression_length=1,
                allowed_missing_percentage=0.5,
            )
        result = str(e.exception)
        expected = (
            "SimpleBeta() expected a value greater than or equal to 3"
            " for argument 'regression_length', but got 1 instead."
        )
        self.assertEqual(result, expected)

        with self.assertRaises(ValueError) as e:
            SimpleBeta(
                target=self.my_asset,
                regression_length=100,
                allowed_missing_percentage=50,
            )
        result = str(e.exception)
        expected = (
            "SimpleBeta() expected a value inclusively between 0.0 and 1.0 "
            "for argument 'allowed_missing_percentage', but got 50 instead."
        )
        self.assertEqual(result, expected)

    def test_simple_beta_target(self):
        beta = SimpleBeta(
            target=self.my_asset,
            regression_length=50,
            allowed_missing_percentage=0.5,
        )
        self.assertIs(beta.target, self.my_asset)

    def test_simple_beta_repr(self):
        beta = SimpleBeta(
            target=self.my_asset,
            regression_length=50,
            allowed_missing_percentage=0.5,
        )
        result = repr(beta)
        expected = "SimpleBeta({}, length=50, allowed_missing=25)".format(
            self.my_asset,
        )
        self.assertEqual(result, expected)

    def test_simple_beta_short_repr(self):
        beta = SimpleBeta(
            target=self.my_asset,
            regression_length=50,
            allowed_missing_percentage=0.5,
        )
        result = beta.short_repr()
        expected = "SimpleBeta('A', 50, 25)".format(self.my_asset)
        self.assertEqual(result, expected)
예제 #25
0
    def test_slice_integer(self):

        # same as above, but for Integer based indexes
        # these coerce to a like integer
        # oob indicates if we are out of bounds
        # of positional indexing
        for index, oob in [(Int64Index(range(5)), False),
                           (RangeIndex(5), False),
                           (Int64Index(range(5)) + 10, True)]:

            # s is an in-range index
            s = Series(range(5), index=index)

            # getitem
            for l in [slice(3.0, 4),
                      slice(3, 4.0),
                      slice(3.0, 4.0)]:

                for idxr in [lambda x: x.loc,
                             lambda x: x.ix]:

                    with catch_warnings(record=True):
                        result = idxr(s)[l]

                    # these are all label indexing
                    # except getitem which is positional
                    # empty
                    if oob:
                        indexer = slice(0, 0)
                    else:
                        indexer = slice(3, 5)
                    self.check(result, s, indexer, False)

                # positional indexing
                msg = ("cannot do slice indexing"
                       r" on {klass} with these indexers \[(3|4)\.0\] of"
                       " {kind}"
                       .format(klass=type(index), kind=str(float)))
                with pytest.raises(TypeError, match=msg):
                    s[l]

            # getitem out-of-bounds
            for l in [slice(-6, 6),
                      slice(-6.0, 6.0)]:

                for idxr in [lambda x: x.loc,
                             lambda x: x.ix]:
                    with catch_warnings(record=True):
                        result = idxr(s)[l]

                    # these are all label indexing
                    # except getitem which is positional
                    # empty
                    if oob:
                        indexer = slice(0, 0)
                    else:
                        indexer = slice(-6, 6)
                    self.check(result, s, indexer, False)

            # positional indexing
            msg = ("cannot do slice indexing"
                   r" on {klass} with these indexers \[-6\.0\] of"
                   " {kind}"
                   .format(klass=type(index), kind=str(float)))
            with pytest.raises(TypeError, match=msg):
                s[slice(-6.0, 6.0)]

            # getitem odd floats
            for l, res1 in [(slice(2.5, 4), slice(3, 5)),
                            (slice(2, 3.5), slice(2, 4)),
                            (slice(2.5, 3.5), slice(3, 4))]:

                for idxr in [lambda x: x.loc,
                             lambda x: x.ix]:

                    with catch_warnings(record=True):
                        result = idxr(s)[l]
                    if oob:
                        res = slice(0, 0)
                    else:
                        res = res1

                    self.check(result, s, res, False)

                # positional indexing
                msg = ("cannot do slice indexing"
                       r" on {klass} with these indexers \[(2|3)\.5\] of"
                       " {kind}"
                       .format(klass=type(index), kind=str(float)))
                with pytest.raises(TypeError, match=msg):
                    s[l]

            # setitem
            for l in [slice(3.0, 4),
                      slice(3, 4.0),
                      slice(3.0, 4.0)]:

                for idxr in [lambda x: x.loc,
                             lambda x: x.ix]:
                    sc = s.copy()
                    with catch_warnings(record=True):
                        idxr(sc)[l] = 0
                        result = idxr(sc)[l].values.ravel()
                    assert (result == 0).all()

                # positional indexing
                msg = ("cannot do slice indexing"
                       r" on {klass} with these indexers \[(3|4)\.0\] of"
                       " {kind}"
                       .format(klass=type(index), kind=str(float)))
                with pytest.raises(TypeError, match=msg):
                    s[l] = 0
예제 #26
0
class TestFloat64Index(Numeric):
    _holder = Float64Index

    @pytest.fixture(
        params=[
            [1.5, 2, 3, 4, 5],
            [0.0, 2.5, 5.0, 7.5, 10.0],
            [5, 4, 3, 2, 1.5],
            [10.0, 7.5, 5.0, 2.5, 0.0],
        ],
        ids=["mixed", "float", "mixed_dec", "float_dec"],
    )
    def index(self, request):
        return Float64Index(request.param)

    @pytest.fixture
    def mixed_index(self):
        return Float64Index([1.5, 2, 3, 4, 5])

    @pytest.fixture
    def float_index(self):
        return Float64Index([0.0, 2.5, 5.0, 7.5, 10.0])

    def create_index(self) -> Float64Index:
        return Float64Index(np.arange(5, dtype="float64"))

    def test_repr_roundtrip(self, index):
        tm.assert_index_equal(eval(repr(index)), index)

    def check_is_index(self, i):
        assert isinstance(i, Index)
        assert not isinstance(i, Float64Index)

    def check_coerce(self, a, b, is_float_index=True):
        assert a.equals(b)
        tm.assert_index_equal(a, b, exact=False)
        if is_float_index:
            assert isinstance(b, Float64Index)
        else:
            self.check_is_index(b)

    def test_constructor(self):

        # explicit construction
        index = Float64Index([1, 2, 3, 4, 5])
        assert isinstance(index, Float64Index)
        expected = np.array([1, 2, 3, 4, 5], dtype="float64")
        tm.assert_numpy_array_equal(index.values, expected)
        index = Float64Index(np.array([1, 2, 3, 4, 5]))
        assert isinstance(index, Float64Index)
        index = Float64Index([1.0, 2, 3, 4, 5])
        assert isinstance(index, Float64Index)
        index = Float64Index(np.array([1.0, 2, 3, 4, 5]))
        assert isinstance(index, Float64Index)
        assert index.dtype == float

        index = Float64Index(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32)
        assert isinstance(index, Float64Index)
        assert index.dtype == np.float64

        index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32)
        assert isinstance(index, Float64Index)
        assert index.dtype == np.float64

        # nan handling
        result = Float64Index([np.nan, np.nan])
        assert pd.isna(result.values).all()
        result = Float64Index(np.array([np.nan]))
        assert pd.isna(result.values).all()
        result = Index(np.array([np.nan]))
        assert pd.isna(result.values).all()

    @pytest.mark.parametrize(
        "index, dtype",
        [
            (Int64Index, "float64"),
            (UInt64Index, "categorical"),
            (Float64Index, "datetime64"),
            (RangeIndex, "float64"),
        ],
    )
    def test_invalid_dtype(self, index, dtype):
        # GH 29539
        with pytest.raises(
                ValueError,
                match=
                rf"Incorrect `dtype` passed: expected \w+(?: \w+)?, received {dtype}",
        ):
            index([1, 2, 3], dtype=dtype)

    def test_constructor_invalid(self):

        # invalid
        msg = (r"Float64Index\(\.\.\.\) must be called with a collection of "
               r"some kind, 0\.0 was passed")
        with pytest.raises(TypeError, match=msg):
            Float64Index(0.0)

        # 2021-02-1 we get ValueError in numpy 1.20, but not on all builds
        msg = "|".join([
            "String dtype not supported, you may need to explicitly cast ",
            "could not convert string to float: 'a'",
        ])
        with pytest.raises((TypeError, ValueError), match=msg):
            Float64Index(["a", "b", 0.0])

        msg = r"float\(\) argument must be a string or a number, not 'Timestamp'"
        with pytest.raises(TypeError, match=msg):
            Float64Index([Timestamp("20130101")])

    def test_constructor_coerce(self, mixed_index, float_index):

        self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5]))
        self.check_coerce(float_index, Index(np.arange(5) * 2.5))
        self.check_coerce(float_index,
                          Index(np.array(np.arange(5) * 2.5, dtype=object)))

    def test_constructor_explicit(self, mixed_index, float_index):

        # these don't auto convert
        self.check_coerce(float_index,
                          Index((np.arange(5) * 2.5), dtype=object),
                          is_float_index=False)
        self.check_coerce(mixed_index,
                          Index([1.5, 2, 3, 4, 5], dtype=object),
                          is_float_index=False)

    def test_type_coercion_fail(self, any_int_dtype):
        # see gh-15832
        msg = "Trying to coerce float values to integers"
        with pytest.raises(ValueError, match=msg):
            Index([1, 2, 3.5], dtype=any_int_dtype)

    def test_type_coercion_valid(self, float_dtype):
        # There is no Float32Index, so we always
        # generate Float64Index.
        i = Index([1, 2, 3.5], dtype=float_dtype)
        tm.assert_index_equal(i, Index([1, 2, 3.5]))

    def test_equals_numeric(self):

        i = Float64Index([1.0, 2.0])
        assert i.equals(i)
        assert i.identical(i)

        i2 = Float64Index([1.0, 2.0])
        assert i.equals(i2)

        i = Float64Index([1.0, np.nan])
        assert i.equals(i)
        assert i.identical(i)

        i2 = Float64Index([1.0, np.nan])
        assert i.equals(i2)

    @pytest.mark.parametrize(
        "other",
        (
            Int64Index([1, 2]),
            Index([1.0, 2.0], dtype=object),
            Index([1, 2], dtype=object),
        ),
    )
    def test_equals_numeric_other_index_type(self, other):
        i = Float64Index([1.0, 2.0])
        assert i.equals(other)
        assert other.equals(i)

    @pytest.mark.parametrize(
        "vals",
        [
            pd.date_range("2016-01-01", periods=3),
            pd.timedelta_range("1 Day", periods=3),
        ],
    )
    def test_lookups_datetimelike_values(self, vals):
        # If we have datetime64 or timedelta64 values, make sure they are
        #  wrappped correctly  GH#31163
        ser = Series(vals, index=range(3, 6))
        ser.index = ser.index.astype("float64")

        expected = vals[1]

        with tm.assert_produces_warning(FutureWarning):
            result = ser.index.get_value(ser, 4.0)
        assert isinstance(result, type(expected)) and result == expected
        with tm.assert_produces_warning(FutureWarning):
            result = ser.index.get_value(ser, 4)
        assert isinstance(result, type(expected)) and result == expected

        result = ser[4.0]
        assert isinstance(result, type(expected)) and result == expected
        result = ser[4]
        assert isinstance(result, type(expected)) and result == expected

        result = ser.loc[4.0]
        assert isinstance(result, type(expected)) and result == expected
        result = ser.loc[4]
        assert isinstance(result, type(expected)) and result == expected

        result = ser.at[4.0]
        assert isinstance(result, type(expected)) and result == expected
        # GH#31329 .at[4] should cast to 4.0, matching .loc behavior
        result = ser.at[4]
        assert isinstance(result, type(expected)) and result == expected

        result = ser.iloc[1]
        assert isinstance(result, type(expected)) and result == expected

        result = ser.iat[1]
        assert isinstance(result, type(expected)) and result == expected

    def test_doesnt_contain_all_the_things(self):
        i = Float64Index([np.nan])
        assert not i.isin([0]).item()
        assert not i.isin([1]).item()
        assert i.isin([np.nan]).item()

    def test_nan_multiple_containment(self):
        i = Float64Index([1.0, np.nan])
        tm.assert_numpy_array_equal(i.isin([1.0]), np.array([True, False]))
        tm.assert_numpy_array_equal(i.isin([2.0, np.pi]),
                                    np.array([False, False]))
        tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, True]))
        tm.assert_numpy_array_equal(i.isin([1.0, np.nan]),
                                    np.array([True, True]))
        i = Float64Index([1.0, 2.0])
        tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, False]))

    def test_fillna_float64(self):
        # GH 11343
        idx = Index([1.0, np.nan, 3.0], dtype=float, name="x")
        # can't downcast
        exp = Index([1.0, 0.1, 3.0], name="x")
        tm.assert_index_equal(idx.fillna(0.1), exp)

        # downcast
        exp = Float64Index([1.0, 2.0, 3.0], name="x")
        tm.assert_index_equal(idx.fillna(2), exp)

        # object
        exp = Index([1.0, "obj", 3.0], name="x")
        tm.assert_index_equal(idx.fillna("obj"), exp)
예제 #27
0
 def test_union_with_DatetimeIndex(self):
     i1 = Int64Index(np.arange(0, 20, 2))
     i2 = DatetimeIndex(start='2012-01-03 00:00:00', periods=10, freq='D')
     i1.union(i2)  # Works
     i2.union(i1)  # Fails with "AttributeError: can't set attribute"
예제 #28
0
 def index(self, request):
     return Int64Index(request.param)
예제 #29
0
 def setup_method(self, method):
     self.indices = dict(index=Int64Index(np.arange(0, 20, 2)),
                         index_dec=Int64Index(np.arange(19, -1, -1)))
     self.setup_indices()
예제 #30
0
    def test_slice_integer_frame_getitem(self):

        # similar to above, but on the getitem dim (of a DataFrame)
        for index in [Int64Index(range(5)), RangeIndex(5)]:

            s = DataFrame(np.random.randn(5, 2), index=index)

            def f(idxr):

                # getitem
                for l in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:

                    result = idxr(s)[l]
                    indexer = slice(0, 2)
                    self.check(result, s, indexer, False)

                    # positional indexing
                    def f():
                        s[l]

                    pytest.raises(TypeError, f)

                # getitem out-of-bounds
                for l in [slice(-10, 10), slice(-10.0, 10.0)]:

                    result = idxr(s)[l]
                    self.check(result, s, slice(-10, 10), True)

                # positional indexing
                def f():
                    s[slice(-10.0, 10.0)]

                pytest.raises(TypeError, f)

                # getitem odd floats
                for l, res in [(slice(0.5, 1), slice(1, 2)),
                               (slice(0, 0.5), slice(0, 1)),
                               (slice(0.5, 1.5), slice(1, 2))]:

                    result = idxr(s)[l]
                    self.check(result, s, res, False)

                    # positional indexing
                    def f():
                        s[l]

                    pytest.raises(TypeError, f)

                # setitem
                for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:

                    sc = s.copy()
                    idxr(sc)[l] = 0
                    result = idxr(sc)[l].values.ravel()
                    assert (result == 0).all()

                    # positional indexing
                    def f():
                        s[l] = 0

                    pytest.raises(TypeError, f)

            f(lambda x: x.loc)
            with catch_warnings(record=True):
                f(lambda x: x.ix)