Esempio n. 1
0
    def test_between_time_frame(self):
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = 13 * 4 + 1
            if not inc_start:
                exp_len -= 5
            if not inc_end:
                exp_len -= 4

            self.assertEqual(len(filtered), exp_len)
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    self.assertTrue(t >= stime)
                else:
                    self.assertTrue(t > stime)

                if inc_end:
                    self.assertTrue(t <= etime)
                else:
                    self.assertTrue(t < etime)

        result = ts.between_time('00:00', '01:00')
        expected = ts.between_time(stime, etime)
        assert_frame_equal(result, expected)

        # across midnight
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(22, 0)
        etime = time(9, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = (12 * 11 + 1) * 4 + 1
            if not inc_start:
                exp_len -= 4
            if not inc_end:
                exp_len -= 4

            self.assertEqual(len(filtered), exp_len)
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    self.assertTrue((t >= stime) or (t <= etime))
                else:
                    self.assertTrue((t > stime) or (t <= etime))

                if inc_end:
                    self.assertTrue((t <= etime) or (t >= stime))
                else:
                    self.assertTrue((t < etime) or (t >= stime))
Esempio n. 2
0
    def test_between_time(self):
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = Series(np.random.randn(len(rng)), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = 13 * 4 + 1
            if not inc_start:
                exp_len -= 5
            if not inc_end:
                exp_len -= 4

            assert len(filtered) == exp_len
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    assert t >= stime
                else:
                    assert t > stime

                if inc_end:
                    assert t <= etime
                else:
                    assert t < etime

        result = ts.between_time('00:00', '01:00')
        expected = ts.between_time(stime, etime)
        assert_series_equal(result, expected)

        # across midnight
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = Series(np.random.randn(len(rng)), index=rng)
        stime = time(22, 0)
        etime = time(9, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = (12 * 11 + 1) * 4 + 1
            if not inc_start:
                exp_len -= 4
            if not inc_end:
                exp_len -= 4

            assert len(filtered) == exp_len
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    assert (t >= stime) or (t <= etime)
                else:
                    assert (t > stime) or (t <= etime)

                if inc_end:
                    assert (t <= etime) or (t >= stime)
                else:
                    assert (t < etime) or (t >= stime)
Esempio n. 3
0
    def test_between_time(self):
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = Series(np.random.randn(len(rng)), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = 13 * 4 + 1
            if not inc_start:
                exp_len -= 5
            if not inc_end:
                exp_len -= 4

            assert len(filtered) == exp_len
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    assert t >= stime
                else:
                    assert t > stime

                if inc_end:
                    assert t <= etime
                else:
                    assert t < etime

        result = ts.between_time('00:00', '01:00')
        expected = ts.between_time(stime, etime)
        assert_series_equal(result, expected)

        # across midnight
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = Series(np.random.randn(len(rng)), index=rng)
        stime = time(22, 0)
        etime = time(9, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = (12 * 11 + 1) * 4 + 1
            if not inc_start:
                exp_len -= 4
            if not inc_end:
                exp_len -= 4

            assert len(filtered) == exp_len
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    assert (t >= stime) or (t <= etime)
                else:
                    assert (t > stime) or (t <= etime)

                if inc_end:
                    assert (t <= etime) or (t >= stime)
                else:
                    assert (t < etime) or (t >= stime)
Esempio n. 4
0
    def test_between_time_frame(self):
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = 13 * 4 + 1
            if not inc_start:
                exp_len -= 5
            if not inc_end:
                exp_len -= 4

            self.assertEqual(len(filtered), exp_len)
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    self.assertTrue(t >= stime)
                else:
                    self.assertTrue(t > stime)

                if inc_end:
                    self.assertTrue(t <= etime)
                else:
                    self.assertTrue(t < etime)

        result = ts.between_time('00:00', '01:00')
        expected = ts.between_time(stime, etime)
        assert_frame_equal(result, expected)

        # across midnight
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(22, 0)
        etime = time(9, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = (12 * 11 + 1) * 4 + 1
            if not inc_start:
                exp_len -= 4
            if not inc_end:
                exp_len -= 4

            self.assertEqual(len(filtered), exp_len)
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    self.assertTrue((t >= stime) or (t <= etime))
                else:
                    self.assertTrue((t > stime) or (t <= etime))

                if inc_end:
                    self.assertTrue((t <= etime) or (t >= stime))
                else:
                    self.assertTrue((t < etime) or (t >= stime))
Esempio n. 5
0
    def test_rank_descending(self):
        dtypes = ['O', 'f8', 'i8']

        for dtype, method in product(dtypes, self.results):
            if 'i' in dtype:
                df = self.df.dropna()
            else:
                df = self.df.astype(dtype)

            res = df.rank(ascending=False)
            expected = (df.max() - df).rank()
            assert_frame_equal(res, expected)

            if method == 'first' and dtype == 'O':
                continue

            expected = (df.max() - df).rank(method=method)

            if dtype != 'O':
                res2 = df.rank(method=method, ascending=False,
                               numeric_only=True)
                assert_frame_equal(res2, expected)

            res3 = df.rank(method=method, ascending=False,
                           numeric_only=False)
            assert_frame_equal(res3, expected)
Esempio n. 6
0
    def test_rank_tie_methods_on_infs_nans(self):
        dtypes = [('object', None, Infinity(), NegInfinity()),
                  ('float64', np.nan, np.inf, -np.inf)]
        chunk = 3
        disabled = set([('object', 'first')])

        def _check(s, expected, method='average', na_option='keep'):
            result = s.rank(method=method, na_option=na_option)
            tm.assert_series_equal(result, Series(expected, dtype='float64'))

        exp_ranks = {
            'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
            'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
            'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
            'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
            'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3])
        }
        na_options = ('top', 'bottom', 'keep')
        for dtype, na_value, pos_inf, neg_inf in dtypes:
            in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
            iseries = Series(in_arr, dtype=dtype)
            for method, na_opt in product(exp_ranks.keys(), na_options):
                ranks = exp_ranks[method]
                if (dtype, method) in disabled:
                    continue
                if na_opt == 'top':
                    order = ranks[1] + ranks[0] + ranks[2]
                elif na_opt == 'bottom':
                    order = ranks[0] + ranks[2] + ranks[1]
                else:
                    order = ranks[0] + [np.nan] * chunk + ranks[1]
                _check(iseries, order, method, na_opt)
Esempio n. 7
0
    def test_rank_descending(self):
        dtypes = ['O', 'f8', 'i8']

        for dtype, method in product(dtypes, self.results):
            if 'i' in dtype:
                df = self.df.dropna()
            else:
                df = self.df.astype(dtype)

            res = df.rank(ascending=False)
            expected = (df.max() - df).rank()
            assert_frame_equal(res, expected)

            if method == 'first' and dtype == 'O':
                continue

            expected = (df.max() - df).rank(method=method)

            if dtype != 'O':
                res2 = df.rank(method=method, ascending=False,
                               numeric_only=True)
                assert_frame_equal(res2, expected)

            res3 = df.rank(method=method, ascending=False,
                           numeric_only=False)
            assert_frame_equal(res3, expected)
Esempio n. 8
0
    def test_rank_tie_methods_on_infs_nans(self):
        dtypes = [('object', None, Infinity(), NegInfinity()),
                  ('float64', np.nan, np.inf, -np.inf)]
        chunk = 3
        disabled = set([('object', 'first')])

        def _check(s, expected, method='average', na_option='keep'):
            result = s.rank(method=method, na_option=na_option)
            tm.assert_series_equal(result, Series(expected, dtype='float64'))

        exp_ranks = {
            'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
            'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
            'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
            'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
            'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3])
        }
        na_options = ('top', 'bottom', 'keep')
        for dtype, na_value, pos_inf, neg_inf in dtypes:
            in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
            iseries = Series(in_arr, dtype=dtype)
            for method, na_opt in product(exp_ranks.keys(), na_options):
                ranks = exp_ranks[method]
                if (dtype, method) in disabled:
                    continue
                if na_opt == 'top':
                    order = ranks[1] + ranks[0] + ranks[2]
                elif na_opt == 'bottom':
                    order = ranks[0] + ranks[2] + ranks[1]
                else:
                    order = ranks[0] + [np.nan] * chunk + ranks[1]
                _check(iseries, order, method, na_opt)
Esempio n. 9
0
    def test_monthly_upsample(self):
        targets = ['D', 'B']

        ts = _simple_pts('1/1/1990', '12/31/1995', freq='M')

        for targ, conv in product(targets, ['start', 'end']):
            result = ts.resample(targ, fill_method='ffill', convention=conv)
            expected = result.to_timestamp(targ, how=conv)
            expected = expected.asfreq(targ, 'ffill').to_period()
            assert_series_equal(result, expected)
Esempio n. 10
0
    def test_monthly_upsample(self):
        targets = ["D", "B"]

        ts = _simple_pts("1/1/1990", "12/31/1995", freq="M")

        for targ, conv in product(targets, ["start", "end"]):
            result = ts.resample(targ, fill_method="ffill", convention=conv)
            expected = result.to_timestamp(targ, how=conv)
            expected = expected.asfreq(targ, "ffill").to_period()
            assert_series_equal(result, expected)
Esempio n. 11
0
    def test_monthly_upsample(self):
        targets = ['D', 'B']

        ts = _simple_pts('1/1/1990', '12/31/1995', freq='M')

        for targ, conv in product(targets, ['start', 'end']):
            result = ts.resample(targ, fill_method='ffill',
                                 convention=conv)
            expected = result.to_timestamp(targ, how=conv)
            expected = expected.asfreq(targ, 'ffill').to_period()
            assert_series_equal(result, expected)
Esempio n. 12
0
    def test_pivot_integer_columns(self):
        # caused by upstream bug in unstack

        d = datetime.date.min
        data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'],
                            [d + datetime.timedelta(i) for i in range(20)], [1.0]))
        df = pandas.DataFrame(data)
        table = df.pivot_table(values=4, rows=[0, 1, 3], cols=[2])

        df2 = df.rename(columns=str)
        table2 = df2.pivot_table(values='4', rows=['0', '1', '3'], cols=['2'])

        tm.assert_frame_equal(table, table2, check_names=False)
Esempio n. 13
0
    def test_pivot_integer_columns(self):
        # caused by upstream bug in unstack

        d = datetime.date.min
        data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'],
                            [d + datetime.timedelta(i) for i in range(20)], [1.0]))
        df = DataFrame(data)
        table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2])

        df2 = df.rename(columns=str)
        table2 = df2.pivot_table(values='4', index=['0', '1', '3'], columns=['2'])

        tm.assert_frame_equal(table, table2, check_names=False)
Esempio n. 14
0
    def test_pivot_integer_columns(self):
        # caused by upstream bug in unstack

        d = date.min
        data = list(
            product(["foo", "bar"], ["A", "B", "C"], ["x1", "x2"], [d + timedelta(i) for i in range(20)], [1.0])
        )
        df = DataFrame(data)
        table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2])

        df2 = df.rename(columns=str)
        table2 = df2.pivot_table(values="4", index=["0", "1", "3"], columns=["2"])

        tm.assert_frame_equal(table, table2, check_names=False)
Esempio n. 15
0
    def test_rank_tie_methods(self):
        s = self.s

        def _check(s, expected, method='average'):
            result = s.rank(method=method)
            tm.assert_series_equal(result, Series(expected))

        dtypes = [None, object]
        disabled = set([(object, 'first')])
        results = self.results

        for method, dtype in product(results, dtypes):
            if (dtype, method) in disabled:
                continue
            series = s if dtype is None else s.astype(dtype)
            _check(series, results[method], method=method)
Esempio n. 16
0
    def test_rank_tie_methods(self):
        s = self.s

        def _check(s, expected, method='average'):
            result = s.rank(method=method)
            tm.assert_series_equal(result, Series(expected))

        dtypes = [None, object]
        disabled = {(object, 'first')}
        results = self.results

        for method, dtype in product(results, dtypes):
            if (dtype, method) in disabled:
                continue
            series = s if dtype is None else s.astype(dtype)
            _check(series, results[method], method=method)
Esempio n. 17
0
    def test_rank_descending(self):
        dtypes = ['O', 'f8', 'i8']

        for dtype, method in product(dtypes, self.results):
            if 'i' in dtype:
                s = self.s.dropna()
            else:
                s = self.s.astype(dtype)

            res = s.rank(ascending=False)
            expected = (s.max() - s).rank()
            assert_series_equal(res, expected)

            if method == 'first' and dtype == 'O':
                continue

            expected = (s.max() - s).rank(method=method)
            res2 = s.rank(method=method, ascending=False)
            assert_series_equal(res2, expected)
Esempio n. 18
0
    def test_rank_descending(self):
        dtypes = ['O', 'f8', 'i8']

        for dtype, method in product(dtypes, self.results):
            if 'i' in dtype:
                s = self.s.dropna()
            else:
                s = self.s.astype(dtype)

            res = s.rank(ascending=False)
            expected = (s.max() - s).rank()
            assert_series_equal(res, expected)

            if method == 'first' and dtype == 'O':
                continue

            expected = (s.max() - s).rank(method=method)
            res2 = s.rank(method=method, ascending=False)
            assert_series_equal(res2, expected)
Esempio n. 19
0
    def test_rank_2d_tie_methods(self):
        df = self.df

        def _check2d(df, expected, method='average', axis=0):
            exp_df = DataFrame({'A': expected, 'B': expected})

            if axis == 1:
                df = df.T
                exp_df = exp_df.T

            result = df.rank(method=method, axis=axis)
            assert_frame_equal(result, exp_df)

        dtypes = [None, object]
        disabled = set([(object, 'first')])
        results = self.results

        for method, axis, dtype in product(results, [0, 1], dtypes):
            if (dtype, method) in disabled:
                continue
            frame = df if dtype is None else df.astype(dtype)
            _check2d(frame, results[method], method=method, axis=axis)
Esempio n. 20
0
    def test_rank_2d_tie_methods(self):
        df = self.df

        def _check2d(df, expected, method='average', axis=0):
            exp_df = DataFrame({'A': expected, 'B': expected})

            if axis == 1:
                df = df.T
                exp_df = exp_df.T

            result = df.rank(method=method, axis=axis)
            assert_frame_equal(result, exp_df)

        dtypes = [None, object]
        disabled = set([(object, 'first')])
        results = self.results

        for method, axis, dtype in product(results, [0, 1], dtypes):
            if (dtype, method) in disabled:
                continue
            frame = df if dtype is None else df.astype(dtype)
            _check2d(frame, results[method], method=method, axis=axis)
Esempio n. 21
0
                    Timestamp, DatetimeIndex, MultiIndex,
                    to_datetime, date_range, period_range)
import pandas as pd
import pandas.tseries.offsets as offsets

from pandas.util.testing import (assert_series_equal,
                                 assert_frame_equal,
                                 assert_index_equal)

import pandas.util.testing as tm
from pandas.compat import product

from pandas.tests.frame.common import TestData


@pytest.fixture(params=product([True, False], [True, False]))
def close_open_fixture(request):
    return request.param


class TestDataFrameTimeSeriesMethods(TestData):

    def test_diff(self):
        the_diff = self.tsframe.diff(1)

        assert_series_equal(the_diff['A'],
                            self.tsframe['A'] - self.tsframe['A'].shift(1))

        # int dtype
        a = 10000000000000000
        b = a + 1
Esempio n. 22
0
import pytest

from pandas.compat import product

import pandas as pd
from pandas import (DataFrame, DatetimeIndex, Index, MultiIndex, Series,
                    Timestamp, date_range, period_range, to_datetime)
from pandas.tests.frame.common import TestData
import pandas.util.testing as tm
from pandas.util.testing import (assert_frame_equal, assert_index_equal,
                                 assert_series_equal)

import pandas.tseries.offsets as offsets


@pytest.fixture(params=product([True, False], [True, False]))
def close_open_fixture(request):
    return request.param


class TestDataFrameTimeSeriesMethods(TestData):
    def test_diff(self):
        the_diff = self.tsframe.diff(1)

        assert_series_equal(the_diff['A'],
                            self.tsframe['A'] - self.tsframe['A'].shift(1))

        # int dtype
        a = 10000000000000000
        b = a + 1
        s = Series([a, b])
Esempio n. 23
0
    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
                                                              'three']],
                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                       names=['first', 'second'])
    raw_frame = DataFrame(np.random.randn(10, 3), index=index,
                          columns=Index(['A', 'B', 'C'], name='exp'))
    raw_frame.iloc[1, [1, 2]] = np.nan
    raw_frame.iloc[7, [0, 1]] = np.nan
    return raw_frame


@pytest.mark.parametrize(
    "op, level, axis, skipna",
    product(AGG_FUNCTIONS,
            lrange(2), lrange(2),
            [True, False]))
def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna):
    # GH6944
    # explicity test the whitelest methods

    if axis == 0:
        frame = raw_frame
    else:
        frame = raw_frame.T

    if op in AGG_FUNCTIONS_WITH_SKIPNA:
        grouped = frame.groupby(level=level, axis=axis)
        result = getattr(grouped, op)(skipna=skipna)
        expected = getattr(frame, op)(level=level, axis=axis,
                                      skipna=skipna)
Esempio n. 24
0
class TestIntervalIndex(object):
    def setup_method(self, method):
        self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))

    def test_loc_with_scalar(self):

        s = self.s

        expected = s.iloc[:3]
        tm.assert_series_equal(expected, s.loc[:3])
        tm.assert_series_equal(expected, s.loc[:2.5])
        tm.assert_series_equal(expected, s.loc[0.1:2.5])
        tm.assert_series_equal(expected, s.loc[-1:3])

        expected = s.iloc[1:4]
        tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
        tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
        tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])

        expected = s.iloc[2:5]
        tm.assert_series_equal(expected, s.loc[s >= 2])

    def test_getitem_with_scalar(self):

        s = self.s

        expected = s.iloc[:3]
        tm.assert_series_equal(expected, s[:3])
        tm.assert_series_equal(expected, s[:2.5])
        tm.assert_series_equal(expected, s[0.1:2.5])
        tm.assert_series_equal(expected, s[-1:3])

        expected = s.iloc[1:4]
        tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]])
        tm.assert_series_equal(expected, s[[2, 3, 4]])
        tm.assert_series_equal(expected, s[[1.5, 3, 4]])

        expected = s.iloc[2:5]
        tm.assert_series_equal(expected, s[s >= 2])

    @pytest.mark.parametrize('direction, closed',
                             product(('increasing', 'decreasing'),
                                     ('left', 'right', 'neither', 'both')))
    def test_nonoverlapping_monotonic(self, direction, closed):
        tpls = [(0, 1), (2, 3), (4, 5)]
        if direction == 'decreasing':
            tpls = reversed(tpls)

        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        s = Series(list('abc'), idx)

        for key, expected in zip(idx.left, s):
            if idx.closed_left:
                assert s[key] == expected
                assert s.loc[key] == expected
            else:
                with pytest.raises(KeyError):
                    s[key]
                with pytest.raises(KeyError):
                    s.loc[key]

        for key, expected in zip(idx.right, s):
            if idx.closed_right:
                assert s[key] == expected
                assert s.loc[key] == expected
            else:
                with pytest.raises(KeyError):
                    s[key]
                with pytest.raises(KeyError):
                    s.loc[key]

        for key, expected in zip(idx.mid, s):
            assert s[key] == expected
            assert s.loc[key] == expected

    def test_with_interval(self):

        s = self.s
        expected = 0

        result = s.loc[Interval(0, 1)]
        assert result == expected

        result = s[Interval(0, 1)]
        assert result == expected

        expected = s.iloc[3:5]
        result = s.loc[Interval(3, 6)]
        tm.assert_series_equal(expected, result)

        expected = s.iloc[3:5]
        result = s.loc[[Interval(3, 6)]]
        tm.assert_series_equal(expected, result)

        expected = s.iloc[3:5]
        result = s.loc[[Interval(3, 5)]]
        tm.assert_series_equal(expected, result)

        # missing
        with pytest.raises(KeyError):
            s.loc[Interval(-2, 0)]

        with pytest.raises(KeyError):
            s[Interval(-2, 0)]

        with pytest.raises(KeyError):
            s.loc[Interval(5, 6)]

        with pytest.raises(KeyError):
            s[Interval(5, 6)]

    def test_with_slices(self):

        s = self.s

        # slice of interval
        with pytest.raises(NotImplementedError):
            s.loc[Interval(3, 6):]

        with pytest.raises(NotImplementedError):
            s[Interval(3, 6):]

        expected = s.iloc[3:5]
        result = s[[Interval(3, 6)]]
        tm.assert_series_equal(expected, result)

        # slice of scalar with step != 1
        with pytest.raises(ValueError):
            s[0:4:2]

    def test_with_overlaps(self):

        s = self.s
        expected = s.iloc[[3, 4, 3, 4]]
        result = s.loc[[Interval(3, 6), Interval(3, 6)]]
        tm.assert_series_equal(expected, result)

        idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
        s = Series(range(len(idx)), index=idx)

        result = s[4]
        expected = s
        tm.assert_series_equal(expected, result)

        result = s[[4]]
        expected = s
        tm.assert_series_equal(expected, result)

        result = s.loc[[4]]
        expected = s
        tm.assert_series_equal(expected, result)

        result = s[Interval(3, 5)]
        expected = s
        tm.assert_series_equal(expected, result)

        result = s.loc[Interval(3, 5)]
        expected = s
        tm.assert_series_equal(expected, result)

        # doesn't intersect unique set of intervals
        with pytest.raises(KeyError):
            s[[Interval(3, 5)]]

        with pytest.raises(KeyError):
            s.loc[[Interval(3, 5)]]

    def test_non_unique(self):

        idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])

        s = pd.Series(range(len(idx)), index=idx)

        result = s.loc[Interval(1, 3)]
        assert result == 0

        result = s.loc[[Interval(1, 3)]]
        expected = s.iloc[0:1]
        tm.assert_series_equal(expected, result)

    def test_non_unique_moar(self):

        idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
        s = Series(range(len(idx)), index=idx)

        result = s.loc[Interval(1, 3)]
        expected = s.iloc[[0, 1]]
        tm.assert_series_equal(expected, result)

        # non-unique index and slices not allowed
        with pytest.raises(ValueError):
            s.loc[Interval(1, 3):]

        with pytest.raises(ValueError):
            s[Interval(1, 3):]

        # non-unique
        with pytest.raises(ValueError):
            s[[Interval(1, 3)]]

    def test_non_matching(self):
        s = self.s

        # this is a departure from our current
        # indexin scheme, but simpler
        with pytest.raises(KeyError):
            s.loc[[-1, 3, 4, 5]]

        with pytest.raises(KeyError):
            s.loc[[-1, 3]]

    def test_large_series(self):
        s = Series(np.arange(1000000),
                   index=IntervalIndex.from_breaks(np.arange(1000001)))

        result1 = s.loc[:80000]
        result2 = s.loc[0:80000]
        result3 = s.loc[0:80000:1]
        tm.assert_series_equal(result1, result2)
        tm.assert_series_equal(result1, result3)

    def test_loc_getitem_frame(self):

        df = DataFrame({'A': range(10)})
        s = pd.cut(df.A, 5)
        df['B'] = s
        df = df.set_index('B')

        result = df.loc[4]
        expected = df.iloc[4:6]
        tm.assert_frame_equal(result, expected)

        with pytest.raises(KeyError):
            df.loc[10]

        # single list-like
        result = df.loc[[4]]
        expected = df.iloc[4:6]
        tm.assert_frame_equal(result, expected)

        # non-unique
        result = df.loc[[4, 5]]
        expected = df.take([4, 5, 4, 5])
        tm.assert_frame_equal(result, expected)

        with pytest.raises(KeyError):
            df.loc[[10]]

        # partial missing
        with pytest.raises(KeyError):
            df.loc[[10, 4]]
Esempio n. 25
0
    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
                                                              'three']],
                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                       names=['first', 'second'])
    raw_frame = DataFrame(np.random.randn(10, 3), index=index,
                          columns=Index(['A', 'B', 'C'], name='exp'))
    raw_frame.iloc[1, [1, 2]] = np.nan
    raw_frame.iloc[7, [0, 1]] = np.nan
    return raw_frame


@pytest.mark.parametrize(
    "op, level, axis, skipna, sort",
    product(AGG_FUNCTIONS,
            lrange(2), lrange(2),
            [True, False],
            [True, False]))
def test_regression_whitelist_methods(
        raw_frame, op, level,
        axis, skipna, sort):
    # GH6944
    # GH 17537
    # explicity test the whitelest methods

    if axis == 0:
        frame = raw_frame
    else:
        frame = raw_frame.T

    if op in AGG_FUNCTIONS_WITH_SKIPNA:
        grouped = frame.groupby(level=level, axis=axis, sort=sort)