def test_between_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 self.assertEqual(len(filtered), exp_len) for rs in filtered.index: t = rs.time() if inc_start: self.assertTrue(t >= stime) else: self.assertTrue(t > stime) if inc_end: self.assertTrue(t <= etime) else: self.assertTrue(t < etime) result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) assert_frame_equal(result, expected) # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(22, 0) etime = time(9, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 self.assertEqual(len(filtered), exp_len) for rs in filtered.index: t = rs.time() if inc_start: self.assertTrue((t >= stime) or (t <= etime)) else: self.assertTrue((t > stime) or (t <= etime)) if inc_end: self.assertTrue((t <= etime) or (t >= stime)) else: self.assertTrue((t < etime) or (t >= stime))
def test_between_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(0, 0) etime = time(1, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert t >= stime else: assert t > stime if inc_end: assert t <= etime else: assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) assert_series_equal(result, expected) # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(22, 0) etime = time(9, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) if inc_end: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime)
def test_rank_descending(self): dtypes = ['O', 'f8', 'i8'] for dtype, method in product(dtypes, self.results): if 'i' in dtype: df = self.df.dropna() else: df = self.df.astype(dtype) res = df.rank(ascending=False) expected = (df.max() - df).rank() assert_frame_equal(res, expected) if method == 'first' and dtype == 'O': continue expected = (df.max() - df).rank(method=method) if dtype != 'O': res2 = df.rank(method=method, ascending=False, numeric_only=True) assert_frame_equal(res2, expected) res3 = df.rank(method=method, ascending=False, numeric_only=False) assert_frame_equal(res3, expected)
def test_rank_tie_methods_on_infs_nans(self): dtypes = [('object', None, Infinity(), NegInfinity()), ('float64', np.nan, np.inf, -np.inf)] chunk = 3 disabled = set([('object', 'first')]) def _check(s, expected, method='average', na_option='keep'): result = s.rank(method=method, na_option=na_option) tm.assert_series_equal(result, Series(expected, dtype='float64')) exp_ranks = { 'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]), 'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]), 'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]), 'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]), 'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3]) } na_options = ('top', 'bottom', 'keep') for dtype, na_value, pos_inf, neg_inf in dtypes: in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk iseries = Series(in_arr, dtype=dtype) for method, na_opt in product(exp_ranks.keys(), na_options): ranks = exp_ranks[method] if (dtype, method) in disabled: continue if na_opt == 'top': order = ranks[1] + ranks[0] + ranks[2] elif na_opt == 'bottom': order = ranks[0] + ranks[2] + ranks[1] else: order = ranks[0] + [np.nan] * chunk + ranks[1] _check(iseries, order, method, na_opt)
def test_monthly_upsample(self): targets = ['D', 'B'] ts = _simple_pts('1/1/1990', '12/31/1995', freq='M') for targ, conv in product(targets, ['start', 'end']): result = ts.resample(targ, fill_method='ffill', convention=conv) expected = result.to_timestamp(targ, how=conv) expected = expected.asfreq(targ, 'ffill').to_period() assert_series_equal(result, expected)
def test_monthly_upsample(self): targets = ["D", "B"] ts = _simple_pts("1/1/1990", "12/31/1995", freq="M") for targ, conv in product(targets, ["start", "end"]): result = ts.resample(targ, fill_method="ffill", convention=conv) expected = result.to_timestamp(targ, how=conv) expected = expected.asfreq(targ, "ffill").to_period() assert_series_equal(result, expected)
def test_pivot_integer_columns(self): # caused by upstream bug in unstack d = datetime.date.min data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'], [d + datetime.timedelta(i) for i in range(20)], [1.0])) df = pandas.DataFrame(data) table = df.pivot_table(values=4, rows=[0, 1, 3], cols=[2]) df2 = df.rename(columns=str) table2 = df2.pivot_table(values='4', rows=['0', '1', '3'], cols=['2']) tm.assert_frame_equal(table, table2, check_names=False)
def test_pivot_integer_columns(self): # caused by upstream bug in unstack d = datetime.date.min data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'], [d + datetime.timedelta(i) for i in range(20)], [1.0])) df = DataFrame(data) table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2]) df2 = df.rename(columns=str) table2 = df2.pivot_table(values='4', index=['0', '1', '3'], columns=['2']) tm.assert_frame_equal(table, table2, check_names=False)
def test_pivot_integer_columns(self): # caused by upstream bug in unstack d = date.min data = list( product(["foo", "bar"], ["A", "B", "C"], ["x1", "x2"], [d + timedelta(i) for i in range(20)], [1.0]) ) df = DataFrame(data) table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2]) df2 = df.rename(columns=str) table2 = df2.pivot_table(values="4", index=["0", "1", "3"], columns=["2"]) tm.assert_frame_equal(table, table2, check_names=False)
def test_rank_tie_methods(self): s = self.s def _check(s, expected, method='average'): result = s.rank(method=method) tm.assert_series_equal(result, Series(expected)) dtypes = [None, object] disabled = set([(object, 'first')]) results = self.results for method, dtype in product(results, dtypes): if (dtype, method) in disabled: continue series = s if dtype is None else s.astype(dtype) _check(series, results[method], method=method)
def test_rank_tie_methods(self): s = self.s def _check(s, expected, method='average'): result = s.rank(method=method) tm.assert_series_equal(result, Series(expected)) dtypes = [None, object] disabled = {(object, 'first')} results = self.results for method, dtype in product(results, dtypes): if (dtype, method) in disabled: continue series = s if dtype is None else s.astype(dtype) _check(series, results[method], method=method)
def test_rank_descending(self): dtypes = ['O', 'f8', 'i8'] for dtype, method in product(dtypes, self.results): if 'i' in dtype: s = self.s.dropna() else: s = self.s.astype(dtype) res = s.rank(ascending=False) expected = (s.max() - s).rank() assert_series_equal(res, expected) if method == 'first' and dtype == 'O': continue expected = (s.max() - s).rank(method=method) res2 = s.rank(method=method, ascending=False) assert_series_equal(res2, expected)
def test_rank_2d_tie_methods(self): df = self.df def _check2d(df, expected, method='average', axis=0): exp_df = DataFrame({'A': expected, 'B': expected}) if axis == 1: df = df.T exp_df = exp_df.T result = df.rank(method=method, axis=axis) assert_frame_equal(result, exp_df) dtypes = [None, object] disabled = set([(object, 'first')]) results = self.results for method, axis, dtype in product(results, [0, 1], dtypes): if (dtype, method) in disabled: continue frame = df if dtype is None else df.astype(dtype) _check2d(frame, results[method], method=method, axis=axis)
Timestamp, DatetimeIndex, MultiIndex, to_datetime, date_range, period_range) import pandas as pd import pandas.tseries.offsets as offsets from pandas.util.testing import (assert_series_equal, assert_frame_equal, assert_index_equal) import pandas.util.testing as tm from pandas.compat import product from pandas.tests.frame.common import TestData @pytest.fixture(params=product([True, False], [True, False])) def close_open_fixture(request): return request.param class TestDataFrameTimeSeriesMethods(TestData): def test_diff(self): the_diff = self.tsframe.diff(1) assert_series_equal(the_diff['A'], self.tsframe['A'] - self.tsframe['A'].shift(1)) # int dtype a = 10000000000000000 b = a + 1
import pytest from pandas.compat import product import pandas as pd from pandas import (DataFrame, DatetimeIndex, Index, MultiIndex, Series, Timestamp, date_range, period_range, to_datetime) from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import (assert_frame_equal, assert_index_equal, assert_series_equal) import pandas.tseries.offsets as offsets @pytest.fixture(params=product([True, False], [True, False])) def close_open_fixture(request): return request.param class TestDataFrameTimeSeriesMethods(TestData): def test_diff(self): the_diff = self.tsframe.diff(1) assert_series_equal(the_diff['A'], self.tsframe['A'] - self.tsframe['A'].shift(1)) # int dtype a = 10000000000000000 b = a + 1 s = Series([a, b])
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) raw_frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) raw_frame.iloc[1, [1, 2]] = np.nan raw_frame.iloc[7, [0, 1]] = np.nan return raw_frame @pytest.mark.parametrize( "op, level, axis, skipna", product(AGG_FUNCTIONS, lrange(2), lrange(2), [True, False])) def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna): # GH6944 # explicity test the whitelest methods if axis == 0: frame = raw_frame else: frame = raw_frame.T if op in AGG_FUNCTIONS_WITH_SKIPNA: grouped = frame.groupby(level=level, axis=axis) result = getattr(grouped, op)(skipna=skipna) expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna)
class TestIntervalIndex(object): def setup_method(self, method): self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) def test_loc_with_scalar(self): s = self.s expected = s.iloc[:3] tm.assert_series_equal(expected, s.loc[:3]) tm.assert_series_equal(expected, s.loc[:2.5]) tm.assert_series_equal(expected, s.loc[0.1:2.5]) tm.assert_series_equal(expected, s.loc[-1:3]) expected = s.iloc[1:4] tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]]) tm.assert_series_equal(expected, s.loc[[2, 3, 4]]) tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]]) expected = s.iloc[2:5] tm.assert_series_equal(expected, s.loc[s >= 2]) def test_getitem_with_scalar(self): s = self.s expected = s.iloc[:3] tm.assert_series_equal(expected, s[:3]) tm.assert_series_equal(expected, s[:2.5]) tm.assert_series_equal(expected, s[0.1:2.5]) tm.assert_series_equal(expected, s[-1:3]) expected = s.iloc[1:4] tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]]) tm.assert_series_equal(expected, s[[2, 3, 4]]) tm.assert_series_equal(expected, s[[1.5, 3, 4]]) expected = s.iloc[2:5] tm.assert_series_equal(expected, s[s >= 2]) @pytest.mark.parametrize('direction, closed', product(('increasing', 'decreasing'), ('left', 'right', 'neither', 'both'))) def test_nonoverlapping_monotonic(self, direction, closed): tpls = [(0, 1), (2, 3), (4, 5)] if direction == 'decreasing': tpls = reversed(tpls) idx = IntervalIndex.from_tuples(tpls, closed=closed) s = Series(list('abc'), idx) for key, expected in zip(idx.left, s): if idx.closed_left: assert s[key] == expected assert s.loc[key] == expected else: with pytest.raises(KeyError): s[key] with pytest.raises(KeyError): s.loc[key] for key, expected in zip(idx.right, s): if idx.closed_right: assert s[key] == expected assert s.loc[key] == expected else: with pytest.raises(KeyError): s[key] with pytest.raises(KeyError): s.loc[key] for key, expected in zip(idx.mid, s): assert s[key] == expected assert s.loc[key] == expected def test_with_interval(self): s = self.s expected = 0 result = s.loc[Interval(0, 1)] assert result == expected result = s[Interval(0, 1)] assert result == expected expected = s.iloc[3:5] result = s.loc[Interval(3, 6)] tm.assert_series_equal(expected, result) expected = s.iloc[3:5] result = s.loc[[Interval(3, 6)]] tm.assert_series_equal(expected, result) expected = s.iloc[3:5] result = s.loc[[Interval(3, 5)]] tm.assert_series_equal(expected, result) # missing with pytest.raises(KeyError): s.loc[Interval(-2, 0)] with pytest.raises(KeyError): s[Interval(-2, 0)] with pytest.raises(KeyError): s.loc[Interval(5, 6)] with pytest.raises(KeyError): s[Interval(5, 6)] def test_with_slices(self): s = self.s # slice of interval with pytest.raises(NotImplementedError): s.loc[Interval(3, 6):] with pytest.raises(NotImplementedError): s[Interval(3, 6):] expected = s.iloc[3:5] result = s[[Interval(3, 6)]] tm.assert_series_equal(expected, result) # slice of scalar with step != 1 with pytest.raises(ValueError): s[0:4:2] def test_with_overlaps(self): s = self.s expected = s.iloc[[3, 4, 3, 4]] result = s.loc[[Interval(3, 6), Interval(3, 6)]] tm.assert_series_equal(expected, result) idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) s = Series(range(len(idx)), index=idx) result = s[4] expected = s tm.assert_series_equal(expected, result) result = s[[4]] expected = s tm.assert_series_equal(expected, result) result = s.loc[[4]] expected = s tm.assert_series_equal(expected, result) result = s[Interval(3, 5)] expected = s tm.assert_series_equal(expected, result) result = s.loc[Interval(3, 5)] expected = s tm.assert_series_equal(expected, result) # doesn't intersect unique set of intervals with pytest.raises(KeyError): s[[Interval(3, 5)]] with pytest.raises(KeyError): s.loc[[Interval(3, 5)]] def test_non_unique(self): idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) s = pd.Series(range(len(idx)), index=idx) result = s.loc[Interval(1, 3)] assert result == 0 result = s.loc[[Interval(1, 3)]] expected = s.iloc[0:1] tm.assert_series_equal(expected, result) def test_non_unique_moar(self): idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) s = Series(range(len(idx)), index=idx) result = s.loc[Interval(1, 3)] expected = s.iloc[[0, 1]] tm.assert_series_equal(expected, result) # non-unique index and slices not allowed with pytest.raises(ValueError): s.loc[Interval(1, 3):] with pytest.raises(ValueError): s[Interval(1, 3):] # non-unique with pytest.raises(ValueError): s[[Interval(1, 3)]] def test_non_matching(self): s = self.s # this is a departure from our current # indexin scheme, but simpler with pytest.raises(KeyError): s.loc[[-1, 3, 4, 5]] with pytest.raises(KeyError): s.loc[[-1, 3]] def test_large_series(self): s = Series(np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001))) result1 = s.loc[:80000] result2 = s.loc[0:80000] result3 = s.loc[0:80000:1] tm.assert_series_equal(result1, result2) tm.assert_series_equal(result1, result3) def test_loc_getitem_frame(self): df = DataFrame({'A': range(10)}) s = pd.cut(df.A, 5) df['B'] = s df = df.set_index('B') result = df.loc[4] expected = df.iloc[4:6] tm.assert_frame_equal(result, expected) with pytest.raises(KeyError): df.loc[10] # single list-like result = df.loc[[4]] expected = df.iloc[4:6] tm.assert_frame_equal(result, expected) # non-unique result = df.loc[[4, 5]] expected = df.take([4, 5, 4, 5]) tm.assert_frame_equal(result, expected) with pytest.raises(KeyError): df.loc[[10]] # partial missing with pytest.raises(KeyError): df.loc[[10, 4]]
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) raw_frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) raw_frame.iloc[1, [1, 2]] = np.nan raw_frame.iloc[7, [0, 1]] = np.nan return raw_frame @pytest.mark.parametrize( "op, level, axis, skipna, sort", product(AGG_FUNCTIONS, lrange(2), lrange(2), [True, False], [True, False])) def test_regression_whitelist_methods( raw_frame, op, level, axis, skipna, sort): # GH6944 # GH 17537 # explicity test the whitelest methods if axis == 0: frame = raw_frame else: frame = raw_frame.T if op in AGG_FUNCTIONS_WITH_SKIPNA: grouped = frame.groupby(level=level, axis=axis, sort=sort)