def test_take_filling_fill_value(self): # same tests as GH 12631 sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) # fill_value result = sparse.take(np.array([1, 0, -1]), allow_fill=True) # XXX: behavior change. # the old way of filling self.fill_value doesn't follow EA rules. # It's supposed to be self.dtype.na_value (nan in this case) expected = SparseArray([0, np.nan, np.nan], fill_value=0) tm.assert_sp_array_equal(result, expected) # allow_fill=False result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) msg = ("Invalid value in 'indices'.") with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -2]), allow_fill=True) with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -5]), allow_fill=True) with pytest.raises(IndexError): sparse.take(np.array([1, -6])) with pytest.raises(IndexError): sparse.take(np.array([1, 5])) with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True)
def test_take_fill_value_with_timezone(self): idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], name='xxx', tz='US/Eastern') result = idx.take(np.array([1, 0, -1])) expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], name='xxx', tz='US/Eastern') tm.assert_index_equal(result, expected) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], name='xxx', tz='US/Eastern') tm.assert_index_equal(result, expected) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], name='xxx', tz='US/Eastern') tm.assert_index_equal(result, expected) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5]))
def test_take_fill_value(self): # GH 12631 idx = TimedeltaIndex(['1 days', '2 days', '3 days'], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = TimedeltaIndex(['2 days', '1 days', '3 days'], name='xxx') tm.assert_index_equal(result, expected) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = TimedeltaIndex(['2 days', '1 days', 'NaT'], name='xxx') tm.assert_index_equal(result, expected) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = TimedeltaIndex(['2 days', '1 days', '3 days'], name='xxx') tm.assert_index_equal(result, expected) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5]))
def test_cumsum(self, data, expected, numpy): cumsum = np.cumsum if numpy else lambda s: s.cumsum() out = cumsum(SparseArray(data)) tm.assert_sp_array_equal(out, expected) out = cumsum(SparseArray(data, fill_value=np.nan)) tm.assert_sp_array_equal(out, expected) out = cumsum(SparseArray(data, fill_value=2)) tm.assert_sp_array_equal(out, expected) if numpy: # numpy compatibility checks. msg = "the 'dtype' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.cumsum, SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.cumsum, SparseArray(data), out=out) else: axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) with tm.assert_raises_regex(ValueError, msg): SparseArray(data).cumsum(axis=axis)
def test_constructor_generic_timestamp_deprecated(self): # see gh-15524 with tm.assert_produces_warning(FutureWarning): dtype = np.timedelta64 s = Series([], dtype=dtype) assert s.empty assert s.dtype == 'm8[ns]' with tm.assert_produces_warning(FutureWarning): dtype = np.datetime64 s = Series([], dtype=dtype) assert s.empty assert s.dtype == 'M8[ns]' # These timestamps have the wrong frequencies, # so an Exception should be raised now. msg = "cannot convert timedeltalike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='m8[ps]') msg = "cannot convert datetimelike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='M8[ps]')
def test_index_equal_metadata_message(self): expected = """Index are different Attribute "names" are different \\[left\\]: \\[None\\] \\[right\\]: \\[u?'x'\\]""" idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3], name='x') with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) # same name, should pass assert_index_equal(pd.Index([1, 2, 3], name=np.nan), pd.Index([1, 2, 3], name=np.nan)) assert_index_equal(pd.Index([1, 2, 3], name=pd.NaT), pd.Index([1, 2, 3], name=pd.NaT)) expected = """Index are different Attribute "names" are different \\[left\\]: \\[nan\\] \\[right\\]: \\[NaT\\]""" idx1 = pd.Index([1, 2, 3], name=np.nan) idx2 = pd.Index([1, 2, 3], name=pd.NaT) with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2)
def test_categorical_equal_message(self): expected = """Categorical\\.categories are different Categorical\\.categories values are different \\(25\\.0 %\\) \\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" a = pd.Categorical([1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 5]) with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b) expected = """Categorical\\.codes are different Categorical\\.codes values are different \\(50\\.0 %\\) \\[left\\]: \\[0, 1, 3, 2\\] \\[right\\]: \\[0, 1, 2, 3\\]""" a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b) expected = """Categorical are different Attribute "ordered" are different \\[left\\]: False \\[right\\]: True""" a = pd.Categorical([1, 2, 3, 4], ordered=False) b = pd.Categorical([1, 2, 3, 4], ordered=True) with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b)
def test_take_fill_value(self): # GH 12631 idx = pd.Float64Index([1., 2., 3.], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.Float64Index([2., 1., 3.], name='xxx') tm.assert_index_equal(result, expected) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = pd.Float64Index([2., 1., np.nan], name='xxx') tm.assert_index_equal(result, expected) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.Float64Index([2., 1., 3.], name='xxx') tm.assert_index_equal(result, expected) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5]))
def test_take_fill_value(self): # see gh-12631 idx = self._holder([1, 2, 3], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) name = self._holder.__name__ msg = ("Unable to fill values because " "{name} cannot contain NA").format(name=name) # fill_value=True with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5]))
def test_generic_errors(self, constructor): # filler input data to be used when supplying invalid kwargs filler = self.get_kwargs_from_breaks(range(10)) # invalid closed msg = "invalid option for 'closed': invalid" with tm.assert_raises_regex(ValueError, msg): constructor(closed='invalid', **filler) # unsupported dtype msg = 'dtype must be an IntervalDtype, got int64' with tm.assert_raises_regex(TypeError, msg): constructor(dtype='int64', **filler) # invalid dtype msg = 'data type "invalid" not understood' with tm.assert_raises_regex(TypeError, msg): constructor(dtype='invalid', **filler) # no point in nesting periods in an IntervalIndex periods = period_range('2000-01-01', periods=10) periods_kwargs = self.get_kwargs_from_breaks(periods) msg = 'Period dtypes are not supported, use a PeriodIndex instead' with tm.assert_raises_regex(ValueError, msg): constructor(**periods_kwargs) # decreasing values decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1)) msg = 'left side of interval must be <= right side' with tm.assert_raises_regex(ValueError, msg): constructor(**decreasing_kwargs)
def test_no_args_with_kwargs(self): bad_arg = 'bar' min_fname_arg_count = 2 compat_args = OrderedDict() compat_args['foo'] = -5 compat_args[bad_arg] = 1 msg = (r"the '{arg}' parameter is not supported " r"in the pandas implementation of {func}\(\)". format(arg=bad_arg, func=self.fname)) args = () kwargs = {'foo': -5, bad_arg: 2} tm.assert_raises_regex(ValueError, msg, validate_args_and_kwargs, self.fname, args, kwargs, min_fname_arg_count, compat_args) args = (-5, 2) kwargs = {} tm.assert_raises_regex(ValueError, msg, validate_args_and_kwargs, self.fname, args, kwargs, min_fname_arg_count, compat_args)
def test_get_loc2(self): idx = pd.period_range('2000-01-01', periods=3) for method in [None, 'pad', 'backfill', 'nearest']: assert idx.get_loc(idx[1], method) == 1 assert idx.get_loc(idx[1].asfreq('H', how='start'), method) == 1 assert idx.get_loc(idx[1].to_timestamp(), method) == 1 assert idx.get_loc(idx[1].to_timestamp() .to_pydatetime(), method) == 1 assert idx.get_loc(str(idx[1]), method) == 1 idx = pd.period_range('2000-01-01', periods=5)[::2] assert idx.get_loc('2000-01-02T12', method='nearest', tolerance='1 day') == 1 assert idx.get_loc('2000-01-02T12', method='nearest', tolerance=pd.Timedelta('1D')) == 1 assert idx.get_loc('2000-01-02T12', method='nearest', tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-02T12', method='nearest', tolerance=timedelta(1)) == 1 with tm.assert_raises_regex(ValueError, 'unit abbreviation w/o a number'): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' with tm.assert_raises_regex(ValueError, msg): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') with pytest.raises(KeyError): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') with pytest.raises( ValueError, match='list-like tolerance size must match target index size'): idx.get_loc('2000-01-10', method='nearest', tolerance=[pd.Timedelta('1 day').to_timedelta64(), pd.Timedelta('1 day').to_timedelta64()])
def test_getitem_partial(self): rng = period_range('2007-01', periods=50, freq='M') ts = Series(np.random.randn(len(rng)), rng) pytest.raises(KeyError, ts.__getitem__, '2006') result = ts['2008'] assert (result.index.year == 2008).all() result = ts['2008':'2009'] assert len(result) == 24 result = ts['2008-1':'2009-12'] assert len(result) == 24 result = ts['2008Q1':'2009Q4'] assert len(result) == 24 result = ts[:'2009'] assert len(result) == 36 result = ts['2009':] assert len(result) == 50 - 24 exp = result result = ts[24:] tm.assert_series_equal(exp, result) ts = ts[10:].append(ts[10:]) tm.assert_raises_regex(KeyError, "left slice bound for non-unique " "label: '2008'", ts.__getitem__, slice('2008', '2009'))
def test_truncate_nonsortedindex(self): # GH 17935 df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e']}, index=[5, 3, 2, 9, 0]) with tm.assert_raises_regex(ValueError, 'truncate requires a sorted index'): df.truncate(before=3, after=9) rng = pd.date_range('2011-01-01', '2012-01-01', freq='W') ts = pd.DataFrame({'A': np.random.randn(len(rng)), 'B': np.random.randn(len(rng))}, index=rng) with tm.assert_raises_regex(ValueError, 'truncate requires a sorted index'): ts.sort_values('A', ascending=False).truncate(before='2011-11', after='2011-12') df = pd.DataFrame({3: np.random.randn(5), 20: np.random.randn(5), 2: np.random.randn(5), 0: np.random.randn(5)}, columns=[3, 20, 2, 0]) with tm.assert_raises_regex(ValueError, 'truncate requires a sorted index'): df.truncate(before=2, after=20, axis=1)
def test_add_iadd(self): for tz in self.tz: # offset offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), Timedelta(hours=2)] for delta in offsets: rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) result = rng + delta expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) tm.assert_index_equal(result, expected) rng += delta tm.assert_index_equal(rng, expected) # int rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, tz=tz) result = rng + 1 expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, tz=tz) tm.assert_index_equal(result, expected) rng += 1 tm.assert_index_equal(rng, expected) idx = DatetimeIndex(['2011-01-01', '2011-01-02']) msg = "cannot add DatetimeIndex and Timestamp" with tm.assert_raises_regex(TypeError, msg): idx + Timestamp('2011-01-01') with tm.assert_raises_regex(TypeError, msg): Timestamp('2011-01-01') + idx
def test_symmetric_difference(self): for name, idx in compat.iteritems(self.indices): first = idx[1:] second = idx[:-1] if isinstance(idx, CategoricalIndex): pass else: answer = idx[[0, -1]] result = first.symmetric_difference(second) assert tm.equalContents(result, answer) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" with tm.assert_raises_regex(ValueError, msg): result = first.symmetric_difference(case) elif isinstance(idx, CategoricalIndex): pass else: result = first.symmetric_difference(case) assert tm.equalContents(result, answer) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with tm.assert_raises_regex(TypeError, msg): first.symmetric_difference([1, 2, 3])
def test_constructor_cant_cast_datetime64(self): msg = "Cannot cast datetime64 to " with tm.assert_raises_regex(TypeError, msg): Series(date_range('1/1/2000', periods=10), dtype=float) with tm.assert_raises_regex(TypeError, msg): Series(date_range('1/1/2000', periods=10), dtype=int)
def test_union_base(self): for name, idx in compat.iteritems(self.indices): first = idx[3:] second = idx[:5] everything = idx union = first.union(second) assert tm.equalContents(union, everything) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" with tm.assert_raises_regex(ValueError, msg): result = first.union(case) elif isinstance(idx, CategoricalIndex): pass else: result = first.union(case) assert tm.equalContents(result, everything) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with tm.assert_raises_regex(TypeError, msg): result = first.union([1, 2, 3])
def test_difference_base(self): for name, idx in compat.iteritems(self.indices): first = idx[2:] second = idx[:4] answer = idx[4:] result = first.difference(second) if isinstance(idx, CategoricalIndex): pass else: assert tm.equalContents(result, answer) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" with tm.assert_raises_regex(ValueError, msg): result = first.difference(case) elif isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): assert result.__class__ == answer.__class__ tm.assert_numpy_array_equal(result.sort_values().asi8, answer.sort_values().asi8) else: result = first.difference(case) assert tm.equalContents(result, answer) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with tm.assert_raises_regex(TypeError, msg): result = first.difference([1, 2, 3])
def test_set_ordered(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) cat2 = cat.as_unordered() assert not cat2.ordered cat2 = cat.as_ordered() assert cat2.ordered cat2.as_unordered(inplace=True) assert not cat2.ordered cat2.as_ordered(inplace=True) assert cat2.ordered assert cat2.set_ordered(True).ordered assert not cat2.set_ordered(False).ordered cat2.set_ordered(True, inplace=True) assert cat2.ordered cat2.set_ordered(False, inplace=True) assert not cat2.ordered # removed in 0.19.0 msg = "can\'t set attribute" with tm.assert_raises_regex(AttributeError, msg): cat.ordered = True with tm.assert_raises_regex(AttributeError, msg): cat.ordered = False
def test_take_fill_value(self): # GH 12631 idx = pd.RangeIndex(1, 4, name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.Int64Index([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) # fill_value msg = "Unable to fill values because RangeIndex cannot contain NA" with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.Int64Index([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) msg = "Unable to fill values because RangeIndex cannot contain NA" with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5]))
def test_dti_isub_tdi(self, tz): # GH 17558 dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) tdi = pd.timedelta_range('0 days', periods=10) expected = pd.date_range('2017-01-01', periods=10, tz=tz, freq='-1D') # isub with TimedeltaIndex result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) result -= tdi tm.assert_index_equal(result, expected) msg = 'cannot subtract .*TimedeltaIndex' with tm.assert_raises_regex(TypeError, msg): tdi -= dti # isub with timedelta64 array result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) result -= tdi.values tm.assert_index_equal(result, expected) msg = '|'.join(['cannot perform __neg__ with this index type:', 'ufunc subtract cannot use operands with types', 'cannot subtract DatetimeIndex from']) with tm.assert_raises_regex(TypeError, msg): tdi.values -= dti
def test_scalar_error(self): # GH 4892 # float_indexers should raise exceptions # on appropriate Index types & accessors # this duplicates the code below # but is spefically testing for the error # message for index in [tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeCategoricalIndex, tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex, tm.makeIntIndex, tm.makeRangeIndex]: i = index(5) s = Series(np.arange(len(i)), index=i) def f(): s.iloc[3.0] tm.assert_raises_regex(TypeError, 'Cannot index by location index', f) def f(): s.iloc[3.0] = 0 pytest.raises(TypeError, f)
def test_numpy_transpose(self): sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a']) result = np.transpose(np.transpose(sdf)) tm.assert_sp_frame_equal(result, sdf) msg = "the 'axes' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.transpose, sdf, axes=1)
def test_period_array_freq_mismatch(): arr = period_array(['2000', '2001'], freq='D') with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): PeriodArray(arr, freq='M') with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd())
def test_union_categoricals_ordered(self): c1 = Categorical([1, 2, 3], ordered=True) c2 = Categorical([1, 2, 3], ordered=False) msg = 'Categorical.ordered must be the same' with tm.assert_raises_regex(TypeError, msg): union_categoricals([c1, c2]) res = union_categoricals([c1, c1]) exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) tm.assert_categorical_equal(res, exp) c1 = Categorical([1, 2, 3, np.nan], ordered=True) c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) res = union_categoricals([c1, c2]) exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) tm.assert_categorical_equal(res, exp) c1 = Categorical([1, 2, 3], ordered=True) c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) msg = "to union ordered Categoricals, all categories must be the same" with tm.assert_raises_regex(TypeError, msg): union_categoricals([c1, c2])
def test_reindex_axis_style_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 df = pd.DataFrame({"A": [1, 2, 3], 'B': [4, 5, 6]}) with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex([0, 1], ['A'], axis=1) with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex([0, 1], ['A'], axis='index') with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='index') with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='columns') with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(columns=[0, 1], axis='columns') with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], columns=[0, 1], axis='columns') with tm.assert_raises_regex(TypeError, 'Cannot specify all'): df.reindex([0, 1], [0], ['A']) # Mixing styles with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='index') with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='columns') # Duplicates with tm.assert_raises_regex(TypeError, "multiple values"): df.reindex([0, 1], labels=[0, 1])
def test_period_cons_mult(self): p1 = Period('2011-01', freq='3M') p2 = Period('2011-01', freq='M') assert p1.ordinal == p2.ordinal assert p1.freq == offsets.MonthEnd(3) assert p1.freqstr == '3M' assert p2.freq == offsets.MonthEnd() assert p2.freqstr == 'M' result = p1 + 1 assert result.ordinal == (p2 + 3).ordinal assert result.freq == p1.freq assert result.freqstr == '3M' result = p1 - 1 assert result.ordinal == (p2 - 3).ordinal assert result.freq == p1.freq assert result.freqstr == '3M' msg = ('Frequency must be positive, because it' ' represents span: -3M') with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='-3M') msg = ('Frequency must be positive, because it' ' represents span: 0M') with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='0M')
def test_numpy_repeat(self): cat = Categorical(["a", "b"], categories=["a", "b"]) exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"]) tm.assert_categorical_equal(np.repeat(cat, 2), exp) msg = "the 'axis' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1)
def test_tdi_round(self): td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') elt = td[1] expected_rng = TimedeltaIndex([Timedelta('16801 days 00:00:00'), Timedelta('16801 days 00:00:00'), Timedelta('16801 days 01:00:00'), Timedelta('16801 days 02:00:00'), Timedelta('16801 days 02:00:00')]) expected_elt = expected_rng[1] tm.assert_index_equal(td.round(freq='H'), expected_rng) assert elt.round(freq='H') == expected_elt msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR with tm.assert_raises_regex(ValueError, msg): td.round(freq='foo') with tm.assert_raises_regex(ValueError, msg): elt.round(freq='foo') msg = "<MonthEnd> is a non-fixed frequency" with tm.assert_raises_regex(ValueError, msg): td.round(freq='M') with tm.assert_raises_regex(ValueError, msg): elt.round(freq='M')
def test_dti_radd_timestamp_raises(self): idx = DatetimeIndex(['2011-01-01', '2011-01-02']) msg = "cannot add DatetimeIndex and Timestamp" with tm.assert_raises_regex(TypeError, msg): Timestamp('2011-01-01') + idx
def test_bool_flex_frame(self): data = np.random.randn(5, 3) other_data = np.random.randn(5, 3) df = pd.DataFrame(data) other = pd.DataFrame(other_data) ndim_5 = np.ones(df.shape + (1, 3)) # Unaligned def _check_unaligned_frame(meth, op, df, other): part_o = other.loc[3:, 1:].copy() rs = meth(part_o) xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) tm.assert_frame_equal(rs, xp) # DataFrame assert df.eq(df).values.all() assert not df.ne(df).values.any() for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: f = getattr(df, op) o = getattr(operator, op) # No NAs tm.assert_frame_equal(f(other), o(df, other)) _check_unaligned_frame(f, o, df, other) # ndarray tm.assert_frame_equal(f(other.values), o(df, other.values)) # scalar tm.assert_frame_equal(f(0), o(df, 0)) # NAs msg = "Unable to coerce to Series/DataFrame" tm.assert_frame_equal(f(np.nan), o(df, np.nan)) with tm.assert_raises_regex(ValueError, msg): f(ndim_5) # Series def _test_seq(df, idx_ser, col_ser): idx_eq = df.eq(idx_ser, axis=0) col_eq = df.eq(col_ser) idx_ne = df.ne(idx_ser, axis=0) col_ne = df.ne(col_ser) tm.assert_frame_equal(col_eq, df == pd.Series(col_ser)) tm.assert_frame_equal(col_eq, -col_ne) tm.assert_frame_equal(idx_eq, -idx_ne) tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) tm.assert_frame_equal(col_eq, df.eq(list(col_ser))) tm.assert_frame_equal(idx_eq, df.eq(pd.Series(idx_ser), axis=0)) tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0)) idx_gt = df.gt(idx_ser, axis=0) col_gt = df.gt(col_ser) idx_le = df.le(idx_ser, axis=0) col_le = df.le(col_ser) tm.assert_frame_equal(col_gt, df > pd.Series(col_ser)) tm.assert_frame_equal(col_gt, -col_le) tm.assert_frame_equal(idx_gt, -idx_le) tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) idx_ge = df.ge(idx_ser, axis=0) col_ge = df.ge(col_ser) idx_lt = df.lt(idx_ser, axis=0) col_lt = df.lt(col_ser) tm.assert_frame_equal(col_ge, df >= pd.Series(col_ser)) tm.assert_frame_equal(col_ge, -col_lt) tm.assert_frame_equal(idx_ge, -idx_lt) tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) idx_ser = pd.Series(np.random.randn(5)) col_ser = pd.Series(np.random.randn(3)) _test_seq(df, idx_ser, col_ser) # list/tuple _test_seq(df, idx_ser.values, col_ser.values) # NA df.loc[0, 0] = np.nan rs = df.eq(df) assert not rs.loc[0, 0] rs = df.ne(df) assert rs.loc[0, 0] rs = df.gt(df) assert not rs.loc[0, 0] rs = df.lt(df) assert not rs.loc[0, 0] rs = df.ge(df) assert not rs.loc[0, 0] rs = df.le(df) assert not rs.loc[0, 0] # complex arr = np.array([np.nan, 1, 6, np.nan]) arr2 = np.array([2j, np.nan, 7, None]) df = pd.DataFrame({'a': arr}) df2 = pd.DataFrame({'a': arr2}) rs = df.gt(df2) assert not rs.values.any() rs = df.ne(df2) assert rs.values.all() arr3 = np.array([2j, np.nan, None]) df3 = pd.DataFrame({'a': arr3}) rs = df3.gt(2j) assert not rs.values.any() # corner, dtype=object df1 = pd.DataFrame({'col': ['foo', np.nan, 'bar']}) df2 = pd.DataFrame({'col': ['foo', datetime.now(), 'bar']}) result = df1.ne(df2) exp = pd.DataFrame({'col': [False, True, False]}) tm.assert_frame_equal(result, exp)
def test_sort_values(self): frame = DataFrame([[1, 1, 2], [3, 1, 0], [4, 5, 6]], index=[1, 2, 3], columns=list('ABC')) # by column (axis=0) sorted_df = frame.sort_values(by='A') indexer = frame['A'].argsort().values expected = frame.loc[frame.index[indexer]] assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by='A', ascending=False) indexer = indexer[::-1] expected = frame.loc[frame.index[indexer]] assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by='A', ascending=False) assert_frame_equal(sorted_df, expected) # GH4839 sorted_df = frame.sort_values(by=['A'], ascending=[False]) assert_frame_equal(sorted_df, expected) # multiple bys sorted_df = frame.sort_values(by=['B', 'C']) expected = frame.loc[[2, 1, 3]] assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=['B', 'C'], ascending=False) assert_frame_equal(sorted_df, expected[::-1]) sorted_df = frame.sort_values(by=['B', 'A'], ascending=[True, False]) assert_frame_equal(sorted_df, expected) pytest.raises( ValueError, lambda: frame.sort_values(by=['A', 'B'], axis=2, inplace=True)) # by row (axis=1): GH 10806 sorted_df = frame.sort_values(by=3, axis=1) expected = frame assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=3, axis=1, ascending=False) expected = frame.reindex(columns=['C', 'B', 'A']) assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=[1, 2], axis='columns') expected = frame.reindex(columns=['B', 'A', 'C']) assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=[True, False]) assert_frame_equal(sorted_df, expected) sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=False) expected = frame.reindex(columns=['C', 'B', 'A']) assert_frame_equal(sorted_df, expected) msg = r'Length of ascending \(5\) != length of by \(2\)' with tm.assert_raises_regex(ValueError, msg): frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5)
def test_tz_convert_and_localize(self, fn): l0 = date_range('20140701', periods=5, freq='D') # TODO: l1 should be a PeriodIndex for testing # after GH2106 is addressed with pytest.raises(NotImplementedError): period_range('20140701', periods=1).tz_convert('UTC') with pytest.raises(NotImplementedError): period_range('20140701', periods=1).tz_localize('UTC') # l1 = period_range('20140701', periods=5, freq='D') l1 = date_range('20140701', periods=5, freq='D') int_idx = Index(range(5)) if fn == 'tz_convert': l0 = l0.tz_localize('UTC') l1 = l1.tz_localize('UTC') for idx in [l0, l1]: l0_expected = getattr(idx, fn)('US/Pacific') l1_expected = getattr(idx, fn)('US/Pacific') df1 = DataFrame(np.ones(5), index=l0) df1 = getattr(df1, fn)('US/Pacific') assert_index_equal(df1.index, l0_expected) # MultiIndex # GH7846 df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) df3 = getattr(df2, fn)('US/Pacific', level=0) assert not df3.index.levels[0].equals(l0) assert_index_equal(df3.index.levels[0], l0_expected) assert_index_equal(df3.index.levels[1], l1) assert not df3.index.levels[1].equals(l1_expected) df3 = getattr(df2, fn)('US/Pacific', level=1) assert_index_equal(df3.index.levels[0], l0) assert not df3.index.levels[0].equals(l0_expected) assert_index_equal(df3.index.levels[1], l1_expected) assert not df3.index.levels[1].equals(l1) df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) # TODO: untested df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa assert_index_equal(df3.index.levels[0], l0) assert not df3.index.levels[0].equals(l0_expected) assert_index_equal(df3.index.levels[1], l1_expected) assert not df3.index.levels[1].equals(l1) # Bad Inputs # Not DatetimeIndex / PeriodIndex with assert_raises_regex(TypeError, 'DatetimeIndex'): df = DataFrame(index=int_idx) df = getattr(df, fn)('US/Pacific') # Not DatetimeIndex / PeriodIndex with assert_raises_regex(TypeError, 'DatetimeIndex'): df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) df = getattr(df, fn)('US/Pacific', level=0) # Invalid level with assert_raises_regex(ValueError, 'not valid'): df = DataFrame(index=l0) df = getattr(df, fn)('US/Pacific', level=1)
def test_shift(self): # naive shift shiftedFrame = self.tsframe.shift(5) tm.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe['A'].shift(5) assert_series_equal(shiftedFrame['A'], shiftedSeries) shiftedFrame = self.tsframe.shift(-5) tm.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe['A'].shift(-5) assert_series_equal(shiftedFrame['A'], shiftedSeries) # shift by 0 unshifted = self.tsframe.shift(0) assert_frame_equal(unshifted, self.tsframe) # shift by DateOffset shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay()) assert len(shiftedFrame) == len(self.tsframe) shiftedFrame2 = self.tsframe.shift(5, freq='B') assert_frame_equal(shiftedFrame, shiftedFrame2) d = self.tsframe.index[0] shifted_d = d + offsets.BDay(5) assert_series_equal(self.tsframe.xs(d), shiftedFrame.xs(shifted_d), check_names=False) # shift int frame int_shifted = self.intframe.shift(1) # noqa # Shifting with PeriodIndex ps = tm.makePeriodFrame() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, offsets.BDay()) assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) tm.assert_raises_regex(ValueError, 'does not match PeriodIndex freq', ps.shift, freq='D') # shift other axis # GH 6371 df = DataFrame(np.random.rand(10, 5)) expected = pd.concat([DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, axis=1) result = df.shift(1, axis=1) assert_frame_equal(result, expected) # shift named axis df = DataFrame(np.random.rand(10, 5)) expected = pd.concat([DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, axis=1) result = df.shift(1, axis='columns') assert_frame_equal(result, expected)
def test_replace_int_to_int_chain(self): df = DataFrame({'a': lrange(1, 5)}) with tm.assert_raises_regex(ValueError, "Replacement not allowed .+"): df.replace({'a': dict(zip(range(1, 5), range(2, 6)))})
def test_replace_with_dict_with_bool_keys(self): df = DataFrame({0: [True, False], 1: [False, True]}) with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): df.replace({'asdf': 'asdb', True: 'yes'})
def test_wrong_number_names(self, indices): def testit(ind): ind.names = ["apple", "banana", "carrot"] tm.assert_raises_regex(ValueError, "^Length", testit, indices)
def test_numeric_compat(self): idx = self.create_index() tm.assert_raises_regex(TypeError, "cannot perform __mul__", lambda: idx * 1) tm.assert_raises_regex(TypeError, "cannot perform __mul__", lambda: 1 * idx) div_err = "cannot perform __truediv__" if PY3 \ else "cannot perform __div__" tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", lambda: idx // 1) tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", lambda: 1 // idx)
def test_hash_error(self, indices): index = indices tm.assert_raises_regex(TypeError, "unhashable type: %r" % type(index).__name__, hash, indices)
def test_period_array_raises(data, freq, msg): with tm.assert_raises_regex(IncompatibleFrequency, msg): period_array(data, freq)
def test_logical_compat(self): idx = self.create_index() tm.assert_raises_regex(TypeError, 'cannot perform all', lambda: idx.all()) tm.assert_raises_regex(TypeError, 'cannot perform any', lambda: idx.any())
def tet_sub_period(): arr = period_array(['2000', '2001'], freq='D') other = pd.Period("2000", freq="M") with tm.assert_raises_regex(IncompatibleFrequency, "freq"): arr - other
def test_period_array_non_period_series_raies(): ser = pd.Series([1, 2, 3]) with tm.assert_raises_regex(TypeError, 'dtype'): PeriodArray(ser, freq='D')
def test_setitem_raises_length(): arr = PeriodArray(np.arange(3), freq="D") with tm.assert_raises_regex(ValueError, "length"): arr[[0, 1]] = [pd.Period("2000", freq="D")]
def test_from_datetime64_raises(): arr = pd.date_range("2017", periods=3, freq="D") with tm.assert_raises_regex(IncompatibleFrequency, "freq"): PeriodArray._from_datetime64(arr, freq="M")
def test_astype_datetime(other): arr = period_array(['2000', '2001', None], freq='D') # slice off the [ns] so that the regex matches. with tm.assert_raises_regex(TypeError, other[:-4]): arr.astype(other)
def test_setitem_raises_type(): arr = PeriodArray(np.arange(3), freq="D") with tm.assert_raises_regex(TypeError, "int"): arr[0] = 1
def test_add_series_with_extension_array(self, data): ser = pd.Series(data) with tm.assert_raises_regex(TypeError, "cannot perform"): ser + data
def test_fillna_raises(): arr = period_array(['2000', '2001', '2002'], freq='D') with tm.assert_raises_regex(ValueError, 'Length'): arr.fillna(arr[:2])
def test_arith_flex_frame(self): ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] if not compat.PY3: aliases = {} else: aliases = {'div': 'truediv'} for op in ops: try: alias = aliases.get(op, op) f = getattr(operator, alias) result = getattr(self.frame, op)(2 * self.frame) exp = f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) # vs mix float result = getattr(self.mixed_float, op)(2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) # vs mix int if op in ['add', 'sub', 'mul']: result = getattr(self.mixed_int, op)(2 + self.mixed_int) exp = f(self.mixed_int, 2 + self.mixed_int) # no overflow in the uint dtype = None if op in ['sub']: dtype = dict(B='uint64', C=None) elif op in ['add', 'mul']: dtype = dict(C=None) assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) # rops r_f = lambda x, y: f(y, x) result = getattr(self.frame, 'r' + op)(2 * self.frame) exp = r_f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) # vs mix float result = getattr(self.mixed_float, op)( 2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) _check_mixed_float(result, dtype=dict(C=None)) result = getattr(self.intframe, op)(2 * self.intframe) exp = f(self.intframe, 2 * self.intframe) assert_frame_equal(result, exp) # vs mix int if op in ['add', 'sub', 'mul']: result = getattr(self.mixed_int, op)( 2 + self.mixed_int) exp = f(self.mixed_int, 2 + self.mixed_int) # no overflow in the uint dtype = None if op in ['sub']: dtype = dict(B='uint64', C=None) elif op in ['add', 'mul']: dtype = dict(C=None) assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) except: printing.pprint_thing("Failing operation %r" % op) raise # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) msg = "Unable to coerce to Series/DataFrame" with tm.assert_raises_regex(ValueError, msg): f(self.frame, ndim_5) with tm.assert_raises_regex(ValueError, msg): getattr(self.frame, op)(ndim_5) # res_add = self.frame.add(self.frame) # res_sub = self.frame.sub(self.frame) # res_mul = self.frame.mul(self.frame) # res_div = self.frame.div(2 * self.frame) # assert_frame_equal(res_add, self.frame + self.frame) # assert_frame_equal(res_sub, self.frame - self.frame) # assert_frame_equal(res_mul, self.frame * self.frame) # assert_frame_equal(res_div, self.frame / (2 * self.frame)) const_add = self.frame.add(1) assert_frame_equal(const_add, self.frame + 1) # corner cases result = self.frame.add(self.frame[:0]) assert_frame_equal(result, self.frame * np.nan) result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], fill_value=3) with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], axis='index', fill_value=3)
def test_take_fill_value(self): # GH 12631 # numeric category idx = pd.CategoricalIndex([1, 2, 3], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.CategoricalIndex([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.CategoricalIndex([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # object category idx = pd.CategoricalIndex(list('CBA'), categories=list('ABC'), ordered=True, name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.CategoricalIndex(list('BCA'), categories=list('ABC'), ordered=True, name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = pd.CategoricalIndex(['B', 'C', np.nan], categories=list('ABC'), ordered=True, name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.CategoricalIndex(list('BCA'), categories=list('ABC'), ordered=True, name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5]))
def test_round_invalid_arg(self): stamp = Timestamp('2000-01-05 05:09:15.13') with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): stamp.round('foo')
def test_constructor_invalid(self): with tm.assert_raises_regex(TypeError, 'Cannot convert input'): Timestamp(slice(2)) with tm.assert_raises_regex(ValueError, 'Cannot convert Period'): Timestamp(Period('1000-01-01'))
def test_set_index2(self): df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], 'B': ['one', 'two', 'three', 'one', 'two'], 'C': ['a', 'b', 'c', 'd', 'e'], 'D': np.random.randn(5), 'E': np.random.randn(5)}) # new object, single-column result = df.set_index('C') result_nodrop = df.set_index('C', drop=False) index = Index(df['C'], name='C') expected = df.loc[:, ['A', 'B', 'D', 'E']] expected.index = index expected_nodrop = df.copy() expected_nodrop.index = index assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) assert result.index.name == index.name # inplace, single df2 = df.copy() df2.set_index('C', inplace=True) assert_frame_equal(df2, expected) df3 = df.copy() df3.set_index('C', drop=False, inplace=True) assert_frame_equal(df3, expected_nodrop) # create new object, multi-column result = df.set_index(['A', 'B']) result_nodrop = df.set_index(['A', 'B'], drop=False) index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) expected = df.loc[:, ['C', 'D', 'E']] expected.index = index expected_nodrop = df.copy() expected_nodrop.index = index assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) assert result.index.names == index.names # inplace df2 = df.copy() df2.set_index(['A', 'B'], inplace=True) assert_frame_equal(df2, expected) df3 = df.copy() df3.set_index(['A', 'B'], drop=False, inplace=True) assert_frame_equal(df3, expected_nodrop) # corner case with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): df.set_index('A', verify_integrity=True) # append result = df.set_index(['A', 'B'], append=True) xp = df.reset_index().set_index(['index', 'A', 'B']) xp.index.names = [None, 'A', 'B'] assert_frame_equal(result, xp) # append to existing multiindex rdf = df.set_index(['A'], append=True) rdf = rdf.set_index(['B', 'C'], append=True) expected = df.set_index(['A', 'B', 'C'], append=True) assert_frame_equal(rdf, expected) # Series result = df.set_index(df.C) assert result.index.name == 'C'
def test_add_series_with_extension_array(self, data): ser = pd.Series(data) with tm.assert_raises_regex(TypeError, "unsupported"): ser + data
def test_setitem_frame_invalid_length(self, data): df = pd.DataFrame({"A": [1] * len(data)}) xpr = "Length of values does not match length of index" with tm.assert_raises_regex(ValueError, xpr): df['B'] = data[:5]
def test_fillna_raises(self, fillna_kwargs, msg): # https://github.com/pandas-dev/pandas/issues/19682 cat = Categorical([1, 2, 3]) with tm.assert_raises_regex(ValueError, msg): cat.fillna(**fillna_kwargs)
def test_hash_error(self): index = date_range('20010101', periods=10) with tm.assert_raises_regex( TypeError, "unhashable type: %r" % type(index).__name__): hash(index)
def test_set_columns(self): cols = Index(np.arange(len(self.mixed_frame.columns))) self.mixed_frame.columns = cols with tm.assert_raises_regex(ValueError, 'Length mismatch'): self.mixed_frame.columns = cols[::2]