def test_union_categoricals_ordered(self): c1 = Categorical([1, 2, 3], ordered=True) c2 = Categorical([1, 2, 3], ordered=False) msg = 'Categorical.ordered must be the same' with tm.assertRaisesRegexp(TypeError, msg): union_categoricals([c1, c2]) res = union_categoricals([c1, c1]) exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) tm.assert_categorical_equal(res, exp) c1 = Categorical([1, 2, 3, np.nan], ordered=True) c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) res = union_categoricals([c1, c2]) exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) tm.assert_categorical_equal(res, exp) c1 = Categorical([1, 2, 3], ordered=True) c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) msg = "to union ordered Categoricals, all categories must be the same" with tm.assertRaisesRegexp(TypeError, msg): union_categoricals([c1, c2])
def test_tz(self): t = '2014-02-01 09:00' ts = Timestamp(t) local = ts.tz_localize('Asia/Tokyo') self.assertEqual(local.hour, 9) self.assertEqual(local, Timestamp(t, tz='Asia/Tokyo')) conv = local.tz_convert('US/Eastern') self.assertEqual(conv, Timestamp('2014-01-31 19:00', tz='US/Eastern')) self.assertEqual(conv.hour, 19) # preserves nanosecond ts = Timestamp(t) + offsets.Nano(5) local = ts.tz_localize('Asia/Tokyo') self.assertEqual(local.hour, 9) self.assertEqual(local.nanosecond, 5) conv = local.tz_convert('US/Eastern') self.assertEqual(conv.nanosecond, 5) self.assertEqual(conv.hour, 19) # GH 8025 with tm.assertRaisesRegexp(TypeError, 'Cannot localize tz-aware Timestamp, use ' 'tz_convert for conversions'): Timestamp('2011-01-01' ,tz='US/Eastern').tz_localize('Asia/Tokyo') with tm.assertRaisesRegexp(TypeError, 'Cannot convert tz-naive Timestamp, use ' 'tz_localize to localize'): Timestamp('2011-01-01').tz_convert('Asia/Tokyo')
def test_tz(self): t = "2014-02-01 09:00" ts = Timestamp(t) local = ts.tz_localize("Asia/Tokyo") self.assertEqual(local.hour, 9) self.assertEqual(local, Timestamp(t, tz="Asia/Tokyo")) conv = local.tz_convert("US/Eastern") self.assertEqual(conv, Timestamp("2014-01-31 19:00", tz="US/Eastern")) self.assertEqual(conv.hour, 19) # preserves nanosecond ts = Timestamp(t) + offsets.Nano(5) local = ts.tz_localize("Asia/Tokyo") self.assertEqual(local.hour, 9) self.assertEqual(local.nanosecond, 5) conv = local.tz_convert("US/Eastern") self.assertEqual(conv.nanosecond, 5) self.assertEqual(conv.hour, 19) # GH 8025 with tm.assertRaisesRegexp(TypeError, "Cannot localize tz-aware Timestamp, use " "tz_convert for conversions"): Timestamp("2011-01-01", tz="US/Eastern").tz_localize("Asia/Tokyo") with tm.assertRaisesRegexp(TypeError, "Cannot convert tz-naive Timestamp, use " "tz_localize to localize"): Timestamp("2011-01-01").tz_convert("Asia/Tokyo")
def test_index_equal_metadata_message(self): expected = """Index are different Attribute "names" are different \\[left\\]: \\[None\\] \\[right\\]: \\[u?'x'\\]""" idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3], name='x') with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) # same name, should pass assert_index_equal(pd.Index([1, 2, 3], name=np.nan), pd.Index([1, 2, 3], name=np.nan)) assert_index_equal(pd.Index([1, 2, 3], name=pd.NaT), pd.Index([1, 2, 3], name=pd.NaT)) expected = """Index are different Attribute "names" are different \\[left\\]: \\[nan\\] \\[right\\]: \\[NaT\\]""" idx1 = pd.Index([1, 2, 3], name=np.nan) idx2 = pd.Index([1, 2, 3], name=pd.NaT) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2)
def test_categorical_equal_message(self): expected = """Categorical\\.categories are different Categorical\\.categories values are different \\(25\\.0 %\\) \\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" a = pd.Categorical([1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 5]) with assertRaisesRegexp(AssertionError, expected): tm.assert_categorical_equal(a, b) expected = """Categorical\\.codes are different Categorical\\.codes values are different \\(50\\.0 %\\) \\[left\\]: \\[0, 1, 3, 2\\] \\[right\\]: \\[0, 1, 2, 3\\]""" a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) with assertRaisesRegexp(AssertionError, expected): tm.assert_categorical_equal(a, b) expected = """Categorical are different Attribute "ordered" are different \\[left\\]: False \\[right\\]: True""" a = pd.Categorical([1, 2, 3, 4], ordered=False) b = pd.Categorical([1, 2, 3, 4], ordered=True) with assertRaisesRegexp(AssertionError, expected): tm.assert_categorical_equal(a, b)
def test_take_fill_value(self): # see gh-12631 idx = self._holder([1, 2, 3], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) name = self._holder.__name__ msg = ("Unable to fill values because " "{name} cannot contain NA").format(name=name) # fill_value=True with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with tm.assertRaises(IndexError): idx.take(np.array([1, -5]))
def test_tshift(self): # PeriodIndex ps = tm.makePeriodFrame() shifted = ps.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(unshifted, ps) shifted2 = ps.tshift(freq='B') assert_frame_equal(shifted, shifted2) shifted3 = ps.tshift(freq=offsets.BDay()) assert_frame_equal(shifted, shifted3) assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M') # DatetimeIndex shifted = self.tsframe.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(self.tsframe, unshifted) shifted2 = self.tsframe.tshift(freq=self.tsframe.index.freq) assert_frame_equal(shifted, shifted2) inferred_ts = DataFrame(self.tsframe.values, Index(np.asarray(self.tsframe.index)), columns=self.tsframe.columns) shifted = inferred_ts.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(shifted, self.tsframe.tshift(1)) assert_frame_equal(unshifted, inferred_ts) no_freq = self.tsframe.iloc[[0, 5, 7], :] self.assertRaises(ValueError, no_freq.tshift)
def test_get_loc(self): idx = pd.period_range('2000-01-01', periods=3) for method in [None, 'pad', 'backfill', 'nearest']: self.assertEqual(idx.get_loc(idx[1], method), 1) self.assertEqual( idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) self.assertEqual( idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) self.assertEqual(idx.get_loc(str(idx[1]), method), 1) idx = pd.period_range('2000-01-01', periods=5)[::2] self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', tolerance='1 day'), 1) self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', tolerance=pd.Timedelta('1D')), 1) self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', tolerance=np.timedelta64(1, 'D')), 1) self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', tolerance=timedelta(1)), 1) with tm.assertRaisesRegexp(ValueError, 'must be convertible'): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' with tm.assertRaisesRegexp(ValueError, msg): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') with tm.assertRaises(KeyError): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day')
def test_take_fill_value(self): # GH 12631 idx = pd.Float64Index([1., 2., 3.], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.Float64Index([2., 1., 3.], name='xxx') tm.assert_index_equal(result, expected) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = pd.Float64Index([2., 1., np.nan], name='xxx') tm.assert_index_equal(result, expected) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.Float64Index([2., 1., 3.], name='xxx') tm.assert_index_equal(result, expected) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with tm.assertRaises(IndexError): idx.take(np.array([1, -5]))
def test_difference_base(self): for name, idx in compat.iteritems(self.indices): first = idx[2:] second = idx[:4] answer = idx[4:] result = first.difference(second) if isinstance(idx, CategoricalIndex): pass else: self.assertTrue(tm.equalContents(result, answer)) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" with tm.assertRaisesRegexp(ValueError, msg): result = first.difference(case) elif isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): self.assertEqual(result.__class__, answer.__class__) tm.assert_numpy_array_equal(result.asi8, answer.asi8) else: result = first.difference(case) self.assertTrue(tm.equalContents(result, answer)) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with tm.assertRaisesRegexp(TypeError, msg): result = first.difference([1, 2, 3])
def test_symmetric_difference(self): for name, idx in compat.iteritems(self.indices): first = idx[1:] second = idx[:-1] if isinstance(idx, CategoricalIndex): pass else: answer = idx[[0, -1]] result = first.symmetric_difference(second) self.assertTrue(tm.equalContents(result, answer)) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" with tm.assertRaisesRegexp(ValueError, msg): result = first.symmetric_difference(case) elif isinstance(idx, CategoricalIndex): pass else: result = first.symmetric_difference(case) self.assertTrue(tm.equalContents(result, answer)) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with tm.assertRaisesRegexp(TypeError, msg): result = first.symmetric_difference([1, 2, 3]) # 12591 deprecated with tm.assert_produces_warning(FutureWarning): first.sym_diff(second)
def test_take_filling_fill_value(self): # same tests as GH 12631 sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) # fill_value result = sparse.take(np.array([1, 0, -1]), fill_value=True) expected = SparseArray([0, np.nan, 0], fill_value=0) tm.assert_sp_array_equal(result, expected) # allow_fill=False result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assertRaisesRegexp(ValueError, msg): sparse.take(np.array([1, 0, -2]), fill_value=True) with tm.assertRaisesRegexp(ValueError, msg): sparse.take(np.array([1, 0, -5]), fill_value=True) with tm.assertRaises(IndexError): sparse.take(np.array([1, -6])) with tm.assertRaises(IndexError): sparse.take(np.array([1, 5])) with tm.assertRaises(IndexError): sparse.take(np.array([1, 5]), fill_value=True)
def test_union_base(self): for name, idx in compat.iteritems(self.indices): first = idx[3:] second = idx[:5] everything = idx union = first.union(second) self.assertTrue(tm.equalContents(union, everything)) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" with tm.assertRaisesRegexp(ValueError, msg): result = first.union(case) elif isinstance(idx, CategoricalIndex): pass else: result = first.union(case) self.assertTrue(tm.equalContents(result, everything)) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with tm.assertRaisesRegexp(TypeError, msg): result = first.union([1, 2, 3])
def test_append_series_dict(self): df = DataFrame(np.random.randn(5, 4), columns=['foo', 'bar', 'baz', 'qux']) series = df.loc[4] with assertRaisesRegexp(ValueError, 'Indexes have overlapping values'): df.append(series, verify_integrity=True) series.name = None with assertRaisesRegexp(TypeError, 'Can only append a Series if ' 'ignore_index=True'): df.append(series, verify_integrity=True) result = df.append(series[::-1], ignore_index=True) expected = df.append(DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True) assert_frame_equal(result, expected) # dict result = df.append(series.to_dict(), ignore_index=True) assert_frame_equal(result, expected) result = df.append(series[::-1][:3], ignore_index=True) expected = df.append(DataFrame({0: series[::-1][:3]}).T, ignore_index=True) assert_frame_equal(result, expected.loc[:, result.columns]) # can append when name set row = df.loc[4] row.name = 5 result = df.append(row) expected = df.append(df[-1:], ignore_index=True) assert_frame_equal(result, expected)
def test_insert_index_datetime64tz(self): obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], tz='US/Eastern') self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') # datetime64tz + datetime64tz => datetime64 exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], tz='US/Eastern') val = pd.Timestamp('2012-01-01', tz='US/Eastern') self._assert_insert_conversion(obj, val, exp, 'datetime64[ns, US/Eastern]') # ToDo: must coerce to object msg = "Passed item and index have different timezone" with tm.assertRaisesRegexp(ValueError, msg): obj.insert(1, pd.Timestamp('2012-01-01')) # ToDo: must coerce to object msg = "Passed item and index have different timezone" with tm.assertRaisesRegexp(ValueError, msg): obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo')) # ToDo: must coerce to object msg = "cannot insert DatetimeIndex with incompatible label" with tm.assertRaisesRegexp(TypeError, msg): obj.insert(1, 1)
def test_take_fill_value(self): # GH 12631 idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], name='xxx', freq='D') result = idx.take(np.array([1, 0, -1])) expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], name='xxx', freq='D') tm.assert_index_equal(result, expected) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], name='xxx', freq='D') tm.assert_index_equal(result, expected) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], name='xxx', freq='D') tm.assert_index_equal(result, expected) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with tm.assertRaises(IndexError): idx.take(np.array([1, -5]))
def test_round(self): td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') elt = td[1] expected_rng = TimedeltaIndex([ Timedelta('16801 days 00:00:00'), Timedelta('16801 days 00:00:00'), Timedelta('16801 days 01:00:00'), Timedelta('16801 days 02:00:00'), Timedelta('16801 days 02:00:00'), ]) expected_elt = expected_rng[1] tm.assert_index_equal(td.round(freq='H'), expected_rng) self.assertEqual(elt.round(freq='H'), expected_elt) msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with self.assertRaisesRegexp(ValueError, msg): td.round(freq='foo') with tm.assertRaisesRegexp(ValueError, msg): elt.round(freq='foo') msg = "<MonthEnd> is a non-fixed frequency" tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M')
def test_take_fill_value(self): # GH 12631 idx = pd.Int64Index([1, 2, 3], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.Int64Index([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) # fill_value msg = "Unable to fill values because Int64Index cannot contain NA" with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.Int64Index([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) msg = "Unable to fill values because Int64Index cannot contain NA" with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with tm.assertRaises(IndexError): idx.take(np.array([1, -5]))
def test_numpy_cumsum(self): non_null_data = np.array([1, 2, 3, 4, 5], dtype=float) non_null_expected = SparseArray(non_null_data.cumsum()) null_data = np.array([1, 2, np.nan, 4, 5], dtype=float) null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])) for data, expected in [ (null_data, null_expected), (non_null_data, non_null_expected) ]: out = np.cumsum(SparseArray(data)) tm.assert_sp_array_equal(out, expected) out = np.cumsum(SparseArray(data, fill_value=np.nan)) tm.assert_sp_array_equal(out, expected) out = np.cumsum(SparseArray(data, fill_value=2)) tm.assert_sp_array_equal(out, expected) msg = "the 'dtype' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.cumsum, SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.cumsum, SparseArray(data), out=out)
def test_constructor_freq_mult(self): # GH #7811 for func in [PeriodIndex, period_range]: # must be the same, but for sure... pidx = func(start='2014-01', freq='2M', periods=4) expected = PeriodIndex(['2014-01', '2014-03', '2014-05', '2014-07'], freq='2M') tm.assert_index_equal(pidx, expected) pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') expected = PeriodIndex(['2014-01-02', '2014-01-05', '2014-01-08', '2014-01-11', '2014-01-14'], freq='3D') tm.assert_index_equal(pidx, expected) pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00', '2014-01-01 17:00'], freq='4H') tm.assert_index_equal(pidx, expected) msg = ('Frequency must be positive, because it' ' represents span: -1M') with tm.assertRaisesRegexp(ValueError, msg): PeriodIndex(['2011-01'], freq='-1M') msg = ('Frequency must be positive, because it' ' represents span: 0M') with tm.assertRaisesRegexp(ValueError, msg): PeriodIndex(['2011-01'], freq='0M') msg = ('Frequency must be positive, because it' ' represents span: 0M') with tm.assertRaisesRegexp(ValueError, msg): period_range('2011-01', periods=3, freq='0M')
def test_numpy_transpose(self): sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a']) result = np.transpose(np.transpose(sdf)) tm.assert_sp_frame_equal(result, sdf) msg = "the 'axes' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.transpose, sdf, axes=1)
def test_constructor_cant_cast_datetime64(self): msg = "Cannot cast datetime64 to " with tm.assertRaisesRegexp(TypeError, msg): Series(date_range('1/1/2000', periods=10), dtype=float) with tm.assertRaisesRegexp(TypeError, msg): Series(date_range('1/1/2000', periods=10), dtype=int)
def test_constructor_generic_timestamp_deprecated(self): # see gh-15524 with tm.assert_produces_warning(FutureWarning): dtype = np.timedelta64 s = Series([], dtype=dtype) assert s.empty assert s.dtype == 'm8[ns]' with tm.assert_produces_warning(FutureWarning): dtype = np.datetime64 s = Series([], dtype=dtype) assert s.empty assert s.dtype == 'M8[ns]' # These timestamps have the wrong frequencies, # so an Exception should be raised now. msg = "cannot convert timedeltalike" with tm.assertRaisesRegexp(TypeError, msg): Series([], dtype='m8[ps]') msg = "cannot convert datetimelike" with tm.assertRaisesRegexp(TypeError, msg): Series([], dtype='M8[ps]')
def test_concat4_interleave_partitions(): pdf1 = pd.DataFrame(np.random.randn(10, 5), columns=list("ABCDE"), index=list("abcdefghij")) pdf2 = pd.DataFrame(np.random.randn(13, 5), columns=list("ABCDE"), index=list("fghijklmnopqr")) pdf3 = pd.DataFrame(np.random.randn(13, 6), columns=list("CDEXYZ"), index=list("fghijklmnopqr")) ddf1 = dd.from_pandas(pdf1, 2) ddf2 = dd.from_pandas(pdf2, 3) ddf3 = dd.from_pandas(pdf3, 2) msg = ( "All inputs have known divisions which cannnot be " "concatenated in order. Specify " "interleave_partitions=True to ignore order" ) cases = [[ddf1, ddf1], [ddf1, ddf2], [ddf1, ddf3], [ddf2, ddf1], [ddf2, ddf3], [ddf3, ddf1], [ddf3, ddf2]] for case in cases: pdcase = [c.compute() for c in case] with tm.assertRaisesRegexp(ValueError, msg): dd.concat(case) assert eq(dd.concat(case, interleave_partitions=True), pd.concat(pdcase)) assert eq(dd.concat(case, join="inner", interleave_partitions=True), pd.concat(pdcase, join="inner")) msg = "'join' must be 'inner' or 'outer'" with tm.assertRaisesRegexp(ValueError, msg): dd.concat([ddf1, ddf1], join="invalid", interleave_partitions=True)
def test_no_args_with_kwargs(self): bad_arg = 'bar' min_fname_arg_count = 2 compat_args = OrderedDict() compat_args['foo'] = -5 compat_args[bad_arg] = 1 msg = (r"the '{arg}' parameter is not supported " r"in the pandas implementation of {func}\(\)". format(arg=bad_arg, func=self.fname)) args = () kwargs = {'foo': -5, bad_arg: 2} tm.assertRaisesRegexp(ValueError, msg, validate_args_and_kwargs, self.fname, args, kwargs, min_fname_arg_count, compat_args) args = (-5, 2) kwargs = {} tm.assertRaisesRegexp(ValueError, msg, validate_args_and_kwargs, self.fname, args, kwargs, min_fname_arg_count, compat_args)
def test_null_quote_char(self): data = 'a,b,c\n1,2,3' # sanity checks msg = 'quotechar must be set if quoting enabled' tm.assertRaisesRegexp(TypeError, msg, self.read_csv, StringIO(data), quotechar=None, quoting=csv.QUOTE_MINIMAL) tm.assertRaisesRegexp(TypeError, msg, self.read_csv, StringIO(data), quotechar='', quoting=csv.QUOTE_MINIMAL) # no errors should be raised if quoting is None expected = DataFrame([[1, 2, 3]], columns=['a', 'b', 'c']) result = self.read_csv(StringIO(data), quotechar=None, quoting=csv.QUOTE_NONE) tm.assert_frame_equal(result, expected) result = self.read_csv(StringIO(data), quotechar='', quoting=csv.QUOTE_NONE) tm.assert_frame_equal(result, expected)
def test_wdi_download_error_handling(self): cntry_codes = ['USA', 'XX'] inds = 'NY.GDP.PCAP.CD' with tm.assertRaisesRegexp(ValueError, "Invalid Country Code\\(s\\): XX"): result = download(country=cntry_codes, indicator=inds, start=2003, end=2004, errors='raise') if PANDAS_0160: # assert_produces_warning doesn't exists in prior versions with self.assert_produces_warning(): result = download(country=cntry_codes, indicator=inds, start=2003, end=2004, errors='warn') self.assertTrue(isinstance(result, pd.DataFrame)) self.assertEqual(len(result), 2) cntry_codes = ['USA'] inds = ['NY.GDP.PCAP.CD', 'BAD_INDICATOR'] with tm.assertRaisesRegexp(ValueError, "The provided parameter value is not valid\\. Indicator: BAD_INDICATOR"): result = download(country=cntry_codes, indicator=inds, start=2003, end=2004, errors='raise') if PANDAS_0160: with self.assert_produces_warning(): result = download(country=cntry_codes, indicator=inds, start=2003, end=2004, errors='warn') self.assertTrue(isinstance(result, pd.DataFrame)) self.assertEqual(len(result), 2)
def test_constructor_pi_nat(self): idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='M')]) exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='M')])) tm.assert_index_equal(idx, exp) idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='M')]) exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) idx = PeriodIndex(np.array([pd.NaT, pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='M')])) tm.assert_index_equal(idx, exp) idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) with tm.assertRaisesRegexp(ValueError, 'freq not specified'): PeriodIndex([pd.NaT, pd.NaT]) with tm.assertRaisesRegexp(ValueError, 'freq not specified'): PeriodIndex(np.array([pd.NaT, pd.NaT])) with tm.assertRaisesRegexp(ValueError, 'freq not specified'): PeriodIndex(['NaT', 'NaT']) with tm.assertRaisesRegexp(ValueError, 'freq not specified'): PeriodIndex(np.array(['NaT', 'NaT']))
def test_set_item(self): def setitem(): self.arr[5] = 3 def setslice(): self.arr[1:5] = 2 assertRaisesRegexp(TypeError, "item assignment", setitem) assertRaisesRegexp(TypeError, "item assignment", setslice)
def test_type_check(self): # GH 11776 df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']}) with tm.assertRaisesRegexp(TypeError, "1-d array"): to_numeric(df) for errors in ['ignore', 'raise', 'coerce']: with tm.assertRaisesRegexp(TypeError, "1-d array"): to_numeric(df, errors=errors)
def test_invalid_table_attrs(self): url = self.banklist_data with tm.assertRaisesRegexp(ValueError, 'No tables found'): self.read_html(url, 'First Federal Bank of Florida', attrs={'id': 'tasdfable'})
def check_mutable_error(self, *args, **kwargs): # pass whatever functions you normally would to assertRaises (after the Exception kind) assertRaisesRegexp(TypeError, self.mutable_regex, *args, **kwargs)
def test_numeric_compat(self): idx = self.create_index() tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", lambda: idx * 1) tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", lambda: 1 * idx) div_err = "cannot perform __truediv__" if PY3 \ else "cannot perform __div__" tm.assertRaisesRegexp(TypeError, div_err, lambda: idx / 1) tm.assertRaisesRegexp(TypeError, div_err, lambda: 1 / idx) tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", lambda: idx // 1) tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", lambda: 1 // idx)
def test_hash_error(self): for ind in self.indices.values(): with tm.assertRaisesRegexp( TypeError, "unhashable type: %r" % type(ind).__name__): hash(ind)
def test_wrong_number_names(self): def testit(ind): ind.names = ["apple", "banana", "carrot"] for ind in self.indices.values(): assertRaisesRegexp(ValueError, "^Length", testit, ind)
def test_logical_compat(self): idx = self.create_index() tm.assertRaisesRegexp(TypeError, 'cannot perform all', lambda: idx.all()) tm.assertRaisesRegexp(TypeError, 'cannot perform any', lambda: idx.any())
def test_negative_skiprows(self): with tm.assertRaisesRegexp(ValueError, '\(you passed a negative value\)'): self.read_html(self.spam_data, 'Water', skiprows=-1)
def test_set_index2(self): df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], 'B': ['one', 'two', 'three', 'one', 'two'], 'C': ['a', 'b', 'c', 'd', 'e'], 'D': np.random.randn(5), 'E': np.random.randn(5)}) # new object, single-column result = df.set_index('C') result_nodrop = df.set_index('C', drop=False) index = Index(df['C'], name='C') expected = df.loc[:, ['A', 'B', 'D', 'E']] expected.index = index expected_nodrop = df.copy() expected_nodrop.index = index assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) self.assertEqual(result.index.name, index.name) # inplace, single df2 = df.copy() df2.set_index('C', inplace=True) assert_frame_equal(df2, expected) df3 = df.copy() df3.set_index('C', drop=False, inplace=True) assert_frame_equal(df3, expected_nodrop) # create new object, multi-column result = df.set_index(['A', 'B']) result_nodrop = df.set_index(['A', 'B'], drop=False) index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) expected = df.loc[:, ['C', 'D', 'E']] expected.index = index expected_nodrop = df.copy() expected_nodrop.index = index assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) self.assertEqual(result.index.names, index.names) # inplace df2 = df.copy() df2.set_index(['A', 'B'], inplace=True) assert_frame_equal(df2, expected) df3 = df.copy() df3.set_index(['A', 'B'], drop=False, inplace=True) assert_frame_equal(df3, expected_nodrop) # corner case with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): df.set_index('A', verify_integrity=True) # append result = df.set_index(['A', 'B'], append=True) xp = df.reset_index().set_index(['index', 'A', 'B']) xp.index.names = [None, 'A', 'B'] assert_frame_equal(result, xp) # append to existing multiindex rdf = df.set_index(['A'], append=True) rdf = rdf.set_index(['B', 'C'], append=True) expected = df.set_index(['A', 'B', 'C'], append=True) assert_frame_equal(rdf, expected) # Series result = df.set_index(df.C) self.assertEqual(result.index.name, 'C')
def test_frame_to_time_stamp(self): K = 5 index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') df = DataFrame(randn(len(index), K), index=index) df['mix'] = 'a' exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') result = df.to_timestamp('D', 'end') tm.assert_index_equal(result.index, exp_index) tm.assert_numpy_array_equal(result.values, df.values) exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') result = df.to_timestamp('D', 'start') tm.assert_index_equal(result.index, exp_index) def _get_with_delta(delta, freq='A-DEC'): return date_range(to_datetime('1/1/2001') + delta, to_datetime('12/31/2009') + delta, freq=freq) delta = timedelta(hours=23) result = df.to_timestamp('H', 'end') exp_index = _get_with_delta(delta) tm.assert_index_equal(result.index, exp_index) delta = timedelta(hours=23, minutes=59) result = df.to_timestamp('T', 'end') exp_index = _get_with_delta(delta) tm.assert_index_equal(result.index, exp_index) result = df.to_timestamp('S', 'end') delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) tm.assert_index_equal(result.index, exp_index) # columns df = df.T exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') result = df.to_timestamp('D', 'end', axis=1) tm.assert_index_equal(result.columns, exp_index) tm.assert_numpy_array_equal(result.values, df.values) exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') result = df.to_timestamp('D', 'start', axis=1) tm.assert_index_equal(result.columns, exp_index) delta = timedelta(hours=23) result = df.to_timestamp('H', 'end', axis=1) exp_index = _get_with_delta(delta) tm.assert_index_equal(result.columns, exp_index) delta = timedelta(hours=23, minutes=59) result = df.to_timestamp('T', 'end', axis=1) exp_index = _get_with_delta(delta) tm.assert_index_equal(result.columns, exp_index) result = df.to_timestamp('S', 'end', axis=1) delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) tm.assert_index_equal(result.columns, exp_index) # invalid axis tm.assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) result1 = df.to_timestamp('5t', axis=1) result2 = df.to_timestamp('t', axis=1) expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS') self.assertTrue(isinstance(result1.columns, DatetimeIndex)) self.assertTrue(isinstance(result2.columns, DatetimeIndex)) self.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) self.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) # PeriodIndex.to_timestamp always use 'infer' self.assertEqual(result1.columns.freqstr, 'AS-JAN') self.assertEqual(result2.columns.freqstr, 'AS-JAN')
def test_skiprows_invalid(self): with tm.assertRaisesRegexp(TypeError, 'is not a valid type for skipping rows'): self.read_html(self.spam_data, '.*Water.*', skiprows='asdf')
def test_converters_type_must_be_dict(self): data = """index,A,B,C,D foo,2,3,4,5 """ with tm.assertRaisesRegexp(TypeError, 'Type converters.+'): self.read_csv(StringIO(data), converters=0)
def test_set_columns(self): cols = Index(np.arange(len(self.mixed_frame.columns))) self.mixed_frame.columns = cols with assertRaisesRegexp(ValueError, 'Length mismatch'): self.mixed_frame.columns = cols[::2]
def test_take_fill_value(self): # GH 12631 # numeric category idx = pd.CategoricalIndex([1, 2, 3], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.CategoricalIndex([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.CategoricalIndex([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # object category idx = pd.CategoricalIndex(list('CBA'), categories=list('ABC'), ordered=True, name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.CategoricalIndex(list('BCA'), categories=list('ABC'), ordered=True, name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = pd.CategoricalIndex(['B', 'C', np.nan], categories=list('ABC'), ordered=True, name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.CategoricalIndex(list('BCA'), categories=list('ABC'), ordered=True, name='xxx') tm.assert_index_equal(result, expected) tm.assert_categorical_equal(result.values, expected.values) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assertRaisesRegexp(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with tm.assertRaises(IndexError): idx.take(np.array([1, -5]))
def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) # no tz results in DatetimeIndex result = DatetimeIndex( [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex (DST) result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # different tz coerces tz-naive to tz-awareIndex(dtype=object) result = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # tz mismatch affecting to tz-aware raises TypeError/ValueError with tm.assertRaises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') with tm.assertRaises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index([pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx')
def test_isinstance(self): expected = "Expected type " with assertRaisesRegexp(AssertionError, expected): tm.assertIsInstance(1, pd.Series)
def test_get_loc(self): idx = pd.date_range('2000-01-01', periods=3) for method in [None, 'pad', 'backfill', 'nearest']: self.assertEqual(idx.get_loc(idx[1], method), 1) self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) self.assertEqual(idx.get_loc(str(idx[1]), method), 1) if method is not None: self.assertEqual( idx.get_loc(idx[1], method, tolerance=pd.Timedelta('0 days')), 1) self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) self.assertEqual( idx.get_loc('2000-01-01T12', method='nearest', tolerance='1 day'), 1) self.assertEqual( idx.get_loc('2000-01-01T12', method='nearest', tolerance=pd.Timedelta('1D')), 1) self.assertEqual( idx.get_loc('2000-01-01T12', method='nearest', tolerance=np.timedelta64(1, 'D')), 1) self.assertEqual( idx.get_loc('2000-01-01T12', method='nearest', tolerance=timedelta(1)), 1) with tm.assertRaisesRegexp(ValueError, 'must be convertible'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with tm.assertRaises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) self.assertEqual(idx.get_loc('1999', method='nearest'), 0) self.assertEqual(idx.get_loc('2001', method='nearest'), 2) with tm.assertRaises(KeyError): idx.get_loc('1999', method='pad') with tm.assertRaises(KeyError): idx.get_loc('2001', method='backfill') with tm.assertRaises(KeyError): idx.get_loc('foobar') with tm.assertRaises(TypeError): idx.get_loc(slice(2)) idx = pd.to_datetime(['2000-01-01', '2000-01-04']) self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) # time indexing idx = pd.date_range('2000-01-01', periods=24, freq='H') tm.assert_numpy_array_equal(idx.get_loc(time(12)), np.array([12]), check_dtype=False) tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), np.array([]), check_dtype=False) with tm.assertRaises(NotImplementedError): idx.get_loc(time(12, 30), method='pad')
def test_index_equal_message(self): expected = """Index are different Index levels are different \\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: 2, MultiIndex\\(levels=\\[\\[u?'A', u?'B'\\], \\[1, 2, 3, 4\\]\\], labels=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)""" idx1 = pd.Index([1, 2, 3]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, exact=False) expected = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) \\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different Index length are different \\[left\\]: 3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3, 4]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different Index classes are different \\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)""" idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3.0]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, exact=True) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, exact=True, check_exact=False) expected = """Index are different Index values are different \\(33\\.33333 %\\) \\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) \\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0000000001]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) # must success assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different Index values are different \\(33\\.33333 %\\) \\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) \\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0001]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) # must success assert_index_equal(idx1, idx2, check_exact=False, check_less_precise=True) expected = """Index are different Index values are different \\(33\\.33333 %\\) \\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)""" idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 4]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, check_less_precise=True) expected = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) \\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False)
def test_notisinstance(self): expected = "Input must not be type " with assertRaisesRegexp(AssertionError, expected): tm.assertNotIsInstance(pd.Series([1]), pd.Series)
def test_hash_error(self): index = date_range('20010101', periods=10) with tm.assertRaisesRegexp( TypeError, "unhashable type: %r" % type(index).__name__): hash(index)
def test_frame_equal_message(self): expected = """DataFrame are different DataFrame shape \\(number of rows\\) are different \\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) \\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3]}), pd.DataFrame({'A': [1, 2, 3, 4]})) expected = """DataFrame are different DataFrame shape \\(number of columns\\) are different \\[left\\]: 2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }), pd.DataFrame({'A': [1, 2, 3]})) expected = """DataFrame\\.index are different DataFrame\\.index values are different \\(33\\.33333 %\\) \\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal( pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }, index=['a', 'b', 'c']), pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }, index=['a', 'b', 'd'])) expected = """DataFrame\\.columns are different DataFrame\\.columns values are different \\(50\\.0 %\\) \\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal( pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }, index=['a', 'b', 'c']), pd.DataFrame({ 'A': [1, 2, 3], 'b': [4, 5, 6] }, index=['a', 'b', 'c'])) expected = """DataFrame\\.iloc\\[:, 1\\] are different DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\) \\[left\\]: \\[4, 5, 6\\] \\[right\\]: \\[4, 5, 7\\]""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }), pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 7] })) with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }), pd.DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 7] }), by_blocks=True)
def test_add_iadd(self): # union rng1 = pd.period_range('1/1/2000', freq='D', periods=5) other1 = pd.period_range('1/6/2000', freq='D', periods=5) expected1 = pd.period_range('1/1/2000', freq='D', periods=10) rng2 = pd.period_range('1/1/2000', freq='D', periods=5) other2 = pd.period_range('1/4/2000', freq='D', periods=5) expected2 = pd.period_range('1/1/2000', freq='D', periods=8) rng3 = pd.period_range('1/1/2000', freq='D', periods=5) other3 = pd.PeriodIndex([], freq='D') expected3 = pd.period_range('1/1/2000', freq='D', periods=5) rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) expected4 = pd.PeriodIndex([ '2000-01-01 09:00', '2000-01-01 10:00', '2000-01-01 11:00', '2000-01-01 12:00', '2000-01-01 13:00', '2000-01-02 09:00', '2000-01-02 10:00', '2000-01-02 11:00', '2000-01-02 12:00', '2000-01-02 13:00' ], freq='H') rng5 = pd.PeriodIndex( ['2000-01-01 09:01', '2000-01-01 09:03', '2000-01-01 09:05'], freq='T') other5 = pd.PeriodIndex( ['2000-01-01 09:01', '2000-01-01 09:05' '2000-01-01 09:08'], freq='T') expected5 = pd.PeriodIndex([ '2000-01-01 09:01', '2000-01-01 09:03', '2000-01-01 09:05', '2000-01-01 09:08' ], freq='T') rng6 = pd.period_range('2000-01-01', freq='M', periods=7) other6 = pd.period_range('2000-04-01', freq='M', periods=7) expected6 = pd.period_range('2000-01-01', freq='M', periods=10) rng7 = pd.period_range('2003-01-01', freq='A', periods=5) other7 = pd.period_range('1998-01-01', freq='A', periods=8) expected7 = pd.period_range('1998-01-01', freq='A', periods=10) for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), (rng3, other3, expected3), (rng4, other4, expected4), (rng5, other5, expected5), (rng6, other6, expected6), (rng7, other7, expected7)]: result_add = rng + other result_union = rng.union(other) tm.assert_index_equal(result_add, expected) tm.assert_index_equal(result_union, expected) # GH 6527 rng += other tm.assert_index_equal(rng, expected) # offset for delta in [pd.offsets.Hour(2), timedelta(hours=2)]: rng = pd.period_range('2000-01-01', '2000-02-01') with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'): result = rng + delta with tm.assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'): rng += delta # int rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) result = rng + 1 expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10) tm.assert_index_equal(result, expected) rng += 1 tm.assert_index_equal(rng, expected)
def test_numpy_array_equal_message(self): if is_platform_windows(): raise nose.SkipTest("windows has incomparable line-endings " "and uses L on the shape") expected = """numpy array are different numpy array shapes are different \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5])) # scalar comparison expected = """Expected type """ with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(1, 2) expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5""" with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(1, 2) # array / scalar array comparison expected = """numpy array are different numpy array classes are different \\[left\\]: ndarray \\[right\\]: int""" with assertRaisesRegexp(AssertionError, expected): # numpy_array_equal only accepts np.ndarray assert_numpy_array_equal(np.array([1]), 1) with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(np.array([1]), 1) # scalar / array comparison expected = """numpy array are different numpy array classes are different \\[left\\]: int \\[right\\]: ndarray""" with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(1, np.array([1])) with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(1, np.array([1])) expected = """numpy array are different numpy array values are different \\(66\\.66667 %\\) \\[left\\]: \\[nan, 2\\.0, 3\\.0\\] \\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) expected = """numpy array are different numpy array values are different \\(50\\.0 %\\) \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([1, 3])) expected = """numpy array are different numpy array values are different \\(50\\.0 %\\) \\[left\\]: \\[1\\.1, 2\\.000001\\] \\[right\\]: \\[1\\.1, 2.0\\]""" with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) # must pass assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) expected = """numpy array are different numpy array values are different \\(16\\.66667 %\\) \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) expected = """numpy array are different numpy array values are different \\(25\\.0 %\\) \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) # allow to overwrite message expected = """Index are different Index shapes are different \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index')
def test_dt_accessor_no_new_attributes(self): # https://github.com/pydata/pandas/issues/10673 s = Series(date_range('20130101', periods=5, freq='D')) with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"): s.dt.xlabel = "a"
def test_mut_exclusive(): msg = "mutually exclusive arguments: '[ab]' and '[ab]'" with tm.assertRaisesRegexp(TypeError, msg): com._mut_exclusive(a=1, b=2) assert com._mut_exclusive(a=1, b=None) == 1 assert com._mut_exclusive(major=None, major_axis=None) is None
def test_per_axis_per_level_getitem(self): # GH6134 # example test case ix = MultiIndex.from_product( [_mklbl('A', 5), _mklbl('B', 7), _mklbl('C', 4), _mklbl('D', 2)]) df = DataFrame(np.arange(len(ix.get_values())), index=ix) result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] expected = df.loc[[ tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3') ]] tm.assert_frame_equal(result, expected) expected = df.loc[[ tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C2' or c == 'C3') ]] result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] tm.assert_frame_equal(result, expected) # test multi-index slicing with per axis and per index controls index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3), ('B', 1)], names=['one', 'two']) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame(np.arange(16, dtype='int64').reshape(4, 4), index=index, columns=columns) df = df.sort_index(axis=0).sort_index(axis=1) # identity result = df.loc[(slice(None), slice(None)), :] tm.assert_frame_equal(result, df) result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] tm.assert_frame_equal(result, df) result = df.loc[:, (slice(None), slice(None))] tm.assert_frame_equal(result, df) # index result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), 1), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # columns result = df.loc[:, (slice(None), ['foo'])] expected = df.iloc[:, [1, 3]] tm.assert_frame_equal(result, expected) # both result = df.loc[(slice(None), 1), (slice(None), ['foo'])] expected = df.iloc[[0, 3], [1, 3]] tm.assert_frame_equal(result, expected) result = df.loc['A', 'a'] expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), index=Index([1, 2, 3], name='two'), columns=Index(['bar', 'foo'], name='lvl1')) tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), [1, 2]), :] expected = df.iloc[[0, 1, 3]] tm.assert_frame_equal(result, expected) # multi-level series s = Series(np.arange(len(ix.get_values())), index=ix) result = s.loc['A1':'A3', :, ['C1', 'C3']] expected = s.loc[[ tuple([a, b, c, d]) for a, b, c, d in s.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3') ]] tm.assert_series_equal(result, expected) # boolean indexers result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] expected = df.iloc[[2, 3]] tm.assert_frame_equal(result, expected) def f(): df.loc[(slice(None), np.array([True, False])), :] self.assertRaises(ValueError, f) # ambiguous cases # these can be multiply interpreted (e.g. in this case # as df.loc[slice(None),[1]] as well self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # not lexsorted self.assertEqual(df.index.lexsort_depth, 2) df = df.sort_index(level=1, axis=0) self.assertEqual(df.index.lexsort_depth, 0) with tm.assertRaisesRegexp( UnsortedIndexError, 'MultiIndex Slicing requires the index to be fully ' r'lexsorted tuple len \(2\), lexsort depth \(0\)'): df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
def test_write_explicit_bad(self, compression, get_random_path): with tm.assertRaisesRegexp(ValueError, "Unrecognized compression type"): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, compression=compression)
def assert_unbounded_slice_error(slc): tm.assertRaisesRegexp(ValueError, "unbounded slice", lambda: BlockPlacement(slc))
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_base = ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', 'quarter', 'freq', 'days_in_month', 'daysinmonth', 'is_leap_year'] ok_for_period = ok_for_base + ['qyear', 'start_time', 'end_time'] ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] ok_for_dt = ok_for_base + ['date', 'time', 'microsecond', 'nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'tz', 'weekday_name'] ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name'] ok_for_td = ['days', 'seconds', 'microseconds', 'nanoseconds'] ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil'] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): self.assertEqual(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [Series(date_range('20130101', periods=5), name='xxx'), Series(date_range('20130101', periods=5, freq='s'), name='xxx'), Series(date_range('20130101 00:00:00', periods=5, freq='ms'), name='xxx')] for s in cases: for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_localize('US/Eastern') exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern') expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'US/Eastern') freq_result = s.dt.freq self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') exp_values = (DatetimeIndex(s.values).tz_localize('UTC') .tz_convert('US/Eastern')) expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) # round s = Series(pd.to_datetime(['2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00']), name='xxx') result = s.dt.round('D') expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # round with tz result = (s.dt.tz_localize('UTC') .dt.tz_convert('US/Eastern') .dt.round('D')) exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', '2012-01-01']).tz_localize('US/Eastern') expected = Series(exp_values, name='xxx') tm.assert_series_equal(result, expected) # floor s = Series(pd.to_datetime(['2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00']), name='xxx') result = s.dt.floor('D') expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # ceil s = Series(pd.to_datetime(['2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00']), name='xxx') result = s.dt.ceil('D') expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', '2012-01-02']), name='xxx') tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_convert('CET') expected = Series(s._values.tz_convert('CET'), index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'CET') freq_result = s.dt.freq self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) # timedeltaindex cases = [Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'), name='xxx'), Series(timedelta_range('2 days 01:23:45.012345', periods=5, freq='ms'), name='xxx')] for s in cases: for prop in ok_for_td: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components self.assertIsInstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.total_seconds() self.assertIsInstance(result, pd.Series) self.assertTrue(result.dtype == 'float64') freq_result = s.dt.freq self.assertEqual(freq_result, TimedeltaIndex(s.values, freq='infer').freq) # both index = date_range('20130101', periods=3, freq='D') s = Series(date_range('20140204', periods=3, freq='s'), index=index, name='xxx') exp = Series(np.array([2014, 2014, 2014], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.second, exp) exp = pd.Series([s[0]] * 3, index=index, name='xxx') tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [Series(period_range('20130101', periods=5, freq='D'), name='xxx')] for s in cases: for prop in ok_for_period: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq self.assertEqual(freq_result, PeriodIndex(s.values).freq) # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series(period_range('20130101', periods=5, freq='D', name='xxx').asobject) results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'), name='xxx') s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T', tz='UTC').tz_convert('America/Chicago') expected = Series(exp_values, name='xxx') tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') with tm.assertRaisesRegexp(ValueError, "modifications"): s.dt.hour = 5 # trying to set a copy with pd.option_context('chained_assignment', 'raise'): def f(): s.dt.hour[0] = 5 self.assertRaises(com.SettingWithCopyError, f)
def test_hash_error(self): index = timedelta_range('1 days', periods=10) with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % type(index).__name__): hash(index)
def test_shift(self): # naive shift shiftedFrame = self.tsframe.shift(5) self.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe['A'].shift(5) assert_series_equal(shiftedFrame['A'], shiftedSeries) shiftedFrame = self.tsframe.shift(-5) self.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe['A'].shift(-5) assert_series_equal(shiftedFrame['A'], shiftedSeries) # shift by 0 unshifted = self.tsframe.shift(0) assert_frame_equal(unshifted, self.tsframe) # shift by DateOffset shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay()) self.assertEqual(len(shiftedFrame), len(self.tsframe)) shiftedFrame2 = self.tsframe.shift(5, freq='B') assert_frame_equal(shiftedFrame, shiftedFrame2) d = self.tsframe.index[0] shifted_d = d + offsets.BDay(5) assert_series_equal(self.tsframe.xs(d), shiftedFrame.xs(shifted_d), check_names=False) # shift int frame int_shifted = self.intframe.shift(1) # noqa # Shifting with PeriodIndex ps = tm.makePeriodFrame() shifted = ps.shift(1) unshifted = shifted.shift(-1) self.assert_index_equal(shifted.index, ps.index) self.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.ix[:, 0].valid().values, ps.ix[:-1, 0].values) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, offsets.BDay()) assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) assertRaisesRegexp(ValueError, 'does not match PeriodIndex freq', ps.shift, freq='D') # shift other axis # GH 6371 df = DataFrame(np.random.rand(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, axis=1) result = df.shift(1, axis=1) assert_frame_equal(result, expected) # shift named axis df = DataFrame(np.random.rand(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, axis=1) result = df.shift(1, axis='columns') assert_frame_equal(result, expected)