def setUp(self): self.bool_index = tm.makeBoolIndex(10, name='a') self.int_index = tm.makeIntIndex(10, name='a') self.float_index = tm.makeFloatIndex(10, name='a') self.dt_index = tm.makeDateIndex(10, name='a') self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize( tz='US/Eastern') self.period_index = tm.makePeriodIndex(10, name='a') self.string_index = tm.makeStringIndex(10, name='a') self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index, name='a') self.float_series = Series(arr, index=self.float_index, name='a') self.dt_series = Series(arr, index=self.dt_index, name='a') self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string', 'unicode'] fmts = ["{0}_{1}".format(t, f) for t in types for f in ['index', 'series']] self.objs = [getattr(self, f) for f in fmts if getattr(self, f, None) is not None]
def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name='a') self.int_index = tm.makeIntIndex(10, name='a') self.float_index = tm.makeFloatIndex(10, name='a') self.dt_index = tm.makeDateIndex(10, name='a') self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize( tz='US/Eastern') self.period_index = tm.makePeriodIndex(10, name='a') self.string_index = tm.makeStringIndex(10, name='a') self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) self.bool_series = Series(arr, index=self.bool_index, name='a') self.int_series = Series(arr, index=self.int_index, name='a') self.float_series = Series(arr, index=self.float_index, name='a') self.dt_series = Series(arr, index=self.dt_index, name='a') self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') self.unicode_series = Series(arr, index=self.unicode_index, name='a') types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string', 'unicode'] self.indexes = [getattr(self, '{}_index'.format(t)) for t in types] self.series = [getattr(self, '{}_series'.format(t)) for t in types] self.objs = self.indexes + self.series
def setUp(self): self.strIndex = tm.makeStringIndex(100) self.dateIndex = tm.makeDateIndex(100) self.intIndex = tm.makeIntIndex(100) self.floatIndex = tm.makeFloatIndex(100) self.empty = Index([]) self.tuples = Index(zip(["foo", "bar", "baz"], [1, 2, 3]))
def test_constructor_dict_cast(self): # cast float tests test_data = { 'A' : {'1' : 1, '2' : 2}, 'B' : {'1' : '1', '2' : '2', '3' : '3'}, } frame = self.klass(test_data, dtype=float) self.assertEqual(len(frame), 3) self.assert_(frame['B'].dtype == np.float_) self.assert_(frame['A'].dtype == np.float_) frame = self.klass(test_data) self.assertEqual(len(frame), 3) self.assert_(frame['B'].dtype == np.object_) self.assert_(frame['A'].dtype == np.float_) # can't cast to float test_data = { 'A' : dict(zip(range(20), common.makeDateIndex(20))), 'B' : dict(zip(range(15), randn(15))) } frame = self.klass(test_data, dtype=float) self.assertEqual(len(frame), 20) self.assert_(frame['A'].dtype == np.object_) self.assert_(frame['B'].dtype == np.float_)
def test_get_numeric_data(self): int_ser = Series(np.array([0, 1, 2])) float_ser = Series(np.array([0., 1., 2.])) complex_ser = Series(np.array([0j, 1j, 2j])) str_ser = Series(np.array(['a', 'b', 'c'])) bool_ser = Series(np.array([True, False, True])) obj_ser = Series(np.array([1, 'a', 5])) dt_ser = Series(tm.makeDateIndex(3)) # check types df = DataFrame({'int': int_ser, 'float': float_ser, 'complex': complex_ser, 'str': str_ser, 'bool': bool_ser, 'obj': obj_ser, 'dt': dt_ser}) xp = DataFrame({'int': int_ser, 'float': float_ser, 'complex': complex_ser}) rs = DataFrame(df._data.get_numeric_data()) assert_frame_equal(xp, rs) xp = DataFrame({'bool': bool_ser}) rs = DataFrame(df._data.get_numeric_data(type_list=bool)) assert_frame_equal(xp, rs) rs = DataFrame(df._data.get_numeric_data(type_list=bool)) df.ix[0, 'bool'] = not df.ix[0, 'bool'] self.assertEqual(rs.ix[0, 'bool'], df.ix[0, 'bool']) rs = DataFrame(df._data.get_numeric_data(type_list=bool, copy=True)) df.ix[0, 'bool'] = not df.ix[0, 'bool'] self.assertEqual(rs.ix[0, 'bool'], not df.ix[0, 'bool'])
def setup_method(self, method): super(TestIndex, self).setup_method(method) self.d = { 'string': tm.makeStringIndex(100), 'date': tm.makeDateIndex(100), 'int': tm.makeIntIndex(100), 'rng': tm.makeRangeIndex(100), 'float': tm.makeFloatIndex(100), 'empty': Index([]), 'tuple': Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])), 'period': Index(period_range('2012-1-1', freq='M', periods=3)), 'date2': Index(date_range('2013-01-1', periods=10)), 'bdate': Index(bdate_range('2013-01-02', periods=10)), 'cat': tm.makeCategoricalIndex(100), 'interval': tm.makeIntervalIndex(100), 'timedelta': tm.makeTimedeltaIndex(100, 'H') } self.mi = { 'reg': MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')], names=['first', 'second']), }
def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) with tm.assert_produces_warning(FutureWarning): self.assertTrue(ser.is_time_series) self.assertTrue(ser.index.is_all_dates) self.assertIsInstance(ser.index, DatetimeIndex)
def test_replace2(self): N = 100 ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) ser[:5] = np.nan ser[6:10] = 'foo' ser[20:30] = 'bar' # replace list with a single value rs = ser.replace([np.nan, 'foo', 'bar'], -1) assert (rs[:5] == -1).all() assert (rs[6:10] == -1).all() assert (rs[20:30] == -1).all() assert (pd.isnull(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) assert (rs[:5] == -1).all() assert (rs[6:10] == -2).all() assert (rs[20:30] == -3).all() assert (pd.isnull(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) tm.assert_series_equal(rs, rs2) # replace inplace ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) assert (ser[:5] == -1).all() assert (ser[6:10] == -1).all() assert (ser[20:30] == -1).all()
def test_dateindex_conversion(self): decimals = 9 for freq in ('B', 'L', 'S'): dateindex = tm.makeDateIndex(k=10, freq=freq) rs = self.dtc.convert(dateindex, None, None) xp = converter.dates.date2num(dateindex._mpl_repr()) tm.assert_almost_equal(rs, xp, decimals)
def get_objs(): indexes = [ tm.makeBoolIndex(10, name='a'), tm.makeIntIndex(10, name='a'), tm.makeFloatIndex(10, name='a'), tm.makeDateIndex(10, name='a'), tm.makeDateIndex(10, name='a').tz_localize(tz='US/Eastern'), tm.makePeriodIndex(10, name='a'), tm.makeStringIndex(10, name='a'), tm.makeUnicodeIndex(10, name='a') ] arr = np.random.randn(10) series = [Series(arr, index=idx, name='a') for idx in indexes] objs = indexes + series return objs
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz="US/Eastern") self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) types = ["int", "float", "dt", "dt_tz", "period", "string"] self.objs = [getattr(self, "{0}_{1}".format(t, f)) for t in types for f in ["index", "series"]]
def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) s = Series(lrange(10)) s.index = idx with tm.assert_produces_warning(FutureWarning): self.assertTrue(s.is_time_series) self.assertTrue(s.index.is_all_dates)
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz='US/Eastern') self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) types = ['int','float','dt', 'dt_tz', 'period','string'] self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in types for f in ['index','series'] ]
def test_isin(self): index = tm.makeDateIndex(4) result = index.isin(index) assert result.all() result = index.isin(list(index)) assert result.all() assert_almost_equal(index.isin([index[2], 5]), np.array([False, False, True, False]))
def test_fill(self): ts = Series([0., 1., 2., 3., 4.], index=common.makeDateIndex(5)) self.assert_(np.array_equal(ts, ts.fill())) ts[2] = np.NaN self.assert_(np.array_equal(ts.fill(), [0., 1., 1., 3., 4.])) self.assert_(np.array_equal(ts.fill(method='backfill'), [0., 1., 3., 3., 4.])) self.assert_(np.array_equal(ts.fill(value=5), [0., 1., 5., 3., 4.]))
def test_dti_timestamp_fields(self, field): # extra fields from DatetimeIndex like quarter and week idx = tm.makeDateIndex(100) expected = getattr(idx, field)[-1] if field == 'weekday_name': with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = getattr(Timestamp(idx[-1]), field) else: result = getattr(Timestamp(idx[-1]), field) assert result == expected
def setUp(self): self.bool_index = tm.makeBoolIndex(10, name='a') self.int_index = tm.makeIntIndex(10, name='a') self.float_index = tm.makeFloatIndex(10, name='a') self.dt_index = tm.makeDateIndex(10, name='a') self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize(tz='US/Eastern') self.period_index = tm.makePeriodIndex(10, name='a') self.string_index = tm.makeStringIndex(10, name='a') self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index, name='a') self.float_series = Series(arr, index=self.float_index, name='a') self.dt_series = Series(arr, index=self.dt_index, name='a') self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') types = ['bool','int','float','dt', 'dt_tz', 'period','string', 'unicode'] fmts = [ "{0}_{1}".format(t,f) for t in types for f in ['index','series'] ] self.objs = [ getattr(self,f) for f in fmts if getattr(self,f,None) is not None ]
def test_union2(self): everything = tm.makeDateIndex(10) first = everything[:5] second = everything[5:] union = first.union(second) assert tm.equalContents(union, everything) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.union(case) assert tm.equalContents(result, everything)
def setUp(self): self.bool_index = tm.makeBoolIndex(10, name="a") self.int_index = tm.makeIntIndex(10, name="a") self.float_index = tm.makeFloatIndex(10, name="a") self.dt_index = tm.makeDateIndex(10, name="a") self.dt_tz_index = tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern") self.period_index = tm.makePeriodIndex(10, name="a") self.string_index = tm.makeStringIndex(10, name="a") self.unicode_index = tm.makeUnicodeIndex(10, name="a") arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index, name="a") self.float_series = Series(arr, index=self.float_index, name="a") self.dt_series = Series(arr, index=self.dt_index, name="a") self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name="a") self.string_series = Series(arr, index=self.string_index, name="a") types = ["bool", "int", "float", "dt", "dt_tz", "period", "string", "unicode"] fmts = ["{0}_{1}".format(t, f) for t in types for f in ["index", "series"]] self.objs = [getattr(self, f) for f in fmts if getattr(self, f, None) is not None]
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz='US/Eastern') self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) types = ['int', 'float', 'dt', 'dt_tz', 'period', 'string'] self.objs = [ getattr(self, "{0}_{1}".format(t, f)) for t in types for f in ['index', 'series'] ]
def test_union2(self, sort): everything = tm.makeDateIndex(10) first = everything[:5] second = everything[5:] union = first.union(second, sort=sort) tm.assert_index_equal(union, everything) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.union(case, sort=sort) tm.assert_index_equal(result, everything)
def test_union3(self, sort, box): everything = tm.makeDateIndex(10) first = everything[:5] second = everything[5:] # GH 10149 expected = first.astype('O').union( pd.Index(second.values, dtype='O') ).astype('O') case = box(second.values) result = first.union(case, sort=sort) tm.assert_index_equal(result, expected)
def test_fill(self): ts = Series([0., 1., 2., 3., 4.], index=common.makeDateIndex(5)) self.assert_(np.array_equal(ts, ts.fill())) ts[2] = np.NaN self.assert_(np.array_equal(ts.fill(), [0., 1., 1., 3., 4.])) self.assert_( np.array_equal(ts.fill(method='backfill'), [0., 1., 3., 3., 4.])) self.assert_(np.array_equal(ts.fill(value=5), [0., 1., 5., 3., 4.]))
def test_timestamp_fields(self): # extra fields from DatetimeIndex like quarter and week from pandas._tseries import Timestamp idx = tm.makeDateIndex(10) fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter'] for f in fields: expected = getattr(idx, f)[0] result = getattr(Timestamp(idx[0]), f) self.assertEqual(result, expected) self.assertEqual(idx.freq, Timestamp(idx[0], idx.freq).freq) self.assertEqual(idx.freqstr, Timestamp(idx[0], idx.freq).freqstr)
def test_timestamp_fields(self): # extra fields from DatetimeIndex like quarter and week idx = tm.makeDateIndex(100) fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', 'days_in_month', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'weekday_name'] for f in fields: expected = getattr(idx, f)[-1] result = getattr(Timestamp(idx[-1]), f) assert result == expected assert idx.freq == Timestamp(idx[-1], idx.freq).freq assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in ['int','float','dt','period','string'] for f in ['index','series'] ]
def test_intersection2(self): first = tm.makeDateIndex(10) second = first[5:] intersect = first.intersection(second) assert tm.equalContents(intersect, second) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.intersection(case) assert tm.equalContents(result, second) third = Index(["a", "b", "c"]) result = first.intersection(third) expected = pd.Index([], dtype=object) tm.assert_index_equal(result, expected)
def test_intersection2(self): first = tm.makeDateIndex(10) second = first[5:] intersect = first.intersection(second) assert tm.equalContents(intersect, second) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.intersection(case) assert tm.equalContents(result, second) third = Index(['a', 'b', 'c']) result = first.intersection(third) expected = pd.Index([], dtype=object) tm.assert_index_equal(result, expected)
def test_tseries_indices_frame(self): idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq)
def test_tseries_indices_series(self): idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq)
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) self.objs = [ getattr(self, "{0}_{1}".format(t, f)) for t in ['int', 'float', 'dt', 'period', 'string'] for f in ['index', 'series'] ]
def setUp(self): super(TestIndex, self).setUp() self.d = { 'string': tm.makeStringIndex(100), 'date': tm.makeDateIndex(100), 'int': tm.makeIntIndex(100), 'float': tm.makeFloatIndex(100), 'empty': Index([]), 'tuple': Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])), 'period': Index(period_range('2012-1-1', freq='M', periods=3)), 'date2': Index(date_range('2013-01-1', periods=10)), 'bdate': Index(bdate_range('2013-01-02', periods=10)), } self.mi = { 'reg': MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')], names=['first', 'second']), }
def test_line_area_nan_series(self): values = [1, 2, np.nan, 3] s = Series(values) ts = Series(values, index=tm.makeDateIndex(k=4)) for d in [s, ts]: ax = _check_plot_works(d.plot) masked = ax.lines[0].get_ydata() # remove nan for comparison purpose exp = np.array([1, 2, 3], dtype=np.float64) self.assert_numpy_array_equal(np.delete(masked.data, 2), exp) self.assert_numpy_array_equal(masked.mask, np.array([False, False, True, False])) expected = np.array([1, 2, 0, 3], dtype=np.float64) ax = _check_plot_works(d.plot, stacked=True) self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) ax = _check_plot_works(d.plot.area) self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) ax = _check_plot_works(d.plot.area, stacked=False) self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected)
def test_line_area_nan_series(self): values = [1, 2, np.nan, 3] s = Series(values) ts = Series(values, index=tm.makeDateIndex(k=4)) for d in [s, ts]: ax = _check_plot_works(d.plot) masked = ax.lines[0].get_ydata() # remove nan for comparison purpose exp = np.array([1, 2, 3], dtype=np.float64) tm.assert_numpy_array_equal(np.delete(masked.data, 2), exp) tm.assert_numpy_array_equal( masked.mask, np.array([False, False, True, False])) expected = np.array([1, 2, 0, 3], dtype=np.float64) ax = _check_plot_works(d.plot, stacked=True) tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) ax = _check_plot_works(d.plot.area) tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) ax = _check_plot_works(d.plot.area, stacked=False) tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected)
def setUp(self): super(TestIndex, self).setUp() self.d = { "string": tm.makeStringIndex(100), "date": tm.makeDateIndex(100), "int": tm.makeIntIndex(100), "float": tm.makeFloatIndex(100), "empty": Index([]), "tuple": Index(zip(["foo", "bar", "baz"], [1, 2, 3])), "period": Index(period_range("2012-1-1", freq="M", periods=3)), "date2": Index(date_range("2013-01-1", periods=10)), "bdate": Index(bdate_range("2013-01-02", periods=10)), } self.mi = { "reg": MultiIndex.from_tuples( [("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")], names=["first", "second"], ) }
def test_get_numeric_data(self): int_ser = Series(np.array([0, 1, 2])) float_ser = Series(np.array([0., 1., 2.])) complex_ser = Series(np.array([0j, 1j, 2j])) str_ser = Series(np.array(['a', 'b', 'c'])) bool_ser = Series(np.array([True, False, True])) obj_ser = Series(np.array([1, 'a', 5])) dt_ser = Series(tm.makeDateIndex(3)) # check types df = DataFrame({ 'int': int_ser, 'float': float_ser, 'complex': complex_ser, 'str': str_ser, 'bool': bool_ser, 'obj': obj_ser, 'dt': dt_ser }) xp = DataFrame({ 'int': int_ser, 'float': float_ser, 'complex': complex_ser, 'bool': bool_ser }) rs = DataFrame(df._data.get_numeric_data()) assert_frame_equal(xp, rs) xp = DataFrame({'bool': bool_ser}) rs = DataFrame(df._data.get_bool_data()) assert_frame_equal(xp, rs) rs = DataFrame(df._data.get_bool_data()) df.ix[0, 'bool'] = not df.ix[0, 'bool'] self.assertEqual(rs.ix[0, 'bool'], df.ix[0, 'bool']) rs = DataFrame(df._data.get_bool_data(copy=True)) df.ix[0, 'bool'] = not df.ix[0, 'bool'] self.assertEqual(rs.ix[0, 'bool'], not df.ix[0, 'bool'])
def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) ser[0:4] = np.nan ser[6:10] = 0 # replace list with a single value ser.replace([np.nan], -1, inplace=True) exp = ser.fillna(-1) tm.assert_series_equal(ser, exp) rs = ser.replace(0., np.nan) ser[ser == 0.] = np.nan tm.assert_series_equal(rs, ser) ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) ser[:5] = np.nan ser[6:10] = 'foo' ser[20:30] = 'bar' # replace list with a single value rs = ser.replace([np.nan, 'foo', 'bar'], -1) assert (rs[:5] == -1).all() assert (rs[6:10] == -1).all() assert (rs[20:30] == -1).all() assert (pd.isnull(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) assert (rs[:5] == -1).all() assert (rs[6:10] == -2).all() assert (rs[20:30] == -3).all() assert (pd.isnull(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) tm.assert_series_equal(rs, rs2) # replace inplace ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) assert (ser[:5] == -1).all() assert (ser[6:10] == -1).all() assert (ser[20:30] == -1).all() ser = pd.Series([np.nan, 0, np.inf]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, lib.NaT]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) filled = ser.copy() filled[4] = 0 tm.assert_series_equal(ser.replace(np.inf, 0), filled) ser = pd.Series(self.ts.index) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) # malformed pytest.raises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) # make sure that we aren't just masking a TypeError because bools don't # implement indexing with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): ser.replace([1, 2], [np.nan, 0]) ser = pd.Series([0, 1, 2, 3, 4]) result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
assert s.iloc[0] == s["a"] s.iloc[0] = 5 tm.assert_almost_equal(s["a"], 5) def test_getitem_box_float64(datetime_series): value = datetime_series[5] assert isinstance(value, np.float64) @pytest.mark.parametrize( "arr", [ np.random.randn(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern") ], ) def test_get(arr): # GH 21260 s = Series(arr, index=[2 * i for i in range(len(arr))]) assert s.get(4) == s.iloc[2] result = s.get([4, 6]) expected = s.iloc[[2, 3]] tm.assert_series_equal(result, expected) result = s.get(slice(2)) expected = s.iloc[[0, 1]] tm.assert_series_equal(result, expected)
import numpy as np import pytest import pandas as pd from pandas.core.indexes.api import Index, MultiIndex import pandas.util.testing as tm @pytest.fixture(params=[tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), tm.makePeriodIndex(100), tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), tm.makeRangeIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), Index([]), MultiIndex.from_tuples(zip( ['foo', 'bar', 'baz'], [1, 2, 3])), Index([0, 0, 1, 1, 2, 2])], ids=lambda x: type(x).__name__) def indices(request): return request.param @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) def one(request): # zero-dim integer array behaves like an integer
def setUp(self): self.strIndex = common.makeStringIndex(100) self.dateIndex = common.makeDateIndex(100) self.intIndex = common.makeIntIndex(100)
def test_fillna(self): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) self.assert_numpy_array_equal(ts, ts.fillna(method='ffill')) ts[2] = np.NaN self.assert_numpy_array_equal(ts.fillna(method='ffill'), [0., 1., 1., 3., 4.]) self.assert_numpy_array_equal(ts.fillna(method='backfill'), [0., 1., 3., 3., 4.]) self.assert_numpy_array_equal(ts.fillna(value=5), [0., 1., 5., 3., 4.]) self.assertRaises(ValueError, ts.fillna) self.assertRaises(ValueError, self.ts.fillna, value=0, method='ffill') # GH 5703 s1 = Series([np.nan]) s2 = Series([1]) result = s1.fillna(s2) expected = Series([1.]) assert_series_equal(result, expected) result = s1.fillna({}) assert_series_equal(result, s1) result = s1.fillna(Series(())) assert_series_equal(result, s1) result = s2.fillna(s1) assert_series_equal(result, s2) result = s1.fillna({0: 1}) assert_series_equal(result, expected) result = s1.fillna({1: 1}) assert_series_equal(result, Series([np.nan])) result = s1.fillna({0: 1, 1: 1}) assert_series_equal(result, expected) result = s1.fillna(Series({0: 1, 1: 1})) assert_series_equal(result, expected) result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5])) assert_series_equal(result, s1) s1 = Series([0, 1, 2], list('abc')) s2 = Series([0, np.nan, 2], list('bac')) result = s2.fillna(s1) expected = Series([0, 0, 2.], list('bac')) assert_series_equal(result, expected) # limit s = Series(np.nan, index=[0, 1, 2]) result = s.fillna(999, limit=1) expected = Series([999, np.nan, np.nan], index=[0, 1, 2]) assert_series_equal(result, expected) result = s.fillna(999, limit=2) expected = Series([999, 999, np.nan], index=[0, 1, 2]) assert_series_equal(result, expected) # GH 9043 # make sure a string representation of int/float values can be filled # correctly without raising errors or being converted vals = ['0', '1.5', '-0.3'] for val in vals: s = Series([0, 1, np.nan, np.nan, 4], dtype='float64') result = s.fillna(val) expected = Series([0, 1, val, val, 4], dtype='object') assert_series_equal(result, expected)
def test_union2(self, sort): everything = tm.makeDateIndex(10) first = everything[:5] second = everything[5:] union = first.union(second, sort=sort) tm.assert_index_equal(union, everything)
class TestSeriesMisc(TestData, SharedWithSparse): series_klass = Series # SharedWithSparse tests use generic, series_klass-agnostic assertion _assert_series_equal = staticmethod(tm.assert_series_equal) def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) # Series of str values should have .str but not .dt/.cat in __dir__ assert "str" in dir(s) assert "dt" not in dir(s) assert "cat" not in dir(s) # similarly for .dt s = Series(date_range("1/1/2015", periods=5)) assert "dt" in dir(s) assert "str" not in dir(s) assert "cat" not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list("abbcd"), dtype="category") assert "cat" in dir(s) assert "str" in dir(s) # as it is a string categorical assert "dt" not in dir(s) # similar to cat and str s = Series(date_range("1/1/2015", periods=5)).astype("category") assert "cat" in dir(s) assert "str" not in dir(s) assert "dt" in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ "name", "index", "categorical", "categories", "codes", "ordered", "set_categories", "add_categories", "remove_categories", "rename_categories", "reorder_categories", "remove_unused_categories", "as_ordered", "as_unordered", ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith("_")] return list(sorted(set(results))) s = Series(list("aabbcde")).astype("category") results = get_dir(s) tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) @pytest.mark.parametrize( "index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(["foo", "bar", "baz"] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index(["a{}".format(i) for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = pd.Series(index=index) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert not isinstance( x, str) or not x.isidentifier() or x in dir_s else: assert x not in dir_s def test_not_hashable(self): s_empty = Series() s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg): hash(s) def test_contains(self): tm.assert_contains_all(self.ts.index, self.ts) def test_iter(self): for i, val in enumerate(self.series): assert val == self.series[i] for i, val in enumerate(self.ts): assert val == self.ts[i] def test_keys(self): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = self.ts.keys assert getkeys() is self.ts.index def test_values(self): tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) def test_iteritems(self): for idx, val in self.series.iteritems(): assert val == self.series[idx] for idx, val in self.ts.iteritems(): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), "reverse") def test_items(self): for idx, val in self.series.items(): assert val == self.series[idx] for idx, val in self.ts.items(): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.items(), "reverse") def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" with pytest.raises(AttributeError, match=msg): s.info() def test_copy(self): for deep in [None, False, True]: s = Series(np.arange(10), dtype="float64") # default deep is True if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[::2] = np.NaN if deep is None or deep is True: # Did not modify original Series assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series assert np.isnan(s2[0]) assert np.isnan(s[0]) def test_copy_tzaware(self): # GH#11794 # copy of tz-aware expected = Series([Timestamp("2012/01/01", tz="UTC")]) expected2 = Series([Timestamp("1999/01/01", tz="UTC")]) for deep in [None, False, True]: s = Series([Timestamp("2012/01/01", tz="UTC")]) if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[0] = pd.Timestamp("1999/01/01", tz="UTC") # default deep is True if deep is None or deep is True: # Did not modify original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected) else: # we DID modify the original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self): # it works! np.unique(self.ts) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( np.random.randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # .item() with tm.assert_produces_warning(FutureWarning): s = Series([1]) result = s.item() assert result == 1 assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) # compress # GH 6658 s = Series([0, 1.0, -1], index=list("abc")) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.0], index=["b"])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Index(dtype=object) as the same as original exp = Series([], dtype="float64", index=Index([], dtype="object")) tm.assert_series_equal(result, exp) s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.0], index=[0.2])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Float64Index as the same as original exp = Series([], dtype="float64", index=Index([], dtype="float64")) tm.assert_series_equal(result, exp) def test_str_accessor_updates_on_inplace(self): s = pd.Series(list("abc")) s.drop([0], inplace=True) assert len(s.str.lower()) == 2 def test_str_attribute(self): # GH9068 methods = ["strip", "rstrip", "lstrip"] s = Series([" jack", "jill ", " jesse ", "frank"]) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) assert_series_equal(getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with pytest.raises(AttributeError, match="only use .str accessor"): s.str.repeat(2) def test_empty_method(self): s_empty = pd.Series() assert s_empty.empty for full_series in [pd.Series([1]), pd.Series(index=[1])]: assert not full_series.empty def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = pd.Series()" ip.run_code(code) with tm.assert_produces_warning(None): with provisionalcompleter("ignore"): list(ip.Completer.completions("s.", 1)) def test_integer_series_size(self): # GH 25580 s = Series(range(9)) assert s.size == 9 s = Series(range(9), dtype="Int64") assert s.size == 9 def test_get_values_deprecation(self): s = Series(range(9)) with tm.assert_produces_warning(FutureWarning): res = s.get_values() tm.assert_numpy_array_equal(res, s.values)
def setUp(self): self.indices = dict(index=tm.makeDateIndex(10)) self.setup_indices()
import numpy as np import pytest import pandas as pd from pandas.core.indexes.api import Index, MultiIndex import pandas.util.testing as tm indices_dict = { "unicode": tm.makeUnicodeIndex(100), "string": tm.makeStringIndex(100), "datetime": tm.makeDateIndex(100), "period": tm.makePeriodIndex(100), "timedelta": tm.makeTimedeltaIndex(100), "int": tm.makeIntIndex(100), "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), "bool": Index([True, False]), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), "empty": Index([]), "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), "repeats": Index([0, 0, 1, 1, 2, 2]), } @pytest.fixture(params=indices_dict.keys()) def indices(request): # copy to avoid mutation, e.g. setting .name return indices_dict[request.param].copy()
def test_dti_timestamp_freq_fields(self): # extra fields from DatetimeIndex like quarter and week idx = tm.makeDateIndex(100) assert idx.freq == Timestamp(idx[-1], idx.freq).freq assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr
class TestSeriesMisc(TestData, SharedWithSparse): series_klass = Series # SharedWithSparse tests use generic, series_klass-agnostic assertion _assert_series_equal = staticmethod(tm.assert_series_equal) def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) # Series of str values should have .str but not .dt/.cat in __dir__ assert 'str' in dir(s) assert 'dt' not in dir(s) assert 'cat' not in dir(s) # similarly for .dt s = Series(date_range('1/1/2015', periods=5)) assert 'dt' in dir(s) assert 'str' not in dir(s) assert 'cat' not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list('abbcd'), dtype="category") assert 'cat' in dir(s) assert 'str' in dir(s) # as it is a string categorical assert 'dt' not in dir(s) # similar to cat and str s = Series(date_range('1/1/2015', periods=5)).astype("category") assert 'cat' in dir(s) assert 'str' not in dir(s) assert 'dt' in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ 'categories', 'codes', 'ordered', 'set_categories', 'add_categories', 'remove_categories', 'rename_categories', 'reorder_categories', 'remove_unused_categories', 'as_ordered', 'as_unordered' ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(list('aabbcde')).astype('category') results = get_dir(s) tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) @pytest.mark.parametrize("index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(['foo', 'bar', 'baz'] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index(['a{}'.format(i) for i in range(101)]), pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')), pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ]) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = pd.Series(index=index) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert (not isinstance(x, string_types) or not isidentifier(x) or x in dir_s) else: assert x not in dir_s def test_not_hashable(self): s_empty = Series() s = Series([1]) pytest.raises(TypeError, hash, s_empty) pytest.raises(TypeError, hash, s) def test_contains(self): tm.assert_contains_all(self.ts.index, self.ts) def test_iter(self): for i, val in enumerate(self.series): assert val == self.series[i] for i, val in enumerate(self.ts): assert val == self.ts[i] def test_keys(self): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = self.ts.keys assert getkeys() is self.ts.index def test_values(self): tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) def test_iteritems(self): for idx, val in compat.iteritems(self.series): assert val == self.series[idx] for idx, val in compat.iteritems(self.ts): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), 'reverse') def test_items(self): for idx, val in self.series.items(): assert val == self.series[idx] for idx, val in self.ts.items(): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.items(), 'reverse') def test_raise_on_info(self): s = Series(np.random.randn(10)) with pytest.raises(AttributeError): s.info() def test_copy(self): for deep in [None, False, True]: s = Series(np.arange(10), dtype='float64') # default deep is True if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[::2] = np.NaN if deep is None or deep is True: # Did not modify original Series assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series assert np.isnan(s2[0]) assert np.isnan(s[0]) # GH 11794 # copy of tz-aware expected = Series([Timestamp('2012/01/01', tz='UTC')]) expected2 = Series([Timestamp('1999/01/01', tz='UTC')]) for deep in [None, False, True]: s = Series([Timestamp('2012/01/01', tz='UTC')]) if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[0] = pd.Timestamp('1999/01/01', tz='UTC') # default deep is True if deep is None or deep is True: # Did not modify original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected) else: # we DID modify the original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index')) assert s.dropna().sum('rows') == 3 assert s._get_axis_number('rows') == 0 assert s._get_axis_name('rows') == 'index' def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self): # it works! np.unique(self.ts) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'], index=date_range('1/1/2000', periods=1000)) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # .item() s = Series([1]) result = s.item() assert result == 1 assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype='float64') tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F')) # compress # GH 6658 s = Series([0, 1., -1], index=list('abc')) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.], index=['b'])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Index(dtype=object) as the same as original exp = Series([], dtype='float64', index=Index([], dtype='object')) tm.assert_series_equal(result, exp) s = Series([0, 1., -1], index=[.1, .2, .3]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.], index=[.2])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Float64Index as the same as original exp = Series([], dtype='float64', index=Index([], dtype='float64')) tm.assert_series_equal(result, exp) def test_str_attribute(self): # GH9068 methods = ['strip', 'rstrip', 'lstrip'] s = Series([' jack', 'jill ', ' jesse ', 'frank']) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) assert_series_equal(getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with tm.assert_raises_regex(AttributeError, 'only use .str accessor'): s.str.repeat(2) def test_empty_method(self): s_empty = pd.Series() assert s_empty.empty for full_series in [pd.Series([1]), pd.Series(index=[1])]: assert not full_series.empty def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = pd.Series()" ip.run_code(code) with tm.assert_produces_warning(None): with provisionalcompleter('ignore'): list(ip.Completer.completions('s.', 1))
def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) s = Series(lrange(10)) s.index = idx assert s.index.is_all_dates
def test_bfill(self): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) ts[2] = np.NaN assert_series_equal(ts.bfill(), ts.fillna(method='bfill'))
def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) assert ser.index.is_all_dates assert isinstance(ser.index, DatetimeIndex)
class TestSeriesMisc: def test_scalarop_preserve_name(self, datetime_series): result = datetime_series * 2 assert result.name == datetime_series.name def test_copy_name(self, datetime_series): result = datetime_series.copy() assert result.name == datetime_series.name def test_copy_index_name_checking(self, datetime_series): # don't want to be able to modify the index stored elsewhere after # making a copy datetime_series.index.name = None assert datetime_series.index.name is None assert datetime_series is datetime_series cp = datetime_series.copy() cp.index.name = "foo" printing.pprint_thing(datetime_series.index.name) assert datetime_series.index.name is None def test_append_preserve_name(self, datetime_series): result = datetime_series[:5].append(datetime_series[5:]) assert result.name == datetime_series.name def test_binop_maybe_preserve_name(self, datetime_series): # names match, preserve result = datetime_series * datetime_series assert result.name == datetime_series.name result = datetime_series.mul(datetime_series) assert result.name == datetime_series.name result = datetime_series * datetime_series[:-2] assert result.name == datetime_series.name # names don't match, don't preserve cp = datetime_series.copy() cp.name = "something else" result = datetime_series + cp assert result.name is None result = datetime_series.add(cp) assert result.name is None ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"] ops = ops + ["r" + op for op in ops] for op in ops: # names match, preserve s = datetime_series.copy() result = getattr(s, op)(s) assert result.name == datetime_series.name # names don't match, don't preserve cp = datetime_series.copy() cp.name = "changed" result = getattr(s, op)(cp) assert result.name is None def test_combine_first_name(self, datetime_series): result = datetime_series.combine_first(datetime_series[:5]) assert result.name == datetime_series.name def test_getitem_preserve_name(self, datetime_series): result = datetime_series[datetime_series > 0] assert result.name == datetime_series.name result = datetime_series[[0, 2, 4]] assert result.name == datetime_series.name result = datetime_series[5:10] assert result.name == datetime_series.name def test_pickle_datetimes(self, datetime_series): unp_ts = self._pickle_roundtrip(datetime_series) tm.assert_series_equal(unp_ts, datetime_series) def test_pickle_strings(self, string_series): unp_series = self._pickle_roundtrip(string_series) tm.assert_series_equal(unp_series, string_series) def _pickle_roundtrip(self, obj): with tm.ensure_clean() as path: obj.to_pickle(path) unpickled = pd.read_pickle(path) return unpickled def test_argsort_preserve_name(self, datetime_series): result = datetime_series.argsort() assert result.name == datetime_series.name def test_sort_index_name(self, datetime_series): result = datetime_series.sort_index(ascending=False) assert result.name == datetime_series.name def test_constructor_dict(self): d = {"a": 0.0, "b": 1.0, "c": 2.0} result = Series(d) expected = Series(d, index=sorted(d.keys())) tm.assert_series_equal(result, expected) result = Series(d, index=["b", "c", "d", "a"]) expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) tm.assert_series_equal(result, expected) def test_constructor_subclass_dict(self): data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) series = Series(data) expected = Series(dict(data.items())) tm.assert_series_equal(series, expected) def test_constructor_ordereddict(self): # GH3283 data = OrderedDict( ("col{i}".format(i=i), np.random.random()) for i in range(12) ) series = Series(data) expected = Series(list(data.values()), list(data.keys())) tm.assert_series_equal(series, expected) # Test with subclass class A(OrderedDict): pass series = Series(A(data)) tm.assert_series_equal(series, expected) def test_constructor_dict_multiindex(self): d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} _d = sorted(d.items()) result = Series(d) expected = Series( [x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d]) ) tm.assert_series_equal(result, expected) d["z"] = 111.0 _d.insert(0, ("z", d["z"])) result = Series(d) expected = Series( [x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False) ) result = result.reindex(index=expected.index) tm.assert_series_equal(result, expected) def test_constructor_dict_timedelta_index(self): # GH #12169 : Resample category data with timedelta index # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data expected = Series( data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s") ) result = Series( data={ pd.to_timedelta(0, unit="s"): "A", pd.to_timedelta(10, unit="s"): "B", pd.to_timedelta(20, unit="s"): "C", }, index=pd.to_timedelta([0, 10, 20], unit="s"), ) tm.assert_series_equal(result, expected) def test_sparse_accessor_updates_on_inplace(self): s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]") s.drop([0, 1], inplace=True) assert s.sparse.density == 1.0 def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) # Series of str values should have .str but not .dt/.cat in __dir__ assert "str" in dir(s) assert "dt" not in dir(s) assert "cat" not in dir(s) # similarly for .dt s = Series(date_range("1/1/2015", periods=5)) assert "dt" in dir(s) assert "str" not in dir(s) assert "cat" not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list("abbcd"), dtype="category") assert "cat" in dir(s) assert "str" in dir(s) # as it is a string categorical assert "dt" not in dir(s) # similar to cat and str s = Series(date_range("1/1/2015", periods=5)).astype("category") assert "cat" in dir(s) assert "str" not in dir(s) assert "dt" in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ "categories", "codes", "ordered", "set_categories", "add_categories", "remove_categories", "rename_categories", "reorder_categories", "remove_unused_categories", "as_ordered", "as_unordered", ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(list("aabbcde")).astype("category") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_cat))) @pytest.mark.parametrize( "index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(["foo", "bar", "baz"] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index(["a{}".format(i) for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = pd.Series(index=index) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert not isinstance(x, str) or not x.isidentifier() or x in dir_s else: assert x not in dir_s def test_not_hashable(self): s_empty = Series() s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg): hash(s) def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) def test_iter_datetimes(self, datetime_series): for i, val in enumerate(datetime_series): assert val == datetime_series[i] def test_iter_strings(self, string_series): for i, val in enumerate(string_series): assert val == string_series[i] def test_keys(self, datetime_series): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = datetime_series.keys assert getkeys() is datetime_series.index def test_values(self, datetime_series): tm.assert_almost_equal( datetime_series.values, datetime_series, check_dtype=False ) def test_iteritems_datetimes(self, datetime_series): for idx, val in datetime_series.iteritems(): assert val == datetime_series[idx] def test_iteritems_strings(self, string_series): for idx, val in string_series.iteritems(): assert val == string_series[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(string_series.iteritems(), "reverse") def test_items_datetimes(self, datetime_series): for idx, val in datetime_series.items(): assert val == datetime_series[idx] def test_items_strings(self, string_series): for idx, val in string_series.items(): assert val == string_series[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(string_series.items(), "reverse") def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" with pytest.raises(AttributeError, match=msg): s.info() def test_copy(self): for deep in [None, False, True]: s = Series(np.arange(10), dtype="float64") # default deep is True if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[::2] = np.NaN if deep is None or deep is True: # Did not modify original Series assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series assert np.isnan(s2[0]) assert np.isnan(s[0]) def test_copy_tzaware(self): # GH#11794 # copy of tz-aware expected = Series([Timestamp("2012/01/01", tz="UTC")]) expected2 = Series([Timestamp("1999/01/01", tz="UTC")]) for deep in [None, False, True]: s = Series([Timestamp("2012/01/01", tz="UTC")]) if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[0] = pd.Timestamp("1999/01/01", tz="UTC") # default deep is True if deep is None or deep is True: # Did not modify original Series tm.assert_series_equal(s2, expected2) tm.assert_series_equal(s, expected) else: # we DID modify the original Series tm.assert_series_equal(s2, expected2) tm.assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self, datetime_series): # it works! np.unique(datetime_series) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( np.random.randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # .item() with tm.assert_produces_warning(FutureWarning): s = Series([1]) result = s.item() assert result == 1 assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) # compress # GH 6658 s = Series([0, 1.0, -1], index=list("abc")) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.0], index=["b"])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Index(dtype=object) as the same as original exp = Series([], dtype="float64", index=Index([], dtype="object")) tm.assert_series_equal(result, exp) s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.0], index=[0.2])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Float64Index as the same as original exp = Series([], dtype="float64", index=Index([], dtype="float64")) tm.assert_series_equal(result, exp) def test_str_accessor_updates_on_inplace(self): s = pd.Series(list("abc")) s.drop([0], inplace=True) assert len(s.str.lower()) == 2 def test_str_attribute(self): # GH9068 methods = ["strip", "rstrip", "lstrip"] s = Series([" jack", "jill ", " jesse ", "frank"]) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) tm.assert_series_equal(getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with pytest.raises(AttributeError, match="only use .str accessor"): s.str.repeat(2) def test_empty_method(self): s_empty = pd.Series() assert s_empty.empty for full_series in [pd.Series([1]), pd.Series(index=[1])]: assert not full_series.empty def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = pd.Series()" ip.run_code(code) with tm.assert_produces_warning(None): with provisionalcompleter("ignore"): list(ip.Completer.completions("s.", 1)) def test_integer_series_size(self): # GH 25580 s = Series(range(9)) assert s.size == 9 s = Series(range(9), dtype="Int64") assert s.size == 9 def test_get_values_deprecation(self): s = Series(range(9)) with tm.assert_produces_warning(FutureWarning): res = s.get_values() tm.assert_numpy_array_equal(res, s.values)
def setup_method(self, method): self.indices = dict(index=tm.makeDateIndex(10), index_dec=date_range('20130110', periods=10, freq='-1D')) self.setup_indices()
pd.DataFrame({"x": [1.0, 2.0, 3.0]}), pd.DataFrame({0: [1, 2, 3]}), pd.DataFrame({"x": [1.0, 2.0, 3.0], "y": [4.0, 5.0, 6.0]}), pd.DataFrame({"x": [1.0, 2.0, 3.0]}, index=pd.Index([4, 5, 6], name="bar")), pd.Series([1.0, 2.0, 3.0]), pd.Series([1.0, 2.0, 3.0], name="foo"), pd.Series([1.0, 2.0, 3.0], name="foo", index=[4, 5, 6]), pd.Series([1.0, 2.0, 3.0], name="foo", index=pd.Index([4, 5, 6], name="bar")), pd.DataFrame({"x": ["a", "b", "c"]}), pd.DataFrame({"x": [b"a", b"b", b"c"]}), pd.DataFrame({"x": pd.Categorical(["a", "b", "a"], ordered=True)}), pd.DataFrame({"x": pd.Categorical(["a", "b", "a"], ordered=False)}), tm.makeCategoricalIndex(), tm.makeCustomDataframe(5, 3), tm.makeDataFrame(), tm.makeDateIndex(), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeObjectSeries(), tm.makePeriodFrame(), tm.makeRangeIndex(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeUnicodeIndex(), ] @pytest.mark.parametrize("df", dfs) def test_dumps_serialize_numpy(df): header, frames = serialize(df) if "compression" in header:
def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) ser[0:4] = np.nan ser[6:10] = 0 # replace list with a single value ser.replace([np.nan], -1, inplace=True) exp = ser.fillna(-1) tm.assert_series_equal(ser, exp) rs = ser.replace(0., np.nan) ser[ser == 0.] = np.nan tm.assert_series_equal(rs, ser) ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) ser[:5] = np.nan ser[6:10] = 'foo' ser[20:30] = 'bar' # replace list with a single value rs = ser.replace([np.nan, 'foo', 'bar'], -1) assert (rs[:5] == -1).all() assert (rs[6:10] == -1).all() assert (rs[20:30] == -1).all() assert (pd.isna(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) assert (rs[:5] == -1).all() assert (rs[6:10] == -2).all() assert (rs[20:30] == -3).all() assert (pd.isna(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) tm.assert_series_equal(rs, rs2) # replace inplace ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) assert (ser[:5] == -1).all() assert (ser[6:10] == -1).all() assert (ser[20:30] == -1).all() ser = pd.Series([np.nan, 0, np.inf]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, pd.NaT]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) filled = ser.copy() filled[4] = 0 tm.assert_series_equal(ser.replace(np.inf, 0), filled) ser = pd.Series(self.ts.index) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) # malformed msg = r"Replacement lists must match in length\. Expecting 3 got 2" with pytest.raises(ValueError, match=msg): ser.replace([1, 2, 3], [np.nan, 0]) # make sure that we aren't just masking a TypeError because bools don't # implement indexing with pytest.raises(TypeError, match='Cannot compare types .+'): ser.replace([1, 2], [np.nan, 0]) ser = pd.Series([0, 1, 2, 3, 4]) result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))