Ejemplo n.º 1
0
    def test_quantile_multi(self):

        qs = [.1, .9]
        result = self.ts.quantile(qs)
        expected = pd.Series([np.percentile(self.ts.dropna(), 10),
                              np.percentile(self.ts.dropna(), 90)],
                             index=qs, name=self.ts.name)
        tm.assert_series_equal(result, expected)

        dts = self.ts.index.to_series()
        dts.name = 'xxx'
        result = dts.quantile((.2, .2))
        expected = Series([Timestamp('2000-01-10 19:12:00'),
                           Timestamp('2000-01-10 19:12:00')],
                          index=[.2, .2], name='xxx')
        tm.assert_series_equal(result, expected)

        result = self.ts.quantile([])
        expected = pd.Series([], name=self.ts.name, index=Index(
            [], dtype=float))
        tm.assert_series_equal(result, expected)
Ejemplo n.º 2
0
    def test_operators_datetimelike(self):

        # ## timedelta64 ###
        td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
        td1.iloc[2] = np.nan

        # ## datetime64 ###
        dt1 = Series([
            Timestamp('20111230'),
            Timestamp('20120101'),
            Timestamp('20120103')
        ])
        dt1.iloc[2] = np.nan
        dt2 = Series([
            Timestamp('20111231'),
            Timestamp('20120102'),
            Timestamp('20120104')
        ])
        dt1 - dt2
        dt2 - dt1

        # ## datetime64 with timetimedelta ###
        dt1 + td1
        td1 + dt1
        dt1 - td1
        # TODO: Decide if this ought to work.
        # td1 - dt1

        # ## timetimedelta with datetime64 ###
        td1 + dt1
        dt1 + td1
Ejemplo n.º 3
0
    def test_operators_datetimelike_invalid(self, all_arithmetic_operators):
        # these are all TypeEror ops
        op_str = all_arithmetic_operators

        def check(get_ser, test_ser):

            # check that we are getting a TypeError
            # with 'operate' (from core/ops.py) for the ops that are not
            # defined
            op = getattr(get_ser, op_str, None)
            with tm.assert_raises_regex(TypeError, 'operate|cannot'):
                op(test_ser)

        # ## timedelta64 ###
        td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
        td1.iloc[2] = np.nan

        # ## datetime64 ###
        dt1 = Series([
            Timestamp('20111230'),
            Timestamp('20120101'),
            Timestamp('20120103')
        ])
        dt1.iloc[2] = np.nan
        dt2 = Series([
            Timestamp('20111231'),
            Timestamp('20120102'),
            Timestamp('20120104')
        ])
        if op_str not in ['__sub__', '__rsub__']:
            check(dt1, dt2)

        # ## datetime64 with timetimedelta ###
        # TODO(jreback) __rsub__ should raise?
        if op_str not in ['__add__', '__radd__', '__sub__']:
            check(dt1, td1)

        # 8260, 10763
        # datetime64 with tz
        tz = 'US/Eastern'
        dt1 = Series(date_range('2000-01-01 09:00:00', periods=5, tz=tz),
                     name='foo')
        dt2 = dt1.copy()
        dt2.iloc[2] = np.nan
        td1 = Series(timedelta_range('1 days 1 min', periods=5, freq='H'))
        td2 = td1.copy()
        td2.iloc[1] = np.nan

        if op_str not in ['__add__', '__radd__', '__sub__', '__rsub__']:
            check(dt2, td2)
Ejemplo n.º 4
0
    def test_constructor_dict_datetime64_index(self):
        # GH 9456

        dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
        values = [42544017.198965244, 1234565, 40512335.181958228, -1]

        def create_data(constructor):
            return dict(zip((constructor(x) for x in dates_as_str), values))

        data_datetime64 = create_data(np.datetime64)
        data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d'))
        data_Timestamp = create_data(Timestamp)

        expected = Series(values, (Timestamp(x) for x in dates_as_str))

        result_datetime64 = Series(data_datetime64)
        result_datetime = Series(data_datetime)
        result_Timestamp = Series(data_Timestamp)

        assert_series_equal(result_datetime64, expected)
        assert_series_equal(result_datetime, expected)
        assert_series_equal(result_Timestamp, expected)
Ejemplo n.º 5
0
    def test_convert_objects(self):

        s = Series([1., 2, 3], index=['a', 'b', 'c'])
        with tm.assert_produces_warning(FutureWarning):
            result = s.convert_objects(convert_dates=False,
                                       convert_numeric=True)
        assert_series_equal(result, s)

        # force numeric conversion
        r = s.copy().astype('O')
        r['a'] = '1'
        with tm.assert_produces_warning(FutureWarning):
            result = r.convert_objects(convert_dates=False,
                                       convert_numeric=True)
        assert_series_equal(result, s)

        r = s.copy().astype('O')
        r['a'] = '1.'
        with tm.assert_produces_warning(FutureWarning):
            result = r.convert_objects(convert_dates=False,
                                       convert_numeric=True)
        assert_series_equal(result, s)

        r = s.copy().astype('O')
        r['a'] = 'garbled'
        expected = s.copy()
        expected['a'] = np.nan
        with tm.assert_produces_warning(FutureWarning):
            result = r.convert_objects(convert_dates=False,
                                       convert_numeric=True)
        assert_series_equal(result, expected)

        # GH 4119, not converting a mixed type (e.g.floats and object)
        s = Series([1, 'na', 3, 4])
        with tm.assert_produces_warning(FutureWarning):
            result = s.convert_objects(convert_numeric=True)
        expected = Series([1, np.nan, 3, 4])
        assert_series_equal(result, expected)

        s = Series([1, '', 3, 4])
        with tm.assert_produces_warning(FutureWarning):
            result = s.convert_objects(convert_numeric=True)
        expected = Series([1, np.nan, 3, 4])
        assert_series_equal(result, expected)

        # dates
        s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
                    datetime(2001, 1, 3, 0, 0)])
        s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
                     datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
                     Timestamp('20010104'), '20010105'],
                    dtype='O')
        with tm.assert_produces_warning(FutureWarning):
            result = s.convert_objects(convert_dates=True,
                                       convert_numeric=False)
        expected = Series([Timestamp('20010101'), Timestamp('20010102'),
                           Timestamp('20010103')], dtype='M8[ns]')
        assert_series_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning):
            result = s.convert_objects(convert_dates='coerce',
                                       convert_numeric=False)
        with tm.assert_produces_warning(FutureWarning):
            result = s.convert_objects(convert_dates='coerce',
                                       convert_numeric=True)
        assert_series_equal(result, expected)

        expected = Series([Timestamp('20010101'), Timestamp('20010102'),
                           Timestamp('20010103'),
                           lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'),
                           Timestamp('20010105')], dtype='M8[ns]')
        with tm.assert_produces_warning(FutureWarning):
            result = s2.convert_objects(convert_dates='coerce',
                                        convert_numeric=False)
        assert_series_equal(result, expected)
        with tm.assert_produces_warning(FutureWarning):
            result = s2.convert_objects(convert_dates='coerce',
                                        convert_numeric=True)
        assert_series_equal(result, expected)

        # preserver all-nans (if convert_dates='coerce')
        s = Series(['foo', 'bar', 1, 1.0], dtype='O')
        with tm.assert_produces_warning(FutureWarning):
            result = s.convert_objects(convert_dates='coerce',
                                       convert_numeric=False)
        expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2)
        assert_series_equal(result, expected)

        # preserver if non-object
        s = Series([1], dtype='float32')
        with tm.assert_produces_warning(FutureWarning):
            result = s.convert_objects(convert_dates='coerce',
                                       convert_numeric=False)
        assert_series_equal(result, s)

        # r = s.copy()
        # r[0] = np.nan
        # result = r.convert_objects(convert_dates=True,convert_numeric=False)
        # assert result.dtype == 'M8[ns]'

        # dateutil parses some single letters into today's value as a date
        for x in 'abcdefghijklmnopqrstuvwxyz':
            s = Series([x])
            with tm.assert_produces_warning(FutureWarning):
                result = s.convert_objects(convert_dates='coerce')
            assert_series_equal(result, s)
            s = Series([x.upper()])
            with tm.assert_produces_warning(FutureWarning):
                result = s.convert_objects(convert_dates='coerce')
            assert_series_equal(result, s)
Ejemplo n.º 6
0
    def test_convert(self):
        # Tests: All to nans, coerce, true
        # Test coercion returns correct type
        s = Series(['a', 'b', 'c'])
        results = s._convert(datetime=True, coerce=True)
        expected = Series([lib.NaT] * 3)
        assert_series_equal(results, expected)

        results = s._convert(numeric=True, coerce=True)
        expected = Series([np.nan] * 3)
        assert_series_equal(results, expected)

        expected = Series([lib.NaT] * 3, dtype=np.dtype('m8[ns]'))
        results = s._convert(timedelta=True, coerce=True)
        assert_series_equal(results, expected)

        dt = datetime(2001, 1, 1, 0, 0)
        td = dt - datetime(2000, 1, 1, 0, 0)

        # Test coercion with mixed types
        s = Series(['a', '3.1415', dt, td])
        results = s._convert(datetime=True, coerce=True)
        expected = Series([lib.NaT, lib.NaT, dt, lib.NaT])
        assert_series_equal(results, expected)

        results = s._convert(numeric=True, coerce=True)
        expected = Series([nan, 3.1415, nan, nan])
        assert_series_equal(results, expected)

        results = s._convert(timedelta=True, coerce=True)
        expected = Series([lib.NaT, lib.NaT, lib.NaT, td],
                          dtype=np.dtype('m8[ns]'))
        assert_series_equal(results, expected)

        # Test standard conversion returns original
        results = s._convert(datetime=True)
        assert_series_equal(results, s)
        results = s._convert(numeric=True)
        expected = Series([nan, 3.1415, nan, nan])
        assert_series_equal(results, expected)
        results = s._convert(timedelta=True)
        assert_series_equal(results, s)

        # test pass-through and non-conversion when other types selected
        s = Series(['1.0', '2.0', '3.0'])
        results = s._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([1.0, 2.0, 3.0])
        assert_series_equal(results, expected)
        results = s._convert(True, False, True)
        assert_series_equal(results, s)

        s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)],
                   dtype='O')
        results = s._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0,
                                                                0)])
        assert_series_equal(results, expected)
        results = s._convert(datetime=False, numeric=True, timedelta=True)
        assert_series_equal(results, s)

        td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
        s = Series([td, td], dtype='O')
        results = s._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([td, td])
        assert_series_equal(results, expected)
        results = s._convert(True, True, False)
        assert_series_equal(results, s)

        s = Series([1., 2, 3], index=['a', 'b', 'c'])
        result = s._convert(numeric=True)
        assert_series_equal(result, s)

        # force numeric conversion
        r = s.copy().astype('O')
        r['a'] = '1'
        result = r._convert(numeric=True)
        assert_series_equal(result, s)

        r = s.copy().astype('O')
        r['a'] = '1.'
        result = r._convert(numeric=True)
        assert_series_equal(result, s)

        r = s.copy().astype('O')
        r['a'] = 'garbled'
        result = r._convert(numeric=True)
        expected = s.copy()
        expected['a'] = nan
        assert_series_equal(result, expected)

        # GH 4119, not converting a mixed type (e.g.floats and object)
        s = Series([1, 'na', 3, 4])
        result = s._convert(datetime=True, numeric=True)
        expected = Series([1, nan, 3, 4])
        assert_series_equal(result, expected)

        s = Series([1, '', 3, 4])
        result = s._convert(datetime=True, numeric=True)
        assert_series_equal(result, expected)

        # dates
        s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
                    datetime(2001, 1, 3, 0, 0)])
        s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
                     datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
                     Timestamp('20010104'), '20010105'], dtype='O')

        result = s._convert(datetime=True)
        expected = Series([Timestamp('20010101'), Timestamp('20010102'),
                           Timestamp('20010103')], dtype='M8[ns]')
        assert_series_equal(result, expected)

        result = s._convert(datetime=True, coerce=True)
        assert_series_equal(result, expected)

        expected = Series([Timestamp('20010101'), Timestamp('20010102'),
                           Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT,
                           Timestamp('20010104'), Timestamp('20010105')],
                          dtype='M8[ns]')
        result = s2._convert(datetime=True, numeric=False, timedelta=False,
                             coerce=True)
        assert_series_equal(result, expected)
        result = s2._convert(datetime=True, coerce=True)
        assert_series_equal(result, expected)

        s = Series(['foo', 'bar', 1, 1.0], dtype='O')
        result = s._convert(datetime=True, coerce=True)
        expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2)
        assert_series_equal(result, expected)

        # preserver if non-object
        s = Series([1], dtype='float32')
        result = s._convert(datetime=True, coerce=True)
        assert_series_equal(result, s)

        # r = s.copy()
        # r[0] = np.nan
        # result = r._convert(convert_dates=True,convert_numeric=False)
        # assert result.dtype == 'M8[ns]'

        # dateutil parses some single letters into today's value as a date
        expected = Series([lib.NaT])
        for x in 'abcdefghijklmnopqrstuvwxyz':
            s = Series([x])
            result = s._convert(datetime=True, coerce=True)
            assert_series_equal(result, expected)
            s = Series([x.upper()])
            result = s._convert(datetime=True, coerce=True)
            assert_series_equal(result, expected)
Ejemplo n.º 7
0
    def test_categorical_delegations(self):

        # invalid accessor
        pytest.raises(AttributeError, lambda: Series([1, 2, 3]).cat)
        tm.assert_raises_regex(
            AttributeError,
            r"Can only use .cat accessor with a 'category' dtype",
            lambda: Series([1, 2, 3]).cat)
        pytest.raises(AttributeError, lambda: Series(['a', 'b', 'c']).cat)
        pytest.raises(AttributeError, lambda: Series(np.arange(5.)).cat)
        pytest.raises(AttributeError,
                      lambda: Series([Timestamp('20130101')]).cat)

        # Series should delegate calls to '.categories', '.codes', '.ordered'
        # and the methods '.set_categories()' 'drop_unused_categories()' to the
        # categorical# -*- coding: utf-8 -*-
        s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        exp_categories = Index(["a", "b", "c"])
        tm.assert_index_equal(s.cat.categories, exp_categories)
        s.cat.categories = [1, 2, 3]
        exp_categories = Index([1, 2, 3])
        tm.assert_index_equal(s.cat.categories, exp_categories)

        exp_codes = Series([0, 1, 2, 0], dtype='int8')
        tm.assert_series_equal(s.cat.codes, exp_codes)

        assert s.cat.ordered
        s = s.cat.as_unordered()
        assert not s.cat.ordered
        s.cat.as_ordered(inplace=True)
        assert s.cat.ordered

        # reorder
        s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        exp_categories = Index(["c", "b", "a"])
        exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
        s = s.cat.set_categories(["c", "b", "a"])
        tm.assert_index_equal(s.cat.categories, exp_categories)
        tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
        tm.assert_numpy_array_equal(s.__array__(), exp_values)

        # remove unused categories
        s = Series(
            Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
        exp_categories = Index(["a", "b"])
        exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
        s = s.cat.remove_unused_categories()
        tm.assert_index_equal(s.cat.categories, exp_categories)
        tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
        tm.assert_numpy_array_equal(s.__array__(), exp_values)

        # This method is likely to be confused, so test that it raises an error
        # on wrong inputs:
        def f():
            s.set_categories([4, 3, 2, 1])

        pytest.raises(Exception, f)
        # right: s.cat.set_categories([4,3,2,1])

        # GH18862 (let Series.cat.rename_categories take callables)
        s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        result = s.cat.rename_categories(lambda x: x.upper())
        expected = Series(
            Categorical(["A", "B", "C", "A"],
                        categories=["A", "B", "C"],
                        ordered=True))
        tm.assert_series_equal(result, expected)
Ejemplo n.º 8
0
    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        assert s.dtype.name == 'datetime64[ns, US/Eastern]'
        assert s.dtype == 'datetime64[ns, US/Eastern]'
        assert is_datetime64tz_dtype(s.dtype)
        assert 'datetime64[ns, US/Eastern]' in str(s)

        # export
        result = s.values
        assert isinstance(result, np.ndarray)
        assert result.dtype == 'datetime64[ns]'

        exp = pd.DatetimeIndex(result)
        exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz)
        tm.assert_index_equal(dr, exp)

        # indexing
        result = s.iloc[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D'))
        result = s[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D'))

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # astype
        result = s.astype(object)
        expected = Series(DatetimeIndex(s._values).asobject)
        assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
        assert_series_equal(result, s)

        # astype - datetime64[ns, tz]
        result = Series(s.values).astype('datetime64[ns, US/Eastern]')
        assert_series_equal(result, s)

        result = Series(s.values).astype(s.dtype)
        assert_series_equal(result, s)

        result = s.astype('datetime64[ns, CET]')
        expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
        assert_series_equal(result, expected)

        # short str
        assert 'datetime64[ns, US/Eastern]' in str(s)

        # formatting with NaT
        result = s.shift()
        assert 'datetime64[ns, US/Eastern]' in str(result)
        assert 'NaT' in str(result)

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        assert 'datetime64[ns, US/Eastern]' in str(t)

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')
        ])
        assert s.dtype == 'datetime64[ns, US/Pacific]'
        assert lib.infer_dtype(s) == 'datetime64'

        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')
        ])
        assert s.dtype == 'object'
        assert lib.infer_dtype(s) == 'datetime'

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)
Ejemplo n.º 9
0
    def test_constructor_dtype_datetime64(self):

        s = Series(iNaT, dtype='M8[ns]', index=lrange(5))
        assert isnull(s).all()

        # in theory this should be all nulls, but since
        # we are not specifying a dtype is ambiguous
        s = Series(iNaT, index=lrange(5))
        assert not isnull(s).all()

        s = Series(nan, dtype='M8[ns]', index=lrange(5))
        assert isnull(s).all()

        s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]')
        assert isnull(s[1])
        self.assertEqual(s.dtype, 'M8[ns]')

        s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]')
        assert isnull(s[1])
        self.assertEqual(s.dtype, 'M8[ns]')

        # GH3416
        dates = [
            np.datetime64(datetime(2013, 1, 1)),
            np.datetime64(datetime(2013, 1, 2)),
            np.datetime64(datetime(2013, 1, 3)),
        ]

        s = Series(dates)
        self.assertEqual(s.dtype, 'M8[ns]')

        s.iloc[0] = np.nan
        self.assertEqual(s.dtype, 'M8[ns]')

        # invalid astypes
        for t in ['s', 'D', 'us', 'ms']:
            pytest.raises(TypeError, s.astype, 'M8[%s]' % t)

        # GH3414 related
        pytest.raises(
            TypeError, lambda x: Series(Series(dates).astype('int') / 1000000,
                                        dtype='M8[ms]'))
        pytest.raises(TypeError, lambda x: Series(dates, dtype='datetime64'))

        # invalid dates can be help as object
        result = Series([datetime(2, 1, 1)])
        self.assertEqual(result[0], datetime(2, 1, 1, 0, 0))

        result = Series([datetime(3000, 1, 1)])
        self.assertEqual(result[0], datetime(3000, 1, 1, 0, 0))

        # don't mix types
        result = Series([Timestamp('20130101'), 1], index=['a', 'b'])
        self.assertEqual(result['a'], Timestamp('20130101'))
        self.assertEqual(result['b'], 1)

        # GH6529
        # coerce datetime64 non-ns properly
        dates = date_range('01-Jan-2015', '01-Dec-2015', freq='M')
        values2 = dates.view(np.ndarray).astype('datetime64[ns]')
        expected = Series(values2, index=dates)

        for dtype in ['s', 'D', 'ms', 'us', 'ns']:
            values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype))
            result = Series(values1, dates)
            assert_series_equal(result, expected)

        # GH 13876
        # coerce to non-ns to object properly
        expected = Series(values2, index=dates, dtype=object)
        for dtype in ['s', 'D', 'ms', 'us', 'ns']:
            values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype))
            result = Series(values1, index=dates, dtype=object)
            assert_series_equal(result, expected)

        # leave datetime.date alone
        dates2 = np.array([d.date() for d in dates.to_pydatetime()],
                          dtype=object)
        series1 = Series(dates2, dates)
        tm.assert_numpy_array_equal(series1.values, dates2)
        self.assertEqual(series1.dtype, object)

        # these will correctly infer a datetime
        s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001'])
        self.assertEqual(s.dtype, 'datetime64[ns]')
        s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001'])
        self.assertEqual(s.dtype, 'datetime64[ns]')
        s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001'])
        self.assertEqual(s.dtype, 'datetime64[ns]')
        s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001'])
        self.assertEqual(s.dtype, 'datetime64[ns]')

        # tz-aware (UTC and other tz's)
        # GH 8411
        dr = date_range('20130101', periods=3)
        assert Series(dr).iloc[0].tz is None
        dr = date_range('20130101', periods=3, tz='UTC')
        assert str(Series(dr).iloc[0].tz) == 'UTC'
        dr = date_range('20130101', periods=3, tz='US/Eastern')
        assert str(Series(dr).iloc[0].tz) == 'US/Eastern'

        # non-convertible
        s = Series([1479596223000, -1479590, pd.NaT])
        assert s.dtype == 'object'
        assert s[2] is pd.NaT
        assert 'NaT' in str(s)

        # if we passed a NaT it remains
        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT])
        assert s.dtype == 'object'
        assert s[2] is pd.NaT
        assert 'NaT' in str(s)

        # if we passed a nan it remains
        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan])
        assert s.dtype == 'object'
        assert s[2] is np.nan
        assert 'NaN' in str(s)
Ejemplo n.º 10
0
    def test_categorical_delegations(self):

        # invalid accessor
        msg = r"Can only use \.cat accessor with a 'category' dtype"
        with pytest.raises(AttributeError, match=msg):
            Series([1, 2, 3]).cat
        with pytest.raises(AttributeError, match=msg):
            Series([1, 2, 3]).cat()
        with pytest.raises(AttributeError, match=msg):
            Series(["a", "b", "c"]).cat
        with pytest.raises(AttributeError, match=msg):
            Series(np.arange(5.0)).cat
        with pytest.raises(AttributeError, match=msg):
            Series([Timestamp("20130101")]).cat

        # Series should delegate calls to '.categories', '.codes', '.ordered'
        # and the methods '.set_categories()' 'drop_unused_categories()' to the
        # categorical
        s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        exp_categories = Index(["a", "b", "c"])
        tm.assert_index_equal(s.cat.categories, exp_categories)
        s.cat.categories = [1, 2, 3]
        exp_categories = Index([1, 2, 3])
        tm.assert_index_equal(s.cat.categories, exp_categories)

        exp_codes = Series([0, 1, 2, 0], dtype="int8")
        tm.assert_series_equal(s.cat.codes, exp_codes)

        assert s.cat.ordered
        s = s.cat.as_unordered()
        assert not s.cat.ordered
        s.cat.as_ordered(inplace=True)
        assert s.cat.ordered

        # reorder
        s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        exp_categories = Index(["c", "b", "a"])
        exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
        s = s.cat.set_categories(["c", "b", "a"])
        tm.assert_index_equal(s.cat.categories, exp_categories)
        tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
        tm.assert_numpy_array_equal(s.__array__(), exp_values)

        # remove unused categories
        s = Series(
            Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
        exp_categories = Index(["a", "b"])
        exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
        s = s.cat.remove_unused_categories()
        tm.assert_index_equal(s.cat.categories, exp_categories)
        tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
        tm.assert_numpy_array_equal(s.__array__(), exp_values)

        # This method is likely to be confused, so test that it raises an error
        # on wrong inputs:
        msg = "'Series' object has no attribute 'set_categories'"
        with pytest.raises(AttributeError, match=msg):
            s.set_categories([4, 3, 2, 1])

        # right: s.cat.set_categories([4,3,2,1])

        # GH18862 (let Series.cat.rename_categories take callables)
        s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        result = s.cat.rename_categories(lambda x: x.upper())
        expected = Series(
            Categorical(["A", "B", "C", "A"],
                        categories=["A", "B", "C"],
                        ordered=True))
        tm.assert_series_equal(result, expected)
Ejemplo n.º 11
0
class TestSeriesQuantile:
    def test_quantile(self, datetime_series):

        q = datetime_series.quantile(0.1)
        assert q == np.percentile(datetime_series.dropna(), 10)

        q = datetime_series.quantile(0.9)
        assert q == np.percentile(datetime_series.dropna(), 90)

        # object dtype
        q = Series(datetime_series, dtype=object).quantile(0.9)
        assert q == np.percentile(datetime_series.dropna(), 90)

        # datetime64[ns] dtype
        dts = datetime_series.index.to_series()
        q = dts.quantile(0.2)
        assert q == Timestamp("2000-01-10 19:12:00")

        # timedelta64[ns] dtype
        tds = dts.diff()
        q = tds.quantile(0.25)
        assert q == pd.to_timedelta("24:00:00")

        # GH7661
        result = Series([np.timedelta64("NaT")]).sum()
        assert result == pd.Timedelta(0)

        msg = "percentiles should all be in the interval \\[0, 1\\]"
        for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
            with pytest.raises(ValueError, match=msg):
                datetime_series.quantile(invalid)

    def test_quantile_multi(self, datetime_series):

        qs = [0.1, 0.9]
        result = datetime_series.quantile(qs)
        expected = Series(
            [
                np.percentile(datetime_series.dropna(), 10),
                np.percentile(datetime_series.dropna(), 90),
            ],
            index=qs,
            name=datetime_series.name,
        )
        tm.assert_series_equal(result, expected)

        dts = datetime_series.index.to_series()
        dts.name = "xxx"
        result = dts.quantile((0.2, 0.2))
        expected = Series(
            [Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")],
            index=[0.2, 0.2],
            name="xxx",
        )
        tm.assert_series_equal(result, expected)

        result = datetime_series.quantile([])
        expected = Series(
            [], name=datetime_series.name, index=Index([], dtype=float), dtype="float64"
        )
        tm.assert_series_equal(result, expected)

    def test_quantile_interpolation(self, datetime_series):
        # see gh-10174

        # interpolation = linear (default case)
        q = datetime_series.quantile(0.1, interpolation="linear")
        assert q == np.percentile(datetime_series.dropna(), 10)
        q1 = datetime_series.quantile(0.1)
        assert q1 == np.percentile(datetime_series.dropna(), 10)

        # test with and without interpolation keyword
        assert q == q1

    def test_quantile_interpolation_dtype(self):
        # GH #10174

        # interpolation = linear (default case)
        q = Series([1, 3, 4]).quantile(0.5, interpolation="lower")
        assert q == np.percentile(np.array([1, 3, 4]), 50)
        assert is_integer(q)

        q = Series([1, 3, 4]).quantile(0.5, interpolation="higher")
        assert q == np.percentile(np.array([1, 3, 4]), 50)
        assert is_integer(q)

    def test_quantile_nan(self):

        # GH 13098
        s = Series([1, 2, 3, 4, np.nan])
        result = s.quantile(0.5)
        expected = 2.5
        assert result == expected

        # all nan/empty
        s1 = Series([], dtype=object)
        cases = [s1, Series([np.nan, np.nan])]

        for s in cases:
            res = s.quantile(0.5)
            assert np.isnan(res)

            res = s.quantile([0.5])
            tm.assert_series_equal(res, Series([np.nan], index=[0.5]))

            res = s.quantile([0.2, 0.3])
            tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3]))

    @pytest.mark.parametrize(
        "case",
        [
            [
                Timestamp("2011-01-01"),
                Timestamp("2011-01-02"),
                Timestamp("2011-01-03"),
            ],
            [
                Timestamp("2011-01-01", tz="US/Eastern"),
                Timestamp("2011-01-02", tz="US/Eastern"),
                Timestamp("2011-01-03", tz="US/Eastern"),
            ],
            [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
            # NaT
            [
                Timestamp("2011-01-01"),
                Timestamp("2011-01-02"),
                Timestamp("2011-01-03"),
                pd.NaT,
            ],
            [
                Timestamp("2011-01-01", tz="US/Eastern"),
                Timestamp("2011-01-02", tz="US/Eastern"),
                Timestamp("2011-01-03", tz="US/Eastern"),
                pd.NaT,
            ],
            [
                pd.Timedelta("1 days"),
                pd.Timedelta("2 days"),
                pd.Timedelta("3 days"),
                pd.NaT,
            ],
        ],
    )
    def test_quantile_box(self, case):
        s = Series(case, name="XXX")
        res = s.quantile(0.5)
        assert res == case[1]

        res = s.quantile([0.5])
        exp = Series([case[1]], index=[0.5], name="XXX")
        tm.assert_series_equal(res, exp)

    def test_datetime_timedelta_quantiles(self):
        # covers #9694
        assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5))
        assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5))

    def test_quantile_nat(self):
        res = Series([pd.NaT, pd.NaT]).quantile(0.5)
        assert res is pd.NaT

        res = Series([pd.NaT, pd.NaT]).quantile([0.5])
        tm.assert_series_equal(res, Series([pd.NaT], index=[0.5]))

    @pytest.mark.parametrize(
        "values, dtype",
        [([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")],
    )
    def test_quantile_sparse(self, values, dtype):
        ser = Series(values, dtype=dtype)
        result = ser.quantile([0.5])
        expected = Series(np.asarray(ser)).quantile([0.5])
        tm.assert_series_equal(result, expected)

    def test_quantile_empty(self):

        # floats
        s = Series([], dtype="float64")

        res = s.quantile(0.5)
        assert np.isnan(res)

        res = s.quantile([0.5])
        exp = Series([np.nan], index=[0.5])
        tm.assert_series_equal(res, exp)

        # int
        s = Series([], dtype="int64")

        res = s.quantile(0.5)
        assert np.isnan(res)

        res = s.quantile([0.5])
        exp = Series([np.nan], index=[0.5])
        tm.assert_series_equal(res, exp)

        # datetime
        s = Series([], dtype="datetime64[ns]")

        res = s.quantile(0.5)
        assert res is pd.NaT

        res = s.quantile([0.5])
        exp = Series([pd.NaT], index=[0.5])
        tm.assert_series_equal(res, exp)

    @pytest.mark.parametrize("dtype", [int, float, "Int64"])
    def test_quantile_dtypes(self, dtype):
        result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25))
        expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25))
        if dtype == "Int64":
            expected = expected.astype("Float64")
        tm.assert_series_equal(result, expected)