Exemple #1
0
    def test_to_timestamp_to_period_astype(self):
        idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx')

        res = idx.astype('period[M]')
        exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx')
        tm.assert_index_equal(res, exp)

        res = idx.astype('period[3M]')
        exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx')
        tm.assert_index_equal(res, exp)
Exemple #2
0
    def test_dti_astype_period(self):
        idx = DatetimeIndex([NaT, "2011-01-01", "2011-02-01"], name="idx")

        res = idx.astype("period[M]")
        exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx")
        tm.assert_index_equal(res, exp)

        res = idx.astype("period[3M]")
        exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx")
        tm.assert_index_equal(res, exp)
Exemple #3
0
    def test_to_timestamp_to_period_astype(self):
        idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx')

        res = idx.astype('period[M]')
        exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx')
        tm.assert_index_equal(res, exp)

        res = idx.astype('period[3M]')
        exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx')
        tm.assert_index_equal(res, exp)
    def test_astype(self):
        # GH 13149, GH 13209
        idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])

        result = idx.astype(object)
        expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object)
        tm.assert_index_equal(result, expected)

        result = idx.astype(int)
        expected = Int64Index([1463356800000000000] +
                              [-9223372036854775808] * 3, dtype=np.int64)
        tm.assert_index_equal(result, expected)

        rng = date_range('1/1/2000', periods=10)
        result = rng.astype('i8')
        self.assert_numpy_array_equal(result, rng.asi8)
def date_to_timestamp(date: pd.DatetimeIndex):
    """Convert a pandas datetime to a javascript timestamp.

    This aims to match the timezone handling semantics
    used in Vega and Vega-Lite.

    Parameters
    ----------
    timestamp : float
        The unix epoch timestamp.

    Returns
    -------
    date : pd.DatetimeIndex
        The timestamps to be converted

    See Also
    --------
    date_to_timestamp : opposite of this function
    """
    if date.tzinfo is None:
        date = date.tz_localize(tzlocal())
    try:
        # Works for pd.Timestamp
        return date.timestamp() * 1000
    except AttributeError:
        # Works for pd.DatetimeIndex
        return date.astype('int64') * 1E-6
Exemple #6
0
def get_fwd_rates(d1, d2, fwd_curve, cal, roll_conv):

    # Get historical fixings
    historical_dates = d1[d1 <= fwd_curve.curve_date]
    historical_fixings = []
    historical_fixing_dates = []
    for i,d in enumerate(historical_dates):
        if d in fwd_curve.historical_fixings.keys():
            historical_fixings.append(fwd_curve.historical_fixings[d]/100.0)
            historical_fixing_dates.append(d)
        else:
            j = 1 
            while j < 10:
                d = DatetimeIndex([d])
                d = pd.Timestamp(np.busday_offset(dates=d.astype(str), offsets=j, roll=roll_conv, busdaycal=cal)[0])
                if d in fwd_curve.historical_fixings.keys():
                    historical_fixings.append(fwd_curve.historical_fixings[d]/100.0)
                    historical_fixing_dates.append(d)
                    break
                j += 1
                
    # Calculate future fixings
    d2 = d2[d1 > fwd_curve.curve_date]
    d1 = d1[d1 > fwd_curve.curve_date]
    t = pd.Series(sch.year_fraction(d1,d2,fwd_curve.day_count_basis))
    future_fixings = list((1 / t) * (fwd_curve.get_dcf(d1) / fwd_curve.get_dcf(d2) - 1))

    return pd.Series(historical_fixing_dates + d1.to_list()), pd.Series(historical_fixings + future_fixings)
    def test_astype_datetime64(self):
        # GH 13149, GH 13209
        idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])

        result = idx.astype("datetime64[ns]")
        tm.assert_index_equal(result, idx)
        assert result is not idx

        result = idx.astype("datetime64[ns]", copy=False)
        tm.assert_index_equal(result, idx)
        assert result is idx

        idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST")
        result = idx_tz.astype("datetime64[ns]")
        expected = DatetimeIndex(["2016-05-16 05:00:00", "NaT", "NaT", "NaT"],
                                 dtype="datetime64[ns]")
        tm.assert_index_equal(result, expected)
Exemple #8
0
    def test_astype_datetime64(self):
        # GH 13149, GH 13209
        idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])

        result = idx.astype('datetime64[ns]')
        tm.assert_index_equal(result, idx)
        assert result is not idx

        result = idx.astype('datetime64[ns]', copy=False)
        tm.assert_index_equal(result, idx)
        assert result is idx

        idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST')
        result = idx_tz.astype('datetime64[ns]')
        expected = DatetimeIndex(['2016-05-16 05:00:00', 'NaT', 'NaT', 'NaT'],
                                 dtype='datetime64[ns]')
        tm.assert_index_equal(result, expected)
Exemple #9
0
    def test_astype(self):
        # GH 13149, GH 13209
        idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])

        result = idx.astype(object)
        expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object)
        tm.assert_index_equal(result, expected)

        result = idx.astype(int)
        expected = Int64Index([1463356800000000000] +
                              [-9223372036854775808] * 3, dtype=np.int64)
        tm.assert_index_equal(result, expected)

        rng = date_range('1/1/2000', periods=10)
        result = rng.astype('i8')
        tm.assert_index_equal(result, Index(rng.asi8))
        tm.assert_numpy_array_equal(result.values, rng.asi8)
Exemple #10
0
    def test_astype_str_compat(self):
        # GH 13149, GH 13209
        # verify that we are returning NaT as a string (and not unicode)

        idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
        result = idx.astype(str)
        expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object)
        tm.assert_index_equal(result, expected)
Exemple #11
0
    def test_astype_object(self):
        idx = pd.date_range(start='2013-01-01',
                            periods=4,
                            freq='M',
                            name='idx')
        expected_list = [
            Timestamp('2013-01-31'),
            Timestamp('2013-02-28'),
            Timestamp('2013-03-31'),
            Timestamp('2013-04-30')
        ]
        expected = pd.Index(expected_list, dtype=object, name='idx')
        result = idx.astype(object)
        assert isinstance(result, Index)

        assert result.dtype == object
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert idx.tolist() == expected_list

        idx = pd.date_range(start='2013-01-01',
                            periods=4,
                            freq='M',
                            name='idx',
                            tz='Asia/Tokyo')
        expected_list = [
            Timestamp('2013-01-31', tz='Asia/Tokyo'),
            Timestamp('2013-02-28', tz='Asia/Tokyo'),
            Timestamp('2013-03-31', tz='Asia/Tokyo'),
            Timestamp('2013-04-30', tz='Asia/Tokyo')
        ]
        expected = pd.Index(expected_list, dtype=object, name='idx')
        result = idx.astype(object)
        assert isinstance(result, Index)
        assert result.dtype == object
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert idx.tolist() == expected_list

        idx = DatetimeIndex([
            datetime(2013, 1, 1),
            datetime(2013, 1, 2), pd.NaT,
            datetime(2013, 1, 4)
        ],
                            name='idx')
        expected_list = [
            Timestamp('2013-01-01'),
            Timestamp('2013-01-02'), pd.NaT,
            Timestamp('2013-01-04')
        ]
        expected = pd.Index(expected_list, dtype=object, name='idx')
        result = idx.astype(object)
        assert isinstance(result, Index)
        assert result.dtype == object
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert idx.tolist() == expected_list
Exemple #12
0
 def test_astype_object_with_nat(self):
     idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2),
                          pd.NaT, datetime(2013, 1, 4)], name='idx')
     expected_list = [Timestamp('2013-01-01'),
                      Timestamp('2013-01-02'), pd.NaT,
                      Timestamp('2013-01-04')]
     expected = pd.Index(expected_list, dtype=object, name='idx')
     result = idx.astype(object)
     tm.assert_index_equal(result, expected)
     assert idx.tolist() == expected_list
Exemple #13
0
 def test_astype_object_with_nat(self):
     idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2),
                          pd.NaT, datetime(2013, 1, 4)], name='idx')
     expected_list = [Timestamp('2013-01-01'),
                      Timestamp('2013-01-02'), pd.NaT,
                      Timestamp('2013-01-04')]
     expected = pd.Index(expected_list, dtype=object, name='idx')
     result = idx.astype(object)
     tm.assert_index_equal(result, expected)
     assert idx.tolist() == expected_list
Exemple #14
0
    def test_astype_datetime64(self):
        # GH 13149, GH 13209
        idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx")

        result = idx.astype("datetime64[ns]")
        tm.assert_index_equal(result, idx)
        assert result is not idx

        result = idx.astype("datetime64[ns]", copy=False)
        tm.assert_index_equal(result, idx)
        assert result is idx

        idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST", name="idx")
        with tm.assert_produces_warning(FutureWarning):
            # dt64tz->dt64 deprecated
            result = idx_tz.astype("datetime64[ns]")
        expected = DatetimeIndex(
            ["2016-05-16 05:00:00", "NaT", "NaT", "NaT"],
            dtype="datetime64[ns]",
            name="idx",
        )
        tm.assert_index_equal(result, expected)
Exemple #15
0
    def test_astype(self):
        # GH 13149, GH 13209
        idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx")

        result = idx.astype(object)
        expected = Index(
            [Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx"
        )
        tm.assert_index_equal(result, expected)

        result = idx.astype(int)
        expected = Int64Index(
            [1463356800000000000] + [-9223372036854775808] * 3,
            dtype=np.int64,
            name="idx",
        )
        tm.assert_index_equal(result, expected)

        rng = date_range("1/1/2000", periods=10, name="idx")
        result = rng.astype("i8")
        tm.assert_index_equal(result, Index(rng.asi8, name="idx"))
        tm.assert_numpy_array_equal(result.values, rng.asi8)
Exemple #16
0
 def test_astype_object_with_nat(self):
     idx = DatetimeIndex(
         [datetime(2013, 1, 1), datetime(2013, 1, 2), NaT, datetime(2013, 1, 4)],
         name="idx",
     )
     expected_list = [
         Timestamp("2013-01-01"),
         Timestamp("2013-01-02"),
         NaT,
         Timestamp("2013-01-04"),
     ]
     expected = Index(expected_list, dtype=object, name="idx")
     result = idx.astype(object)
     tm.assert_index_equal(result, expected)
     assert idx.tolist() == expected_list
Exemple #17
0
    def test_equals(self):
        # GH 13107
        idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
        assert idx.equals(idx)
        assert idx.equals(idx.copy())
        assert idx.equals(idx.astype(object))
        assert idx.astype(object).equals(idx)
        assert idx.astype(object).equals(idx.astype(object))
        assert not idx.equals(list(idx))
        assert not idx.equals(Series(idx))

        idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"],
                             tz="US/Pacific")
        assert not idx.equals(idx2)
        assert not idx.equals(idx2.copy())
        assert not idx.equals(idx2.astype(object))
        assert not idx.astype(object).equals(idx2)
        assert not idx.equals(list(idx2))
        assert not idx.equals(Series(idx2))

        # same internal, different tz
        idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
        tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
        assert not idx.equals(idx3)
        assert not idx.equals(idx3.copy())
        assert not idx.equals(idx3.astype(object))
        assert not idx.astype(object).equals(idx3)
        assert not idx.equals(list(idx3))
        assert not idx.equals(Series(idx3))

        # check that we do not raise when comparing with OutOfBounds objects
        oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
        assert not idx.equals(oob)
        assert not idx2.equals(oob)
        assert not idx3.equals(oob)

        # check that we do not raise when comparing with OutOfBounds dt64
        oob2 = oob.map(np.datetime64)
        assert not idx.equals(oob2)
        assert not idx2.equals(oob2)
        assert not idx3.equals(oob2)
Exemple #18
0
    def test_astype_object(self):
        idx = pd.date_range(start='2013-01-01', periods=4, freq='M',
                            name='idx')
        expected_list = [Timestamp('2013-01-31'),
                         Timestamp('2013-02-28'),
                         Timestamp('2013-03-31'),
                         Timestamp('2013-04-30')]
        expected = pd.Index(expected_list, dtype=object, name='idx')
        result = idx.astype(object)
        assert isinstance(result, Index)

        assert result.dtype == object
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert idx.tolist() == expected_list

        idx = pd.date_range(start='2013-01-01', periods=4, freq='M',
                            name='idx', tz='Asia/Tokyo')
        expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'),
                         Timestamp('2013-02-28', tz='Asia/Tokyo'),
                         Timestamp('2013-03-31', tz='Asia/Tokyo'),
                         Timestamp('2013-04-30', tz='Asia/Tokyo')]
        expected = pd.Index(expected_list, dtype=object, name='idx')
        result = idx.astype(object)
        assert isinstance(result, Index)
        assert result.dtype == object
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert idx.tolist() == expected_list

        idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2),
                             pd.NaT, datetime(2013, 1, 4)], name='idx')
        expected_list = [Timestamp('2013-01-01'),
                         Timestamp('2013-01-02'), pd.NaT,
                         Timestamp('2013-01-04')]
        expected = pd.Index(expected_list, dtype=object, name='idx')
        result = idx.astype(object)
        assert isinstance(result, Index)
        assert result.dtype == object
        tm.assert_index_equal(result, expected)
        assert result.name == expected.name
        assert idx.tolist() == expected_list
Exemple #19
0
 def test_astype_raises(self, dtype):
     # GH 13149, GH 13209
     idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
     msg = 'Cannot cast DatetimeArrayMixin to dtype'
     with pytest.raises(TypeError, match=msg):
         idx.astype(dtype)
Exemple #20
0
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """
    from pandas import Index, Series, DatetimeIndex

    vals = np.asarray(values)

    # localize to UTC
    is_datetimetz_type = is_datetimetz(values)
    if is_datetimetz_type:
        values = DatetimeIndex(values)
        vals = values.tz_localize(None)

    is_datetime = is_datetime64_dtype(vals)
    is_timedelta = is_timedelta64_dtype(vals)
    (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)

    table = hash_klass(size_hint or len(vals))
    uniques = vec_klass()
    labels = table.get_labels(vals, uniques, 0, na_sentinel, True)

    labels = _ensure_platform_int(labels)

    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
                                    assume_unique=True)

    if is_datetimetz_type:
        # reset tz
        uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize(
            values.tz)
    elif is_datetime:
        uniques = uniques.astype('M8[ns]')
    elif is_timedelta:
        uniques = uniques.astype('m8[ns]')
    if isinstance(values, Index):
        uniques = values._shallow_copy(uniques, name=None)
    elif isinstance(values, Series):
        uniques = Index(uniques)
    return labels, uniques
Exemple #21
0
 def test_astype_raises(self, dtype):
     # GH 13149, GH 13209
     idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
     msg = 'Cannot cast DatetimeIndex to dtype'
     with tm.assert_raises_regex(TypeError, msg):
         idx.astype(dtype)
def value_counts(values,
                 sort=True,
                 ascending=False,
                 normalize=False,
                 bins=None,
                 dropna=True):
    """
    Compute a histogram of the counts of non-null values.

    Parameters
    ----------
    values : ndarray (1-d)
    sort : boolean, default True
        Sort by values
    ascending : boolean, default False
        Sort in ascending order
    normalize: boolean, default False
        If True then compute a relative histogram
    bins : integer, optional
        Rather than count values, group them into half-open bins,
        convenience for pd.cut, only works with numeric data
    dropna : boolean, default True
        Don't include counts of NaN

    Returns
    -------
    value_counts : Series

    """
    from pandas.core.series import Series
    from pandas.tools.tile import cut
    from pandas import Index, PeriodIndex, DatetimeIndex

    name = getattr(values, 'name', None)
    values = Series(values).values

    if bins is not None:
        try:
            cat, bins = cut(values, bins, retbins=True)
        except TypeError:
            raise TypeError("bins argument only works with numeric data.")
        values = cat.codes

    if com.is_categorical_dtype(values.dtype):
        result = values.value_counts(dropna)

    else:

        dtype = values.dtype
        is_period = com.is_period_arraylike(values)
        is_datetimetz = com.is_datetimetz(values)

        if com.is_datetime_or_timedelta_dtype(
                dtype) or is_period or is_datetimetz:

            if is_period:
                values = PeriodIndex(values)
            elif is_datetimetz:
                tz = getattr(values, 'tz', None)
                values = DatetimeIndex(values).tz_localize(None)

            values = values.view(np.int64)
            keys, counts = htable.value_count_scalar64(values, dropna)

            if dropna:
                from pandas.tslib import iNaT
                msk = keys != iNaT
                keys, counts = keys[msk], counts[msk]

            # localize to the original tz if necessary
            if is_datetimetz:
                keys = DatetimeIndex(keys).tz_localize(tz)

            # convert the keys back to the dtype we came in
            else:
                keys = keys.astype(dtype)

        elif com.is_integer_dtype(dtype):
            values = com._ensure_int64(values)
            keys, counts = htable.value_count_scalar64(values, dropna)
        elif com.is_float_dtype(dtype):
            values = com._ensure_float64(values)
            keys, counts = htable.value_count_scalar64(values, dropna)

        else:
            values = com._ensure_object(values)
            mask = com.isnull(values)
            keys, counts = htable.value_count_object(values, mask)
            if not dropna and mask.any():
                keys = np.insert(keys, 0, np.NaN)
                counts = np.insert(counts, 0, mask.sum())

        if not isinstance(keys, Index):
            keys = Index(keys)
        result = Series(counts, index=keys, name=name)

        if bins is not None:
            # TODO: This next line should be more efficient
            result = result.reindex(np.arange(len(cat.categories)),
                                    fill_value=0)
            result.index = bins[:-1]

    if sort:
        result = result.sort_values(ascending=ascending)

    if normalize:
        result = result / float(values.size)

    return result
Exemple #23
0
 def test_astype_raises(self, dtype):
     # GH 13149, GH 13209
     idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
     msg = 'Cannot cast DatetimeArray to dtype'
     with pytest.raises(TypeError, match=msg):
         idx.astype(dtype)
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """
    from pandas import Index, Series, DatetimeIndex

    vals = np.asarray(values)

    # localize to UTC
    is_datetimetz = com.is_datetimetz(values)
    if is_datetimetz:
        values = DatetimeIndex(values)
        vals = values.tz_localize(None)

    is_datetime = com.is_datetime64_dtype(vals)
    is_timedelta = com.is_timedelta64_dtype(vals)
    (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)

    table = hash_klass(size_hint or len(vals))
    uniques = vec_klass()
    labels = table.get_labels(vals, uniques, 0, na_sentinel, True)

    labels = com._ensure_platform_int(labels)

    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        try:
            sorter = uniques.argsort()
        except:
            # unorderable in py3 if mixed str/int
            t = hash_klass(len(uniques))
            t.map_locations(com._ensure_object(uniques))

            # order ints before strings
            ordered = np.concatenate([
                np.sort(np.array([e for i, e in enumerate(uniques) if f(e)],
                                 dtype=object)) for f in
                [lambda x: not isinstance(x, string_types),
                 lambda x: isinstance(x, string_types)]])
            sorter = com._ensure_platform_int(t.lookup(
                com._ensure_object(ordered)))

        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
        reverse_indexer.put(sorter, np.arange(len(sorter)))

        mask = labels < 0
        labels = reverse_indexer.take(labels)
        np.putmask(labels, mask, -1)

        uniques = uniques.take(sorter)

    if is_datetimetz:

        # reset tz
        uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize(
            values.tz)
    elif is_datetime:
        uniques = uniques.astype('M8[ns]')
    elif is_timedelta:
        uniques = uniques.astype('m8[ns]')
    if isinstance(values, Index):
        uniques = values._shallow_copy(uniques, name=None)
    elif isinstance(values, Series):
        uniques = Index(uniques)
    return labels, uniques
Exemple #25
0
def value_counts(values, sort=True, ascending=False, normalize=False,
                 bins=None, dropna=True):
    """
    Compute a histogram of the counts of non-null values.

    Parameters
    ----------
    values : ndarray (1-d)
    sort : boolean, default True
        Sort by values
    ascending : boolean, default False
        Sort in ascending order
    normalize: boolean, default False
        If True then compute a relative histogram
    bins : integer, optional
        Rather than count values, group them into half-open bins,
        convenience for pd.cut, only works with numeric data
    dropna : boolean, default True
        Don't include counts of NaN

    Returns
    -------
    value_counts : Series

    """
    from pandas.core.series import Series
    from pandas.tools.tile import cut
    from pandas import Index, PeriodIndex, DatetimeIndex

    name = getattr(values, 'name', None)
    values = Series(values).values

    if bins is not None:
        try:
            cat, bins = cut(values, bins, retbins=True)
        except TypeError:
            raise TypeError("bins argument only works with numeric data.")
        values = cat.codes

    if com.is_categorical_dtype(values.dtype):
        result = values.value_counts(dropna)

    else:

        dtype = values.dtype
        is_period = com.is_period_arraylike(values)
        is_datetimetz = com.is_datetimetz(values)

        if com.is_datetime_or_timedelta_dtype(dtype) or is_period or \
                is_datetimetz:

            if is_period:
                values = PeriodIndex(values)
            elif is_datetimetz:
                tz = getattr(values, 'tz', None)
                values = DatetimeIndex(values).tz_localize(None)

            values = values.view(np.int64)
            keys, counts = htable.value_count_scalar64(values, dropna)

            if dropna:
                msk = keys != iNaT
                keys, counts = keys[msk], counts[msk]

            # localize to the original tz if necessary
            if is_datetimetz:
                keys = DatetimeIndex(keys).tz_localize(tz)

            # convert the keys back to the dtype we came in
            else:
                keys = keys.astype(dtype)

        elif com.is_integer_dtype(dtype):
            values = com._ensure_int64(values)
            keys, counts = htable.value_count_scalar64(values, dropna)
        elif com.is_float_dtype(dtype):
            values = com._ensure_float64(values)
            keys, counts = htable.value_count_scalar64(values, dropna)

        else:
            values = com._ensure_object(values)
            mask = com.isnull(values)
            keys, counts = htable.value_count_object(values, mask)
            if not dropna and mask.any():
                keys = np.insert(keys, 0, np.NaN)
                counts = np.insert(counts, 0, mask.sum())

        if not isinstance(keys, Index):
            keys = Index(keys)
        result = Series(counts, index=keys, name=name)

        if bins is not None:
            # TODO: This next line should be more efficient
            result = result.reindex(np.arange(len(cat.categories)),
                                    fill_value=0)
            result.index = bins[:-1]

    if sort:
        result = result.sort_values(ascending=ascending)

    if normalize:
        result = result / float(values.size)

    return result
Exemple #26
0
 def test_astype_raises(self, dtype):
     # GH 13149, GH 13209
     idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN])
     msg = "Cannot cast DatetimeArray to dtype"
     with pytest.raises(TypeError, match=msg):
         idx.astype(dtype)