예제 #1
0
 def test_mixed_freq_regular_first(self):
     import matplotlib.pyplot as plt
     s1 = tm.makeTimeSeries()
     s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]]
     ax = s1.plot()
     ax2 = s2.plot(style='g')
     lines = ax2.get_lines()
     idx1 = PeriodIndex(lines[0].get_xdata())
     idx2 = PeriodIndex(lines[1].get_xdata())
     self.assertTrue(idx1.equals(s1.index.to_period('B')))
     self.assertTrue(idx2.equals(s2.index.to_period('B')))
     left, right = ax2.get_xlim()
     pidx = s1.index.to_period()
     self.assertEqual(left, pidx[0].ordinal)
     self.assertEqual(right, pidx[-1].ordinal)
예제 #2
0
파일: resample.py 프로젝트: wudcwctw/pandas
    def _resample_periods(self, obj):
        axlabels = obj._get_axis(self.axis)

        if len(axlabels) == 0:
            new_index = PeriodIndex(data=[], freq=self.freq)
            return obj.reindex(new_index)
        else:
            start = axlabels[0].asfreq(self.freq, how=self.convention)
            end = axlabels[-1].asfreq(self.freq, how='end')

            new_index = period_range(start, end, freq=self.freq)

        # Start vs. end of period
        memb = axlabels.asfreq(self.freq, how=self.convention)

        if is_subperiod(axlabels.freq, self.freq) or self.how is not None:
            # Downsampling
            rng = np.arange(memb.values[0], memb.values[-1] + 1)
            bins = memb.searchsorted(rng, side='right')
            grouper = BinGrouper(bins, new_index)

            grouped = obj.groupby(grouper, axis=self.axis)
            return grouped.aggregate(self._agg_method)
        elif is_superperiod(axlabels.freq, self.freq):
            # Get the fill indexer
            indexer = memb.get_indexer(new_index,
                                       method=self.fill_method,
                                       limit=self.limit)

            return _take_new_index(obj, indexer, new_index, axis=self.axis)
        else:
            raise ValueError('Frequency %s cannot be resampled to %s' %
                             (axlabels.freq, self.freq))
예제 #3
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))

    index = data.index
    if issubclass(data.dtype.type, np.datetime64):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index)
    elif issubclass(data.dtype.type, np.timedelta64):
        return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy), index)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index)

    raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
예제 #4
0
파일: resample.py 프로젝트: Libardo1/pandas
    def _get_time_period_bins(self, axis):
        if not (isinstance(axis, DatetimeIndex)):
            raise AssertionError()

        if len(axis) == 0:
            binner = labels = PeriodIndex(data=[], freq=self.freq)
            return binner, [], labels

        labels = binner = PeriodIndex(start=axis[0],
                                      end=axis[-1],
                                      freq=self.freq)

        end_stamps = (labels + 1).asfreq('D', 's').to_timestamp()
        bins = axis.searchsorted(end_stamps, side='left')

        return binner, bins, labels
예제 #5
0
파일: resample.py 프로젝트: wudcwctw/pandas
    def _get_time_period_bins(self, axis):
        if not isinstance(axis, DatetimeIndex):
            raise TypeError('axis must be a DatetimeIndex, but got '
                            'an instance of %r' % type(axis).__name__)

        if not len(axis):
            binner = labels = PeriodIndex(data=[], freq=self.freq)
            return binner, [], labels

        labels = binner = PeriodIndex(start=axis[0],
                                      end=axis[-1],
                                      freq=self.freq)

        end_stamps = (labels + 1).asfreq('D', 's').to_timestamp()
        bins = axis.searchsorted(end_stamps, side='left')

        return binner, bins, labels
예제 #6
0
 def test_business_freq(self):
     import matplotlib.pyplot as plt  # noqa
     bts = tm.makePeriodSeries()
     ax = bts.plot()
     self.assertEqual(ax.get_lines()[0].get_xydata()[0, 0],
                      bts.index[0].ordinal)
     idx = ax.get_lines()[0].get_xdata()
     self.assertEqual(PeriodIndex(data=idx).freqstr, 'B')
예제 #7
0
    def test_mixed_freq_regular_first_df(self):
        # GH 9852
        import matplotlib.pyplot as plt  # noqa

        s1 = tm.makeTimeSeries().to_frame()
        s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :]
        ax = s1.plot()
        ax2 = s2.plot(style="g", ax=ax)
        lines = ax2.get_lines()
        idx1 = PeriodIndex(lines[0].get_xdata())
        idx2 = PeriodIndex(lines[1].get_xdata())
        self.assertTrue(idx1.equals(s1.index.to_period("B")))
        self.assertTrue(idx2.equals(s2.index.to_period("B")))
        left, right = ax2.get_xlim()
        pidx = s1.index.to_period()
        self.assertEqual(left, pidx[0].ordinal)
        self.assertEqual(right, pidx[-1].ordinal)
예제 #8
0
 def test_business_freq(self):
     import matplotlib.pyplot as plt
     plt.close('all')
     bts = tm.makePeriodSeries()
     ax = bts.plot()
     self.assert_(ax.get_lines()[0].get_xydata()[0, 0],
                  bts.index[0].ordinal)
     idx = ax.get_lines()[0].get_xdata()
     self.assert_(PeriodIndex(data=idx).freqstr == 'B')
예제 #9
0
    def test_to_weekly_resampling(self):
        idxh = date_range('1/1/1999', periods=52, freq='W')
        idxl = date_range('1/1/1999', periods=12, freq='M')
        high = Series(np.random.randn(len(idxh)), idxh)
        low = Series(np.random.randn(len(idxl)), idxl)
        high.plot()
        ax = low.plot()
        for l in ax.get_lines():
            self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)

        # tsplot
        from pandas.tseries.plotting import tsplot
        import matplotlib.pyplot as plt

        tsplot(high, plt.Axes.plot)
        lines = tsplot(low, plt.Axes.plot)
        for l in lines:
            self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
예제 #10
0
 def test_mixed_freq_hf_first(self):
     idxh = date_range('1/1/1999', periods=365, freq='D')
     idxl = date_range('1/1/1999', periods=12, freq='M')
     high = Series(np.random.randn(len(idxh)), idxh)
     low = Series(np.random.randn(len(idxl)), idxl)
     high.plot()
     ax = low.plot()
     for l in ax.get_lines():
         self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'D')
예제 #11
0
    def _get_time_period_bins(self, axis):
        assert (isinstance(axis, DatetimeIndex))

        if len(axis) == 0:
            binner = labels = PeriodIndex(data=[], freq=self.freq)
            return binner, [], labels

        labels = binner = PeriodIndex(start=axis[0],
                                      end=axis[-1],
                                      freq=self.freq)

        end_stamps = (labels + 1).asfreq('D', 's').to_timestamp()
        bins = axis.searchsorted(end_stamps, side='left')

        if bins[-1] < len(axis):
            bins = np.concatenate([bins, [len(axis)]])

        return binner, bins, labels
예제 #12
0
 def test_from_weekly_resampling(self):
     idxh = date_range('1/1/1999', periods=52, freq='W')
     idxl = date_range('1/1/1999', periods=12, freq='M')
     high = Series(np.random.randn(len(idxh)), idxh)
     low = Series(np.random.randn(len(idxl)), idxl)
     low.plot()
     ax = high.plot()
     for l in ax.get_lines():
         self.assert_(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
예제 #13
0
    def test_mixed_freq_second_millisecond(self):
        # GH 7772, GH 7760
        idxh = date_range('2014-07-01 09:00', freq='S', periods=50)
        idxl = date_range('2014-07-01 09:00', freq='100L', periods=500)
        high = Series(np.random.randn(len(idxh)), idxh)
        low = Series(np.random.randn(len(idxl)), idxl)
        # high to low
        high.plot()
        ax = low.plot()
        self.assertEqual(len(ax.get_lines()), 2)
        for l in ax.get_lines():
            self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'L')
        tm.close()

        # low to high
        low.plot()
        ax = high.plot()
        self.assertEqual(len(ax.get_lines()), 2)
        for l in ax.get_lines():
            self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'L')
예제 #14
0
 def test_mixed_freq_hf_first(self):
     import matplotlib.pyplot as plt
     plt.close('all')
     idxh = date_range('1/1/1999', periods=365, freq='D')
     idxl = date_range('1/1/1999', periods=12, freq='M')
     high = Series(np.random.randn(len(idxh)), idxh)
     low = Series(np.random.randn(len(idxl)), idxl)
     high.plot()
     ax = low.plot()
     for l in ax.get_lines():
         self.assert_(PeriodIndex(data=l.get_xdata()).freq == 'D')
예제 #15
0
 def test_to_weekly_resampling(self):
     import matplotlib.pyplot as plt
     plt.close('all')
     idxh = date_range('1/1/1999', periods=52, freq='W')
     idxl = date_range('1/1/1999', periods=12, freq='M')
     high = Series(np.random.randn(len(idxh)), idxh)
     low = Series(np.random.randn(len(idxl)), idxl)
     high.plot()
     ax = low.plot()
     for l in ax.get_lines():
         self.assert_(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
예제 #16
0
파일: algorithms.py 프로젝트: jcfr/pandas
def _value_counts_arraylike(values, dropna=True):
    is_datetimetz = com.is_datetimetz(values)
    is_period = (isinstance(values, gt.ABCPeriodIndex) or
                 com.is_period_arraylike(values))

    orig = values

    from pandas.core.series import Series
    values = Series(values).values
    dtype = values.dtype

    if com.is_datetime_or_timedelta_dtype(dtype) or is_period:
        from pandas.tseries.index import DatetimeIndex
        from pandas.tseries.period import PeriodIndex

        if is_period:
            values = PeriodIndex(values)
            freq = values.freq

        values = values.view(np.int64)
        keys, counts = htable.value_count_scalar64(values, dropna)

        if dropna:
            msk = keys != iNaT
            keys, counts = keys[msk], counts[msk]

        # convert the keys back to the dtype we came in
        keys = keys.astype(dtype)

        # dtype handling
        if is_datetimetz:
            if isinstance(orig, gt.ABCDatetimeIndex):
                tz = orig.tz
            else:
                tz = orig.dt.tz
            keys = DatetimeIndex._simple_new(keys, tz=tz)
        if is_period:
            keys = PeriodIndex._simple_new(keys, freq=freq)

    elif com.is_integer_dtype(dtype):
        values = com._ensure_int64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    elif com.is_float_dtype(dtype):
        values = com._ensure_float64(values)
        keys, counts = htable.value_count_scalar64(values, dropna)
    else:
        values = com._ensure_object(values)
        mask = com.isnull(values)
        keys, counts = htable.value_count_object(values, mask)
        if not dropna and mask.any():
            keys = np.insert(keys, 0, np.NaN)
            counts = np.insert(counts, 0, mask.sum())

    return keys, counts
예제 #17
0
 def test_business_freq_convert(self):
     n = tm.N
     tm.N = 300
     bts = tm.makeTimeSeries().asfreq('BM')
     tm.N = n
     ts = bts.to_period('M')
     ax = bts.plot()
     self.assertEqual(ax.get_lines()[0].get_xydata()[0, 0],
                      ts.index[0].ordinal)
     idx = ax.get_lines()[0].get_xdata()
     self.assertEqual(PeriodIndex(data=idx).freqstr, 'M')
예제 #18
0
파일: resample.py 프로젝트: zmyer/pandas
    def _get_time_period_bins(self, ax):
        if not isinstance(ax, DatetimeIndex):
            raise TypeError('axis must be a DatetimeIndex, but got '
                            'an instance of %r' % type(ax).__name__)

        if not len(ax):
            binner = labels = PeriodIndex(
                data=[], freq=self.freq, name=ax.name)
            return binner, [], labels

        labels = binner = PeriodIndex(start=ax[0],
                                      end=ax[-1],
                                      freq=self.freq,
                                      name=ax.name)

        end_stamps = (labels + 1).asfreq(self.freq, 's').to_timestamp()
        if ax.tzinfo:
            end_stamps = end_stamps.tz_localize(ax.tzinfo)
        bins = ax.searchsorted(end_stamps, side='left')

        return binner, bins, labels
예제 #19
0
 def test_business_freq_convert(self):
     import matplotlib.pyplot as plt
     plt.close('all')
     n = tm.N
     tm.N = 300
     bts = tm.makeTimeSeries().asfreq('BM')
     tm.N = n
     ts = bts.to_period('M')
     ax = bts.plot()
     self.assert_(ax.get_lines()[0].get_xydata()[0, 0], ts.index[0].ordinal)
     idx = ax.get_lines()[0].get_xdata()
     self.assert_(PeriodIndex(data=idx).freqstr == 'M')
예제 #20
0
def _value_counts_arraylike(values, dropna=True):
    is_datetimetz_type = is_datetimetz(values)
    is_period_type = (is_period_dtype(values) or
                      is_period_arraylike(values))

    orig = values

    from pandas.core.series import Series
    values = Series(values).values
    dtype = values.dtype

    if needs_i8_conversion(dtype) or is_period_type:

        from pandas.tseries.index import DatetimeIndex
        from pandas.tseries.period import PeriodIndex

        if is_period_type:
            # values may be an object
            values = PeriodIndex(values)
            freq = values.freq

        values = values.view(np.int64)
        keys, counts = htable.value_count_int64(values, dropna)

        if dropna:
            msk = keys != iNaT
            keys, counts = keys[msk], counts[msk]

        # convert the keys back to the dtype we came in
        keys = keys.astype(dtype)

        # dtype handling
        if is_datetimetz_type:
            keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
        if is_period_type:
            keys = PeriodIndex._simple_new(keys, freq=freq)

    elif is_integer_dtype(dtype):
        values = _ensure_int64(values)
        keys, counts = htable.value_count_int64(values, dropna)
    elif is_float_dtype(dtype):
        values = _ensure_float64(values)
        keys, counts = htable.value_count_float64(values, dropna)
    else:
        values = _ensure_object(values)
        mask = isnull(values)
        keys, counts = htable.value_count_object(values, mask)
        if not dropna and mask.any():
            keys = np.insert(keys, 0, np.NaN)
            counts = np.insert(counts, 0, mask.sum())

    return keys, counts
예제 #21
0
    def _get_new_index(self):
        """ return our new index """
        ax = self.ax
        obj = self._selected_obj

        if len(ax) == 0:
            new_index = PeriodIndex(data=[], freq=self.freq)
            return obj.reindex(new_index)

        start = ax[0].asfreq(self.freq, how=self.convention)
        end = ax[-1].asfreq(self.freq, how='end')

        return period_range(start, end, freq=self.freq)
예제 #22
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a "
                        "datetimelike index".format(type(data)))

    index = data.index
    name = data.name
    orig = data if is_categorical_dtype(data) else None
    if orig is not None:
        data = orig.values.categories

    if is_datetime64_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'),
                                  index, name=name, orig=orig)
    elif is_datetime64tz_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer',
                                                ambiguous='infer'),
                                  index, data.name, orig=orig)
    elif is_timedelta64_dtype(data.dtype):
        return TimedeltaProperties(TimedeltaIndex(data, copy=copy,
                                                  freq='infer'), index,
                                   name=name, orig=orig)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy), index,
                                    name=name, orig=orig)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data, copy=copy,
                                                    freq='infer'), index,
                                      name=name, orig=orig)

    raise TypeError("cannot convert an object of type {0} to a "
                    "datetimelike index".format(type(data)))
예제 #23
0
    def test_periodindex(self):
        idx1 = PeriodIndex(
            ['2014-01', '2014-01', '2014-02', '2014-02', '2014-03', '2014-03'],
            freq='M')
        cat1 = Categorical.from_array(idx1)

        exp_arr = np.array([0, 0, 1, 1, 2, 2])
        exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')

        self.assert_numpy_array_equal(cat1.labels, exp_arr)
        self.assert_(cat1.levels.equals(exp_idx))

        idx2 = PeriodIndex(
            ['2014-03', '2014-03', '2014-02', '2014-01', '2014-03', '2014-01'],
            freq='M')
        cat2 = Categorical.from_array(idx2)

        exp_arr = np.array([2, 2, 1, 0, 2, 0])

        self.assert_numpy_array_equal(cat2.labels, exp_arr)
        self.assert_(cat2.levels.equals(exp_idx))

        idx3 = PeriodIndex([
            '2013-12', '2013-11', '2013-10', '2013-09', '2013-08', '2013-07',
            '2013-05'
        ],
                           freq='M')
        cat3 = Categorical.from_array(idx3)

        exp_arr = np.array([6, 5, 4, 3, 2, 1, 0])
        exp_idx = PeriodIndex([
            '2013-05', '2013-07', '2013-08', '2013-09', '2013-10', '2013-11',
            '2013-12'
        ],
                              freq='M')

        self.assert_numpy_array_equal(cat3.labels, exp_arr)
        self.assert_(cat3.levels.equals(exp_idx))
예제 #24
0
    def test_from_weekly_resampling(self):
        idxh = date_range('1/1/1999', periods=52, freq='W')
        idxl = date_range('1/1/1999', periods=12, freq='M')
        high = Series(np.random.randn(len(idxh)), idxh)
        low = Series(np.random.randn(len(idxl)), idxl)
        low.plot()
        ax = high.plot()

        expected_h = idxh.to_period().asi8
        expected_l = np.array([
            1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558,
            1562
        ])
        for l in ax.get_lines():
            self.assertTrue(
                PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
            xdata = l.get_xdata(orig=False)
            if len(xdata) == 12:  # idxl lines
                self.assert_numpy_array_equal(xdata, expected_l)
            else:
                self.assert_numpy_array_equal(xdata, expected_h)
        tm.close()

        # tsplot
        from pandas.tseries.plotting import tsplot
        import matplotlib.pyplot as plt

        tsplot(low, plt.Axes.plot)
        lines = tsplot(high, plt.Axes.plot)

        for l in lines:
            self.assertTrue(
                PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
            xdata = l.get_xdata(orig=False)
            if len(xdata) == 12:  # idxl lines
                self.assert_numpy_array_equal(xdata, expected_l)
            else:
                self.assert_numpy_array_equal(xdata, expected_h)
예제 #25
0
    def test_mixed_freq_lf_first(self):
        import matplotlib.pyplot as plt
        plt.close('all')
        idxh = date_range('1/1/1999', periods=365, freq='D')
        idxl = date_range('1/1/1999', periods=12, freq='M')
        high = Series(np.random.randn(len(idxh)), idxh)
        low = Series(np.random.randn(len(idxl)), idxl)
        low.plot(legend=True)
        ax = high.plot(legend=True)
        for l in ax.get_lines():
            self.assert_(PeriodIndex(data=l.get_xdata()).freq == 'D')
        leg = ax.get_legend()
        self.assert_(len(leg.texts) == 2)

        plt.close('all')
        idxh = date_range('1/1/1999', periods=240, freq='T')
        idxl = date_range('1/1/1999', periods=4, freq='H')
        high = Series(np.random.randn(len(idxh)), idxh)
        low = Series(np.random.randn(len(idxl)), idxl)
        low.plot()
        ax = high.plot()
        for l in ax.get_lines():
            self.assert_(PeriodIndex(data=l.get_xdata()).freq == 'T')
예제 #26
0
파일: index.py 프로젝트: joaonatali/pandas
    def to_period(self, freq=None):
        """
        Cast to PeriodIndex at a particular frequency
        """
        from pandas.tseries.period import PeriodIndex

        if self.freq is None and freq is None:
            msg = "You must pass a freq argument as current index has none."
            raise ValueError(msg)

        if freq is None:
            freq = get_period_alias(self.freqstr)

        return PeriodIndex(self.values, freq=freq)
예제 #27
0
 def convert(values, units, axis):
     if not hasattr(axis, 'freq'):
         raise TypeError('Axis must have `freq` set to convert to Periods')
     valid_types = (str, datetime, Period, pydt.date, pydt.time)
     if (isinstance(values, valid_types) or com.is_integer(values)
             or com.is_float(values)):
         return get_datevalue(values, axis.freq)
     if isinstance(values, PeriodIndex):
         return values.asfreq(axis.freq).values
     if isinstance(values, Index):
         return values.map(lambda x: get_datevalue(x, axis.freq))
     if isinstance(values, (list, tuple, np.ndarray, Index)):
         return PeriodIndex(values, freq=axis.freq).values
     return values
예제 #28
0
    def test_resample_fill_missing(self):
        rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A')

        s = TimeSeries(np.random.randn(4), index=rng)

        stamps = s.to_timestamp()

        filled = s.resample('A')
        expected = stamps.resample('A').to_period('A')
        assert_series_equal(filled, expected)

        filled = s.resample('A', fill_method='ffill')
        expected = stamps.resample('A', fill_method='ffill').to_period('A')
        assert_series_equal(filled, expected)
예제 #29
0
    def _get_new_index(self):
        """ return our new index """
        ax = self.ax
        ax_attrs = ax._get_attributes_dict()
        ax_attrs['freq'] = self.freq
        obj = self._selected_obj

        if len(ax) == 0:
            new_index = PeriodIndex(data=[], **ax_attrs)
            return obj.reindex(new_index)

        start = ax[0].asfreq(self.freq, how=self.convention)
        end = ax[-1].asfreq(self.freq, how='end')

        return period_range(start, end, **ax_attrs)
예제 #30
0
def factorize(values, sort=False, order=None, na_sentinel=-1):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    order :
    na_sentinel: int, default -1
        Value to mark "not found"

    Returns
    -------
    """
    from pandas.tseries.period import PeriodIndex
    vals = np.asarray(values)
    is_datetime = com.is_datetime64_dtype(vals)
    (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)

    table = hash_klass(len(vals))
    uniques = vec_klass()
    labels = table.get_labels(vals, uniques, 0, na_sentinel)

    labels = com._ensure_platform_int(labels)

    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        sorter = uniques.argsort()
        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
        reverse_indexer.put(sorter, np.arange(len(sorter)))

        mask = labels < 0
        labels = reverse_indexer.take(labels)
        np.putmask(labels, mask, -1)

        uniques = uniques.take(sorter)

    if is_datetime:
        uniques = uniques.view('M8[ns]')
    if isinstance(values, PeriodIndex):
        uniques = PeriodIndex(ordinal=uniques, freq=values.freq)

    return labels, uniques
예제 #31
0
def duplicated(values, keep='first'):
    """
    Return boolean ndarray denoting duplicate values

    .. versionadded:: 0.19.0

    Parameters
    ----------
    keep : {'first', 'last', False}, default 'first'
        - ``first`` : Mark duplicates as ``True`` except for the first
          occurrence.
        - ``last`` : Mark duplicates as ``True`` except for the last
          occurrence.
        - False : Mark all duplicates as ``True``.

    Returns
    -------
    duplicated : ndarray
    """

    dtype = values.dtype

    # no need to revert to original type
    if is_datetime_or_timedelta_dtype(dtype) or is_datetimetz(dtype):
        if isinstance(values, (ABCSeries, ABCIndex)):
            values = values.values.view(np.int64)
        else:
            values = values.view(np.int64)
    elif is_period_arraylike(values):
        from pandas.tseries.period import PeriodIndex
        values = PeriodIndex(values).asi8
    elif is_categorical_dtype(dtype):
        values = values.values.codes
    elif isinstance(values, (ABCSeries, ABCIndex)):
        values = values.values

    if is_integer_dtype(dtype):
        values = _ensure_int64(values)
        duplicated = htable.duplicated_int64(values, keep=keep)
    elif is_float_dtype(dtype):
        values = _ensure_float64(values)
        duplicated = htable.duplicated_float64(values, keep=keep)
    else:
        values = _ensure_object(values)
        duplicated = htable.duplicated_object(values, keep=keep)

    return duplicated
예제 #32
0
 def _convert_1d(values, units, axis):
     if not hasattr(axis, 'freq'):
         raise TypeError('Axis must have `freq` set to convert to Periods')
     valid_types = (compat.string_types, datetime, Period, pydt.date,
                    pydt.time)
     if (isinstance(values, valid_types) or is_integer(values)
             or is_float(values)):
         return get_datevalue(values, axis.freq)
     if isinstance(values, PeriodIndex):
         return values.asfreq(axis.freq)._values
     if isinstance(values, Index):
         return values.map(lambda x: get_datevalue(x, axis.freq))
     if is_period_arraylike(values):
         return PeriodIndex(values, freq=axis.freq)._values
     if isinstance(values, (list, tuple, np.ndarray, Index)):
         return [get_datevalue(x, axis.freq) for x in values]
     return values
예제 #33
0
 def test_mixed_freq_regular_first(self):
     import matplotlib.pyplot as plt
     s1 = tm.makeTimeSeries()
     s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]]
     ax = s1.plot()
     ax2 = s2.plot(style='g')
     lines = ax2.get_lines()
     idx1 = PeriodIndex(lines[0].get_xdata())
     idx2 = PeriodIndex(lines[1].get_xdata())
     self.assertTrue(idx1.equals(s1.index.to_period('B')))
     self.assertTrue(idx2.equals(s2.index.to_period('B')))
     left, right = ax2.get_xlim()
     pidx = s1.index.to_period()
     self.assertEqual(left, pidx[0].ordinal)
     self.assertEqual(right, pidx[-1].ordinal)
예제 #34
0
    def test_from_resampling_area_line_mixed(self):
        idxh = date_range('1/1/1999', periods=52, freq='W')
        idxl = date_range('1/1/1999', periods=12, freq='M')
        high = DataFrame(np.random.rand(len(idxh), 3),
                         index=idxh,
                         columns=[0, 1, 2])
        low = DataFrame(np.random.rand(len(idxl), 3),
                        index=idxl,
                        columns=[0, 1, 2])

        # low to high
        for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
            ax = low.plot(kind=kind1, stacked=True)
            ax = high.plot(kind=kind2, stacked=True, ax=ax)

            # check low dataframe result
            expected_x = np.array([
                1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553,
                1558, 1562
            ],
                                  dtype=np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[i]
                self.assertEqual(PeriodIndex(l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False),
                                              expected_x)
                # check stacked values are correct
                expected_y += low[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False),
                                              expected_y)

            # check high dataframe result
            expected_x = idxh.to_period().asi8.astype(np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[3 + i]
                self.assertEqual(
                    PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False),
                                              expected_x)
                expected_y += high[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False),
                                              expected_y)

        # high to low
        for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
            ax = high.plot(kind=kind1, stacked=True)
            ax = low.plot(kind=kind2, stacked=True, ax=ax)

            # check high dataframe result
            expected_x = idxh.to_period().asi8.astype(np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[i]
                self.assertEqual(
                    PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False),
                                              expected_x)
                expected_y += high[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False),
                                              expected_y)

            # check low dataframe result
            expected_x = np.array([
                1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553,
                1558, 1562
            ],
                                  dtype=np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[3 + i]
                self.assertEqual(
                    PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False),
                                              expected_x)
                expected_y += low[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False),
                                              expected_y)