예제 #1
0
파일: period.py 프로젝트: BrenBarn/pandas
 def _maybe_convert_timedelta(self, other):
     if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)):
         offset = frequencies.to_offset(self.freq.rule_code)
         if isinstance(offset, offsets.Tick):
             nanos = tslib._delta_to_nanoseconds(other)
             offset_nanos = tslib._delta_to_nanoseconds(offset)
             if nanos % offset_nanos == 0:
                 return nanos // offset_nanos
     elif isinstance(other, offsets.DateOffset):
         freqstr = frequencies.get_standard_freq(other)
         base = frequencies.get_base_alias(freqstr)
         if base == self.freq.rule_code:
             return other.n
     elif isinstance(other, np.ndarray):
         if com.is_integer_dtype(other):
             return other
         elif com.is_timedelta64_dtype(other):
             offset = frequencies.to_offset(self.freq)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
                 offset_nanos = tslib._delta_to_nanoseconds(offset)
                 if (nanos % offset_nanos).all() == 0:
                     return nanos // offset_nanos
     msg = "Input has different freq from PeriodIndex(freq={0})"
     raise ValueError(msg.format(self.freqstr))
예제 #2
0
    def create_input(self, trace, weather_source):
        '''Creates a :code:`DatetimeIndex` ed dataframe containing formatted
        model input data formatted as follows.

        Parameters
        ----------
        trace : eemeter.structures.EnergyTrace
            The source of energy data for inclusion in model input.
        weather_source : eemeter.weather.WeatherSourceBase
            The source of weather data.

        Returns
        -------
        input_df : pandas.DataFrame
            Predictably formatted input data. This data should be directly
            usable as input to applicable model.fit() methods.
        '''
        if (trace.data.index.freq is not None and
                to_offset(trace.data.index.freq) > to_offset(self.freq_str)):
            raise ValueError(
                "Will not upsample '{}' to '{}'"
                .format(trace.data.index.freq, self.freq_str)
            )

        energy = trace.data.value.resample(self.freq_str).sum()
        tempF = weather_source.indexed_temperatures(energy.index, "degF")
        return pd.DataFrame({"energy": energy, "tempF": tempF},
                            columns=["energy", "tempF"])
예제 #3
0
def test_to_offset_pd_timedelta_invalid():
    # see gh-9064
    msg = "Invalid frequency: 0 days 00:00:00"
    td = Timedelta(microseconds=0)

    with pytest.raises(ValueError, match=msg):
        frequencies.to_offset(td)
예제 #4
0
파일: timedelta.py 프로젝트: tdsmith/pandas
    def __new__(cls, values, freq=None, start=None, end=None, periods=None,
                closed=None):
        if (freq is not None and not isinstance(freq, DateOffset) and
                freq != 'infer'):
            freq = to_offset(freq)

        if periods is not None:
            if lib.is_float(periods):
                periods = int(periods)
            elif not lib.is_integer(periods):
                raise TypeError('`periods` must be a number, got {periods}'
                                .format(periods=periods))

        if values is None:
            if freq is None and com._any_none(periods, start, end):
                raise ValueError('Must provide freq argument if no data is '
                                 'supplied')
            else:
                return cls._generate(start, end, periods, freq,
                                     closed=closed)

        result = cls._simple_new(values, freq=freq)
        if freq == 'infer':
            inferred = result.inferred_freq
            if inferred:
                result._freq = to_offset(inferred)

        return result
예제 #5
0
    def _resample_timestamps(self):
        # assumes set_grouper(obj) already called
        axlabels = self.ax

        self._get_binner_for_resample()
        grouper = self.grouper
        binner = self.binner
        obj = self.obj

        # Determine if we're downsampling
        if axlabels.freq is not None or axlabels.inferred_freq is not None:

            if len(grouper.binlabels) < len(axlabels) or self.how is not None:
                # downsample
                grouped = obj.groupby(grouper, axis=self.axis)
                result = grouped.aggregate(self._agg_method)
                # GH2073
                if self.fill_method is not None:
                    result = result.fillna(method=self.fill_method,
                                           limit=self.limit)

            else:
                # upsampling shortcut
                if self.axis:
                    raise AssertionError('axis must be 0')

                if self.closed == 'right':
                    res_index = binner[1:]
                else:
                    res_index = binner[:-1]

                # if we have the same frequency as our axis, then we are equal sampling
                # even if how is None
                if self.fill_method is None and self.limit is None and to_offset(
                    axlabels.inferred_freq) == self.freq:
                    result = obj.copy()
                    result.index = res_index
                else:
                    result = obj.reindex(res_index, method=self.fill_method,
                                         limit=self.limit)
        else:
            # Irregular data, have to use groupby
            grouped = obj.groupby(grouper, axis=self.axis)
            result = grouped.aggregate(self._agg_method)

            if self.fill_method is not None:
                result = result.fillna(method=self.fill_method,
                                       limit=self.limit)

        loffset = self.loffset
        if isinstance(loffset, compat.string_types):
            loffset = to_offset(self.loffset)

        if isinstance(loffset, (DateOffset, timedelta)):
            if (isinstance(result.index, DatetimeIndex)
                    and len(result.index) > 0):

                result.index = result.index + loffset

        return result
예제 #6
0
    def test_frequency_misc(self):
        self.assertEquals(fmod.get_freq_group('T'),
                          fmod.FreqGroup.FR_MIN)

        code, stride = fmod.get_freq_code(offsets.Hour())
        self.assertEquals(code, fmod.FreqGroup.FR_HR)

        code, stride = fmod.get_freq_code((5, 'T'))
        self.assertEquals(code, fmod.FreqGroup.FR_MIN)
        self.assertEquals(stride, 5)

        offset = offsets.Hour()
        result = fmod.to_offset(offset)
        self.assertEquals(result, offset)

        result = fmod.to_offset((5, 'T'))
        expected = offsets.Minute(5)
        self.assertEquals(result, expected)

        self.assertRaises(KeyError, fmod.get_freq_code, (5, 'baz'))

        self.assertRaises(ValueError, fmod.to_offset, '100foo')

        self.assertRaises(ValueError, fmod.to_offset, ('', ''))

        result = fmod.get_standard_freq(offsets.Hour())
        self.assertEquals(result, 'H')
예제 #7
0
파일: period.py 프로젝트: DGrady/pandas
 def _maybe_convert_timedelta(self, other):
     if isinstance(other, (timedelta, np.timedelta64,
                           offsets.Tick, Timedelta)):
         offset = frequencies.to_offset(self.freq.rule_code)
         if isinstance(offset, offsets.Tick):
             nanos = tslib._delta_to_nanoseconds(other)
             offset_nanos = tslib._delta_to_nanoseconds(offset)
             if nanos % offset_nanos == 0:
                 return nanos // offset_nanos
     elif isinstance(other, offsets.DateOffset):
         freqstr = other.rule_code
         base = frequencies.get_base_alias(freqstr)
         if base == self.freq.rule_code:
             return other.n
         msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
         raise IncompatibleFrequency(msg)
     elif isinstance(other, np.ndarray):
         if is_integer_dtype(other):
             return other
         elif is_timedelta64_dtype(other):
             offset = frequencies.to_offset(self.freq)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
                 offset_nanos = tslib._delta_to_nanoseconds(offset)
                 if (nanos % offset_nanos).all() == 0:
                     return nanos // offset_nanos
     # raise when input doesn't have freq
     msg = "Input has different freq from PeriodIndex(freq={0})"
     raise IncompatibleFrequency(msg.format(self.freqstr))
예제 #8
0
def test_anchored_shortcuts():
    result = frequencies.to_offset("W")
    expected = frequencies.to_offset("W-SUN")
    assert result == expected

    result = frequencies.to_offset("Q")
    expected = frequencies.to_offset("Q-DEC")
    assert result == expected
예제 #9
0
def test_to_offset_negative():
    freqstr = "-1S"
    result = frequencies.to_offset(freqstr)
    assert result.n == -1

    freqstr = "-5min10s"
    result = frequencies.to_offset(freqstr)
    assert result.n == -310
예제 #10
0
def test_to_offset_leading_zero():
    freqstr = "00H 00T 01S"
    result = frequencies.to_offset(freqstr)
    assert result.n == 1

    freqstr = "-00H 03T 14S"
    result = frequencies.to_offset(freqstr)
    assert result.n == -194
예제 #11
0
def test_to_offset_negative():
    freqstr = '-1S'
    result = to_offset(freqstr)
    assert(result.n == -1)

    freqstr = '-5min10s'
    result = to_offset(freqstr)
    assert(result.n == -310)
예제 #12
0
def test_to_offset_leading_zero():
    freqstr = '00H 00T 01S'
    result = to_offset(freqstr)
    assert(result.n == 1)

    freqstr = '-00H 03T 14S'
    result = to_offset(freqstr)
    assert(result.n == -194)
예제 #13
0
def test_anchored_shortcuts():
    result = to_offset('W')
    expected = to_offset('W-SUN')
    assert(result == expected)

    result = to_offset('Q')
    expected = to_offset('Q-DEC')
    assert(result == expected)
예제 #14
0
def test_to_offset_invalid(freqstr):
    # see gh-13930

    # We escape string because some of our
    # inputs contain regex special characters.
    msg = re.escape("Invalid frequency: {freqstr}".format(freqstr=freqstr))
    with pytest.raises(ValueError, match=msg):
        frequencies.to_offset(freqstr)
예제 #15
0
    def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
                periods=None, closed=None, dtype=None, copy=False,
                name=None, verify_integrity=True):

        if isinstance(data, TimedeltaIndex) and freq is None and name is None:
            if copy:
                return data.copy()
            else:
                return data._shallow_copy()

        freq_infer = False
        if not isinstance(freq, DateOffset):

            # if a passed freq is None, don't infer automatically
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        periods = dtl.validate_periods(periods)

        if data is None:
            if freq is None and com._any_none(periods, start, end):
                msg = 'Must provide freq argument if no data is supplied'
                raise ValueError(msg)
            else:
                return cls._generate_range(start, end, periods, name, freq,
                                           closed=closed)

        if unit is not None:
            data = to_timedelta(data, unit=unit, box=False)

        if not isinstance(data, (np.ndarray, Index, ABCSeries)):
            if is_scalar(data):
                raise ValueError('TimedeltaIndex() must be called with a '
                                 'collection of some kind, %s was passed'
                                 % repr(data))

        # convert if not already
        if getattr(data, 'dtype', None) != _TD_DTYPE:
            data = to_timedelta(data, unit=unit, box=False)
        elif copy:
            data = np.array(data, copy=True)

        subarr = cls._simple_new(data, name=name, freq=freq)
        # check that we are matching freqs
        if verify_integrity and len(subarr) > 0:
            if freq is not None and not freq_infer:
                cls._validate_frequency(subarr, freq)

        if freq_infer:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.freq = to_offset(inferred)
            return subarr

        return subarr
예제 #16
0
    def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False):
        if isinstance(values, (ABCSeries, ABCIndexClass)):
            values = values._values

        inferred_freq = getattr(values, "_freq", None)

        if isinstance(values, type(self)):
            if freq is None:
                freq = values.freq
            elif freq and values.freq:
                freq = to_offset(freq)
                freq, _ = dtl.validate_inferred_freq(freq, values.freq, False)
            values = values._data

        if not isinstance(values, np.ndarray):
            msg = (
                "Unexpected type '{}'. 'values' must be a TimedeltaArray "
                "ndarray, or Series or Index containing one of those."
            )
            raise ValueError(msg.format(type(values).__name__))

        if values.dtype == 'i8':
            # for compat with datetime/timedelta/period shared methods,
            #  we can sometimes get here with int64 values.  These represent
            #  nanosecond UTC (or tz-naive) unix timestamps
            values = values.view(_TD_DTYPE)

        if values.dtype != _TD_DTYPE:
            raise TypeError(_BAD_DTYPE.format(dtype=values.dtype))

        try:
            dtype_mismatch = dtype != _TD_DTYPE
        except TypeError:
            raise TypeError(_BAD_DTYPE.format(dtype=dtype))
        else:
            if dtype_mismatch:
                raise TypeError(_BAD_DTYPE.format(dtype=dtype))

        if freq == "infer":
            msg = (
                "Frequency inference not allowed in TimedeltaArray.__init__. "
                "Use 'pd.array()' instead."
            )
            raise ValueError(msg)

        if copy:
            values = values.copy()
        if freq:
            freq = to_offset(freq)

        self._data = values
        self._dtype = dtype
        self._freq = freq

        if inferred_freq is None and freq is not None:
            type(self)._validate_frequency(self, freq)
예제 #17
0
파일: index.py 프로젝트: dragoljub/pandas
    def intersection(self, other):
        """
        Specialized intersection for DatetimeIndex objects. May be much faster
        than Index.intersection

        Parameters
        ----------
        other : DatetimeIndex or array-like

        Returns
        -------
        y : Index or DatetimeIndex
        """
        if not isinstance(other, DatetimeIndex):
            try:
                other = DatetimeIndex(other)
            except TypeError:
                pass
            result = Index.intersection(self, other)
            if isinstance(result, DatetimeIndex):
                if result.freq is None:
                    result.offset = to_offset(result.inferred_freq)
            return result

        elif (
            other.offset is None
            or self.offset is None
            or other.offset != self.offset
            or not other.offset.isAnchored()
            or (not self.is_monotonic or not other.is_monotonic)
        ):
            result = Index.intersection(self, other)
            if isinstance(result, DatetimeIndex):
                if result.freq is None:
                    result.offset = to_offset(result.inferred_freq)
            return result

        if len(self) == 0:
            return self
        if len(other) == 0:
            return other
        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        end = min(left[-1], right[-1])
        start = right[0]

        if end < start:
            return type(self)(data=[])
        else:
            lslice = slice(*left.slice_locs(start, end))
            left_chunk = left.values[lslice]
            return self._view_like(left_chunk)
예제 #18
0
    def intersection(self, other, sort=False):
        self._validate_sort_keyword(sort)
        self._assert_can_do_setop(other)

        if self.equals(other):
            return self._get_reconciled_name_object(other)

        if len(self) == 0:
            return self.copy()
        if len(other) == 0:
            return other.copy()

        if not isinstance(other, type(self)):
            result = Index.intersection(self, other, sort=sort)
            if isinstance(result, type(self)):
                if result.freq is None:
                    result.freq = to_offset(result.inferred_freq)
            return result

        elif (other.freq is None or self.freq is None or
              other.freq != self.freq or
              not other.freq.isAnchored() or
              (not self.is_monotonic or not other.is_monotonic)):
            result = Index.intersection(self, other, sort=sort)

            # Invalidate the freq of `result`, which may not be correct at
            # this point, depending on the values.
            result.freq = None
            if hasattr(self, 'tz'):
                result = self._shallow_copy(result._values, name=result.name,
                                            tz=result.tz, freq=None)
            else:
                result = self._shallow_copy(result._values, name=result.name,
                                            freq=None)
            if result.freq is None:
                result.freq = to_offset(result.inferred_freq)
            return result

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        # after sorting, the intersection always starts with the right index
        # and ends with the index of which the last elements is smallest
        end = min(left[-1], right[-1])
        start = right[0]

        if end < start:
            return type(self)(data=[])
        else:
            lslice = slice(*left.slice_locs(start, end))
            left_chunk = left.values[lslice]
            return self._shallow_copy(left_chunk)
예제 #19
0
    def test_to_offset_leading_plus(self):
        freqstr = '+1d'
        result = frequencies.to_offset(freqstr)
        assert (result.n == 1)

        freqstr = '+2h30min'
        result = frequencies.to_offset(freqstr)
        assert (result.n == 150)

        for bad_freq in ['+-1d', '-+1h', '+1', '-7', '+d', '-m']:
            with pytest.raises(ValueError, match='Invalid frequency:'):
                frequencies.to_offset(bad_freq)
예제 #20
0
    def __new__(cls, values, freq=None):
        if (freq is not None and not isinstance(freq, DateOffset) and
                freq != 'infer'):
            freq = to_offset(freq)

        result = cls._simple_new(values, freq=freq)
        if freq == 'infer':
            inferred = result.inferred_freq
            if inferred:
                result._freq = to_offset(inferred)

        return result
예제 #21
0
def conv_resol(resolution):
    d = {
        to_offset('5Min'): '5',
        to_offset('1H'): 'h',
        to_offset('D'): 'd',
    }
    try:
        return(d[to_offset(resolution)])
    except:
        logging.error(traceback.format_exc())
        logging.warning("conv_resol returns '%s'" % resolution)
        return(resolution)
예제 #22
0
파일: resample.py 프로젝트: rosnfeld/pandas
    def __init__(
        self,
        freq="Min",
        closed=None,
        label=None,
        how="mean",
        nperiods=None,
        axis=0,
        fill_method=None,
        limit=None,
        loffset=None,
        kind=None,
        convention=None,
        base=0,
        **kwargs
    ):
        freq = to_offset(freq)

        end_types = set(["M", "A", "Q", "BM", "BA", "BQ", "W"])
        rule = freq.rule_code
        if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
            if closed is None:
                closed = "right"
            if label is None:
                label = "right"
        else:
            if closed is None:
                closed = "left"
            if label is None:
                label = "left"

        self.closed = closed
        self.label = label
        self.nperiods = nperiods
        self.kind = kind

        self.convention = convention or "E"
        self.convention = self.convention.lower()

        if isinstance(loffset, compat.string_types):
            loffset = to_offset(loffset)
        self.loffset = loffset

        self.how = how
        self.fill_method = fill_method
        self.limit = limit
        self.base = base

        # always sort time groupers
        kwargs["sort"] = True

        super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs)
예제 #23
0
def test_anchored_shortcuts():
    result = frequencies.to_offset('W')
    expected = frequencies.to_offset('W-SUN')
    assert(result == expected)

    result1 = frequencies.to_offset('Q')
    result2 = frequencies.to_offset('Q-DEC')
    expected = offsets.QuarterEnd(startingMonth=12)
    assert(result1 == expected)
    assert(result2 == expected)

    result1 = frequencies.to_offset('Q-MAY')
    expected = offsets.QuarterEnd(startingMonth=5)
    assert(result1 == expected)
예제 #24
0
파일: resample.py 프로젝트: aterrel/pandas
    def __init__(self, freq='Min', closed=None, label=None, how='mean',
                 axis=0, fill_method=None, limit=None, loffset=None,
                 kind=None, convention=None, base=0, **kwargs):
        # Check for correctness of the keyword arguments which would
        # otherwise silently use the default if misspelled
        if label not in {None, 'left', 'right'}:
            raise ValueError('Unsupported value {} for `label`'.format(label))
        if closed not in {None, 'left', 'right'}:
            raise ValueError('Unsupported value {} for `closed`'.format(
                closed))
        if convention not in {None, 'start', 'end', 'e', 's'}:
            raise ValueError('Unsupported value {} for `convention`'
                             .format(convention))

        freq = to_offset(freq)

        end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'])
        rule = freq.rule_code
        if (rule in end_types or
                ('-' in rule and rule[:rule.find('-')] in end_types)):
            if closed is None:
                closed = 'right'
            if label is None:
                label = 'right'
        else:
            if closed is None:
                closed = 'left'
            if label is None:
                label = 'left'

        self.closed = closed
        self.label = label
        self.kind = kind

        self.convention = convention or 'E'
        self.convention = self.convention.lower()

        if isinstance(loffset, compat.string_types):
            loffset = to_offset(loffset)
        self.loffset = loffset

        self.how = how
        self.fill_method = fill_method
        self.limit = limit
        self.base = base

        # always sort time groupers
        kwargs['sort'] = True

        super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs)
예제 #25
0
    def test_pi_add_offset_n_gt1_not_divisible(self, box_with_array):
        # GH#23215
        # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0
        pi = pd.PeriodIndex(['2016-01'], freq='2M')
        expected = pd.PeriodIndex(['2016-04'], freq='2M')

        # FIXME: with transposing these tests fail
        pi = tm.box_expected(pi, box_with_array, transpose=False)
        expected = tm.box_expected(expected, box_with_array, transpose=False)

        result = pi + to_offset('3M')
        tm.assert_equal(result, expected)

        result = to_offset('3M') + pi
        tm.assert_equal(result, expected)
예제 #26
0
    def test_pi_add_offset_n_gt1_not_divisible(self, box):
        # GH#23215
        # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0

        pi = pd.PeriodIndex(['2016-01'], freq='2M')
        pi = tm.box_expected(pi, box)

        expected = pd.PeriodIndex(['2016-04'], freq='2M')
        expected = tm.box_expected(expected, box)

        result = pi + to_offset('3M')
        tm.assert_equal(result, expected)

        result = to_offset('3M') + pi
        tm.assert_equal(result, expected)
예제 #27
0
파일: tdi.py 프로젝트: AbnerZheng/pandas
    def _maybe_cast_slice_bound(self, label, side, kind):
        """
        If label is a string, cast it to timedelta according to resolution.


        Parameters
        ----------
        label : object
        side : {'left', 'right'}
        kind : {'ix', 'loc', 'getitem'}

        Returns
        -------
        label :  object

        """
        assert kind in ['ix', 'loc', 'getitem', None]

        if isinstance(label, compat.string_types):
            parsed = _coerce_scalar_to_timedelta_type(label, box=True)
            lbound = parsed.round(parsed.resolution)
            if side == 'left':
                return lbound
            else:
                return (lbound + to_offset(parsed.resolution) -
                        Timedelta(1, 'ns'))
        elif is_integer(label) or is_float(label):
            self._invalid_indexer('slice', label)

        return label
예제 #28
0
파일: tdi.py 프로젝트: AbnerZheng/pandas
    def union(self, other):
        """
        Specialized union for TimedeltaIndex objects. If combine
        overlapping ranges with the same DateOffset, will be much
        faster than Index.union

        Parameters
        ----------
        other : TimedeltaIndex or array-like

        Returns
        -------
        y : Index or TimedeltaIndex
        """
        self._assert_can_do_setop(other)
        if not isinstance(other, TimedeltaIndex):
            try:
                other = TimedeltaIndex(other)
            except (TypeError, ValueError):
                pass
        this, other = self, other

        if this._can_fast_union(other):
            return this._fast_union(other)
        else:
            result = Index.union(this, other)
            if isinstance(result, TimedeltaIndex):
                if result.freq is None:
                    result.freq = to_offset(result.inferred_freq)
            return result
예제 #29
0
    def last(self, offset):
        """
        Convenience method for subsetting final periods of time series data
        based on a date offset

        Parameters
        ----------
        offset : string, DateOffset, dateutil.relativedelta

        Examples
        --------
        ts.last('5M') -> Last 5 months

        Returns
        -------
        subset : type of caller
        """
        from pandas.tseries.frequencies import to_offset
        if not isinstance(self.index, DatetimeIndex):
            raise NotImplementedError

        if len(self.index) == 0:
            return self

        offset = to_offset(offset)

        start_date = start = self.index[-1] - offset
        start = self.index.searchsorted(start_date, side='right')
        return self.ix[start:]
예제 #30
0
    def first(self, offset):
        """
        Convenience method for subsetting initial periods of time series data
        based on a date offset

        Parameters
        ----------
        offset : string, DateOffset, dateutil.relativedelta

        Examples
        --------
        ts.last('10D') -> First 10 days

        Returns
        -------
        subset : type of caller
        """
        from pandas.tseries.frequencies import to_offset
        if not isinstance(self.index, DatetimeIndex):
            raise NotImplementedError

        if len(self.index) == 0:
            return self

        offset = to_offset(offset)
        end_date = end = self.index[0] + offset

        # Tick-like, e.g. 3 weeks
        if not offset.isAnchored() and hasattr(offset, '_inc'):
            if end_date in self.index:
                end = self.index.searchsorted(end_date, side='left')

        return self.ix[:end]
예제 #31
0
파일: date_range.py 프로젝트: qinxuye/mars
def date_range(start=None,
               end=None,
               periods=None,
               freq=None,
               tz=None,
               normalize=False,
               name=None,
               closed=None,
               chunk_size=None,
               **kwargs):
    """
    Return a fixed frequency DatetimeIndex.

    Parameters
    ----------
    start : str or datetime-like, optional
        Left bound for generating dates.
    end : str or datetime-like, optional
        Right bound for generating dates.
    periods : int, optional
        Number of periods to generate.
    freq : str or DateOffset, default 'D'
        Frequency strings can have multiples, e.g. '5H'. See
        :ref:`here <timeseries.offset_aliases>` for a list of
        frequency aliases.
    tz : str or tzinfo, optional
        Time zone name for returning localized DatetimeIndex, for example
        'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
        timezone-naive.
    normalize : bool, default False
        Normalize start/end dates to midnight before generating date range.
    name : str, default None
        Name of the resulting DatetimeIndex.
    closed : {None, 'left', 'right'}, optional
        Make the interval closed with respect to the given frequency to
        the 'left', 'right', or both sides (None, the default).
    **kwargs
        For compatibility. Has no effect on the result.

    Returns
    -------
    rng : DatetimeIndex

    See Also
    --------
    DatetimeIndex : An immutable container for datetimes.
    timedelta_range : Return a fixed frequency TimedeltaIndex.
    period_range : Return a fixed frequency PeriodIndex.
    interval_range : Return a fixed frequency IntervalIndex.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``DatetimeIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end`` (closed on both sides).

    To learn more about the frequency strings, please see `this link
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

    Examples
    --------
    **Specifying the values**

    The next four examples generate the same `DatetimeIndex`, but vary
    the combination of `start`, `end` and `periods`.

    Specify `start` and `end`, with the default daily frequency.
    >>> import mars.dataframe as md

    >>> md.date_range(start='1/1/2018', end='1/08/2018').execute()
    DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
                   '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
                  dtype='datetime64[ns]', freq='D')

    Specify `start` and `periods`, the number of periods (days).

    >>> md.date_range(start='1/1/2018', periods=8).execute()
    DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
                   '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
                  dtype='datetime64[ns]', freq='D')

    Specify `end` and `periods`, the number of periods (days).

    >>> md.date_range(end='1/1/2018', periods=8).execute()
    DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',
                   '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],
                  dtype='datetime64[ns]', freq='D')

    Specify `start`, `end`, and `periods`; the frequency is generated
    automatically (linearly spaced).

    >>> md.date_range(start='2018-04-24', end='2018-04-27', periods=3).execute()
    DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
                   '2018-04-27 00:00:00'],
                  dtype='datetime64[ns]', freq=None)

    **Other Parameters**

    Changed the `freq` (frequency) to ``'M'`` (month end frequency).

    >>> md.date_range(start='1/1/2018', periods=5, freq='M').execute()
    DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
                   '2018-05-31'],
                  dtype='datetime64[ns]', freq='M')

    Multiples are allowed

    >>> md.date_range(start='1/1/2018', periods=5, freq='3M').execute()
    DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
                   '2019-01-31'],
                  dtype='datetime64[ns]', freq='3M')

    `freq` can also be specified as an Offset object.

    >>> md.date_range(start='1/1/2018', periods=5, freq=md.offsets.MonthEnd(3)).execute()
    DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
                   '2019-01-31'],
                  dtype='datetime64[ns]', freq='3M')

    Specify `tz` to set the timezone.

    >>> md.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo').execute()
    DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00',
                   '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00',
                   '2018-01-05 00:00:00+09:00'],
                  dtype='datetime64[ns, Asia/Tokyo]', freq='D')

    `closed` controls whether to include `start` and `end` that are on the
    boundary. The default includes boundary points on either end.

    >>> md.date_range(start='2017-01-01', end='2017-01-04', closed=None).execute()
    DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],
                  dtype='datetime64[ns]', freq='D')

    Use ``closed='left'`` to exclude `end` if it falls on the boundary.

    >>> md.date_range(start='2017-01-01', end='2017-01-04', closed='left').execute()
    DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'],
                  dtype='datetime64[ns]', freq='D')

    Use ``closed='right'`` to exclude `start` if it falls on the boundary.

    >>> md.date_range(start='2017-01-01', end='2017-01-04', closed='right').execute()
    DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
                  dtype='datetime64[ns]', freq='D')
    """
    # validate periods
    if isinstance(periods, (float, np.floating)):
        periods = int(periods)
    if periods is not None and not isinstance(periods, (int, np.integer)):
        raise TypeError(f'periods must be a number, got {periods}')

    if freq is None and any(arg is None for arg in [periods, start, end]):
        freq = 'D'
    if sum(arg is not None for arg in [start, end, periods, freq]) != 3:
        raise ValueError('Of the four parameters: start, end, periods, '
                         'and freq, exactly three must be specified')
    freq = to_offset(freq)

    if start is not None:
        start = pd.Timestamp(start)

    if end is not None:
        end = pd.Timestamp(end)

    if start is pd.NaT or end is pd.NaT:
        raise ValueError('Neither `start` nor `end` can be NaT')

    start, end, _ = _maybe_normalize_endpoints(start, end, normalize)
    tz = _infer_tz_from_endpoints(start, end, tz)

    if start is None and end is not None:
        # start is None and end is not None
        # adjust end first
        end = pd.date_range(end=end, periods=1, freq=freq)[0]
        size = periods
        start = end - (periods - 1) * freq
        if closed == 'left':
            size -= 1
        elif closed == 'right':
            # when start is None, closed == 'left' would not take effect
            # thus just ignore
            closed = None
    elif end is None:
        # end is None
        # adjust start first
        start = pd.date_range(start=start, periods=1, freq=freq)[0]
        size = periods
        end = start + (periods - 1) * freq
        if closed == 'right':
            size -= 1
        elif closed == 'left':
            # when end is None, closed == 'left' would not take effect
            # thus just ignore
            closed = None
    else:
        if periods is None:
            periods = size = int((end - start) / freq + 1)
        else:
            size = periods
        if closed is not None:
            size -= 1

    shape = (size, )
    op = DataFrameDateRange(start=start,
                            end=end,
                            periods=periods,
                            freq=freq,
                            tz=tz,
                            normalize=normalize,
                            closed=closed,
                            name=name,
                            **kwargs)
    return op(shape, chunk_size=chunk_size)
예제 #32
0
 def _get_old_time_step_in_minutes(self):
     td = pd.to_timedelta(to_offset(self.htimeseries.time_step))
     return str(int(td.total_seconds() / 60)) + ",0"
예제 #33
0
def _to_offset(freq):
    if freq[-1] in traderule_alias_mapping:
        return traderule_alias_mapping.get(freq[-1]) * int(freq[:-1])
    else:
        return to_offset(freq)
def test_render_pdf_special_chars(ac_power_observation_metadata,
                                  ac_power_forecast_metadata, dash_url,
                                  fail_pdf, preprocessing_result_types,
                                  report_metrics):
    if shutil.which('pdflatex') is None:  # pragma: no cover
        pytest.skip('pdflatex must be on PATH to generate PDF reports')
    quality_flag_filter = datamodel.QualityFlagFilter(("USER FLAGGED", ))
    forecast = ac_power_forecast_metadata.replace(
        name="ac_power forecast (why,)  ()'-_,")
    observation = ac_power_observation_metadata.replace(
        name="ac_power observations  ()'-_,")
    fxobs = datamodel.ForecastObservation(forecast, observation)
    tz = 'America/Phoenix'
    start = pd.Timestamp('20190401 0000', tz=tz)
    end = pd.Timestamp('20190404 2359', tz=tz)
    report_params = datamodel.ReportParameters(
        name="NREL MIDC OASIS GHI Forecast Analysis  ()'-_,",
        start=start,
        end=end,
        object_pairs=(fxobs, ),
        metrics=("mae", "rmse", "mbe", "s"),
        categories=("total", "date", "hour"),
        filters=(quality_flag_filter, ))
    report = datamodel.Report(report_id="56c67770-9832-11e9-a535-f4939feddd83",
                              report_parameters=report_params)
    qflags = list(f.quality_flags for f in report.report_parameters.filters
                  if isinstance(f, datamodel.QualityFlagFilter))
    qflags = list(qflags[0])
    ser_index = pd.date_range(start,
                              end,
                              freq=to_offset(forecast.interval_length),
                              name='timestamp')
    ser = pd.Series(np.repeat(100, len(ser_index)),
                    name='value',
                    index=ser_index)
    pfxobs = datamodel.ProcessedForecastObservation(
        forecast.name,
        fxobs,
        forecast.interval_value_type,
        forecast.interval_length,
        forecast.interval_label,
        valid_point_count=len(ser),
        validation_results=tuple(
            datamodel.ValidationResult(flag=f, count=0) for f in qflags),
        preprocessing_results=tuple(
            datamodel.PreprocessingResult(name=t, count=0)
            for t in preprocessing_result_types),
        forecast_values=ser,
        observation_values=ser)

    figs = datamodel.RawReportPlots(
        (datamodel.PlotlyReportFigure.from_dict({
            'name': 'mae tucson ac_power',
            'spec': '{"data":[{"x":[1],"y":[1],"type":"bar"}]}',
            'pdf': fail_pdf,
            'figure_type': 'bar',
            'category': 'total',
            'metric': 'mae',
            'figure_class': 'plotly',
        }), ),
        '4.5.3',
    )
    raw = datamodel.RawReport(
        generated_at=report.report_parameters.end,
        timezone=tz,
        plots=figs,
        metrics=report_metrics(report),
        processed_forecasts_observations=(pfxobs, ),
        versions=(('test', 'test_with_underscore?'), ),
        messages=(datamodel.ReportMessage(
            message="Failed to make metrics for ac_power forecast ()'-_,",
            step='',
            level='',
            function=''), ))
    rr = report.replace(raw_report=raw)
    rendered = template.render_pdf(rr, dash_url)
    assert rendered.startswith(b'%PDF')
예제 #35
0
def device_scheduler(  # noqa C901
    device_constraints: List[pd.DataFrame],
    ems_constraints: pd.DataFrame,
    commitment_quantities: List[pd.Series],
    commitment_downwards_deviation_price: Union[List[pd.Series], List[float]],
    commitment_upwards_deviation_price: Union[List[pd.Series], List[float]],
) -> Tuple[List[pd.Series], float, SolverResults]:
    """This generic device scheduler is able to handle an EMS with multiple devices,
    with various types of constraints on the EMS level and on the device level,
    and with multiple market commitments on the EMS level.
    A typical example is a house with many devices.
    The commitments are assumed to be with regard to the flow of energy to the device (positive for consumption,
    negative for production). The solver minimises the costs of deviating from the commitments.

    Device constraints are on a device level. Handled constraints (listed by column name):
        max: maximum stock assuming an initial stock of zero (e.g. in MWh or boxes)
        min: minimum stock assuming an initial stock of zero
        equal: exact amount of stock (we do this by clamping min and max)
        derivative max: maximum flow (e.g. in MW or boxes/h)
        derivative min: minimum flow
        derivative equals: exact amount of flow (we do this by clamping derivative min and derivative max)
        derivative down efficiency: ratio of downwards flows (flow into EMS : flow out of device)
        derivative up efficiency: ratio of upwards flows (flow into device : flow out of EMS)
    EMS constraints are on an EMS level. Handled constraints (listed by column name):
        derivative max: maximum flow
        derivative min: minimum flow
    Commitments are on an EMS level. Parameter explanations:
        commitment_quantities: amounts of flow specified in commitments (both previously ordered and newly requested)
            - e.g. in MW or boxes/h
        commitment_downwards_deviation_price: penalty for downwards deviations of the flow
            - e.g. in EUR/MW or EUR/(boxes/h)
            - either a single value (same value for each flow value) or a Series (different value for each flow value)
        commitment_upwards_deviation_price: penalty for upwards deviations of the flow

    All Series and DataFrames should have the same resolution.

    For now, we pass in the various constraints and prices as separate variables, from which we make a MultiIndex
    DataFrame. Later we could pass in a MultiIndex DataFrame directly.
    """

    # If the EMS has no devices, don't bother
    if len(device_constraints) == 0:
        return [], 0, SolverResults()

    # Check if commitments have the same time window and resolution as the constraints
    start = device_constraints[0].index.to_pydatetime()[0]
    resolution = pd.to_timedelta(device_constraints[0].index.freq)
    end = device_constraints[0].index.to_pydatetime()[-1] + resolution
    if len(commitment_quantities) != 0:
        start_c = commitment_quantities[0].index.to_pydatetime()[0]
        resolution_c = pd.to_timedelta(commitment_quantities[0].index.freq)
        end_c = commitment_quantities[0].index.to_pydatetime()[-1] + resolution
        if not (start_c == start and end_c == end):
            raise Exception(
                "Not implemented for different time windows.\n(%s,%s)\n(%s,%s)"
                % (start, end, start_c, end_c))
        if resolution_c != resolution:
            raise Exception(
                "Not implemented for different resolutions.\n%s\n%s" %
                (resolution, resolution_c))

    # Turn prices per commitment into prices per commitment flow
    if len(commitment_downwards_deviation_price) != 0:
        if all(
                isinstance(price, float)
                for price in commitment_downwards_deviation_price):
            commitment_downwards_deviation_price = [
                initialize_series(price, start, end, resolution)
                for price in commitment_downwards_deviation_price
            ]
    if len(commitment_upwards_deviation_price) != 0:
        if all(
                isinstance(price, float)
                for price in commitment_upwards_deviation_price):
            commitment_upwards_deviation_price = [
                initialize_series(price, start, end, resolution)
                for price in commitment_upwards_deviation_price
            ]

    model = ConcreteModel()

    # Add indices for devices (d), datetimes (j) and commitments (c)
    model.d = RangeSet(0, len(device_constraints) - 1, doc="Set of devices")
    model.j = RangeSet(0,
                       len(device_constraints[0].index.to_pydatetime()) - 1,
                       doc="Set of datetimes")
    model.c = RangeSet(0,
                       len(commitment_quantities) - 1,
                       doc="Set of commitments")

    # Add parameters
    def price_down_select(m, c, j):
        return commitment_downwards_deviation_price[c].iloc[j]

    def price_up_select(m, c, j):
        return commitment_upwards_deviation_price[c].iloc[j]

    def commitment_quantity_select(m, c, j):
        return commitment_quantities[c].iloc[j]

    def device_max_select(m, d, j):
        max_v = device_constraints[d]["max"].iloc[j]
        equal_v = device_constraints[d]["equals"].iloc[j]
        if np.isnan(max_v) and np.isnan(equal_v):
            return infinity
        else:
            return np.nanmin([max_v, equal_v])

    def device_min_select(m, d, j):
        min_v = device_constraints[d]["min"].iloc[j]
        equal_v = device_constraints[d]["equals"].iloc[j]
        if np.isnan(min_v) and np.isnan(equal_v):
            return -infinity
        else:
            return np.nanmax([min_v, equal_v])

    def device_derivative_max_select(m, d, j):
        max_v = device_constraints[d]["derivative max"].iloc[j]
        equal_v = device_constraints[d]["derivative equals"].iloc[j]
        if np.isnan(max_v) and np.isnan(equal_v):
            return infinity
        else:
            return np.nanmin([max_v, equal_v])

    def device_derivative_min_select(m, d, j):
        min_v = device_constraints[d]["derivative min"].iloc[j]
        equal_v = device_constraints[d]["derivative equals"].iloc[j]
        if np.isnan(min_v) and np.isnan(equal_v):
            return -infinity
        else:
            return np.nanmax([min_v, equal_v])

    def ems_derivative_max_select(m, j):
        v = ems_constraints["derivative max"].iloc[j]
        if np.isnan(v):
            return infinity
        else:
            return v

    def ems_derivative_min_select(m, j):
        v = ems_constraints["derivative min"].iloc[j]
        if np.isnan(v):
            return -infinity
        else:
            return v

    def device_derivative_down_efficiency(m, d, j):
        try:
            return device_constraints[d]["derivative down efficiency"].iloc[j]
        except KeyError:
            return 1

    def device_derivative_up_efficiency(m, d, j):
        try:
            return device_constraints[d]["derivative up efficiency"].iloc[j]
        except KeyError:
            return 1

    model.up_price = Param(model.c, model.j, initialize=price_up_select)
    model.down_price = Param(model.c, model.j, initialize=price_down_select)
    model.commitment_quantity = Param(model.c,
                                      model.j,
                                      initialize=commitment_quantity_select)
    model.device_max = Param(model.d, model.j, initialize=device_max_select)
    model.device_min = Param(model.d, model.j, initialize=device_min_select)
    model.device_derivative_max = Param(
        model.d, model.j, initialize=device_derivative_max_select)
    model.device_derivative_min = Param(
        model.d, model.j, initialize=device_derivative_min_select)
    model.ems_derivative_max = Param(model.j,
                                     initialize=ems_derivative_max_select)
    model.ems_derivative_min = Param(model.j,
                                     initialize=ems_derivative_min_select)
    model.device_derivative_down_efficiency = Param(
        model.d, model.j, initialize=device_derivative_down_efficiency)
    model.device_derivative_up_efficiency = Param(
        model.d, model.j, initialize=device_derivative_up_efficiency)

    # Add variables
    model.ems_power = Var(model.d, model.j, domain=Reals, initialize=0)
    model.device_power_down = Var(model.d,
                                  model.j,
                                  domain=NonPositiveReals,
                                  initialize=0)
    model.device_power_up = Var(model.d,
                                model.j,
                                domain=NonNegativeReals,
                                initialize=0)
    model.commitment_downwards_deviation = Var(model.c,
                                               model.j,
                                               domain=NonPositiveReals,
                                               initialize=0)
    model.commitment_upwards_deviation = Var(model.c,
                                             model.j,
                                             domain=NonNegativeReals,
                                             initialize=0)

    # Add constraints as a tuple of (lower bound, value, upper bound)
    def device_bounds(m, d, j):
        return (
            m.device_min[d, j],
            sum(m.device_power_down[d, k] + m.device_power_up[d, k]
                for k in range(0, j + 1)),
            m.device_max[d, j],
        )

    def device_derivative_bounds(m, d, j):
        return (
            m.device_derivative_min[d, j],
            m.device_power_down[d, j] + m.device_power_up[d, j],
            m.device_derivative_max[d, j],
        )

    def device_down_derivative_bounds(m, d, j):
        return (
            m.device_derivative_min[d, j],
            m.device_power_down[d, j],
            0,
        )

    def device_up_derivative_bounds(m, d, j):
        return (
            0,
            m.device_power_up[d, j],
            m.device_derivative_max[d, j],
        )

    def ems_derivative_bounds(m, j):
        return m.ems_derivative_min[j], sum(
            m.ems_power[:, j]), m.ems_derivative_max[j]

    def ems_flow_commitment_equalities(m, j):
        """Couple EMS flows (sum over devices) to commitments."""
        return (
            0,
            sum(m.commitment_quantity[:, j]) +
            sum(m.commitment_downwards_deviation[:, j]) +
            sum(m.commitment_upwards_deviation[:, j]) - sum(m.ems_power[:, j]),
            0,
        )

    def device_derivative_equalities(m, d, j):
        """Couple device flows to EMS flows per device, applying efficiencies."""
        return (
            0,
            m.device_power_up[d, j] / m.device_derivative_up_efficiency[d, j] +
            m.device_power_down[d, j] *
            m.device_derivative_down_efficiency[d, j] - m.ems_power[d, j],
            0,
        )

    model.device_energy_bounds = Constraint(model.d,
                                            model.j,
                                            rule=device_bounds)
    model.device_power_bounds = Constraint(model.d,
                                           model.j,
                                           rule=device_derivative_bounds)
    model.device_power_down_bounds = Constraint(
        model.d, model.j, rule=device_down_derivative_bounds)
    model.device_power_up_bounds = Constraint(model.d,
                                              model.j,
                                              rule=device_up_derivative_bounds)
    model.ems_power_bounds = Constraint(model.j, rule=ems_derivative_bounds)
    model.ems_power_commitment_equalities = Constraint(
        model.j, rule=ems_flow_commitment_equalities)
    model.device_power_equalities = Constraint(
        model.d, model.j, rule=device_derivative_equalities)

    # Add objective
    def cost_function(m):
        costs = 0
        for c in m.c:
            for j in m.j:
                costs += m.commitment_downwards_deviation[c,
                                                          j] * m.down_price[c,
                                                                            j]
                costs += m.commitment_upwards_deviation[c, j] * m.up_price[c,
                                                                           j]
        return costs

    model.costs = Objective(rule=cost_function, sense=minimize)

    # Solve
    results = SolverFactory(
        current_app.config.get("FLEXMEASURES_LP_SOLVER")).solve(model)

    planned_costs = value(model.costs)
    planned_power_per_device = []
    for d in model.d:
        planned_device_power = [
            model.device_power_down[d, j].value +
            model.device_power_up[d, j].value for j in model.j
        ]
        planned_power_per_device.append(
            pd.Series(
                index=pd.date_range(start=start,
                                    end=end,
                                    freq=to_offset(resolution),
                                    closed="left"),
                data=planned_device_power,
            ))

    # model.pprint()
    # print(results.solver.termination_condition)
    # print(planned_costs)
    # model.display()
    return planned_power_per_device, planned_costs, results
예제 #36
0
파일: index.py 프로젝트: nicktp/pandas
    def __new__(cls,
                data=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                copy=False,
                name=None,
                tz=None,
                verify_integrity=True,
                normalize=False,
                **kwds):

        dayfirst = kwds.pop('dayfirst', None)
        yearfirst = kwds.pop('yearfirst', None)
        warn = False
        if 'offset' in kwds and kwds['offset']:
            freq = kwds['offset']
            warn = True

        freq_infer = False
        if not isinstance(freq, DateOffset):
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if warn:
            import warnings
            warnings.warn(
                "parameter 'offset' is deprecated, "
                "please use 'freq' instead", FutureWarning)

        offset = freq

        if periods is not None:
            if com.is_float(periods):
                periods = int(periods)
            elif not com.is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None and offset is None:
            raise ValueError("Must provide freq argument if no data is "
                             "supplied")

        if data is None:
            return cls._generate(start,
                                 end,
                                 periods,
                                 name,
                                 offset,
                                 tz=tz,
                                 normalize=normalize)

        if not isinstance(data, np.ndarray):
            if np.isscalar(data):
                raise ValueError('DatetimeIndex() must be called with a '
                                 'collection of some kind, %s was passed' %
                                 repr(data))

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data, dtype='O')

            # try a few ways to make it datetime64
            if lib.is_string_array(data):
                data = _str_to_dt_array(data,
                                        offset,
                                        dayfirst=dayfirst,
                                        yearfirst=yearfirst)
            else:
                data = tools.to_datetime(data)
                data.offset = offset
                if isinstance(data, DatetimeIndex):
                    if name is not None:
                        data.name = name
                    return data

        if issubclass(data.dtype.type, basestring):
            subarr = _str_to_dt_array(data,
                                      offset,
                                      dayfirst=dayfirst,
                                      yearfirst=yearfirst)
        elif issubclass(data.dtype.type, np.datetime64):
            if isinstance(data, DatetimeIndex):
                if tz is None:
                    tz = data.tz

                subarr = data.values

                if offset is None:
                    offset = data.offset
                    verify_integrity = False
            else:
                if data.dtype != _NS_DTYPE:
                    subarr = lib.cast_to_nanoseconds(data)
                else:
                    subarr = data
        elif data.dtype == _INT64_DTYPE:
            if isinstance(data, Int64Index):
                raise TypeError('cannot convert Int64Index->DatetimeIndex')
            if copy:
                subarr = np.asarray(data, dtype=_NS_DTYPE)
            else:
                subarr = data.view(_NS_DTYPE)
        else:
            try:
                subarr = tools.to_datetime(data)
            except ValueError:
                # tz aware
                subarr = tools.to_datetime(data, utc=True)

            if not np.issubdtype(subarr.dtype, np.datetime64):
                raise TypeError('Unable to convert %s to datetime dtype' %
                                str(data))

        if isinstance(subarr, DatetimeIndex):
            if tz is None:
                tz = subarr.tz
        else:
            if tz is not None:
                tz = tools._maybe_get_tz(tz)

                if (not isinstance(data, DatetimeIndex)
                        or getattr(data, 'tz', None) is None):
                    # Convert tz-naive to UTC
                    ints = subarr.view('i8')
                    subarr = lib.tz_localize_to_utc(ints, tz)

                subarr = subarr.view(_NS_DTYPE)

        subarr = subarr.view(cls)
        subarr.name = name
        subarr.offset = offset
        subarr.tz = tz

        if verify_integrity and len(subarr) > 0:
            if offset is not None and not freq_infer:
                inferred = subarr.inferred_freq
                if inferred != offset.freqstr:
                    raise ValueError('Dates do not conform to passed '
                                     'frequency')

        if freq_infer:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.offset = to_offset(inferred)

        return subarr
예제 #37
0
def longest_period_from_frequency_str(freq_str: str) -> int:
    offset = to_offset(freq_str)
    return FREQ_LONGEST_PERIOD_DICT[norm_freq_str(offset.name)] // offset.n
예제 #38
0
파일: resample.py 프로젝트: tikazyq/pandas
    def _resample_timestamps(self, kind=None):
        # assumes set_grouper(obj) already called
        axlabels = self.ax

        self._get_binner_for_resample(kind=kind)
        grouper = self.grouper
        binner = self.binner
        obj = self.obj

        # Determine if we're downsampling
        if axlabels.freq is not None or axlabels.inferred_freq is not None:

            if len(grouper.binlabels) < len(axlabels) or self.how is not None:
                # downsample
                grouped = obj.groupby(grouper, axis=self.axis)
                result = grouped.aggregate(self._agg_method)
                # GH2073
                if self.fill_method is not None:
                    result = result.fillna(method=self.fill_method,
                                           limit=self.limit)

            else:
                # upsampling shortcut
                if self.axis:
                    raise AssertionError('axis must be 0')

                if self.closed == 'right':
                    res_index = binner[1:]
                else:
                    res_index = binner[:-1]

                # if we have the same frequency as our axis, then we are equal sampling
                # even if how is None
                if self.fill_method is None and self.limit is None and to_offset(
                        axlabels.inferred_freq) == self.freq:
                    result = obj.copy()
                    result.index = res_index
                else:
                    result = obj.reindex(res_index,
                                         method=self.fill_method,
                                         limit=self.limit)
        else:
            # Irregular data, have to use groupby
            grouped = obj.groupby(grouper, axis=self.axis)
            result = grouped.aggregate(self._agg_method)

            if self.fill_method is not None:
                result = result.fillna(method=self.fill_method,
                                       limit=self.limit)

        loffset = self.loffset
        if isinstance(loffset, compat.string_types):
            loffset = to_offset(self.loffset)

        if isinstance(loffset, (DateOffset, timedelta)):
            if (isinstance(result.index, DatetimeIndex)
                    and len(result.index) > 0):

                result.index = result.index + loffset

        return result
예제 #39
0
def test_rule_aliases():
    rule = frequencies.to_offset('10us')
    assert rule == offsets.Micro(10)
예제 #40
0
    def test_to_offset_multiple(self):
        freqstr = '2h30min'
        freqstr2 = '2h 30min'

        result = frequencies.to_offset(freqstr)
        assert (result == frequencies.to_offset(freqstr2))
        expected = offsets.Minute(150)
        assert (result == expected)

        freqstr = '2h30min15s'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Second(150 * 60 + 15)
        assert (result == expected)

        freqstr = '2h 60min'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Hour(3)
        assert (result == expected)

        freqstr = '2h 20.5min'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Second(8430)
        assert (result == expected)

        freqstr = '1.5min'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Second(90)
        assert (result == expected)

        freqstr = '0.5S'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Milli(500)
        assert (result == expected)

        freqstr = '15l500u'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Micro(15500)
        assert (result == expected)

        freqstr = '10s75L'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Milli(10075)
        assert (result == expected)

        freqstr = '1s0.25ms'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Micro(1000250)
        assert (result == expected)

        freqstr = '1s0.25L'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Micro(1000250)
        assert (result == expected)

        freqstr = '2800N'
        result = frequencies.to_offset(freqstr)
        expected = offsets.Nano(2800)
        assert (result == expected)

        freqstr = '2SM'
        result = frequencies.to_offset(freqstr)
        expected = offsets.SemiMonthEnd(2)
        assert (result == expected)

        freqstr = '2SM-16'
        result = frequencies.to_offset(freqstr)
        expected = offsets.SemiMonthEnd(2, day_of_month=16)
        assert (result == expected)

        freqstr = '2SMS-14'
        result = frequencies.to_offset(freqstr)
        expected = offsets.SemiMonthBegin(2, day_of_month=14)
        assert (result == expected)

        freqstr = '2SMS-15'
        result = frequencies.to_offset(freqstr)
        expected = offsets.SemiMonthBegin(2)
        assert (result == expected)

        # malformed
        with tm.assert_raises_regex(ValueError, 'Invalid frequency: 2h20m'):
            frequencies.to_offset('2h20m')
예제 #41
0
def test_anchored_shortcuts():
    result = frequencies.to_offset('W')
    expected = frequencies.to_offset('W-SUN')
    assert (result == expected)

    result1 = frequencies.to_offset('Q')
    result2 = frequencies.to_offset('Q-DEC')
    expected = offsets.QuarterEnd(startingMonth=12)
    assert (result1 == expected)
    assert (result2 == expected)

    result1 = frequencies.to_offset('Q-MAY')
    expected = offsets.QuarterEnd(startingMonth=5)
    assert (result1 == expected)

    result1 = frequencies.to_offset('SM')
    result2 = frequencies.to_offset('SM-15')
    expected = offsets.SemiMonthEnd(day_of_month=15)
    assert (result1 == expected)
    assert (result2 == expected)

    result = frequencies.to_offset('SM-1')
    expected = offsets.SemiMonthEnd(day_of_month=1)
    assert (result == expected)

    result = frequencies.to_offset('SM-27')
    expected = offsets.SemiMonthEnd(day_of_month=27)
    assert (result == expected)

    result = frequencies.to_offset('SMS-2')
    expected = offsets.SemiMonthBegin(day_of_month=2)
    assert (result == expected)

    result = frequencies.to_offset('SMS-27')
    expected = offsets.SemiMonthBegin(day_of_month=27)
    assert (result == expected)

    # ensure invalid cases fail as expected
    invalid_anchors = [
        'SM-0', 'SM-28', 'SM-29', 'SM-FOO', 'BSM', 'SM--1'
        'SMS-1', 'SMS-28', 'SMS-30', 'SMS-BAR', 'BSMS', 'SMS--2'
    ]
    for invalid_anchor in invalid_anchors:
        try:
            frequencies.to_offset(invalid_anchor)
        except ValueError:
            pass
        else:
            raise AssertionError(invalid_anchor)
예제 #42
0
    def test_to_offset_invalid(self):
        # GH 13930
        with tm.assert_raises_regex(ValueError, 'Invalid frequency: U1'):
            frequencies.to_offset('U1')
        with tm.assert_raises_regex(ValueError, 'Invalid frequency: -U'):
            frequencies.to_offset('-U')
        with tm.assert_raises_regex(ValueError, 'Invalid frequency: 3U1'):
            frequencies.to_offset('3U1')
        with tm.assert_raises_regex(ValueError, 'Invalid frequency: -2-3U'):
            frequencies.to_offset('-2-3U')
        with tm.assert_raises_regex(ValueError, 'Invalid frequency: -2D:3H'):
            frequencies.to_offset('-2D:3H')
        with tm.assert_raises_regex(ValueError, 'Invalid frequency: 1.5.0S'):
            frequencies.to_offset('1.5.0S')

        # split offsets with spaces are valid
        assert frequencies.to_offset('2D 3H') == offsets.Hour(51)
        assert frequencies.to_offset('2 D3 H') == offsets.Hour(51)
        assert frequencies.to_offset('2 D 3 H') == offsets.Hour(51)
        assert frequencies.to_offset('  2 D 3 H  ') == offsets.Hour(51)
        assert frequencies.to_offset('   H    ') == offsets.Hour()
        assert frequencies.to_offset(' 3  H    ') == offsets.Hour(3)

        # special cases
        assert frequencies.to_offset('2SMS-15') == offsets.SemiMonthBegin(2)
        with tm.assert_raises_regex(ValueError,
                                    'Invalid frequency: 2SMS-15-15'):
            frequencies.to_offset('2SMS-15-15')
        with tm.assert_raises_regex(ValueError, 'Invalid frequency: 2SMS-15D'):
            frequencies.to_offset('2SMS-15D')
예제 #43
0
파일: timedeltas.py 프로젝트: zfixer/pandas
    def __new__(cls,
                data=None,
                unit=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                closed=None,
                dtype=None,
                copy=False,
                name=None,
                verify_integrity=True):

        if isinstance(data, TimedeltaIndex) and freq is None and name is None:
            if copy:
                return data.copy()
            else:
                return data._shallow_copy()

        freq_infer = False
        if not isinstance(freq, DateOffset):

            # if a passed freq is None, don't infer automatically
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                msg = 'periods must be a number, got {periods}'
                raise TypeError(msg.format(periods=periods))

        if data is None:
            if freq is None and com._any_none(periods, start, end):
                msg = 'Must provide freq argument if no data is supplied'
                raise ValueError(msg)
            else:
                return cls._generate(start,
                                     end,
                                     periods,
                                     name,
                                     freq,
                                     closed=closed)

        if unit is not None:
            data = to_timedelta(data, unit=unit, box=False)

        if not isinstance(data, (np.ndarray, Index, ABCSeries)):
            if is_scalar(data):
                raise ValueError('TimedeltaIndex() must be called with a '
                                 'collection of some kind, %s was passed' %
                                 repr(data))

        # convert if not already
        if getattr(data, 'dtype', None) != _TD_DTYPE:
            data = to_timedelta(data, unit=unit, box=False)
        elif copy:
            data = np.array(data, copy=True)

        # check that we are matching freqs
        if verify_integrity and len(data) > 0:
            if freq is not None and not freq_infer:
                index = cls._simple_new(data, name=name)
                cls._validate_frequency(index, freq)
                index.freq = freq
                return index

        if freq_infer:
            index = cls._simple_new(data, name=name)
            inferred = index.inferred_freq
            if inferred:
                index.freq = to_offset(inferred)
            return index

        return cls._simple_new(data, name=name, freq=freq)
예제 #44
0
    def __new__(cls, data=None, unit=None,
                freq=None, start=None, end=None, periods=None,
                copy=False, name=None,
                closed=None, verify_integrity=True, **kwargs):

        if isinstance(data, TimedeltaIndex) and freq is None:
            if copy:
                data = data.copy()
            return data

        freq_infer = False
        if not isinstance(freq, DateOffset):

            # if a passed freq is None, don't infer automatically
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if periods is not None:
            if com.is_float(periods):
                periods = int(periods)
            elif not com.is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None and freq is None:
            raise ValueError("Must provide freq argument if no data is "
                             "supplied")

        if data is None:
            return cls._generate(start, end, periods, name, freq,
                                 closed=closed)

        if unit is not None:
            data = to_timedelta(data, unit=unit, box=False)

        if not isinstance(data, (np.ndarray, Index, ABCSeries)):
            if np.isscalar(data):
                raise ValueError('TimedeltaIndex() must be called with a '
                                 'collection of some kind, %s was passed'
                                 % repr(data))

        # convert if not already
        if getattr(data,'dtype',None) != _TD_DTYPE:
            data = to_timedelta(data,unit=unit,box=False)
        elif copy:
            data = np.array(data,copy=True)

        # check that we are matching freqs
        if verify_integrity and len(data) > 0:
            if freq is not None and not freq_infer:
                index = cls._simple_new(data, name=name)
                inferred = index.inferred_freq
                if inferred != freq.freqstr:
                    on_freq = cls._generate(index[0], None, len(index), name, freq)
                    if not np.array_equal(index.asi8, on_freq.asi8):
                        raise ValueError('Inferred frequency {0} from passed timedeltas does not '
                                         'conform to passed frequency {1}'.format(inferred, freq.freqstr))
                index.freq = freq
                return index

        if freq_infer:
            index = cls._simple_new(data, name=name)
            inferred = index.inferred_freq
            if inferred:
                index.freq = to_offset(inferred)
            return index

        return cls._simple_new(data, name=name, freq=freq)
예제 #45
0
def round(t, freq):
    freq = to_offset(freq)
    return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value)
예제 #46
0
def interval_range(start=None,
                   end=None,
                   periods=None,
                   freq=None,
                   name=None,
                   closed='right'):
    """
    Return a fixed frequency IntervalIndex

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals
    end : numeric or datetime-like, default None
        Right bound for generating intervals
    periods : integer, default None
        Number of periods to generate
    freq : numeric, string, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' for datetime-like.
    name : string, default None
        Name of the resulting IntervalIndex
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

    Returns
    -------
    rng : IntervalIndex

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]]
                  closed='right', dtype='interval[datetime64[ns]]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
              closed='right',
              dtype='interval[float64]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
                  closed='both', dtype='interval[int64]')

    See Also
    --------
    IntervalIndex : an Index of intervals that are all closed on the same side.
    """
    start = com.maybe_box_datetimelike(start)
    end = com.maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com._any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else 'D'

    if com.count_not_none(start, end, periods, freq) != 3:
        raise ValueError('Of the four parameters: start, end, periods, and '
                         'freq, exactly three must be specified')

    if not _is_valid_endpoint(start):
        msg = 'start must be numeric or datetime-like, got {start}'
        raise ValueError(msg.format(start=start))
    elif not _is_valid_endpoint(end):
        msg = 'end must be numeric or datetime-like, got {end}'
        raise ValueError(msg.format(end=end))

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        msg = 'periods must be a number, got {periods}'
        raise TypeError(msg.format(periods=periods))

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError:
            raise ValueError('freq must be numeric or convertible to '
                             'DateOffset, got {freq}'.format(freq=freq))

    # verify type compatibility
    if not all([
            _is_type_compatible(start, end),
            _is_type_compatible(start, freq),
            _is_type_compatible(end, freq)
    ]):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com._all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com._not_none(start, end, freq)):
            # np.linspace always produces float output
            breaks = maybe_downcast_to_dtype(breaks, 'int64')
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            range_func = date_range
        else:
            range_func = timedelta_range

        breaks = range_func(start=start, end=end, periods=periods, freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def conv_resol(resolution):
    """Returns a string for resolution (from a Pandas)
    """
    if _HAS_PANDAS:
        from pandas.tseries.frequencies import to_offset
        d = {
            to_offset('1Min'):'MINUTE',
            to_offset('2Min'):'MINUTE_2',
            to_offset('3Min'):'MINUTE_3',
            to_offset('5Min'):'MINUTE_5',
            to_offset('10Min'):'MINUTE_10',
            to_offset('15Min'):'MINUTE_15',
            to_offset('30Min'): 'MINUTE_30',
            to_offset('1H'): 'HOUR',
            to_offset('2H'): 'HOUR_2',
            to_offset('3H'): 'HOUR_3',
            to_offset('4H'): 'HOUR_4',
            to_offset('D'): 'DAY',
            to_offset('W'): 'WEEK',
            to_offset('M'): 'MONTH'
        }
        offset = to_offset(resolution)
        if offset in d:
            return d[offset]
        else:
            logger.error(traceback.format_exc())
            logger.warning("conv_resol returns '%s'" % resolution)
            return resolution
    else:
        return resolution
예제 #48
0
    def _init_dates(self, dates=None, freq=None):
        """
        Initialize dates

        Parameters
        ----------
        dates : array_like, optional
            An array like object containing dates.
        freq : str, tuple, datetime.timedelta, DateOffset or None, optional
            A frequency specification for either `dates` or the row labels from
            the endog / exog data.

        Notes
        -----
        Creates `self._index` and related attributes. `self._index` is always
        a Pandas index, and it is always Int64Index, DatetimeIndex, or
        PeriodIndex.

        If Pandas objects, endog / exog may have any type of index. If it is
        an Int64Index with values 0, 1, ..., nobs-1 or if it is (coerceable to)
        a DatetimeIndex or PeriodIndex *with an associated frequency*, then it
        is called a "supported" index. Otherwise it is called an "unsupported"
        index.

        Supported indexes are standardized (i.e. a list of date strings is
        converted to a DatetimeIndex) and the result is put in `self._index`.

        Unsupported indexes are ignored, and a supported Int64Index is
        generated and put in `self._index`. Warnings are issued in this case
        to alert the user if the returned index from some operation (e.g.
        forecasting) is different from the original data's index. However,
        whenever possible (e.g. purely in-sample prediction), the original
        index is returned.

        The benefit of supported indexes is that they allow *forecasting*, i.e.
        it is possible to extend them in a reasonable way. Thus every model
        must have an underlying supported index, even if it is just a generated
        Int64Index.

        """

        # Get our index from `dates` if available, otherwise from whatever
        # Pandas index we might have retrieved from endog, exog
        if dates is not None:
            index = dates
        else:
            index = self.data.row_labels

        # Sanity check that we don't have a `freq` without an index
        if index is None and freq is not None:
            raise ValueError('Frequency provided without associated index.')

        # If an index is available, see if it is a date-based index or if it
        # can be coerced to one. (If it can't we'll fall back, below, to an
        # internal, 0, 1, ... nobs-1 integer index for modeling purposes)
        inferred_freq = False
        if index is not None:
            # Try to coerce to date-based index
            if not isinstance(index, (DatetimeIndex, PeriodIndex)):
                try:
                    # Only try to coerce non-numeric index types (string,
                    # list of date-times, etc.)
                    # Note that np.asarray(Float64Index([...])) yields an
                    # object dtype array in earlier versions of Pandas (and so
                    # will not have is_numeric_dtype == True), so explicitly
                    # check for it here. But note also that in very early
                    # Pandas (~0.12), Float64Index doesn't exist (and so the
                    # Statsmodels compat makes it an empty tuple, so in that
                    # case also check if the first element is a float.
                    _index = np.asarray(index)
                    if (is_numeric_dtype(_index)
                            or isinstance(index, Float64Index)
                            or (Float64Index == tuple()
                                and isinstance(_index[0], float))):
                        raise ValueError('Numeric index given')
                    # If a non-index Pandas series was given, only keep its
                    # values (because we must have a pd.Index type, below, and
                    # pd.to_datetime will return a Series when passed
                    # non-list-like objects)
                    if isinstance(index, Series):
                        index = index.values
                    # All coercion is done via pd.to_datetime
                    # Note: date coercion via pd.to_datetime does not handle
                    # string versions of PeriodIndex objects most of the time.
                    _index = to_datetime(index)
                    # Older versions of Pandas can sometimes fail here and
                    # return a numpy array - check to make sure it's an index
                    if not isinstance(_index, Index):
                        raise ValueError('Could not coerce to date index')
                    index = _index
                except:
                    # Only want to actually raise an exception if `dates` was
                    # provided but can't be coerced. If we got the index from
                    # the row_labels, we'll just ignore it and use the integer
                    # index below
                    if dates is not None:
                        raise ValueError('Non-date index index provided to'
                                         ' `dates` argument.')
            # Now, if we were given, or coerced, a date-based index, make sure
            # it has an associated frequency
            if isinstance(index, (DatetimeIndex, PeriodIndex)):
                # If no frequency, try to get an inferred frequency
                if freq is None and index.freq is None:
                    freq = index.inferred_freq
                    # If we got an inferred frequncy, alert the user
                    if freq is not None:
                        inferred_freq = True
                        if freq is not None:
                            warnings.warn(
                                'No frequency information was'
                                ' provided, so inferred frequency %s'
                                ' will be used.' % freq, ValueWarning)

                # Convert the passed freq to a pandas offset object
                if freq is not None:
                    freq = to_offset(freq)

                # Now, if no frequency information is available from the index
                # itself or from the `freq` argument, raise an exception
                if freq is None and index.freq is None:
                    # But again, only want to raise the exception if `dates`
                    # was provided.
                    if dates is not None:
                        raise ValueError('No frequency information was'
                                         ' provided with date index and no'
                                         ' frequency could be inferred.')
                # However, if the index itself has no frequency information but
                # the `freq` argument is available (or was inferred), construct
                # a new index with an associated frequency
                elif freq is not None and index.freq is None:
                    resampled_index = date_range(start=index[0],
                                                 end=index[-1],
                                                 freq=freq)
                    if not inferred_freq and not resampled_index.equals(index):
                        raise ValueError('The given frequency argument could'
                                         ' not be matched to the given index.')
                    index = resampled_index
                # Finally, if the index itself has a frequency and there was
                # also a given frequency, raise an exception if they are not
                # equal
                elif (freq is not None and not inferred_freq
                      and not (index.freq == freq)):
                    raise ValueError('The given frequency argument is'
                                     ' incompatible with the given index.')
            # Finally, raise an exception if we could not coerce to date-based
            # but we were given a frequency argument
            elif freq is not None:
                raise ValueError('Given index could not be coerced to dates'
                                 ' but `freq` argument was provided.')

        # Get attributes of the index
        has_index = index is not None
        date_index = isinstance(index, (DatetimeIndex, PeriodIndex))
        int_index = isinstance(index, Int64Index)
        range_index = isinstance(index, RangeIndex)
        has_freq = index.freq is not None if date_index else None
        increment = Index(range(self.endog.shape[0]))
        is_increment = index.equals(increment) if int_index else None

        # Issue warnings for unsupported indexes
        if has_index and not (date_index or range_index or is_increment):
            warnings.warn(
                'An unsupported index was provided and will be'
                ' ignored when e.g. forecasting.', ValueWarning)
        if date_index and not has_freq:
            warnings.warn(
                'A date index has been provided, but it has no'
                ' associated frequency information and so will be'
                ' ignored when e.g. forecasting.', ValueWarning)

        # Construct the internal index
        index_generated = False

        if ((date_index and has_freq) or (int_index and is_increment)
                or range_index):
            _index = index
        else:
            _index = increment
            index_generated = True
        self._index = _index
        self._index_generated = index_generated
        self._index_none = index is None
        self._index_dates = date_index and not index_generated
        self._index_freq = self._index.freq if self._index_dates else None
        self._index_inferred_freq = inferred_freq

        # For backwards compatibility, set data.dates, data.freq
        self.data.dates = self._index if self._index_dates else None
        self.data.freq = self._index.freqstr if self._index_dates else None
예제 #49
0
def convert_index(
    to,
    interval=None,
    epoch="julian",
    input_ts="-",
    columns=None,
    start_date=None,
    end_date=None,
    round_index=None,
    dropna="no",
    clean=False,
    names=None,
    source_units=None,
    target_units=None,
    skiprows=None,
):
    """Convert datetime to/from Julian dates from different epochs."""
    # Clip to start_date/end_date if possible.
    if to == "datetime":
        index_type = "number"
        nstart_date = None
        nend_date = None
        nround_index = None
    elif to == "number":
        index_type = "datetime"
        nstart_date = start_date
        nend_date = end_date
        nround_index = round_index

    tsd = tsutils.common_kwds(
        tsutils.read_iso_ts(input_ts,
                            skiprows=skiprows,
                            names=names,
                            index_type=index_type),
        start_date=nstart_date,
        end_date=nend_date,
        pick=columns,
        round_index=nround_index,
        dropna=dropna,
        source_units=source_units,
        target_units=target_units,
        clean=clean,
    )

    allowed = {
        "julian": lambda x: x,
        "reduced": lambda x: x - 2400000,
        "modified": lambda x: x - 2400000.5,
        "truncated": lambda x: np.floor(x - 2440000.5),
        "dublin": lambda x: x - 2415020,
        "cnes": lambda x: x - 2433282.5,
        "ccsds": lambda x: x - 2436204.5,
        "lop": lambda x: x - 2448622.5,
        "lilian": lambda x: np.floor(x - 2299159.5),
        "rata_die": lambda x: np.floor(x - 1721424.5),
        "mars_sol": lambda x: (x - 2405522) / 1.02749,
        "unix": lambda x: (x - 2440587.5),
    }

    dailies = [
        "julian",
        "reduced",
        "modified",
        "truncated",
        "dublin",
        "cnes",
        "ccsds",
        "lop",
        "lilian",
        "rata_die",
        "mars_sol",
    ]

    epoch_dates = {
        "julian": "julian",
        "reduced": "1858-11-16T12",
        "modified": "1858-11-17T00",
        "truncated": "1968-05-24T00",
        "dublin": "1899-12-31T12",
        "cnes": "1950-01-01T00",
        "ccsds": "1958-01-01T00",
        "lop": "1992-01-01T00",
        "lilian": "1582-10-15T00",
        "rata_die": "0001-01-01T00",
        "mars_sol": "1873-12-29T12",
        "unix": "1970-01-01T00",
    }

    if interval is None:
        interval = "D"
    else:
        words = interval.split("-")
        if len(words) == 2:
            warnings.warn("""
*
*   The epoch keyword "{0}" overrides the anchoring suffix "{1}".
*
""".format(epoch, words[1]))

            interval = words[0]

    if epoch == "unix" and interval not in ["S", "s"]:
        warnings.warn("""
*
*   Typically the unix epoch would has an interval of 'S' (seconds).
*   Instead you gave {0}.
*
""".format(interval))

    if epoch in dailies and interval != "D":
        warnings.warn("""
*
*   Typically the {0} epoch would has an interval of 'D' (days).
*   Instead you gave {1}.
*
""".format(epoch, interval))

    if to == "number":
        # Index must be datetime - let's make sure
        tsd.index = pd.to_datetime(tsd.index)

        frac = to_offset("D").nanos / to_offset(interval).nanos

        try:
            tsd.index = allowed[epoch](tsd.index.to_julian_date()) * frac
        except KeyError:
            epoch_date = tsutils.parsedate(epoch)
            tsd.index = (tsd.index.to_julian_date() -
                         epoch_date.to_julian_date()) * frac

        tsd = tsutils.memory_optimize(tsd)

    elif to == "datetime":
        tsd.index = pd.to_datetime(tsd.index.values,
                                   origin=epoch_dates.setdefault(epoch, epoch),
                                   unit=interval)

    if names is None:
        tsd.index.name = "{0}_date".format(epoch)

    if to == "datetime":
        index_type = "number"
        nstart_date = start_date
        nend_date = end_date
        nround_index = round_index
    elif to == "number":
        index_type = "datetime"
        nstart_date = None
        nend_date = None
        nround_index = None
    tsd = tsutils.common_kwds(tsd,
                              start_date=nstart_date,
                              end_date=nend_date,
                              round_index=nround_index)
    return tsd
예제 #50
0
def is_datetime_not_remain(obj: datetime.datetime, freq: str) -> bool:
    offset = to_offset(freq)
    return obj.timestamp() % offset.delta.total_seconds() == 0
예제 #51
0
    def intersection(self, other, sort=False):
        self._validate_sort_keyword(sort)
        self._assert_can_do_setop(other)

        if self.equals(other):
            return self._get_reconciled_name_object(other)

        if len(self) == 0:
            return self.copy()
        if len(other) == 0:
            return other.copy()

        if not isinstance(other, type(self)):
            result = Index.intersection(self, other, sort=sort)
            if isinstance(result, type(self)):
                if result.freq is None:
                    # TODO: find a less code-smelly way to set this
                    result._data._freq = to_offset(result.inferred_freq)
            return result

        elif (other.freq is None or self.freq is None
              or other.freq != self.freq or not other.freq.isAnchored()
              or (not self.is_monotonic or not other.is_monotonic)):
            result = Index.intersection(self, other, sort=sort)

            # Invalidate the freq of `result`, which may not be correct at
            # this point, depending on the values.

            # TODO: find a less code-smelly way to set this
            result._data._freq = None
            if hasattr(self, "tz"):
                result = self._shallow_copy(result._values,
                                            name=result.name,
                                            tz=result.tz,
                                            freq=None)
            else:
                result = self._shallow_copy(result._values,
                                            name=result.name,
                                            freq=None)
            if result.freq is None:
                # TODO: find a less code-smelly way to set this
                result._data._freq = to_offset(result.inferred_freq)
            return result

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        # after sorting, the intersection always starts with the right index
        # and ends with the index of which the last elements is smallest
        end = min(left[-1], right[-1])
        start = right[0]

        if end < start:
            return type(self)(data=[])
        else:
            lslice = slice(*left.slice_locs(start, end))
            left_chunk = left.values[lslice]
            return self._shallow_copy(left_chunk)
예제 #52
0
 def setup(self, freq, is_offset):
     if is_offset:
         self.freq = to_offset(freq)
     else:
         self.freq = freq
예제 #53
0
def extract_nwis_df(nwis_dict, interpolate=True):
    """Returns a Pandas dataframe and a metadata dict from the NWIS response
    object or the json dict of the response.

    Args:
        nwis_dict (obj):
            the json from a response object as returned by get_nwis().json().
            Alternatively, you may supply the response object itself.

    Returns:
        a pandas dataframe.

    Raises:
        HydroNoDataError
            when the request is valid, but NWIS has no data for
            the parameters provided in the request.

        HydroUserWarning
            when one dataset is sampled at a lower frequency than
            another dataset in the same request.
    """
    if type(nwis_dict) is not dict:
        nwis_dict = nwis_dict.json()

    # strip header and all metadata.
    ts = nwis_dict["value"]["timeSeries"]
    if ts == []:
        # raise a HydroNoDataError if NWIS returns an empty set.
        #
        # Ideally, an empty set exception would be raised when the request
        # is first returned, but I do it here so that the data doesn't get
        # extracted twice.
        # TODO: raise this exception earlier??
        #
        # ** Interactive sessions should have an error raised.
        #
        # **Automated systems should catch these errors and deal with them.
        # In this case, if NWIS returns an empty set, then the request
        # needs to be reconsidered. The request was valid somehow, but
        # there is no data being collected.

        raise exceptions.HydroNoDataError("The NWIS reports that it does not "
                                          "have any data for this request.")

    # create a list of time series;
    # set the index, set the data types, replace NaNs, sort, find the first and last

    collection = []
    starts = []
    ends = []
    freqs = []
    meta = {}
    for series in ts:
        series_name = series["name"]
        temp_name = series_name.split(":")
        agency = str(temp_name[0])
        site_id = agency + ":" + str(temp_name[1])
        parameter_cd = str(temp_name[2])
        stat = str(temp_name[3])
        siteName = series["sourceInfo"]["siteName"]
        siteLatLongSrs = series["sourceInfo"]["geoLocation"]["geogLocation"]
        noDataValues = series["variable"]["noDataValue"]
        variableDescription = series["variable"]["variableDescription"]
        unit = series["variable"]["unit"]["unitCode"]
        data = series["values"][0]["value"]
        if data == []:
            # This parameter has no data. Skip to next series.
            continue
        if len(data) == 1:
            # This parameter only contains the most recent reading.
            # See Issue #49
            pass
        qualifiers = series_name + "_qualifiers"
        DF = pd.DataFrame(data=data)
        DF.index = pd.to_datetime(DF.pop("dateTime"), utc=True)
        DF["value"] = DF["value"].astype(float)
        DF = DF.replace(to_replace=noDataValues, value=np.nan)
        DF["qualifiers"] = DF["qualifiers"].apply(lambda x: ",".join(x))
        DF.rename(columns={
            "qualifiers": qualifiers,
            "value": series_name
        },
                  inplace=True)
        DF.sort_index(inplace=True)
        local_start = DF.index.min()
        local_end = DF.index.max()
        starts.append(local_start)
        ends.append(local_end)
        local_freq = calc_freq(DF.index)
        freqs.append(local_freq)
        if not DF.index.is_unique:
            print("Series index for " + series_name +
                  " is not unique. Attempting to drop identical rows.")
            DF = DF.drop_duplicates(keep="first")
            if not DF.index.is_unique:
                print(
                    "Series index for " + series_name +
                    " is STILL not unique. Dropping first rows with duplicated date."
                )
                DF = DF[~DF.index.duplicated(keep="first")]
        if local_freq > to_offset("0min"):
            local_clean_index = pd.date_range(start=local_start,
                                              end=local_end,
                                              freq=local_freq,
                                              tz="UTC")
            # if len(local_clean_index) != len(DF):
            # This condition happens quite frequently with missing data.
            # print(str(series_name) + "-- clean index length: "+ str(len(local_clean_index)) + " Series length: " + str(len(DF)))
            DF = DF.reindex(index=local_clean_index, copy=True)
        else:
            # The dataframe DF must contain only the most recent data.
            pass
        qual_cols = DF.columns.str.contains("_qualifiers")
        # https://stackoverflow.com/questions/21998354/pandas-wont-fillna-inplace
        # Instead, create a temporary dataframe, fillna, then copy back into original.
        DFquals = DF.loc[:, qual_cols].fillna("hf.missing")
        DF.loc[:, qual_cols] = DFquals

        if local_freq > pd.Timedelta(to_offset("0min")):
            variableFreq_str = str(to_offset(local_freq))
        else:
            variableFreq_str = str(to_offset("0min"))
        parameter_info = {
            "variableFreq": variableFreq_str,
            "variableUnit": unit,
            "variableDescription": variableDescription,
        }
        site_info = {
            "siteName": siteName,
            "siteLatLongSrs": siteLatLongSrs,
            "timeSeries": {},
        }
        # if site is not in meta keys, add it.
        if site_id not in meta:
            meta[site_id] = site_info
        # Add the variable info to the site dict.
        meta[site_id]["timeSeries"][parameter_cd] = parameter_info
        collection.append(DF)

    if len(collection) < 1:
        # It seems like this condition should not occur. The NWIS trims the
        # response and returns an empty nwis_dict['value']['timeSeries']
        # if none of the parameters requested have data.
        # If at least one of the paramters have data,
        # then the empty series will get delivered, but with no data.
        # Compare these requests:
        # empty:               https://nwis.waterservices.usgs.gov/nwis/iv/?format=json&sites=01570500&startDT=2018-06-01&endDT=2018-06-01&parameterCd=00045
        # one empty, one full: https://nwis.waterservices.usgs.gov/nwis/iv/?format=json&sites=01570500&startDT=2018-06-01&endDT=2018-06-01&parameterCd=00045,00060
        raise exceptions.HydroNoDataError("The NWIS does not have any data for"
                                          " the requested combination of sites"
                                          ", parameters, and dates.")
    startmin = min(starts)
    endmax = max(ends)
    # Remove all frequencies of zero from freqs list.
    zero = to_offset("0min")
    freqs2 = list(filter(lambda x: x > zero, freqs))
    if len(freqs2) > 0:
        freqmin = min(freqs)
        freqmax = max(freqs)
        if freqmin != freqmax:
            warnings.warn(
                "One or more datasets in this request is going to be "
                "'upsampled' to " + str(freqmin) + " because the data "
                "were collected at a lower frequency of " + str(freqmax),
                exceptions.HydroUserWarning,
            )
        clean_index = pd.date_range(start=startmin,
                                    end=endmax,
                                    freq=freqmin,
                                    tz="UTC")
        cleanDF = pd.DataFrame(index=clean_index)
        for dataset in collection:
            cleanDF = pd.concat([cleanDF, dataset], axis=1)
        # Replace lines with missing _qualifier flags with hf.upsampled
        qual_cols = cleanDF.columns.str.contains("_qualifiers")
        cleanDFquals = cleanDF.loc[:, qual_cols].fillna("hf.upsampled")
        cleanDF.loc[:, qual_cols] = cleanDFquals
        if interpolate:
            # TODO: mark interpolated values with 'hf.interp'
            # select data, then replace Nans with interpolated values.
            data_cols = cleanDF.columns.str.contains(r"[0-9]$")
            cleanDFdata = cleanDF.loc[:, data_cols].interpolate()
            cleanDF.loc[:, data_cols] = cleanDFdata
    else:
        # If datasets only contain most recent data, then
        # don't set an index or a freq. Just concat all of the datasets.
        cleanDF = pd.concat(collection, axis=1)

    cleanDF.index.name = "datetimeUTC"

    if not DF.index.is_unique:
        DF = DF[~DF.index.duplicated(keep="first")]
    if not DF.index.is_monotonic:
        DF.sort_index(axis=0, inplace=True)

    return cleanDF, meta
예제 #54
0
파일: timedeltas.py 프로젝트: yasenv/pandas
    def __new__(cls,
                data=None,
                unit=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                closed=None,
                dtype=None,
                copy=False,
                name=None,
                verify_integrity=True):

        freq, freq_infer = dtl.maybe_infer_freq(freq)

        if data is None:
            # TODO: Remove this block and associated kwargs; GH#20535
            result = cls._generate_range(start,
                                         end,
                                         periods,
                                         freq,
                                         closed=closed)
            result.name = name
            return result

        if is_scalar(data):
            raise TypeError(
                '{cls}() must be called with a '
                'collection of some kind, {data} was passed'.format(
                    cls=cls.__name__, data=repr(data)))

        if isinstance(data, TimedeltaIndex) and freq is None and name is None:
            if copy:
                return data.copy()
            else:
                return data._shallow_copy()

        # - Cases checked above all return/raise before reaching here - #

        data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
        if inferred_freq is not None:
            if freq is not None and freq != inferred_freq:
                raise ValueError('Inferred frequency {inferred} from passed '
                                 'values does not conform to passed frequency '
                                 '{passed}'.format(inferred=inferred_freq,
                                                   passed=freq.freqstr))
            elif freq_infer:
                freq = inferred_freq
                freq_infer = False
            verify_integrity = False

        subarr = cls._simple_new(data, name=name, freq=freq)
        # check that we are matching freqs
        if verify_integrity and len(subarr) > 0:
            if freq is not None and not freq_infer:
                cls._validate_frequency(subarr, freq)

        if freq_infer:
            subarr.freq = to_offset(subarr.inferred_freq)

        return subarr
예제 #55
0
def estimate_air_temp(year_start, surfrad, lat, lon, cs):
    """
    Use clear sky temps scaled by daily ratio of measured to clear sky global
    insolation.

    Parameters
    ----------
    year_start : str
        SURFRAD data year
    surfrad : pandas.DateFrame
        surfrad data frame
    lat : float
        latitude in degrees north of equator [deg]
    lon : float
        longitude in degrees east of prime meridian [deg]
    cs : pandas.DataFrame
        clear sky irradiances [W/m^2]

    Returns
    -------
    est_air_temp : pandas.DataFrame
        estimated air temperature in Celsius [C]
    temp_adj : pandas.Series
        temperature adjustment [C}
    ghi_ratio : pandas.Series
        ratio of  daily SURFRAD to clearsky GHI insolation
    daily_delta_temp : numpy.array
        daily temperature range, max - min, in Kelvin [K]
    cs_temp_air : pandas.Series
        clear sky air temperatures in Celsius [C]

    """
    daze = 367 if calendar.isleap(int(year_start)) else 366
    # create a leap year of minutes for the given year at UTC
    year_minutes = pd.date_range(start=year_start,
                                 freq='T',
                                 periods=daze * DAYMINUTES,
                                 tz='UTC')
    # clear sky temperature
    cs_temp_air = rdtools.clearsky_temperature.get_clearsky_tamb(
        year_minutes, lat, lon)
    # organize by day
    cs_temp_daily = cs_temp_air.values.reshape((daze, DAYMINUTES)) + KELVINS
    # get daily temperature range
    daily_delta_temp = np.array([td.max() - td.min() for td in cs_temp_daily])
    daily_delta_temp = pd.Series(daily_delta_temp,
                                 index=cs_temp_air.resample('D').mean().index)
    # calculate ratio of daily insolation versus clearsky
    ghi_ratio = surfrad.ghi.resample('D').sum() / cs.ghi.resample('D').sum()
    ghi_ratio = ghi_ratio.rename('ghi_ratio')
    # apply ghi ratio to next day, wrap days to start at day 1
    day1 = ghi_ratio.index[0]
    ghi_ratio.index = ghi_ratio.index + to_offset('1D')
    # set day 1 estimated air temp equal to last day
    ghi_ratio[day1] = ghi_ratio.iloc[-1]
    # fix day 1 is added last, so out of order
    ghi_ratio = ghi_ratio.sort_index()
    # scale daily temperature delta by the ratio of insolation from day before
    temp_adj = (ghi_ratio -
                1.0) * daily_delta_temp[ghi_ratio.index]  # use next day
    temp_adj = temp_adj.rename('temp_adj')
    # interpolate smoothly, but fill forward minutes in last day
    est_air_temp = pd.concat([
        cs_temp_air,
        ghi_ratio.resample('1min').interpolate(),
        temp_adj.resample('1min').interpolate()
    ],
                             axis=1).pad()
    # Tadj = Tcs + (GHI/CS_GHI - 1) * DeltaT
    # if GHI/CS_GHI > 1 then adjustment > DeltaT
    est_air_temp['Adjusted Temp (C)'] = (
        est_air_temp['Clear Sky Temperature (C)'] + est_air_temp.temp_adj)
    return est_air_temp, temp_adj, ghi_ratio, daily_delta_temp, cs_temp_air
예제 #56
0
    def freq(self, value):
        if value is not None:
            value = frequencies.to_offset(value)
            self._validate_frequency(self, value)

        self._freq = value
예제 #57
0
파일: lag.py 프로젝트: yifeim/gluon-ts
def get_lags_for_frequency(freq_str: str,
                           lag_ub: int = 1200,
                           num_lags: Optional[int] = None) -> List[int]:
    """
    Generates a list of lags that that are appropriate for the given frequency string.

    By default all frequencies have the following lags: [1, 2, 3, 4, 5, 6, 7].
    Remaining lags correspond to the same `season` (+/- `delta`) in previous `k` cycles.
    Here `delta` and `k` are chosen according to the existing code.

    Parameters
    ----------

    freq_str
        Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.

    lag_ub
        The maximum value for a lag.

    num_lags
        Maximum number of lags; by default all generated lags are returned
    """

    # Lags are target values at the same `season` (+/- delta) but in the previous cycle.
    def _make_lags_for_minute(multiple, num_cycles=3):
        # We use previous ``num_cycles`` hours to generate lags
        return [
            _make_lags(k * 60 // multiple, 2)
            for k in range(1, num_cycles + 1)
        ]

    def _make_lags_for_hour(multiple, num_cycles=7):
        # We use previous ``num_cycles`` days to generate lags
        return [
            _make_lags(k * 24 // multiple, 1)
            for k in range(1, num_cycles + 1)
        ]

    def _make_lags_for_day(multiple, num_cycles=4):
        # We use previous ``num_cycles`` weeks to generate lags
        # We use the last month (in addition to 4 weeks) to generate lag.
        return [
            _make_lags(k * 7 // multiple, 1) for k in range(1, num_cycles + 1)
        ] + [_make_lags(30 // multiple, 1)]

    def _make_lags_for_week(multiple, num_cycles=3):
        # We use previous ``num_cycles`` years to generate lags
        # Additionally, we use previous 4, 8, 12 weeks
        return [
            _make_lags(k * 52 // multiple, 1)
            for k in range(1, num_cycles + 1)
        ] + [[4 // multiple, 8 // multiple, 12 // multiple]]

    def _make_lags_for_month(multiple, num_cycles=3):
        # We use previous ``num_cycles`` years to generate lags
        return [
            _make_lags(k * 12 // multiple, 1)
            for k in range(1, num_cycles + 1)
        ]

    # multiple, granularity = get_granularity(freq_str)
    offset = to_offset(freq_str)
    # normalize offset name, so that both `W` and `W-SUN` refer to `W`
    offset_name = norm_freq_str(offset.name)

    if offset_name == "A":
        lags = []
    elif offset_name == "Q":
        assert (
            offset.n == 1
        ), "Only multiple 1 is supported for quarterly. Use x month instead."
        lags = _make_lags_for_month(offset.n * 3.0)
    elif offset_name == "M":
        lags = _make_lags_for_month(offset.n)
    elif offset_name == "W":
        lags = _make_lags_for_week(offset.n)
    elif offset_name == "D":
        lags = _make_lags_for_day(offset.n) + _make_lags_for_week(
            offset.n / 7.0)
    elif offset_name == "B":
        # todo find good lags for business day
        lags = []
    elif offset_name == "H":
        lags = (_make_lags_for_hour(offset.n) +
                _make_lags_for_day(offset.n / 24.0) +
                _make_lags_for_week(offset.n / (24.0 * 7)))
    # minutes
    elif offset_name == "T":
        lags = (_make_lags_for_minute(offset.n) +
                _make_lags_for_hour(offset.n / 60.0) +
                _make_lags_for_day(offset.n / (60.0 * 24)) +
                _make_lags_for_week(offset.n / (60.0 * 24 * 7)))
    else:
        raise Exception("invalid frequency")

    # flatten lags list and filter
    lags = [
        int(lag) for sub_list in lags for lag in sub_list if 7 < lag <= lag_ub
    ]
    lags = [1, 2, 3, 4, 5, 6, 7] + sorted(list(set(lags)))

    return lags[:num_lags]
예제 #58
0
def interval_range(start=None,
                   end=None,
                   periods=None,
                   freq=None,
                   name=None,
                   closed='right'):
    """
    Return a fixed frequency IntervalIndex

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals
    end : numeric or datetime-like, default None
        Right bound for generating intervals
    periods : integer, default None
        Number of periods to generate
    freq : numeric, string, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' (calendar daily) for datetime-like.
    name : string, default None
        Name of the resulting IntervalIndex
    closed : string, default 'right'
        options are: 'left', 'right', 'both', 'neither'

    Notes
    -----
    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
    must be specified.

    Returns
    -------
    rng : IntervalIndex

    Examples
    --------

    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start='2017-01-01', end='2017-01-04')
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start='2017-01-01', periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
                  closed='both', dtype='interval[int64]')
    """
    if com._count_not_none(start, end, periods) != 2:
        raise ValueError('Of the three parameters: start, end, and periods, '
                         'exactly two must be specified')

    start = com._maybe_box_datetimelike(start)
    end = com._maybe_box_datetimelike(end)
    endpoint = next(com._not_none(start, end))

    if not _is_valid_endpoint(start):
        msg = 'start must be numeric or datetime-like, got {start}'
        raise ValueError(msg.format(start=start))

    if not _is_valid_endpoint(end):
        msg = 'end must be numeric or datetime-like, got {end}'
        raise ValueError(msg.format(end=end))

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        msg = 'periods must be a number, got {periods}'
        raise TypeError(msg.format(periods=periods))

    freq = freq or (1 if is_number(endpoint) else 'D')
    if not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError:
            raise ValueError('freq must be numeric or convertible to '
                             'DateOffset, got {freq}'.format(freq=freq))

    # verify type compatibility
    if not all([
            _is_type_compatible(start, end),
            _is_type_compatible(start, freq),
            _is_type_compatible(end, freq)
    ]):
        raise TypeError("start, end, freq need to be type compatible")

    if is_number(endpoint):
        if periods is None:
            periods = int((end - start) // freq)

        if start is None:
            start = end - periods * freq

        # force end to be consistent with freq (lower if freq skips over end)
        end = start + periods * freq

        # end + freq for inclusive endpoint
        breaks = np.arange(start, end + freq, freq)
    elif isinstance(endpoint, Timestamp):
        # add one to account for interval endpoints (n breaks = n-1 intervals)
        if periods is not None:
            periods += 1
        breaks = date_range(start=start, end=end, periods=periods, freq=freq)
    else:
        # add one to account for interval endpoints (n breaks = n-1 intervals)
        if periods is not None:
            periods += 1
        breaks = timedelta_range(start=start,
                                 end=end,
                                 periods=periods,
                                 freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
예제 #59
0
    def test_anchored_shortcuts(self):
        result = frequencies.to_offset('W')
        expected = frequencies.to_offset('W-SUN')
        assert (result == expected)

        result1 = frequencies.to_offset('Q')
        result2 = frequencies.to_offset('Q-DEC')
        expected = offsets.QuarterEnd(startingMonth=12)
        assert (result1 == expected)
        assert (result2 == expected)

        result1 = frequencies.to_offset('Q-MAY')
        expected = offsets.QuarterEnd(startingMonth=5)
        assert (result1 == expected)

        result1 = frequencies.to_offset('SM')
        result2 = frequencies.to_offset('SM-15')
        expected = offsets.SemiMonthEnd(day_of_month=15)
        assert (result1 == expected)
        assert (result2 == expected)

        result = frequencies.to_offset('SM-1')
        expected = offsets.SemiMonthEnd(day_of_month=1)
        assert (result == expected)

        result = frequencies.to_offset('SM-27')
        expected = offsets.SemiMonthEnd(day_of_month=27)
        assert (result == expected)

        result = frequencies.to_offset('SMS-2')
        expected = offsets.SemiMonthBegin(day_of_month=2)
        assert (result == expected)

        result = frequencies.to_offset('SMS-27')
        expected = offsets.SemiMonthBegin(day_of_month=27)
        assert (result == expected)

        # ensure invalid cases fail as expected
        invalid_anchors = [
            'SM-0', 'SM-28', 'SM-29', 'SM-FOO', 'BSM', 'SM--1', 'SMS-1',
            'SMS-28', 'SMS-30', 'SMS-BAR', 'SMS-BYR'
            'BSMS', 'SMS--2'
        ]
        for invalid_anchor in invalid_anchors:
            with tm.assert_raises_regex(ValueError, 'Invalid frequency: '):
                frequencies.to_offset(invalid_anchor)
예제 #60
0
def test_to_offset_multiple():
    freqstr = '2h30min'
    freqstr2 = '2h 30min'

    result = frequencies.to_offset(freqstr)
    assert (result == frequencies.to_offset(freqstr2))
    expected = offsets.Minute(150)
    assert (result == expected)

    freqstr = '2h30min15s'
    result = frequencies.to_offset(freqstr)
    expected = offsets.Second(150 * 60 + 15)
    assert (result == expected)

    freqstr = '2h 60min'
    result = frequencies.to_offset(freqstr)
    expected = offsets.Hour(3)
    assert (result == expected)

    freqstr = '15l500u'
    result = frequencies.to_offset(freqstr)
    expected = offsets.Micro(15500)
    assert (result == expected)

    freqstr = '10s75L'
    result = frequencies.to_offset(freqstr)
    expected = offsets.Milli(10075)
    assert (result == expected)

    freqstr = '2800N'
    result = frequencies.to_offset(freqstr)
    expected = offsets.Nano(2800)
    assert (result == expected)

    freqstr = '2SM'
    result = frequencies.to_offset(freqstr)
    expected = offsets.SemiMonthEnd(2)
    assert (result == expected)

    freqstr = '2SM-16'
    result = frequencies.to_offset(freqstr)
    expected = offsets.SemiMonthEnd(2, day_of_month=16)
    assert (result == expected)

    freqstr = '2SMS-14'
    result = frequencies.to_offset(freqstr)
    expected = offsets.SemiMonthBegin(2, day_of_month=14)
    assert (result == expected)

    freqstr = '2SMS-15'
    result = frequencies.to_offset(freqstr)
    expected = offsets.SemiMonthBegin(2)
    assert (result == expected)

    # malformed
    try:
        frequencies.to_offset('2h20m')
    except ValueError:
        pass
    else:
        assert (False)