def _maybe_convert_timedelta(self, other): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): freqstr = frequencies.get_standard_freq(other) base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n elif isinstance(other, np.ndarray): if com.is_integer_dtype(other): return other elif com.is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos msg = "Input has different freq from PeriodIndex(freq={0})" raise ValueError(msg.format(self.freqstr))
def create_input(self, trace, weather_source): '''Creates a :code:`DatetimeIndex` ed dataframe containing formatted model input data formatted as follows. Parameters ---------- trace : eemeter.structures.EnergyTrace The source of energy data for inclusion in model input. weather_source : eemeter.weather.WeatherSourceBase The source of weather data. Returns ------- input_df : pandas.DataFrame Predictably formatted input data. This data should be directly usable as input to applicable model.fit() methods. ''' if (trace.data.index.freq is not None and to_offset(trace.data.index.freq) > to_offset(self.freq_str)): raise ValueError( "Will not upsample '{}' to '{}'" .format(trace.data.index.freq, self.freq_str) ) energy = trace.data.value.resample(self.freq_str).sum() tempF = weather_source.indexed_temperatures(energy.index, "degF") return pd.DataFrame({"energy": energy, "tempF": tempF}, columns=["energy", "tempF"])
def test_to_offset_pd_timedelta_invalid(): # see gh-9064 msg = "Invalid frequency: 0 days 00:00:00" td = Timedelta(microseconds=0) with pytest.raises(ValueError, match=msg): frequencies.to_offset(td)
def __new__(cls, values, freq=None, start=None, end=None, periods=None, closed=None): if (freq is not None and not isinstance(freq, DateOffset) and freq != 'infer'): freq = to_offset(freq) if periods is not None: if lib.is_float(periods): periods = int(periods) elif not lib.is_integer(periods): raise TypeError('`periods` must be a number, got {periods}' .format(periods=periods)) if values is None: if freq is None and com._any_none(periods, start, end): raise ValueError('Must provide freq argument if no data is ' 'supplied') else: return cls._generate(start, end, periods, freq, closed=closed) result = cls._simple_new(values, freq=freq) if freq == 'infer': inferred = result.inferred_freq if inferred: result._freq = to_offset(inferred) return result
def _resample_timestamps(self): # assumes set_grouper(obj) already called axlabels = self.ax self._get_binner_for_resample() grouper = self.grouper binner = self.binner obj = self.obj # Determine if we're downsampling if axlabels.freq is not None or axlabels.inferred_freq is not None: if len(grouper.binlabels) < len(axlabels) or self.how is not None: # downsample grouped = obj.groupby(grouper, axis=self.axis) result = grouped.aggregate(self._agg_method) # GH2073 if self.fill_method is not None: result = result.fillna(method=self.fill_method, limit=self.limit) else: # upsampling shortcut if self.axis: raise AssertionError('axis must be 0') if self.closed == 'right': res_index = binner[1:] else: res_index = binner[:-1] # if we have the same frequency as our axis, then we are equal sampling # even if how is None if self.fill_method is None and self.limit is None and to_offset( axlabels.inferred_freq) == self.freq: result = obj.copy() result.index = res_index else: result = obj.reindex(res_index, method=self.fill_method, limit=self.limit) else: # Irregular data, have to use groupby grouped = obj.groupby(grouper, axis=self.axis) result = grouped.aggregate(self._agg_method) if self.fill_method is not None: result = result.fillna(method=self.fill_method, limit=self.limit) loffset = self.loffset if isinstance(loffset, compat.string_types): loffset = to_offset(self.loffset) if isinstance(loffset, (DateOffset, timedelta)): if (isinstance(result.index, DatetimeIndex) and len(result.index) > 0): result.index = result.index + loffset return result
def test_frequency_misc(self): self.assertEquals(fmod.get_freq_group('T'), fmod.FreqGroup.FR_MIN) code, stride = fmod.get_freq_code(offsets.Hour()) self.assertEquals(code, fmod.FreqGroup.FR_HR) code, stride = fmod.get_freq_code((5, 'T')) self.assertEquals(code, fmod.FreqGroup.FR_MIN) self.assertEquals(stride, 5) offset = offsets.Hour() result = fmod.to_offset(offset) self.assertEquals(result, offset) result = fmod.to_offset((5, 'T')) expected = offsets.Minute(5) self.assertEquals(result, expected) self.assertRaises(KeyError, fmod.get_freq_code, (5, 'baz')) self.assertRaises(ValueError, fmod.to_offset, '100foo') self.assertRaises(ValueError, fmod.to_offset, ('', '')) result = fmod.get_standard_freq(offsets.Hour()) self.assertEquals(result, 'H')
def _maybe_convert_timedelta(self, other): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): freqstr = other.rule_code base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) elif isinstance(other, np.ndarray): if is_integer_dtype(other): return other elif is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos # raise when input doesn't have freq msg = "Input has different freq from PeriodIndex(freq={0})" raise IncompatibleFrequency(msg.format(self.freqstr))
def test_anchored_shortcuts(): result = frequencies.to_offset("W") expected = frequencies.to_offset("W-SUN") assert result == expected result = frequencies.to_offset("Q") expected = frequencies.to_offset("Q-DEC") assert result == expected
def test_to_offset_negative(): freqstr = "-1S" result = frequencies.to_offset(freqstr) assert result.n == -1 freqstr = "-5min10s" result = frequencies.to_offset(freqstr) assert result.n == -310
def test_to_offset_leading_zero(): freqstr = "00H 00T 01S" result = frequencies.to_offset(freqstr) assert result.n == 1 freqstr = "-00H 03T 14S" result = frequencies.to_offset(freqstr) assert result.n == -194
def test_to_offset_negative(): freqstr = '-1S' result = to_offset(freqstr) assert(result.n == -1) freqstr = '-5min10s' result = to_offset(freqstr) assert(result.n == -310)
def test_to_offset_leading_zero(): freqstr = '00H 00T 01S' result = to_offset(freqstr) assert(result.n == 1) freqstr = '-00H 03T 14S' result = to_offset(freqstr) assert(result.n == -194)
def test_anchored_shortcuts(): result = to_offset('W') expected = to_offset('W-SUN') assert(result == expected) result = to_offset('Q') expected = to_offset('Q-DEC') assert(result == expected)
def test_to_offset_invalid(freqstr): # see gh-13930 # We escape string because some of our # inputs contain regex special characters. msg = re.escape("Invalid frequency: {freqstr}".format(freqstr=freqstr)) with pytest.raises(ValueError, match=msg): frequencies.to_offset(freqstr)
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() freq_infer = False if not isinstance(freq, DateOffset): # if a passed freq is None, don't infer automatically if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None periods = dtl.validate_periods(periods) if data is None: if freq is None and com._any_none(periods, start, end): msg = 'Must provide freq argument if no data is supplied' raise ValueError(msg) else: return cls._generate_range(start, end, periods, name, freq, closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs if verify_integrity and len(subarr) > 0: if freq is not None and not freq_infer: cls._validate_frequency(subarr, freq) if freq_infer: inferred = subarr.inferred_freq if inferred: subarr.freq = to_offset(inferred) return subarr return subarr
def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): if isinstance(values, (ABCSeries, ABCIndexClass)): values = values._values inferred_freq = getattr(values, "_freq", None) if isinstance(values, type(self)): if freq is None: freq = values.freq elif freq and values.freq: freq = to_offset(freq) freq, _ = dtl.validate_inferred_freq(freq, values.freq, False) values = values._data if not isinstance(values, np.ndarray): msg = ( "Unexpected type '{}'. 'values' must be a TimedeltaArray " "ndarray, or Series or Index containing one of those." ) raise ValueError(msg.format(type(values).__name__)) if values.dtype == 'i8': # for compat with datetime/timedelta/period shared methods, # we can sometimes get here with int64 values. These represent # nanosecond UTC (or tz-naive) unix timestamps values = values.view(_TD_DTYPE) if values.dtype != _TD_DTYPE: raise TypeError(_BAD_DTYPE.format(dtype=values.dtype)) try: dtype_mismatch = dtype != _TD_DTYPE except TypeError: raise TypeError(_BAD_DTYPE.format(dtype=dtype)) else: if dtype_mismatch: raise TypeError(_BAD_DTYPE.format(dtype=dtype)) if freq == "infer": msg = ( "Frequency inference not allowed in TimedeltaArray.__init__. " "Use 'pd.array()' instead." ) raise ValueError(msg) if copy: values = values.copy() if freq: freq = to_offset(freq) self._data = values self._dtype = dtype self._freq = freq if inferred_freq is None and freq is not None: type(self)._validate_frequency(self, freq)
def intersection(self, other): """ Specialized intersection for DatetimeIndex objects. May be much faster than Index.intersection Parameters ---------- other : DatetimeIndex or array-like Returns ------- y : Index or DatetimeIndex """ if not isinstance(other, DatetimeIndex): try: other = DatetimeIndex(other) except TypeError: pass result = Index.intersection(self, other) if isinstance(result, DatetimeIndex): if result.freq is None: result.offset = to_offset(result.inferred_freq) return result elif ( other.offset is None or self.offset is None or other.offset != self.offset or not other.offset.isAnchored() or (not self.is_monotonic or not other.is_monotonic) ): result = Index.intersection(self, other) if isinstance(result, DatetimeIndex): if result.freq is None: result.offset = to_offset(result.inferred_freq) return result if len(self) == 0: return self if len(other) == 0: return other # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self end = min(left[-1], right[-1]) start = right[0] if end < start: return type(self)(data=[]) else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left.values[lslice] return self._view_like(left_chunk)
def intersection(self, other, sort=False): self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if self.equals(other): return self._get_reconciled_name_object(other) if len(self) == 0: return self.copy() if len(other) == 0: return other.copy() if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): if result.freq is None: result.freq = to_offset(result.inferred_freq) return result elif (other.freq is None or self.freq is None or other.freq != self.freq or not other.freq.isAnchored() or (not self.is_monotonic or not other.is_monotonic)): result = Index.intersection(self, other, sort=sort) # Invalidate the freq of `result`, which may not be correct at # this point, depending on the values. result.freq = None if hasattr(self, 'tz'): result = self._shallow_copy(result._values, name=result.name, tz=result.tz, freq=None) else: result = self._shallow_copy(result._values, name=result.name, freq=None) if result.freq is None: result.freq = to_offset(result.inferred_freq) return result # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self # after sorting, the intersection always starts with the right index # and ends with the index of which the last elements is smallest end = min(left[-1], right[-1]) start = right[0] if end < start: return type(self)(data=[]) else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left.values[lslice] return self._shallow_copy(left_chunk)
def test_to_offset_leading_plus(self): freqstr = '+1d' result = frequencies.to_offset(freqstr) assert (result.n == 1) freqstr = '+2h30min' result = frequencies.to_offset(freqstr) assert (result.n == 150) for bad_freq in ['+-1d', '-+1h', '+1', '-7', '+d', '-m']: with pytest.raises(ValueError, match='Invalid frequency:'): frequencies.to_offset(bad_freq)
def __new__(cls, values, freq=None): if (freq is not None and not isinstance(freq, DateOffset) and freq != 'infer'): freq = to_offset(freq) result = cls._simple_new(values, freq=freq) if freq == 'infer': inferred = result.inferred_freq if inferred: result._freq = to_offset(inferred) return result
def conv_resol(resolution): d = { to_offset('5Min'): '5', to_offset('1H'): 'h', to_offset('D'): 'd', } try: return(d[to_offset(resolution)]) except: logging.error(traceback.format_exc()) logging.warning("conv_resol returns '%s'" % resolution) return(resolution)
def __init__( self, freq="Min", closed=None, label=None, how="mean", nperiods=None, axis=0, fill_method=None, limit=None, loffset=None, kind=None, convention=None, base=0, **kwargs ): freq = to_offset(freq) end_types = set(["M", "A", "Q", "BM", "BA", "BQ", "W"]) rule = freq.rule_code if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types): if closed is None: closed = "right" if label is None: label = "right" else: if closed is None: closed = "left" if label is None: label = "left" self.closed = closed self.label = label self.nperiods = nperiods self.kind = kind self.convention = convention or "E" self.convention = self.convention.lower() if isinstance(loffset, compat.string_types): loffset = to_offset(loffset) self.loffset = loffset self.how = how self.fill_method = fill_method self.limit = limit self.base = base # always sort time groupers kwargs["sort"] = True super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs)
def test_anchored_shortcuts(): result = frequencies.to_offset('W') expected = frequencies.to_offset('W-SUN') assert(result == expected) result1 = frequencies.to_offset('Q') result2 = frequencies.to_offset('Q-DEC') expected = offsets.QuarterEnd(startingMonth=12) assert(result1 == expected) assert(result2 == expected) result1 = frequencies.to_offset('Q-MAY') expected = offsets.QuarterEnd(startingMonth=5) assert(result1 == expected)
def __init__(self, freq='Min', closed=None, label=None, how='mean', axis=0, fill_method=None, limit=None, loffset=None, kind=None, convention=None, base=0, **kwargs): # Check for correctness of the keyword arguments which would # otherwise silently use the default if misspelled if label not in {None, 'left', 'right'}: raise ValueError('Unsupported value {} for `label`'.format(label)) if closed not in {None, 'left', 'right'}: raise ValueError('Unsupported value {} for `closed`'.format( closed)) if convention not in {None, 'start', 'end', 'e', 's'}: raise ValueError('Unsupported value {} for `convention`' .format(convention)) freq = to_offset(freq) end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W']) rule = freq.rule_code if (rule in end_types or ('-' in rule and rule[:rule.find('-')] in end_types)): if closed is None: closed = 'right' if label is None: label = 'right' else: if closed is None: closed = 'left' if label is None: label = 'left' self.closed = closed self.label = label self.kind = kind self.convention = convention or 'E' self.convention = self.convention.lower() if isinstance(loffset, compat.string_types): loffset = to_offset(loffset) self.loffset = loffset self.how = how self.fill_method = fill_method self.limit = limit self.base = base # always sort time groupers kwargs['sort'] = True super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs)
def test_pi_add_offset_n_gt1_not_divisible(self, box_with_array): # GH#23215 # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 pi = pd.PeriodIndex(['2016-01'], freq='2M') expected = pd.PeriodIndex(['2016-04'], freq='2M') # FIXME: with transposing these tests fail pi = tm.box_expected(pi, box_with_array, transpose=False) expected = tm.box_expected(expected, box_with_array, transpose=False) result = pi + to_offset('3M') tm.assert_equal(result, expected) result = to_offset('3M') + pi tm.assert_equal(result, expected)
def test_pi_add_offset_n_gt1_not_divisible(self, box): # GH#23215 # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 pi = pd.PeriodIndex(['2016-01'], freq='2M') pi = tm.box_expected(pi, box) expected = pd.PeriodIndex(['2016-04'], freq='2M') expected = tm.box_expected(expected, box) result = pi + to_offset('3M') tm.assert_equal(result, expected) result = to_offset('3M') + pi tm.assert_equal(result, expected)
def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to timedelta according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} Returns ------- label : object """ assert kind in ['ix', 'loc', 'getitem', None] if isinstance(label, compat.string_types): parsed = _coerce_scalar_to_timedelta_type(label, box=True) lbound = parsed.round(parsed.resolution) if side == 'left': return lbound else: return (lbound + to_offset(parsed.resolution) - Timedelta(1, 'ns')) elif is_integer(label) or is_float(label): self._invalid_indexer('slice', label) return label
def union(self, other): """ Specialized union for TimedeltaIndex objects. If combine overlapping ranges with the same DateOffset, will be much faster than Index.union Parameters ---------- other : TimedeltaIndex or array-like Returns ------- y : Index or TimedeltaIndex """ self._assert_can_do_setop(other) if not isinstance(other, TimedeltaIndex): try: other = TimedeltaIndex(other) except (TypeError, ValueError): pass this, other = self, other if this._can_fast_union(other): return this._fast_union(other) else: result = Index.union(this, other) if isinstance(result, TimedeltaIndex): if result.freq is None: result.freq = to_offset(result.inferred_freq) return result
def last(self, offset): """ Convenience method for subsetting final periods of time series data based on a date offset Parameters ---------- offset : string, DateOffset, dateutil.relativedelta Examples -------- ts.last('5M') -> Last 5 months Returns ------- subset : type of caller """ from pandas.tseries.frequencies import to_offset if not isinstance(self.index, DatetimeIndex): raise NotImplementedError if len(self.index) == 0: return self offset = to_offset(offset) start_date = start = self.index[-1] - offset start = self.index.searchsorted(start_date, side='right') return self.ix[start:]
def first(self, offset): """ Convenience method for subsetting initial periods of time series data based on a date offset Parameters ---------- offset : string, DateOffset, dateutil.relativedelta Examples -------- ts.last('10D') -> First 10 days Returns ------- subset : type of caller """ from pandas.tseries.frequencies import to_offset if not isinstance(self.index, DatetimeIndex): raise NotImplementedError if len(self.index) == 0: return self offset = to_offset(offset) end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks if not offset.isAnchored() and hasattr(offset, '_inc'): if end_date in self.index: end = self.index.searchsorted(end_date, side='left') return self.ix[:end]
def date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None, closed=None, chunk_size=None, **kwargs): """ Return a fixed frequency DatetimeIndex. Parameters ---------- start : str or datetime-like, optional Left bound for generating dates. end : str or datetime-like, optional Right bound for generating dates. periods : int, optional Number of periods to generate. freq : str or DateOffset, default 'D' Frequency strings can have multiples, e.g. '5H'. See :ref:`here <timeseries.offset_aliases>` for a list of frequency aliases. tz : str or tzinfo, optional Time zone name for returning localized DatetimeIndex, for example 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is timezone-naive. normalize : bool, default False Normalize start/end dates to midnight before generating date range. name : str, default None Name of the resulting DatetimeIndex. closed : {None, 'left', 'right'}, optional Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None, the default). **kwargs For compatibility. Has no effect on the result. Returns ------- rng : DatetimeIndex See Also -------- DatetimeIndex : An immutable container for datetimes. timedelta_range : Return a fixed frequency TimedeltaIndex. period_range : Return a fixed frequency PeriodIndex. interval_range : Return a fixed frequency IntervalIndex. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``DatetimeIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end`` (closed on both sides). To learn more about the frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- **Specifying the values** The next four examples generate the same `DatetimeIndex`, but vary the combination of `start`, `end` and `periods`. Specify `start` and `end`, with the default daily frequency. >>> import mars.dataframe as md >>> md.date_range(start='1/1/2018', end='1/08/2018').execute() DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], dtype='datetime64[ns]', freq='D') Specify `start` and `periods`, the number of periods (days). >>> md.date_range(start='1/1/2018', periods=8).execute() DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], dtype='datetime64[ns]', freq='D') Specify `end` and `periods`, the number of periods (days). >>> md.date_range(end='1/1/2018', periods=8).execute() DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28', '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'], dtype='datetime64[ns]', freq='D') Specify `start`, `end`, and `periods`; the frequency is generated automatically (linearly spaced). >>> md.date_range(start='2018-04-24', end='2018-04-27', periods=3).execute() DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', '2018-04-27 00:00:00'], dtype='datetime64[ns]', freq=None) **Other Parameters** Changed the `freq` (frequency) to ``'M'`` (month end frequency). >>> md.date_range(start='1/1/2018', periods=5, freq='M').execute() DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30', '2018-05-31'], dtype='datetime64[ns]', freq='M') Multiples are allowed >>> md.date_range(start='1/1/2018', periods=5, freq='3M').execute() DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', '2019-01-31'], dtype='datetime64[ns]', freq='3M') `freq` can also be specified as an Offset object. >>> md.date_range(start='1/1/2018', periods=5, freq=md.offsets.MonthEnd(3)).execute() DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', '2019-01-31'], dtype='datetime64[ns]', freq='3M') Specify `tz` to set the timezone. >>> md.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo').execute() DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', '2018-01-05 00:00:00+09:00'], dtype='datetime64[ns, Asia/Tokyo]', freq='D') `closed` controls whether to include `start` and `end` that are on the boundary. The default includes boundary points on either end. >>> md.date_range(start='2017-01-01', end='2017-01-04', closed=None).execute() DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') Use ``closed='left'`` to exclude `end` if it falls on the boundary. >>> md.date_range(start='2017-01-01', end='2017-01-04', closed='left').execute() DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D') Use ``closed='right'`` to exclude `start` if it falls on the boundary. >>> md.date_range(start='2017-01-01', end='2017-01-04', closed='right').execute() DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') """ # validate periods if isinstance(periods, (float, np.floating)): periods = int(periods) if periods is not None and not isinstance(periods, (int, np.integer)): raise TypeError(f'periods must be a number, got {periods}') if freq is None and any(arg is None for arg in [periods, start, end]): freq = 'D' if sum(arg is not None for arg in [start, end, periods, freq]) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') freq = to_offset(freq) if start is not None: start = pd.Timestamp(start) if end is not None: end = pd.Timestamp(end) if start is pd.NaT or end is pd.NaT: raise ValueError('Neither `start` nor `end` can be NaT') start, end, _ = _maybe_normalize_endpoints(start, end, normalize) tz = _infer_tz_from_endpoints(start, end, tz) if start is None and end is not None: # start is None and end is not None # adjust end first end = pd.date_range(end=end, periods=1, freq=freq)[0] size = periods start = end - (periods - 1) * freq if closed == 'left': size -= 1 elif closed == 'right': # when start is None, closed == 'left' would not take effect # thus just ignore closed = None elif end is None: # end is None # adjust start first start = pd.date_range(start=start, periods=1, freq=freq)[0] size = periods end = start + (periods - 1) * freq if closed == 'right': size -= 1 elif closed == 'left': # when end is None, closed == 'left' would not take effect # thus just ignore closed = None else: if periods is None: periods = size = int((end - start) / freq + 1) else: size = periods if closed is not None: size -= 1 shape = (size, ) op = DataFrameDateRange(start=start, end=end, periods=periods, freq=freq, tz=tz, normalize=normalize, closed=closed, name=name, **kwargs) return op(shape, chunk_size=chunk_size)
def _get_old_time_step_in_minutes(self): td = pd.to_timedelta(to_offset(self.htimeseries.time_step)) return str(int(td.total_seconds() / 60)) + ",0"
def _to_offset(freq): if freq[-1] in traderule_alias_mapping: return traderule_alias_mapping.get(freq[-1]) * int(freq[:-1]) else: return to_offset(freq)
def test_render_pdf_special_chars(ac_power_observation_metadata, ac_power_forecast_metadata, dash_url, fail_pdf, preprocessing_result_types, report_metrics): if shutil.which('pdflatex') is None: # pragma: no cover pytest.skip('pdflatex must be on PATH to generate PDF reports') quality_flag_filter = datamodel.QualityFlagFilter(("USER FLAGGED", )) forecast = ac_power_forecast_metadata.replace( name="ac_power forecast (why,) ()'-_,") observation = ac_power_observation_metadata.replace( name="ac_power observations ()'-_,") fxobs = datamodel.ForecastObservation(forecast, observation) tz = 'America/Phoenix' start = pd.Timestamp('20190401 0000', tz=tz) end = pd.Timestamp('20190404 2359', tz=tz) report_params = datamodel.ReportParameters( name="NREL MIDC OASIS GHI Forecast Analysis ()'-_,", start=start, end=end, object_pairs=(fxobs, ), metrics=("mae", "rmse", "mbe", "s"), categories=("total", "date", "hour"), filters=(quality_flag_filter, )) report = datamodel.Report(report_id="56c67770-9832-11e9-a535-f4939feddd83", report_parameters=report_params) qflags = list(f.quality_flags for f in report.report_parameters.filters if isinstance(f, datamodel.QualityFlagFilter)) qflags = list(qflags[0]) ser_index = pd.date_range(start, end, freq=to_offset(forecast.interval_length), name='timestamp') ser = pd.Series(np.repeat(100, len(ser_index)), name='value', index=ser_index) pfxobs = datamodel.ProcessedForecastObservation( forecast.name, fxobs, forecast.interval_value_type, forecast.interval_length, forecast.interval_label, valid_point_count=len(ser), validation_results=tuple( datamodel.ValidationResult(flag=f, count=0) for f in qflags), preprocessing_results=tuple( datamodel.PreprocessingResult(name=t, count=0) for t in preprocessing_result_types), forecast_values=ser, observation_values=ser) figs = datamodel.RawReportPlots( (datamodel.PlotlyReportFigure.from_dict({ 'name': 'mae tucson ac_power', 'spec': '{"data":[{"x":[1],"y":[1],"type":"bar"}]}', 'pdf': fail_pdf, 'figure_type': 'bar', 'category': 'total', 'metric': 'mae', 'figure_class': 'plotly', }), ), '4.5.3', ) raw = datamodel.RawReport( generated_at=report.report_parameters.end, timezone=tz, plots=figs, metrics=report_metrics(report), processed_forecasts_observations=(pfxobs, ), versions=(('test', 'test_with_underscore?'), ), messages=(datamodel.ReportMessage( message="Failed to make metrics for ac_power forecast ()'-_,", step='', level='', function=''), )) rr = report.replace(raw_report=raw) rendered = template.render_pdf(rr, dash_url) assert rendered.startswith(b'%PDF')
def device_scheduler( # noqa C901 device_constraints: List[pd.DataFrame], ems_constraints: pd.DataFrame, commitment_quantities: List[pd.Series], commitment_downwards_deviation_price: Union[List[pd.Series], List[float]], commitment_upwards_deviation_price: Union[List[pd.Series], List[float]], ) -> Tuple[List[pd.Series], float, SolverResults]: """This generic device scheduler is able to handle an EMS with multiple devices, with various types of constraints on the EMS level and on the device level, and with multiple market commitments on the EMS level. A typical example is a house with many devices. The commitments are assumed to be with regard to the flow of energy to the device (positive for consumption, negative for production). The solver minimises the costs of deviating from the commitments. Device constraints are on a device level. Handled constraints (listed by column name): max: maximum stock assuming an initial stock of zero (e.g. in MWh or boxes) min: minimum stock assuming an initial stock of zero equal: exact amount of stock (we do this by clamping min and max) derivative max: maximum flow (e.g. in MW or boxes/h) derivative min: minimum flow derivative equals: exact amount of flow (we do this by clamping derivative min and derivative max) derivative down efficiency: ratio of downwards flows (flow into EMS : flow out of device) derivative up efficiency: ratio of upwards flows (flow into device : flow out of EMS) EMS constraints are on an EMS level. Handled constraints (listed by column name): derivative max: maximum flow derivative min: minimum flow Commitments are on an EMS level. Parameter explanations: commitment_quantities: amounts of flow specified in commitments (both previously ordered and newly requested) - e.g. in MW or boxes/h commitment_downwards_deviation_price: penalty for downwards deviations of the flow - e.g. in EUR/MW or EUR/(boxes/h) - either a single value (same value for each flow value) or a Series (different value for each flow value) commitment_upwards_deviation_price: penalty for upwards deviations of the flow All Series and DataFrames should have the same resolution. For now, we pass in the various constraints and prices as separate variables, from which we make a MultiIndex DataFrame. Later we could pass in a MultiIndex DataFrame directly. """ # If the EMS has no devices, don't bother if len(device_constraints) == 0: return [], 0, SolverResults() # Check if commitments have the same time window and resolution as the constraints start = device_constraints[0].index.to_pydatetime()[0] resolution = pd.to_timedelta(device_constraints[0].index.freq) end = device_constraints[0].index.to_pydatetime()[-1] + resolution if len(commitment_quantities) != 0: start_c = commitment_quantities[0].index.to_pydatetime()[0] resolution_c = pd.to_timedelta(commitment_quantities[0].index.freq) end_c = commitment_quantities[0].index.to_pydatetime()[-1] + resolution if not (start_c == start and end_c == end): raise Exception( "Not implemented for different time windows.\n(%s,%s)\n(%s,%s)" % (start, end, start_c, end_c)) if resolution_c != resolution: raise Exception( "Not implemented for different resolutions.\n%s\n%s" % (resolution, resolution_c)) # Turn prices per commitment into prices per commitment flow if len(commitment_downwards_deviation_price) != 0: if all( isinstance(price, float) for price in commitment_downwards_deviation_price): commitment_downwards_deviation_price = [ initialize_series(price, start, end, resolution) for price in commitment_downwards_deviation_price ] if len(commitment_upwards_deviation_price) != 0: if all( isinstance(price, float) for price in commitment_upwards_deviation_price): commitment_upwards_deviation_price = [ initialize_series(price, start, end, resolution) for price in commitment_upwards_deviation_price ] model = ConcreteModel() # Add indices for devices (d), datetimes (j) and commitments (c) model.d = RangeSet(0, len(device_constraints) - 1, doc="Set of devices") model.j = RangeSet(0, len(device_constraints[0].index.to_pydatetime()) - 1, doc="Set of datetimes") model.c = RangeSet(0, len(commitment_quantities) - 1, doc="Set of commitments") # Add parameters def price_down_select(m, c, j): return commitment_downwards_deviation_price[c].iloc[j] def price_up_select(m, c, j): return commitment_upwards_deviation_price[c].iloc[j] def commitment_quantity_select(m, c, j): return commitment_quantities[c].iloc[j] def device_max_select(m, d, j): max_v = device_constraints[d]["max"].iloc[j] equal_v = device_constraints[d]["equals"].iloc[j] if np.isnan(max_v) and np.isnan(equal_v): return infinity else: return np.nanmin([max_v, equal_v]) def device_min_select(m, d, j): min_v = device_constraints[d]["min"].iloc[j] equal_v = device_constraints[d]["equals"].iloc[j] if np.isnan(min_v) and np.isnan(equal_v): return -infinity else: return np.nanmax([min_v, equal_v]) def device_derivative_max_select(m, d, j): max_v = device_constraints[d]["derivative max"].iloc[j] equal_v = device_constraints[d]["derivative equals"].iloc[j] if np.isnan(max_v) and np.isnan(equal_v): return infinity else: return np.nanmin([max_v, equal_v]) def device_derivative_min_select(m, d, j): min_v = device_constraints[d]["derivative min"].iloc[j] equal_v = device_constraints[d]["derivative equals"].iloc[j] if np.isnan(min_v) and np.isnan(equal_v): return -infinity else: return np.nanmax([min_v, equal_v]) def ems_derivative_max_select(m, j): v = ems_constraints["derivative max"].iloc[j] if np.isnan(v): return infinity else: return v def ems_derivative_min_select(m, j): v = ems_constraints["derivative min"].iloc[j] if np.isnan(v): return -infinity else: return v def device_derivative_down_efficiency(m, d, j): try: return device_constraints[d]["derivative down efficiency"].iloc[j] except KeyError: return 1 def device_derivative_up_efficiency(m, d, j): try: return device_constraints[d]["derivative up efficiency"].iloc[j] except KeyError: return 1 model.up_price = Param(model.c, model.j, initialize=price_up_select) model.down_price = Param(model.c, model.j, initialize=price_down_select) model.commitment_quantity = Param(model.c, model.j, initialize=commitment_quantity_select) model.device_max = Param(model.d, model.j, initialize=device_max_select) model.device_min = Param(model.d, model.j, initialize=device_min_select) model.device_derivative_max = Param( model.d, model.j, initialize=device_derivative_max_select) model.device_derivative_min = Param( model.d, model.j, initialize=device_derivative_min_select) model.ems_derivative_max = Param(model.j, initialize=ems_derivative_max_select) model.ems_derivative_min = Param(model.j, initialize=ems_derivative_min_select) model.device_derivative_down_efficiency = Param( model.d, model.j, initialize=device_derivative_down_efficiency) model.device_derivative_up_efficiency = Param( model.d, model.j, initialize=device_derivative_up_efficiency) # Add variables model.ems_power = Var(model.d, model.j, domain=Reals, initialize=0) model.device_power_down = Var(model.d, model.j, domain=NonPositiveReals, initialize=0) model.device_power_up = Var(model.d, model.j, domain=NonNegativeReals, initialize=0) model.commitment_downwards_deviation = Var(model.c, model.j, domain=NonPositiveReals, initialize=0) model.commitment_upwards_deviation = Var(model.c, model.j, domain=NonNegativeReals, initialize=0) # Add constraints as a tuple of (lower bound, value, upper bound) def device_bounds(m, d, j): return ( m.device_min[d, j], sum(m.device_power_down[d, k] + m.device_power_up[d, k] for k in range(0, j + 1)), m.device_max[d, j], ) def device_derivative_bounds(m, d, j): return ( m.device_derivative_min[d, j], m.device_power_down[d, j] + m.device_power_up[d, j], m.device_derivative_max[d, j], ) def device_down_derivative_bounds(m, d, j): return ( m.device_derivative_min[d, j], m.device_power_down[d, j], 0, ) def device_up_derivative_bounds(m, d, j): return ( 0, m.device_power_up[d, j], m.device_derivative_max[d, j], ) def ems_derivative_bounds(m, j): return m.ems_derivative_min[j], sum( m.ems_power[:, j]), m.ems_derivative_max[j] def ems_flow_commitment_equalities(m, j): """Couple EMS flows (sum over devices) to commitments.""" return ( 0, sum(m.commitment_quantity[:, j]) + sum(m.commitment_downwards_deviation[:, j]) + sum(m.commitment_upwards_deviation[:, j]) - sum(m.ems_power[:, j]), 0, ) def device_derivative_equalities(m, d, j): """Couple device flows to EMS flows per device, applying efficiencies.""" return ( 0, m.device_power_up[d, j] / m.device_derivative_up_efficiency[d, j] + m.device_power_down[d, j] * m.device_derivative_down_efficiency[d, j] - m.ems_power[d, j], 0, ) model.device_energy_bounds = Constraint(model.d, model.j, rule=device_bounds) model.device_power_bounds = Constraint(model.d, model.j, rule=device_derivative_bounds) model.device_power_down_bounds = Constraint( model.d, model.j, rule=device_down_derivative_bounds) model.device_power_up_bounds = Constraint(model.d, model.j, rule=device_up_derivative_bounds) model.ems_power_bounds = Constraint(model.j, rule=ems_derivative_bounds) model.ems_power_commitment_equalities = Constraint( model.j, rule=ems_flow_commitment_equalities) model.device_power_equalities = Constraint( model.d, model.j, rule=device_derivative_equalities) # Add objective def cost_function(m): costs = 0 for c in m.c: for j in m.j: costs += m.commitment_downwards_deviation[c, j] * m.down_price[c, j] costs += m.commitment_upwards_deviation[c, j] * m.up_price[c, j] return costs model.costs = Objective(rule=cost_function, sense=minimize) # Solve results = SolverFactory( current_app.config.get("FLEXMEASURES_LP_SOLVER")).solve(model) planned_costs = value(model.costs) planned_power_per_device = [] for d in model.d: planned_device_power = [ model.device_power_down[d, j].value + model.device_power_up[d, j].value for j in model.j ] planned_power_per_device.append( pd.Series( index=pd.date_range(start=start, end=end, freq=to_offset(resolution), closed="left"), data=planned_device_power, )) # model.pprint() # print(results.solver.termination_condition) # print(planned_costs) # model.display() return planned_power_per_device, planned_costs, results
def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, verify_integrity=True, normalize=False, **kwds): dayfirst = kwds.pop('dayfirst', None) yearfirst = kwds.pop('yearfirst', None) warn = False if 'offset' in kwds and kwds['offset']: freq = kwds['offset'] warn = True freq_infer = False if not isinstance(freq, DateOffset): if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if warn: import warnings warnings.warn( "parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning) offset = freq if periods is not None: if com.is_float(periods): periods = int(periods) elif not com.is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None and offset is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, offset, tz=tz, normalize=normalize) if not isinstance(data, np.ndarray): if np.isscalar(data): raise ValueError('DatetimeIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype='O') # try a few ways to make it datetime64 if lib.is_string_array(data): data = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) else: data = tools.to_datetime(data) data.offset = offset if isinstance(data, DatetimeIndex): if name is not None: data.name = name return data if issubclass(data.dtype.type, basestring): subarr = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) elif issubclass(data.dtype.type, np.datetime64): if isinstance(data, DatetimeIndex): if tz is None: tz = data.tz subarr = data.values if offset is None: offset = data.offset verify_integrity = False else: if data.dtype != _NS_DTYPE: subarr = lib.cast_to_nanoseconds(data) else: subarr = data elif data.dtype == _INT64_DTYPE: if isinstance(data, Int64Index): raise TypeError('cannot convert Int64Index->DatetimeIndex') if copy: subarr = np.asarray(data, dtype=_NS_DTYPE) else: subarr = data.view(_NS_DTYPE) else: try: subarr = tools.to_datetime(data) except ValueError: # tz aware subarr = tools.to_datetime(data, utc=True) if not np.issubdtype(subarr.dtype, np.datetime64): raise TypeError('Unable to convert %s to datetime dtype' % str(data)) if isinstance(subarr, DatetimeIndex): if tz is None: tz = subarr.tz else: if tz is not None: tz = tools._maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or getattr(data, 'tz', None) is None): # Convert tz-naive to UTC ints = subarr.view('i8') subarr = lib.tz_localize_to_utc(ints, tz) subarr = subarr.view(_NS_DTYPE) subarr = subarr.view(cls) subarr.name = name subarr.offset = offset subarr.tz = tz if verify_integrity and len(subarr) > 0: if offset is not None and not freq_infer: inferred = subarr.inferred_freq if inferred != offset.freqstr: raise ValueError('Dates do not conform to passed ' 'frequency') if freq_infer: inferred = subarr.inferred_freq if inferred: subarr.offset = to_offset(inferred) return subarr
def longest_period_from_frequency_str(freq_str: str) -> int: offset = to_offset(freq_str) return FREQ_LONGEST_PERIOD_DICT[norm_freq_str(offset.name)] // offset.n
def _resample_timestamps(self, kind=None): # assumes set_grouper(obj) already called axlabels = self.ax self._get_binner_for_resample(kind=kind) grouper = self.grouper binner = self.binner obj = self.obj # Determine if we're downsampling if axlabels.freq is not None or axlabels.inferred_freq is not None: if len(grouper.binlabels) < len(axlabels) or self.how is not None: # downsample grouped = obj.groupby(grouper, axis=self.axis) result = grouped.aggregate(self._agg_method) # GH2073 if self.fill_method is not None: result = result.fillna(method=self.fill_method, limit=self.limit) else: # upsampling shortcut if self.axis: raise AssertionError('axis must be 0') if self.closed == 'right': res_index = binner[1:] else: res_index = binner[:-1] # if we have the same frequency as our axis, then we are equal sampling # even if how is None if self.fill_method is None and self.limit is None and to_offset( axlabels.inferred_freq) == self.freq: result = obj.copy() result.index = res_index else: result = obj.reindex(res_index, method=self.fill_method, limit=self.limit) else: # Irregular data, have to use groupby grouped = obj.groupby(grouper, axis=self.axis) result = grouped.aggregate(self._agg_method) if self.fill_method is not None: result = result.fillna(method=self.fill_method, limit=self.limit) loffset = self.loffset if isinstance(loffset, compat.string_types): loffset = to_offset(self.loffset) if isinstance(loffset, (DateOffset, timedelta)): if (isinstance(result.index, DatetimeIndex) and len(result.index) > 0): result.index = result.index + loffset return result
def test_rule_aliases(): rule = frequencies.to_offset('10us') assert rule == offsets.Micro(10)
def test_to_offset_multiple(self): freqstr = '2h30min' freqstr2 = '2h 30min' result = frequencies.to_offset(freqstr) assert (result == frequencies.to_offset(freqstr2)) expected = offsets.Minute(150) assert (result == expected) freqstr = '2h30min15s' result = frequencies.to_offset(freqstr) expected = offsets.Second(150 * 60 + 15) assert (result == expected) freqstr = '2h 60min' result = frequencies.to_offset(freqstr) expected = offsets.Hour(3) assert (result == expected) freqstr = '2h 20.5min' result = frequencies.to_offset(freqstr) expected = offsets.Second(8430) assert (result == expected) freqstr = '1.5min' result = frequencies.to_offset(freqstr) expected = offsets.Second(90) assert (result == expected) freqstr = '0.5S' result = frequencies.to_offset(freqstr) expected = offsets.Milli(500) assert (result == expected) freqstr = '15l500u' result = frequencies.to_offset(freqstr) expected = offsets.Micro(15500) assert (result == expected) freqstr = '10s75L' result = frequencies.to_offset(freqstr) expected = offsets.Milli(10075) assert (result == expected) freqstr = '1s0.25ms' result = frequencies.to_offset(freqstr) expected = offsets.Micro(1000250) assert (result == expected) freqstr = '1s0.25L' result = frequencies.to_offset(freqstr) expected = offsets.Micro(1000250) assert (result == expected) freqstr = '2800N' result = frequencies.to_offset(freqstr) expected = offsets.Nano(2800) assert (result == expected) freqstr = '2SM' result = frequencies.to_offset(freqstr) expected = offsets.SemiMonthEnd(2) assert (result == expected) freqstr = '2SM-16' result = frequencies.to_offset(freqstr) expected = offsets.SemiMonthEnd(2, day_of_month=16) assert (result == expected) freqstr = '2SMS-14' result = frequencies.to_offset(freqstr) expected = offsets.SemiMonthBegin(2, day_of_month=14) assert (result == expected) freqstr = '2SMS-15' result = frequencies.to_offset(freqstr) expected = offsets.SemiMonthBegin(2) assert (result == expected) # malformed with tm.assert_raises_regex(ValueError, 'Invalid frequency: 2h20m'): frequencies.to_offset('2h20m')
def test_anchored_shortcuts(): result = frequencies.to_offset('W') expected = frequencies.to_offset('W-SUN') assert (result == expected) result1 = frequencies.to_offset('Q') result2 = frequencies.to_offset('Q-DEC') expected = offsets.QuarterEnd(startingMonth=12) assert (result1 == expected) assert (result2 == expected) result1 = frequencies.to_offset('Q-MAY') expected = offsets.QuarterEnd(startingMonth=5) assert (result1 == expected) result1 = frequencies.to_offset('SM') result2 = frequencies.to_offset('SM-15') expected = offsets.SemiMonthEnd(day_of_month=15) assert (result1 == expected) assert (result2 == expected) result = frequencies.to_offset('SM-1') expected = offsets.SemiMonthEnd(day_of_month=1) assert (result == expected) result = frequencies.to_offset('SM-27') expected = offsets.SemiMonthEnd(day_of_month=27) assert (result == expected) result = frequencies.to_offset('SMS-2') expected = offsets.SemiMonthBegin(day_of_month=2) assert (result == expected) result = frequencies.to_offset('SMS-27') expected = offsets.SemiMonthBegin(day_of_month=27) assert (result == expected) # ensure invalid cases fail as expected invalid_anchors = [ 'SM-0', 'SM-28', 'SM-29', 'SM-FOO', 'BSM', 'SM--1' 'SMS-1', 'SMS-28', 'SMS-30', 'SMS-BAR', 'BSMS', 'SMS--2' ] for invalid_anchor in invalid_anchors: try: frequencies.to_offset(invalid_anchor) except ValueError: pass else: raise AssertionError(invalid_anchor)
def test_to_offset_invalid(self): # GH 13930 with tm.assert_raises_regex(ValueError, 'Invalid frequency: U1'): frequencies.to_offset('U1') with tm.assert_raises_regex(ValueError, 'Invalid frequency: -U'): frequencies.to_offset('-U') with tm.assert_raises_regex(ValueError, 'Invalid frequency: 3U1'): frequencies.to_offset('3U1') with tm.assert_raises_regex(ValueError, 'Invalid frequency: -2-3U'): frequencies.to_offset('-2-3U') with tm.assert_raises_regex(ValueError, 'Invalid frequency: -2D:3H'): frequencies.to_offset('-2D:3H') with tm.assert_raises_regex(ValueError, 'Invalid frequency: 1.5.0S'): frequencies.to_offset('1.5.0S') # split offsets with spaces are valid assert frequencies.to_offset('2D 3H') == offsets.Hour(51) assert frequencies.to_offset('2 D3 H') == offsets.Hour(51) assert frequencies.to_offset('2 D 3 H') == offsets.Hour(51) assert frequencies.to_offset(' 2 D 3 H ') == offsets.Hour(51) assert frequencies.to_offset(' H ') == offsets.Hour() assert frequencies.to_offset(' 3 H ') == offsets.Hour(3) # special cases assert frequencies.to_offset('2SMS-15') == offsets.SemiMonthBegin(2) with tm.assert_raises_regex(ValueError, 'Invalid frequency: 2SMS-15-15'): frequencies.to_offset('2SMS-15-15') with tm.assert_raises_regex(ValueError, 'Invalid frequency: 2SMS-15D'): frequencies.to_offset('2SMS-15D')
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() freq_infer = False if not isinstance(freq, DateOffset): # if a passed freq is None, don't infer automatically if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if data is None: if freq is None and com._any_none(periods, start, end): msg = 'Must provide freq argument if no data is supplied' raise ValueError(msg) else: return cls._generate(start, end, periods, name, freq, closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) # check that we are matching freqs if verify_integrity and len(data) > 0: if freq is not None and not freq_infer: index = cls._simple_new(data, name=name) cls._validate_frequency(index, freq) index.freq = freq return index if freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred: index.freq = to_offset(inferred) return index return cls._simple_new(data, name=name, freq=freq)
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, closed=None, verify_integrity=True, **kwargs): if isinstance(data, TimedeltaIndex) and freq is None: if copy: data = data.copy() return data freq_infer = False if not isinstance(freq, DateOffset): # if a passed freq is None, don't infer automatically if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if periods is not None: if com.is_float(periods): periods = int(periods) elif not com.is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, freq, closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if np.isscalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # convert if not already if getattr(data,'dtype',None) != _TD_DTYPE: data = to_timedelta(data,unit=unit,box=False) elif copy: data = np.array(data,copy=True) # check that we are matching freqs if verify_integrity and len(data) > 0: if freq is not None and not freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred != freq.freqstr: on_freq = cls._generate(index[0], None, len(index), name, freq) if not np.array_equal(index.asi8, on_freq.asi8): raise ValueError('Inferred frequency {0} from passed timedeltas does not ' 'conform to passed frequency {1}'.format(inferred, freq.freqstr)) index.freq = freq return index if freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred: index.freq = to_offset(inferred) return index return cls._simple_new(data, name=name, freq=freq)
def round(t, freq): freq = to_offset(freq) return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value)
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ start = com.maybe_box_datetimelike(start) end = com.maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com._any_none(periods, start, end): freq = 1 if is_number(endpoint) else 'D' if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) elif not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq) ]): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com._all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com._not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, 'int64') else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def conv_resol(resolution): """Returns a string for resolution (from a Pandas) """ if _HAS_PANDAS: from pandas.tseries.frequencies import to_offset d = { to_offset('1Min'):'MINUTE', to_offset('2Min'):'MINUTE_2', to_offset('3Min'):'MINUTE_3', to_offset('5Min'):'MINUTE_5', to_offset('10Min'):'MINUTE_10', to_offset('15Min'):'MINUTE_15', to_offset('30Min'): 'MINUTE_30', to_offset('1H'): 'HOUR', to_offset('2H'): 'HOUR_2', to_offset('3H'): 'HOUR_3', to_offset('4H'): 'HOUR_4', to_offset('D'): 'DAY', to_offset('W'): 'WEEK', to_offset('M'): 'MONTH' } offset = to_offset(resolution) if offset in d: return d[offset] else: logger.error(traceback.format_exc()) logger.warning("conv_resol returns '%s'" % resolution) return resolution else: return resolution
def _init_dates(self, dates=None, freq=None): """ Initialize dates Parameters ---------- dates : array_like, optional An array like object containing dates. freq : str, tuple, datetime.timedelta, DateOffset or None, optional A frequency specification for either `dates` or the row labels from the endog / exog data. Notes ----- Creates `self._index` and related attributes. `self._index` is always a Pandas index, and it is always Int64Index, DatetimeIndex, or PeriodIndex. If Pandas objects, endog / exog may have any type of index. If it is an Int64Index with values 0, 1, ..., nobs-1 or if it is (coerceable to) a DatetimeIndex or PeriodIndex *with an associated frequency*, then it is called a "supported" index. Otherwise it is called an "unsupported" index. Supported indexes are standardized (i.e. a list of date strings is converted to a DatetimeIndex) and the result is put in `self._index`. Unsupported indexes are ignored, and a supported Int64Index is generated and put in `self._index`. Warnings are issued in this case to alert the user if the returned index from some operation (e.g. forecasting) is different from the original data's index. However, whenever possible (e.g. purely in-sample prediction), the original index is returned. The benefit of supported indexes is that they allow *forecasting*, i.e. it is possible to extend them in a reasonable way. Thus every model must have an underlying supported index, even if it is just a generated Int64Index. """ # Get our index from `dates` if available, otherwise from whatever # Pandas index we might have retrieved from endog, exog if dates is not None: index = dates else: index = self.data.row_labels # Sanity check that we don't have a `freq` without an index if index is None and freq is not None: raise ValueError('Frequency provided without associated index.') # If an index is available, see if it is a date-based index or if it # can be coerced to one. (If it can't we'll fall back, below, to an # internal, 0, 1, ... nobs-1 integer index for modeling purposes) inferred_freq = False if index is not None: # Try to coerce to date-based index if not isinstance(index, (DatetimeIndex, PeriodIndex)): try: # Only try to coerce non-numeric index types (string, # list of date-times, etc.) # Note that np.asarray(Float64Index([...])) yields an # object dtype array in earlier versions of Pandas (and so # will not have is_numeric_dtype == True), so explicitly # check for it here. But note also that in very early # Pandas (~0.12), Float64Index doesn't exist (and so the # Statsmodels compat makes it an empty tuple, so in that # case also check if the first element is a float. _index = np.asarray(index) if (is_numeric_dtype(_index) or isinstance(index, Float64Index) or (Float64Index == tuple() and isinstance(_index[0], float))): raise ValueError('Numeric index given') # If a non-index Pandas series was given, only keep its # values (because we must have a pd.Index type, below, and # pd.to_datetime will return a Series when passed # non-list-like objects) if isinstance(index, Series): index = index.values # All coercion is done via pd.to_datetime # Note: date coercion via pd.to_datetime does not handle # string versions of PeriodIndex objects most of the time. _index = to_datetime(index) # Older versions of Pandas can sometimes fail here and # return a numpy array - check to make sure it's an index if not isinstance(_index, Index): raise ValueError('Could not coerce to date index') index = _index except: # Only want to actually raise an exception if `dates` was # provided but can't be coerced. If we got the index from # the row_labels, we'll just ignore it and use the integer # index below if dates is not None: raise ValueError('Non-date index index provided to' ' `dates` argument.') # Now, if we were given, or coerced, a date-based index, make sure # it has an associated frequency if isinstance(index, (DatetimeIndex, PeriodIndex)): # If no frequency, try to get an inferred frequency if freq is None and index.freq is None: freq = index.inferred_freq # If we got an inferred frequncy, alert the user if freq is not None: inferred_freq = True if freq is not None: warnings.warn( 'No frequency information was' ' provided, so inferred frequency %s' ' will be used.' % freq, ValueWarning) # Convert the passed freq to a pandas offset object if freq is not None: freq = to_offset(freq) # Now, if no frequency information is available from the index # itself or from the `freq` argument, raise an exception if freq is None and index.freq is None: # But again, only want to raise the exception if `dates` # was provided. if dates is not None: raise ValueError('No frequency information was' ' provided with date index and no' ' frequency could be inferred.') # However, if the index itself has no frequency information but # the `freq` argument is available (or was inferred), construct # a new index with an associated frequency elif freq is not None and index.freq is None: resampled_index = date_range(start=index[0], end=index[-1], freq=freq) if not inferred_freq and not resampled_index.equals(index): raise ValueError('The given frequency argument could' ' not be matched to the given index.') index = resampled_index # Finally, if the index itself has a frequency and there was # also a given frequency, raise an exception if they are not # equal elif (freq is not None and not inferred_freq and not (index.freq == freq)): raise ValueError('The given frequency argument is' ' incompatible with the given index.') # Finally, raise an exception if we could not coerce to date-based # but we were given a frequency argument elif freq is not None: raise ValueError('Given index could not be coerced to dates' ' but `freq` argument was provided.') # Get attributes of the index has_index = index is not None date_index = isinstance(index, (DatetimeIndex, PeriodIndex)) int_index = isinstance(index, Int64Index) range_index = isinstance(index, RangeIndex) has_freq = index.freq is not None if date_index else None increment = Index(range(self.endog.shape[0])) is_increment = index.equals(increment) if int_index else None # Issue warnings for unsupported indexes if has_index and not (date_index or range_index or is_increment): warnings.warn( 'An unsupported index was provided and will be' ' ignored when e.g. forecasting.', ValueWarning) if date_index and not has_freq: warnings.warn( 'A date index has been provided, but it has no' ' associated frequency information and so will be' ' ignored when e.g. forecasting.', ValueWarning) # Construct the internal index index_generated = False if ((date_index and has_freq) or (int_index and is_increment) or range_index): _index = index else: _index = increment index_generated = True self._index = _index self._index_generated = index_generated self._index_none = index is None self._index_dates = date_index and not index_generated self._index_freq = self._index.freq if self._index_dates else None self._index_inferred_freq = inferred_freq # For backwards compatibility, set data.dates, data.freq self.data.dates = self._index if self._index_dates else None self.data.freq = self._index.freqstr if self._index_dates else None
def convert_index( to, interval=None, epoch="julian", input_ts="-", columns=None, start_date=None, end_date=None, round_index=None, dropna="no", clean=False, names=None, source_units=None, target_units=None, skiprows=None, ): """Convert datetime to/from Julian dates from different epochs.""" # Clip to start_date/end_date if possible. if to == "datetime": index_type = "number" nstart_date = None nend_date = None nround_index = None elif to == "number": index_type = "datetime" nstart_date = start_date nend_date = end_date nround_index = round_index tsd = tsutils.common_kwds( tsutils.read_iso_ts(input_ts, skiprows=skiprows, names=names, index_type=index_type), start_date=nstart_date, end_date=nend_date, pick=columns, round_index=nround_index, dropna=dropna, source_units=source_units, target_units=target_units, clean=clean, ) allowed = { "julian": lambda x: x, "reduced": lambda x: x - 2400000, "modified": lambda x: x - 2400000.5, "truncated": lambda x: np.floor(x - 2440000.5), "dublin": lambda x: x - 2415020, "cnes": lambda x: x - 2433282.5, "ccsds": lambda x: x - 2436204.5, "lop": lambda x: x - 2448622.5, "lilian": lambda x: np.floor(x - 2299159.5), "rata_die": lambda x: np.floor(x - 1721424.5), "mars_sol": lambda x: (x - 2405522) / 1.02749, "unix": lambda x: (x - 2440587.5), } dailies = [ "julian", "reduced", "modified", "truncated", "dublin", "cnes", "ccsds", "lop", "lilian", "rata_die", "mars_sol", ] epoch_dates = { "julian": "julian", "reduced": "1858-11-16T12", "modified": "1858-11-17T00", "truncated": "1968-05-24T00", "dublin": "1899-12-31T12", "cnes": "1950-01-01T00", "ccsds": "1958-01-01T00", "lop": "1992-01-01T00", "lilian": "1582-10-15T00", "rata_die": "0001-01-01T00", "mars_sol": "1873-12-29T12", "unix": "1970-01-01T00", } if interval is None: interval = "D" else: words = interval.split("-") if len(words) == 2: warnings.warn(""" * * The epoch keyword "{0}" overrides the anchoring suffix "{1}". * """.format(epoch, words[1])) interval = words[0] if epoch == "unix" and interval not in ["S", "s"]: warnings.warn(""" * * Typically the unix epoch would has an interval of 'S' (seconds). * Instead you gave {0}. * """.format(interval)) if epoch in dailies and interval != "D": warnings.warn(""" * * Typically the {0} epoch would has an interval of 'D' (days). * Instead you gave {1}. * """.format(epoch, interval)) if to == "number": # Index must be datetime - let's make sure tsd.index = pd.to_datetime(tsd.index) frac = to_offset("D").nanos / to_offset(interval).nanos try: tsd.index = allowed[epoch](tsd.index.to_julian_date()) * frac except KeyError: epoch_date = tsutils.parsedate(epoch) tsd.index = (tsd.index.to_julian_date() - epoch_date.to_julian_date()) * frac tsd = tsutils.memory_optimize(tsd) elif to == "datetime": tsd.index = pd.to_datetime(tsd.index.values, origin=epoch_dates.setdefault(epoch, epoch), unit=interval) if names is None: tsd.index.name = "{0}_date".format(epoch) if to == "datetime": index_type = "number" nstart_date = start_date nend_date = end_date nround_index = round_index elif to == "number": index_type = "datetime" nstart_date = None nend_date = None nround_index = None tsd = tsutils.common_kwds(tsd, start_date=nstart_date, end_date=nend_date, round_index=nround_index) return tsd
def is_datetime_not_remain(obj: datetime.datetime, freq: str) -> bool: offset = to_offset(freq) return obj.timestamp() % offset.delta.total_seconds() == 0
def intersection(self, other, sort=False): self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if self.equals(other): return self._get_reconciled_name_object(other) if len(self) == 0: return self.copy() if len(other) == 0: return other.copy() if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): if result.freq is None: # TODO: find a less code-smelly way to set this result._data._freq = to_offset(result.inferred_freq) return result elif (other.freq is None or self.freq is None or other.freq != self.freq or not other.freq.isAnchored() or (not self.is_monotonic or not other.is_monotonic)): result = Index.intersection(self, other, sort=sort) # Invalidate the freq of `result`, which may not be correct at # this point, depending on the values. # TODO: find a less code-smelly way to set this result._data._freq = None if hasattr(self, "tz"): result = self._shallow_copy(result._values, name=result.name, tz=result.tz, freq=None) else: result = self._shallow_copy(result._values, name=result.name, freq=None) if result.freq is None: # TODO: find a less code-smelly way to set this result._data._freq = to_offset(result.inferred_freq) return result # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self # after sorting, the intersection always starts with the right index # and ends with the index of which the last elements is smallest end = min(left[-1], right[-1]) start = right[0] if end < start: return type(self)(data=[]) else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left.values[lslice] return self._shallow_copy(left_chunk)
def setup(self, freq, is_offset): if is_offset: self.freq = to_offset(freq) else: self.freq = freq
def extract_nwis_df(nwis_dict, interpolate=True): """Returns a Pandas dataframe and a metadata dict from the NWIS response object or the json dict of the response. Args: nwis_dict (obj): the json from a response object as returned by get_nwis().json(). Alternatively, you may supply the response object itself. Returns: a pandas dataframe. Raises: HydroNoDataError when the request is valid, but NWIS has no data for the parameters provided in the request. HydroUserWarning when one dataset is sampled at a lower frequency than another dataset in the same request. """ if type(nwis_dict) is not dict: nwis_dict = nwis_dict.json() # strip header and all metadata. ts = nwis_dict["value"]["timeSeries"] if ts == []: # raise a HydroNoDataError if NWIS returns an empty set. # # Ideally, an empty set exception would be raised when the request # is first returned, but I do it here so that the data doesn't get # extracted twice. # TODO: raise this exception earlier?? # # ** Interactive sessions should have an error raised. # # **Automated systems should catch these errors and deal with them. # In this case, if NWIS returns an empty set, then the request # needs to be reconsidered. The request was valid somehow, but # there is no data being collected. raise exceptions.HydroNoDataError("The NWIS reports that it does not " "have any data for this request.") # create a list of time series; # set the index, set the data types, replace NaNs, sort, find the first and last collection = [] starts = [] ends = [] freqs = [] meta = {} for series in ts: series_name = series["name"] temp_name = series_name.split(":") agency = str(temp_name[0]) site_id = agency + ":" + str(temp_name[1]) parameter_cd = str(temp_name[2]) stat = str(temp_name[3]) siteName = series["sourceInfo"]["siteName"] siteLatLongSrs = series["sourceInfo"]["geoLocation"]["geogLocation"] noDataValues = series["variable"]["noDataValue"] variableDescription = series["variable"]["variableDescription"] unit = series["variable"]["unit"]["unitCode"] data = series["values"][0]["value"] if data == []: # This parameter has no data. Skip to next series. continue if len(data) == 1: # This parameter only contains the most recent reading. # See Issue #49 pass qualifiers = series_name + "_qualifiers" DF = pd.DataFrame(data=data) DF.index = pd.to_datetime(DF.pop("dateTime"), utc=True) DF["value"] = DF["value"].astype(float) DF = DF.replace(to_replace=noDataValues, value=np.nan) DF["qualifiers"] = DF["qualifiers"].apply(lambda x: ",".join(x)) DF.rename(columns={ "qualifiers": qualifiers, "value": series_name }, inplace=True) DF.sort_index(inplace=True) local_start = DF.index.min() local_end = DF.index.max() starts.append(local_start) ends.append(local_end) local_freq = calc_freq(DF.index) freqs.append(local_freq) if not DF.index.is_unique: print("Series index for " + series_name + " is not unique. Attempting to drop identical rows.") DF = DF.drop_duplicates(keep="first") if not DF.index.is_unique: print( "Series index for " + series_name + " is STILL not unique. Dropping first rows with duplicated date." ) DF = DF[~DF.index.duplicated(keep="first")] if local_freq > to_offset("0min"): local_clean_index = pd.date_range(start=local_start, end=local_end, freq=local_freq, tz="UTC") # if len(local_clean_index) != len(DF): # This condition happens quite frequently with missing data. # print(str(series_name) + "-- clean index length: "+ str(len(local_clean_index)) + " Series length: " + str(len(DF))) DF = DF.reindex(index=local_clean_index, copy=True) else: # The dataframe DF must contain only the most recent data. pass qual_cols = DF.columns.str.contains("_qualifiers") # https://stackoverflow.com/questions/21998354/pandas-wont-fillna-inplace # Instead, create a temporary dataframe, fillna, then copy back into original. DFquals = DF.loc[:, qual_cols].fillna("hf.missing") DF.loc[:, qual_cols] = DFquals if local_freq > pd.Timedelta(to_offset("0min")): variableFreq_str = str(to_offset(local_freq)) else: variableFreq_str = str(to_offset("0min")) parameter_info = { "variableFreq": variableFreq_str, "variableUnit": unit, "variableDescription": variableDescription, } site_info = { "siteName": siteName, "siteLatLongSrs": siteLatLongSrs, "timeSeries": {}, } # if site is not in meta keys, add it. if site_id not in meta: meta[site_id] = site_info # Add the variable info to the site dict. meta[site_id]["timeSeries"][parameter_cd] = parameter_info collection.append(DF) if len(collection) < 1: # It seems like this condition should not occur. The NWIS trims the # response and returns an empty nwis_dict['value']['timeSeries'] # if none of the parameters requested have data. # If at least one of the paramters have data, # then the empty series will get delivered, but with no data. # Compare these requests: # empty: https://nwis.waterservices.usgs.gov/nwis/iv/?format=json&sites=01570500&startDT=2018-06-01&endDT=2018-06-01¶meterCd=00045 # one empty, one full: https://nwis.waterservices.usgs.gov/nwis/iv/?format=json&sites=01570500&startDT=2018-06-01&endDT=2018-06-01¶meterCd=00045,00060 raise exceptions.HydroNoDataError("The NWIS does not have any data for" " the requested combination of sites" ", parameters, and dates.") startmin = min(starts) endmax = max(ends) # Remove all frequencies of zero from freqs list. zero = to_offset("0min") freqs2 = list(filter(lambda x: x > zero, freqs)) if len(freqs2) > 0: freqmin = min(freqs) freqmax = max(freqs) if freqmin != freqmax: warnings.warn( "One or more datasets in this request is going to be " "'upsampled' to " + str(freqmin) + " because the data " "were collected at a lower frequency of " + str(freqmax), exceptions.HydroUserWarning, ) clean_index = pd.date_range(start=startmin, end=endmax, freq=freqmin, tz="UTC") cleanDF = pd.DataFrame(index=clean_index) for dataset in collection: cleanDF = pd.concat([cleanDF, dataset], axis=1) # Replace lines with missing _qualifier flags with hf.upsampled qual_cols = cleanDF.columns.str.contains("_qualifiers") cleanDFquals = cleanDF.loc[:, qual_cols].fillna("hf.upsampled") cleanDF.loc[:, qual_cols] = cleanDFquals if interpolate: # TODO: mark interpolated values with 'hf.interp' # select data, then replace Nans with interpolated values. data_cols = cleanDF.columns.str.contains(r"[0-9]$") cleanDFdata = cleanDF.loc[:, data_cols].interpolate() cleanDF.loc[:, data_cols] = cleanDFdata else: # If datasets only contain most recent data, then # don't set an index or a freq. Just concat all of the datasets. cleanDF = pd.concat(collection, axis=1) cleanDF.index.name = "datetimeUTC" if not DF.index.is_unique: DF = DF[~DF.index.duplicated(keep="first")] if not DF.index.is_monotonic: DF.sort_index(axis=0, inplace=True) return cleanDF, meta
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): freq, freq_infer = dtl.maybe_infer_freq(freq) if data is None: # TODO: Remove this block and associated kwargs; GH#20535 result = cls._generate_range(start, end, periods, freq, closed=closed) result.name = name return result if is_scalar(data): raise TypeError( '{cls}() must be called with a ' 'collection of some kind, {data} was passed'.format( cls=cls.__name__, data=repr(data))) if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() # - Cases checked above all return/raise before reaching here - # data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) if inferred_freq is not None: if freq is not None and freq != inferred_freq: raise ValueError('Inferred frequency {inferred} from passed ' 'values does not conform to passed frequency ' '{passed}'.format(inferred=inferred_freq, passed=freq.freqstr)) elif freq_infer: freq = inferred_freq freq_infer = False verify_integrity = False subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs if verify_integrity and len(subarr) > 0: if freq is not None and not freq_infer: cls._validate_frequency(subarr, freq) if freq_infer: subarr.freq = to_offset(subarr.inferred_freq) return subarr
def estimate_air_temp(year_start, surfrad, lat, lon, cs): """ Use clear sky temps scaled by daily ratio of measured to clear sky global insolation. Parameters ---------- year_start : str SURFRAD data year surfrad : pandas.DateFrame surfrad data frame lat : float latitude in degrees north of equator [deg] lon : float longitude in degrees east of prime meridian [deg] cs : pandas.DataFrame clear sky irradiances [W/m^2] Returns ------- est_air_temp : pandas.DataFrame estimated air temperature in Celsius [C] temp_adj : pandas.Series temperature adjustment [C} ghi_ratio : pandas.Series ratio of daily SURFRAD to clearsky GHI insolation daily_delta_temp : numpy.array daily temperature range, max - min, in Kelvin [K] cs_temp_air : pandas.Series clear sky air temperatures in Celsius [C] """ daze = 367 if calendar.isleap(int(year_start)) else 366 # create a leap year of minutes for the given year at UTC year_minutes = pd.date_range(start=year_start, freq='T', periods=daze * DAYMINUTES, tz='UTC') # clear sky temperature cs_temp_air = rdtools.clearsky_temperature.get_clearsky_tamb( year_minutes, lat, lon) # organize by day cs_temp_daily = cs_temp_air.values.reshape((daze, DAYMINUTES)) + KELVINS # get daily temperature range daily_delta_temp = np.array([td.max() - td.min() for td in cs_temp_daily]) daily_delta_temp = pd.Series(daily_delta_temp, index=cs_temp_air.resample('D').mean().index) # calculate ratio of daily insolation versus clearsky ghi_ratio = surfrad.ghi.resample('D').sum() / cs.ghi.resample('D').sum() ghi_ratio = ghi_ratio.rename('ghi_ratio') # apply ghi ratio to next day, wrap days to start at day 1 day1 = ghi_ratio.index[0] ghi_ratio.index = ghi_ratio.index + to_offset('1D') # set day 1 estimated air temp equal to last day ghi_ratio[day1] = ghi_ratio.iloc[-1] # fix day 1 is added last, so out of order ghi_ratio = ghi_ratio.sort_index() # scale daily temperature delta by the ratio of insolation from day before temp_adj = (ghi_ratio - 1.0) * daily_delta_temp[ghi_ratio.index] # use next day temp_adj = temp_adj.rename('temp_adj') # interpolate smoothly, but fill forward minutes in last day est_air_temp = pd.concat([ cs_temp_air, ghi_ratio.resample('1min').interpolate(), temp_adj.resample('1min').interpolate() ], axis=1).pad() # Tadj = Tcs + (GHI/CS_GHI - 1) * DeltaT # if GHI/CS_GHI > 1 then adjustment > DeltaT est_air_temp['Adjusted Temp (C)'] = ( est_air_temp['Clear Sky Temperature (C)'] + est_air_temp.temp_adj) return est_air_temp, temp_adj, ghi_ratio, daily_delta_temp, cs_temp_air
def freq(self, value): if value is not None: value = frequencies.to_offset(value) self._validate_frequency(self, value) self._freq = value
def get_lags_for_frequency(freq_str: str, lag_ub: int = 1200, num_lags: Optional[int] = None) -> List[int]: """ Generates a list of lags that that are appropriate for the given frequency string. By default all frequencies have the following lags: [1, 2, 3, 4, 5, 6, 7]. Remaining lags correspond to the same `season` (+/- `delta`) in previous `k` cycles. Here `delta` and `k` are chosen according to the existing code. Parameters ---------- freq_str Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. lag_ub The maximum value for a lag. num_lags Maximum number of lags; by default all generated lags are returned """ # Lags are target values at the same `season` (+/- delta) but in the previous cycle. def _make_lags_for_minute(multiple, num_cycles=3): # We use previous ``num_cycles`` hours to generate lags return [ _make_lags(k * 60 // multiple, 2) for k in range(1, num_cycles + 1) ] def _make_lags_for_hour(multiple, num_cycles=7): # We use previous ``num_cycles`` days to generate lags return [ _make_lags(k * 24 // multiple, 1) for k in range(1, num_cycles + 1) ] def _make_lags_for_day(multiple, num_cycles=4): # We use previous ``num_cycles`` weeks to generate lags # We use the last month (in addition to 4 weeks) to generate lag. return [ _make_lags(k * 7 // multiple, 1) for k in range(1, num_cycles + 1) ] + [_make_lags(30 // multiple, 1)] def _make_lags_for_week(multiple, num_cycles=3): # We use previous ``num_cycles`` years to generate lags # Additionally, we use previous 4, 8, 12 weeks return [ _make_lags(k * 52 // multiple, 1) for k in range(1, num_cycles + 1) ] + [[4 // multiple, 8 // multiple, 12 // multiple]] def _make_lags_for_month(multiple, num_cycles=3): # We use previous ``num_cycles`` years to generate lags return [ _make_lags(k * 12 // multiple, 1) for k in range(1, num_cycles + 1) ] # multiple, granularity = get_granularity(freq_str) offset = to_offset(freq_str) # normalize offset name, so that both `W` and `W-SUN` refer to `W` offset_name = norm_freq_str(offset.name) if offset_name == "A": lags = [] elif offset_name == "Q": assert ( offset.n == 1 ), "Only multiple 1 is supported for quarterly. Use x month instead." lags = _make_lags_for_month(offset.n * 3.0) elif offset_name == "M": lags = _make_lags_for_month(offset.n) elif offset_name == "W": lags = _make_lags_for_week(offset.n) elif offset_name == "D": lags = _make_lags_for_day(offset.n) + _make_lags_for_week( offset.n / 7.0) elif offset_name == "B": # todo find good lags for business day lags = [] elif offset_name == "H": lags = (_make_lags_for_hour(offset.n) + _make_lags_for_day(offset.n / 24.0) + _make_lags_for_week(offset.n / (24.0 * 7))) # minutes elif offset_name == "T": lags = (_make_lags_for_minute(offset.n) + _make_lags_for_hour(offset.n / 60.0) + _make_lags_for_day(offset.n / (60.0 * 24)) + _make_lags_for_week(offset.n / (60.0 * 24 * 7))) else: raise Exception("invalid frequency") # flatten lags list and filter lags = [ int(lag) for sub_list in lags for lag in sub_list if 7 < lag <= lag_ub ] lags = [1, 2, 3, 4, 5, 6, 7] + sorted(list(set(lags))) return lags[:num_lags]
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start='2017-01-01', end='2017-01-04') IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start='2017-01-01', periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') """ if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') start = com._maybe_box_datetimelike(start) end = com._maybe_box_datetimelike(end) endpoint = next(com._not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) freq = freq or (1 if is_number(endpoint) else 'D') if not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq) ]): raise TypeError("start, end, freq need to be type compatible") if is_number(endpoint): if periods is None: periods = int((end - start) // freq) if start is None: start = end - periods * freq # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def test_anchored_shortcuts(self): result = frequencies.to_offset('W') expected = frequencies.to_offset('W-SUN') assert (result == expected) result1 = frequencies.to_offset('Q') result2 = frequencies.to_offset('Q-DEC') expected = offsets.QuarterEnd(startingMonth=12) assert (result1 == expected) assert (result2 == expected) result1 = frequencies.to_offset('Q-MAY') expected = offsets.QuarterEnd(startingMonth=5) assert (result1 == expected) result1 = frequencies.to_offset('SM') result2 = frequencies.to_offset('SM-15') expected = offsets.SemiMonthEnd(day_of_month=15) assert (result1 == expected) assert (result2 == expected) result = frequencies.to_offset('SM-1') expected = offsets.SemiMonthEnd(day_of_month=1) assert (result == expected) result = frequencies.to_offset('SM-27') expected = offsets.SemiMonthEnd(day_of_month=27) assert (result == expected) result = frequencies.to_offset('SMS-2') expected = offsets.SemiMonthBegin(day_of_month=2) assert (result == expected) result = frequencies.to_offset('SMS-27') expected = offsets.SemiMonthBegin(day_of_month=27) assert (result == expected) # ensure invalid cases fail as expected invalid_anchors = [ 'SM-0', 'SM-28', 'SM-29', 'SM-FOO', 'BSM', 'SM--1', 'SMS-1', 'SMS-28', 'SMS-30', 'SMS-BAR', 'SMS-BYR' 'BSMS', 'SMS--2' ] for invalid_anchor in invalid_anchors: with tm.assert_raises_regex(ValueError, 'Invalid frequency: '): frequencies.to_offset(invalid_anchor)
def test_to_offset_multiple(): freqstr = '2h30min' freqstr2 = '2h 30min' result = frequencies.to_offset(freqstr) assert (result == frequencies.to_offset(freqstr2)) expected = offsets.Minute(150) assert (result == expected) freqstr = '2h30min15s' result = frequencies.to_offset(freqstr) expected = offsets.Second(150 * 60 + 15) assert (result == expected) freqstr = '2h 60min' result = frequencies.to_offset(freqstr) expected = offsets.Hour(3) assert (result == expected) freqstr = '15l500u' result = frequencies.to_offset(freqstr) expected = offsets.Micro(15500) assert (result == expected) freqstr = '10s75L' result = frequencies.to_offset(freqstr) expected = offsets.Milli(10075) assert (result == expected) freqstr = '2800N' result = frequencies.to_offset(freqstr) expected = offsets.Nano(2800) assert (result == expected) freqstr = '2SM' result = frequencies.to_offset(freqstr) expected = offsets.SemiMonthEnd(2) assert (result == expected) freqstr = '2SM-16' result = frequencies.to_offset(freqstr) expected = offsets.SemiMonthEnd(2, day_of_month=16) assert (result == expected) freqstr = '2SMS-14' result = frequencies.to_offset(freqstr) expected = offsets.SemiMonthBegin(2, day_of_month=14) assert (result == expected) freqstr = '2SMS-15' result = frequencies.to_offset(freqstr) expected = offsets.SemiMonthBegin(2) assert (result == expected) # malformed try: frequencies.to_offset('2h20m') except ValueError: pass else: assert (False)