def test_parse_time_string(): (date, parsed, reso) = parse_time_string("4Q1984") (date_lower, parsed_lower, reso_lower) = parse_time_string("4q1984") assert date == date_lower assert reso == reso_lower assert parsed == parsed_lower
def test_parse_time_quarter_with_dash(dashed, normal): # see gh-9688 (parsed_dash, reso_dash) = parse_time_string(dashed) (parsed, reso) = parse_time_string(normal) assert parsed_dash == parsed assert reso_dash == reso
def test_parse_time_string_check_instance_type_raise_exception(): # issue 20684 with pytest.raises(TypeError): parse_time_string((1, 2, 3)) result = parse_time_string("2019") expected = (datetime(2019, 1, 1), datetime(2019, 1, 1), "year") assert result == expected
def test_parse_time_quarter_with_dash(dashed, normal): # see gh-9688 (date_dash, parsed_dash, reso_dash) = parse_time_string(dashed) (date, parsed, reso) = parse_time_string(normal) assert date_dash == date assert parsed_dash == parsed assert reso_dash == reso
def test_parsers_quarter_invalid(date_str): if date_str == "6Q-20": msg = ("Incorrect quarterly string is given, quarter " "must be between 1 and 4: {date_str}") else: msg = "Unknown datetime string format, unable to parse: {date_str}" with pytest.raises(ValueError, match=msg.format(date_str=date_str)): parsing.parse_time_string(date_str)
def test_parse_time_string_check_instance_type_raise_exception(): # issue 20684 msg = "Argument 'arg' has incorrect type (expected str, got tuple)" with pytest.raises(TypeError, match=re.escape(msg)): parse_time_string((1, 2, 3)) result = parse_time_string("2019") expected = (datetime(2019, 1, 1), "year") assert result == expected
def test_parsers_quarter_invalid(date_str): if date_str == "6Q-20": msg = ("Incorrect quarterly string is given, quarter " "must be between 1 and 4: {date_str}".format(date_str=date_str)) else: msg = ("Unknown datetime string format, unable " "to parse: {date_str}".format(date_str=date_str)) with pytest.raises(ValueError, match=msg): parsing.parse_time_string(date_str)
def test_parse_time_quarter_w_dash(self): # https://github.com/pandas-dev/pandas/issue/9688 pairs = [('1988-Q2', '1988Q2'), ('2Q-1988', '2Q1988')] for dashed, normal in pairs: (date_dash, parsed_dash, reso_dash) = parse_time_string(dashed) (date, parsed, reso) = parse_time_string(normal) assert date_dash == date assert parsed_dash == parsed assert reso_dash == reso pytest.raises(parsing.DateParseError, parse_time_string, "-2Q1992") pytest.raises(parsing.DateParseError, parse_time_string, "2-Q1992") pytest.raises(parsing.DateParseError, parse_time_string, "4-4Q1992")
def calendar(date_in, calendar_type='proleptic_gregorian'): """ Get date or number of steps from input. Get date from input string using one of the available formats or get time step number from input number or string. Used to get the date from CalendarDayStart (input) in the settings xml :param date_in: string containing a date in one of the available formats or time step number as number or string :param calendar_type: :rtype: datetime object or float number :returns: date as datetime or time step number as float :raises ValueError: stop if input is not a step number AND it is in wrong date format """ try: # try reading step number from number or string return float(date_in) except ValueError: # try reading a date in one of available formats try: _t_units = "hours since 1970-01-01 00:00:00" # units used for date type conversion (datetime.datetime -> calendar-specific if needed) date = parse_time_string( date_in, dayfirst=True)[0] # datetime.datetime type step = date2num(date, _t_units, calendar_type) # float type return num2date( step, _t_units, calendar_type ) # calendar-dependent type from netCDF4.netcdftime._netcdftime module except: # if cannot read input then stop msg = "Wrong step or date format in XML settings file\n Input {}".format( date_in) raise LisfloodError(msg)
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): # TODO: Check for non-True use_lhs/use_rhs raw = key if not self.is_monotonic: raise ValueError( "Partial indexing only valid for ordered time series") parsed, reso = parse_time_string(key, self.freq) grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) if not grp < freqn: # TODO: we used to also check for # reso in ["day", "hour", "minute", "second"] # why is that check not needed? raise TypeError(key) t1, t2 = self._parsed_string_to_bounds(reso, parsed) if len(self): if t2 < self.min() or t1 > self.max(): raise KeyError(raw) # Use asi8 searchsorted to avoid overhead of re-validating inputs return slice( self.asi8.searchsorted(t1.ordinal, side="left"), self.asi8.searchsorted(t2.ordinal, side="right"), )
def _get_string_slice(self, key: str): parsed, reso = parse_time_string(key, self.freq) reso = Resolution.from_attrname(reso) try: return self._partial_date_slice(reso, parsed) except KeyError as err: raise KeyError(key) from err
def test_parsers_monthfreq(self): cases = {'201101': datetime(2011, 1, 1, 0, 0), '200005': datetime(2000, 5, 1, 0, 0)} for date_str, expected in compat.iteritems(cases): result1, _, _ = parsing.parse_time_string(date_str, freq='M') assert result1 == expected
def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label. Parameters ---------- key : Period, NaT, str, or datetime String or datetime key must be parseable as Period. Returns ------- loc : int or ndarray[int64] Raises ------ KeyError Key is not present in the index. TypeError If key is listlike or otherwise not hashable. """ if isinstance(key, str): try: return self._get_string_slice(key) except (TypeError, KeyError, ValueError, OverflowError): pass try: asdt, reso = parse_time_string(key, self.freq) key = asdt except DateParseError: # A string with invalid format raise KeyError(f"Cannot interpret '{key}' as period") elif is_integer(key): # Period constructor will cast to string, which we dont want raise KeyError(key) try: key = Period(key, freq=self.freq) except ValueError: # we cannot construct the Period # as we have an invalid type if is_list_like(key): raise TypeError(f"'{key}' is an invalid key") raise KeyError(key) ordinal = key.ordinal if key is not NaT else key.value try: return self._engine.get_loc(ordinal) except KeyError: try: if tolerance is not None: tolerance = self._convert_tolerance( tolerance, np.asarray(key)) return self._int64index.get_loc(ordinal, method, tolerance) except KeyError: raise KeyError(key)
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): # TODO: Check for non-True use_lhs/use_rhs parsed, reso = parse_time_string(key, self.freq) try: return self._partial_date_slice(reso, parsed, use_lhs, use_rhs) except KeyError as err: raise KeyError(key) from err
def parse_date_str(date_str, tag='begin'): assert tag in ('begin', 'end') date, _, granularity = parse_time_string(str(date_str)) if tag == 'begin': return date else: offset = pd.DateOffset(**{granularity + 's': 1}) return date + offset - pd.to_timedelta('1s')
def test_parsers_quarterly_with_freq(self): msg = ('Incorrect quarterly string is given, quarter ' 'must be between 1 and 4: 2013Q5') with pytest.raises(parsing.DateParseError, match=msg): parsing.parse_time_string('2013Q5') # GH 5418 msg = ('Unable to retrieve month information from given freq: ' 'INVLD-L-DEC-SAT') with pytest.raises(parsing.DateParseError, match=msg): parsing.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') cases = {('2013Q2', None): datetime(2013, 4, 1), ('2013Q2', 'A-APR'): datetime(2012, 8, 1), ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} for (date_str, freq), exp in compat.iteritems(cases): result, _, _ = parsing.parse_time_string(date_str, freq=freq) assert result == exp
def test_parsers_quarterly_with_freq(self): msg = ('Incorrect quarterly string is given, quarter ' 'must be between 1 and 4: 2013Q5') with tm.assert_raises_regex(parsing.DateParseError, msg): parsing.parse_time_string('2013Q5') # GH 5418 msg = ('Unable to retrieve month information from given freq: ' 'INVLD-L-DEC-SAT') with tm.assert_raises_regex(parsing.DateParseError, msg): parsing.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') cases = {('2013Q2', None): datetime(2013, 4, 1), ('2013Q2', 'A-APR'): datetime(2012, 8, 1), ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} for (date_str, freq), exp in compat.iteritems(cases): result, _, _ = parsing.parse_time_string(date_str, freq=freq) assert result == exp
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) parsed, reso = parsing.parse_time_string(key, freq) loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs) return loc
def _get_string_slice(self, key): if not self.is_monotonic: raise ValueError( "Partial indexing only valid for ordered time series") parsed, reso = parse_time_string(key, self.freq) grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) if reso in ["day", "hour", "minute", "second"] and not grp < freqn: raise KeyError(key) t1, t2 = self._parsed_string_to_bounds(reso, parsed) return slice(self.searchsorted(t1, side="left"), self.searchsorted(t2, side="right"))
def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): """ If label is a string, cast it to datetime according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'loc', 'getitem'} or None Returns ------- label : object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ["loc", "getitem", None, lib.no_default] self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") if isinstance(label, str): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) try: parsed, reso_str = parsing.parse_time_string(label, freq) except parsing.DateParseError as err: raise self._invalid_indexer("slice", label) from err reso = Resolution.from_attrname(reso_str) lower, upper = self._parsed_string_to_bounds(reso, parsed) # lower, upper form the half-open interval: # [parsed, parsed + 1 freq) # because label may be passed to searchsorted # the bounds need swapped if index is reverse sorted and has a # length > 1 (is_monotonic_decreasing gives True for empty # and length 1 index) if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == "left" else lower return lower if side == "left" else upper elif isinstance(label, (self._data._recognized_scalars, date)): self._deprecate_mismatched_indexing(label) else: raise self._invalid_indexer("slice", label) return self._maybe_cast_for_get_loc(label)
def parse_dates(date_time): """ ERDDAP ReSTful API standardizes the representation of dates as either ISO strings or seconds since 1970, but internally ERDDAPY uses datetime-like objects. `timestamp` returns the expected strings in seconds since 1970. """ date_time = parse_time_string(date_time) # pandas returns a tuple with datetime, dateutil, and string representation. # we want only the datetime obj. if isinstance(date_time, tuple): date_time = date_time[0] if not date_time.tzinfo: date_time = pytz.utc.localize(date_time) else: date_time = date_time.astimezone(pytz.utc) return date_time.timestamp()
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): # TODO: Check for non-True use_lhs/use_rhs parsed, reso = parse_time_string(key, self.freq) grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) if not grp < freqn: # TODO: we used to also check for # reso in ["day", "hour", "minute", "second"] # why is that check not needed? raise ValueError(key) t1, t2 = self._parsed_string_to_bounds(reso, parsed) i8vals = self.asi8 if self.is_monotonic: # we are out of range if len(self) and ((use_lhs and t1 < self[0] and t2 < self[0]) or ((use_rhs and t1 > self[-1] and t2 > self[-1]))): raise KeyError(key) # TODO: does this depend on being monotonic _increasing_? # If so, DTI will also be affected. # a monotonic (sorted) series can be sliced # Use asi8.searchsorted to avoid re-validating Periods left = i8vals.searchsorted(t1.ordinal, side="left") if use_lhs else None right = i8vals.searchsorted(t2.ordinal, side="right") if use_rhs else None return slice(left, right) else: lhs_mask = (i8vals >= t1.ordinal) if use_lhs else True rhs_mask = (i8vals <= t2.ordinal) if use_rhs else True # try to find a the dates return (lhs_mask & rhs_mask).nonzero()[0]
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ if is_integer(key): return series.iat[key] if isinstance(key, str): asdt, reso = parse_time_string(key, self.freq) grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) vals = self._ndarray_values # if our data is higher resolution than requested key, slice if grp < freqn: iv = Period(asdt, freq=(grp, 1)) ord1 = iv.asfreq(self.freq, how="S").ordinal ord2 = iv.asfreq(self.freq, how="E").ordinal if ord2 < vals[0] or ord1 > vals[-1]: raise KeyError(key) pos = np.searchsorted(self._ndarray_values, [ord1, ord2]) key = slice(pos[0], pos[1] + 1) return series[key] elif grp == freqn: key = Period(asdt, freq=self.freq) loc = self.get_loc(key) return series.iloc[loc] else: raise KeyError(key) elif isinstance(key, Period) or key is NaT: ordinal = key.ordinal if key is not NaT else NaT.value loc = self._engine.get_loc(ordinal) return series[loc] # slice, PeriodIndex, np.ndarray, List[Period] value = Index.get_value(self, series, key) return com.maybe_box(self, value, series, key)
def _maybe_cast_slice_bound(self, label, side: str, kind): """ If label is a string, cast it to datetime according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'loc', 'getitem'} or None Returns ------- label : object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ["loc", "getitem", None] if is_float(label) or isinstance(label, time) or is_integer(label): self._invalid_indexer("slice", label) if isinstance(label, str): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) parsed, reso = parsing.parse_time_string(label, freq) reso = Resolution.from_attrname(reso) lower, upper = self._parsed_string_to_bounds(reso, parsed) # lower, upper form the half-open interval: # [parsed, parsed + 1 freq) # because label may be passed to searchsorted # the bounds need swapped if index is reverse sorted and has a # length > 1 (is_monotonic_decreasing gives True for empty # and length 1 index) if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == "left" else lower return lower if side == "left" else upper else: return label
def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'loc', 'getitem'}, or None Returns ------- bound : Period or object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ["loc", "getitem", None, lib.no_default] self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound") if isinstance(label, datetime): return Period(label, freq=self.freq) elif isinstance(label, str): try: parsed, reso_str = parse_time_string(label, self.freq) except ValueError as err: # string cannot be parsed as datetime-like raise self._invalid_indexer("slice", label) from err reso = Resolution.from_attrname(reso_str) lower, upper = self._parsed_string_to_bounds(reso, parsed) return lower if side == "left" else upper elif not isinstance(label, self._data._recognized_scalars): raise self._invalid_indexer("slice", label) return label
def parse_dates(date_time: Union[datetime, str]) -> float: """ Parse dates to ERDDAP internal format. ERDDAP ReSTful API standardizes the representation of dates as either ISO strings or seconds since 1970, but internally ERDDAPY uses datetime-like objects. `timestamp` returns the expected strings in seconds since 1970. """ if isinstance(date_time, str): # pandas returns a tuple with datetime, dateutil, and string representation. # we want only the datetime obj. parse_date_time = parse_time_string(date_time)[0] else: parse_date_time = date_time if not parse_date_time.tzinfo: parse_date_time = pytz.utc.localize(parse_date_time) else: parse_date_time = parse_date_time.astimezone(pytz.utc) return parse_date_time.timestamp()
def _maybe_cast_slice_bound(self, label, side: str, kind: str): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'loc', 'getitem'} Returns ------- bound : Period or object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ["loc", "getitem"] if isinstance(label, datetime): return Period(label, freq=self.freq) elif isinstance(label, str): try: parsed, reso = parse_time_string(label, self.freq) reso = Resolution.from_attrname(reso) bounds = self._parsed_string_to_bounds(reso, parsed) return bounds[0 if side == "left" else 1] except ValueError as err: # string cannot be parsed as datetime-like # TODO: we need tests for this case raise KeyError(label) from err elif is_integer(label) or is_float(label): self._invalid_indexer("slice", label) return label
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ if is_integer(key): return series.iat[key] if isinstance(key, str): try: loc = self._get_string_slice(key) return series[loc] except (TypeError, ValueError): pass asdt, reso = parse_time_string(key, self.freq) grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) # _get_string_slice will handle cases where grp < freqn assert grp >= freqn if grp == freqn: key = Period(asdt, freq=self.freq) loc = self.get_loc(key) return series.iloc[loc] else: raise KeyError(key) elif isinstance(key, Period) or key is NaT: ordinal = key.ordinal if key is not NaT else NaT.value loc = self._engine.get_loc(ordinal) return series[loc] # slice, PeriodIndex, np.ndarray, List[Period] value = Index.get_value(self, series, key) return com.maybe_box(self, value, series, key)
def _parse_with_reso(self, label: str): # overridden by TimedeltaIndex parsed, reso_str = parsing.parse_time_string(label, self.freq) reso = Resolution.from_attrname(reso_str) return parsed, reso
def test_parse_time_string(self): (date, parsed, reso) = parse_time_string('4Q1984') (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') assert date == date_lower assert parsed == parsed_lower assert reso == reso_lower
def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label. Parameters ---------- key : Period, NaT, str, or datetime String or datetime key must be parsable as Period. Returns ------- loc : int or ndarray[int64] Raises ------ KeyError Key is not present in the index. TypeError If key is listlike or otherwise not hashable. """ orig_key = key if not is_scalar(key): raise InvalidIndexError(key) if isinstance(key, str): try: loc = self._get_string_slice(key) return loc except (TypeError, ValueError): pass try: asdt, reso = parse_time_string(key, self.freq) except (ValueError, DateParseError) as err: # A string with invalid format raise KeyError(f"Cannot interpret '{key}' as period") from err reso = Resolution.from_attrname(reso) grp = reso.freq_group freqn = self.dtype.freq_group # _get_string_slice will handle cases where grp < freqn assert grp >= freqn # BusinessDay is a bit strange. It has a *lower* code, but we never parse # a string as "BusinessDay" resolution, just Day. if grp == freqn or ( reso == Resolution.RESO_DAY and self.dtype.freq.name == "B" ): key = Period(asdt, freq=self.freq) loc = self.get_loc(key, method=method, tolerance=tolerance) return loc elif method is None: raise KeyError(key) else: key = asdt elif is_integer(key): # Period constructor will cast to string, which we dont want raise KeyError(key) try: key = Period(key, freq=self.freq) except ValueError as err: # we cannot construct the Period raise KeyError(orig_key) from err try: return Index.get_loc(self, key, method, tolerance) except KeyError as err: raise KeyError(orig_key) from err
def test_parsers_quarterly_with_freq(date_str, freq, expected): result, _, _ = parsing.parse_time_string(date_str, freq=freq) assert result == expected
def test_parsers_quarterly_with_freq_error(date_str, kwargs, msg): with pytest.raises(parsing.DateParseError, match=msg): parsing.parse_time_string(date_str, **kwargs)
def test_parse_time_quarter_with_dash_error(dashed): msg = ("Unknown datetime string format, " "unable to parse: {dashed}".format(dashed=dashed)) with pytest.raises(parsing.DateParseError, match=msg): parse_time_string(dashed)
def test_parsers_month_freq(date_str, expected): result, _, _ = parsing.parse_time_string(date_str, freq="M") assert result == expected
def _get_string_slice(self, key: str): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) parsed, reso_str = parsing.parse_time_string(key, freq) reso = Resolution.from_attrname(reso_str) return self._partial_date_slice(reso, parsed)
def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label. Parameters ---------- key : Period, NaT, str, or datetime String or datetime key must be parseable as Period. Returns ------- loc : int or ndarray[int64] Raises ------ KeyError Key is not present in the index. TypeError If key is listlike or otherwise not hashable. """ orig_key = key if not is_scalar(key): raise InvalidIndexError(key) if isinstance(key, str): try: loc = self._get_string_slice(key) return loc except (TypeError, ValueError): pass try: asdt, reso = parse_time_string(key, self.freq) except DateParseError: # A string with invalid format raise KeyError(f"Cannot interpret '{key}' as period") grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) # _get_string_slice will handle cases where grp < freqn assert grp >= freqn if grp == freqn: key = Period(asdt, freq=self.freq) loc = self.get_loc(key, method=method, tolerance=tolerance) return loc elif method is None: raise KeyError(key) else: key = asdt elif is_integer(key): # Period constructor will cast to string, which we dont want raise KeyError(key) try: key = Period(key, freq=self.freq) except ValueError: # we cannot construct the Period raise KeyError(orig_key) try: return Index.get_loc(self, key, method, tolerance) except KeyError: raise KeyError(orig_key)