def _convert_scalar_indexer(self, key, kind=None): """ we don't allow integer or float indexing on datetime-like when using loc Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): self._invalid_indexer('index', key) elif kind in ['ix', 'getitem'] and is_flt: self._invalid_indexer('index', key) return (super(DatetimeIndexOpsMixin, self) ._convert_scalar_indexer(key, kind=kind))
def _simple_new(cls, data, sp_index, fill_value): if not isinstance(sp_index, SparseIndex): # caller must pass SparseIndex raise ValueError('sp_index must be a SparseIndex') if fill_value is None: if sp_index.ngaps > 0: # has missing hole fill_value = np.nan else: fill_value = na_value_for_dtype(data.dtype) if (is_integer_dtype(data) and is_float(fill_value) and sp_index.ngaps > 0): # if float fill_value is being included in dense repr, # convert values to float data = data.astype(float) result = data.view(cls) if not isinstance(sp_index, SparseIndex): # caller must pass SparseIndex raise ValueError('sp_index must be a SparseIndex') result.sp_index = sp_index result._fill_value = fill_value return result
def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to timedelta according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} Returns ------- label : object """ assert kind in ['ix', 'loc', 'getitem', None] if isinstance(label, compat.string_types): parsed = Timedelta(label) lbound = parsed.round(parsed.resolution) if side == 'left': return lbound else: return (lbound + to_offset(parsed.resolution) - Timedelta(1, 'ns')) elif ((is_integer(label) or is_float(label)) and not is_timedelta64_dtype(label)): self._invalid_indexer('slice', label) return label
def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. Parameters ---------- label : object side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} Returns ------- bound : Period or object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ['ix', 'loc', 'getitem'] if isinstance(label, datetime): return Period(label, freq=self.freq) elif isinstance(label, compat.string_types): try: _, parsed, reso = parse_time_string(label, self.freq) bounds = self._parsed_string_to_bounds(reso, parsed) return bounds[0 if side == 'left' else 1] except Exception: raise KeyError(label) elif is_integer(label) or is_float(label): self._invalid_indexer('slice', label) return label
def _value_with_fmt(self, val): """Convert numpy types to Python types for the Excel writers. Parameters ---------- val : object Value to be written into cells Returns ------- Tuple with the first element being the converted value and the second being an optional format """ fmt = None if is_integer(val): val = int(val) elif is_float(val): val = float(val) elif is_bool(val): val = bool(val) elif isinstance(val, datetime): fmt = self.datetime_format elif isinstance(val, date): fmt = self.date_format elif isinstance(val, timedelta): val = val.total_seconds() / float(86400) fmt = '0' else: val = compat.to_str(val) return val, fmt
def _get_string_slice(self, key, use_lhs=True, use_rhs=True): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) if is_integer(key) or is_float(key) or key is NaT: self._invalid_indexer('slice', key) loc = self._partial_td_slice(key, freq, use_lhs=use_lhs, use_rhs=use_rhs) return loc
def convert(value, unit, axis): valid_types = (str, pydt.time) if (isinstance(value, valid_types) or is_integer(value) or is_float(value)): return time2num(value) if isinstance(value, Index): return value.map(time2num) if isinstance(value, (list, tuple, np.ndarray, Index)): return [time2num(x) for x in value] return value
def __contains__(self, key): """ Check if key is a float and has a decimal. If it has, return False. """ hash(key) try: if is_float(key) and int(key) != key: return False return key in self._engine except (OverflowError, TypeError, ValueError): return False
def _format_value(self, val): if is_scalar(val) and missing.isna(val): val = self.na_rep elif is_float(val): if missing.isposinf_scalar(val): val = self.inf_rep elif missing.isneginf_scalar(val): val = '-{inf}'.format(inf=self.inf_rep) elif self.float_format is not None: val = float(self.float_format % val) return val
def integer_arithmetic_method(self, other): op_name = op.__name__ mask = None if isinstance(other, (ABCSeries, ABCIndexClass)): # Rely on pandas to unbox and dispatch to us. return NotImplemented if getattr(other, 'ndim', 0) > 1: raise NotImplementedError( "can only perform ops with 1-d structures") if isinstance(other, IntegerArray): other, mask = other._data, other._mask elif getattr(other, 'ndim', None) == 0: other = other.item() elif is_list_like(other): other = np.asarray(other) if not other.ndim: other = other.item() elif other.ndim == 1: if not (is_float_dtype(other) or is_integer_dtype(other)): raise TypeError( "can only perform ops with numeric values") else: if not (is_float(other) or is_integer(other)): raise TypeError("can only perform ops with numeric values") # nans propagate if mask is None: mask = self._mask else: mask = self._mask | mask # 1 ** np.nan is 1. So we have to unmask those. if op_name == 'pow': mask = np.where(self == 1, False, mask) elif op_name == 'rpow': mask = np.where(other == 1, False, mask) with np.errstate(all='ignore'): result = op(self._data, other) # divmod returns a tuple if op_name == 'divmod': div, mod = result return (self._maybe_mask_result(div, mask, other, 'floordiv'), self._maybe_mask_result(mod, mask, other, 'mod')) return self._maybe_mask_result(result, mask, other, op_name)
def _format_value(self, val): if lib.checknull(val): val = self.na_rep elif is_float(val): if lib.isposinf_scalar(val): val = self.inf_rep elif lib.isneginf_scalar(val): val = '-{inf}'.format(inf=self.inf_rep) elif self.float_format is not None: val = float(self.float_format % val) return val
def get_datevalue(date, freq): if isinstance(date, Period): return date.asfreq(freq).ordinal elif isinstance(date, (compat.string_types, datetime, pydt.date, pydt.time)): return Period(date, freq).ordinal elif (is_integer(date) or is_float(date) or (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): return date elif date is None: return None raise ValueError("Unrecognizable date '%s'" % date)
def get_datevalue(date, freq): if isinstance(date, Period): return date.asfreq(freq).ordinal elif isinstance(date, (str, datetime, pydt.date, pydt.time, np.datetime64)): return Period(date, freq).ordinal elif (is_integer(date) or is_float(date) or (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): return date elif date is None: return None raise ValueError("Unrecognizable date '{date}'".format(date=date))
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 tm.skip_if_no_package('scipy') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(3, dtype=dtype) # GH 16179 arr[0, 1] = dtype(2) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible was_upcast = ((fill_value is None or is_float(fill_value)) and not is_object_dtype(dtype) and not is_float_dtype(dtype)) res_dtype = (bool if is_bool_dtype(dtype) else float if was_upcast else dtype) tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) assert sdf.to_coo().dtype == res_dtype # However, adding a str column results in an upcast to object sdf['strings'] = np.arange(len(sdf)).astype(str) assert sdf.to_coo().dtype == np.object_
def _convert_1d(values, units, axis): if not hasattr(axis, 'freq'): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (compat.string_types, datetime, Period, pydt.date, pydt.time) if (isinstance(values, valid_types) or is_integer(values) or is_float(values)): return get_datevalue(values, axis.freq) if isinstance(values, PeriodIndex): return values.asfreq(axis.freq)._values if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) if is_period_arraylike(values): return PeriodIndex(values, freq=axis.freq)._values if isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values
def _convert_1d(values, units, axis): if not hasattr(axis, 'freq'): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (str, datetime, Period, pydt.date, pydt.time, np.datetime64) if (isinstance(values, valid_types) or is_integer(values) or is_float(values)): return get_datevalue(values, axis.freq) elif isinstance(values, PeriodIndex): return values.asfreq(axis.freq)._ndarray_values elif isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) elif lib.infer_dtype(values, skipna=False) == 'period': # https://github.com/pandas-dev/pandas/issues/24304 # convert ndarray[period] -> PeriodIndex return PeriodIndex(values, freq=axis.freq)._ndarray_values elif isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values
def _ensure_numeric(x): if isinstance(x, np.ndarray): if is_integer_dtype(x) or is_bool_dtype(x): x = x.astype(np.float64) elif is_object_dtype(x): try: x = x.astype(np.complex128) except: x = x.astype(np.float64) else: if not np.any(x.imag): x = x.real elif not (is_float(x) or is_integer(x) or is_complex(x)): try: x = float(x) except Exception: try: x = complex(x) except Exception: raise TypeError('Could not convert %s to numeric' % str(x)) return x
def _convert_1d(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, np.datetime64): return _dt_to_float_ordinal(tslibs.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) elif (is_integer(values) or is_float(values)): return values elif isinstance(values, str): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray, Index, ABCSeries)): if isinstance(values, ABCSeries): # https://github.com/matplotlib/matplotlib/issues/11391 # Series was skipped. Convert to DatetimeIndex to get asi8 values = Index(values) if isinstance(values, Index): values = values.values if not isinstance(values, np.ndarray): values = com.asarray_tuplesafe(values) if is_integer_dtype(values) or is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = _dt_to_float_ordinal(values) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: values = _dt_to_float_ordinal(values) return values
def integer_arithmetic_method(self, other): op_name = op.__name__ mask = None if isinstance(other, (ABCSeries, ABCIndexClass)): other = getattr(other, 'values', other) if isinstance(other, IntegerArray): other, mask = other._data, other._mask elif getattr(other, 'ndim', 0) > 1: raise NotImplementedError( "can only perform ops with 1-d structures") elif is_list_like(other): other = np.asarray(other) if not other.ndim: other = other.item() elif other.ndim == 1: if not (is_float_dtype(other) or is_integer_dtype(other)): raise TypeError( "can only perform ops with numeric values") else: if not (is_float(other) or is_integer(other)): raise TypeError("can only perform ops with numeric values") # nans propagate if mask is None: mask = self._mask else: mask = self._mask | mask with np.errstate(all='ignore'): result = op(self._data, other) # divmod returns a tuple if op_name == 'divmod': div, mod = result return (self._maybe_mask_result(div, mask, other, 'floordiv'), self._maybe_mask_result(mod, mask, other, 'mod')) return self._maybe_mask_result(result, mask, other, op_name)
def _convert_1d(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, np.datetime64): return _dt_to_float_ordinal(lib.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) elif (is_integer(values) or is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray, Index)): if isinstance(values, Index): values = values.values if not isinstance(values, np.ndarray): values = com._asarray_tuplesafe(values) if is_integer_dtype(values) or is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = _dt_to_float_ordinal(values) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: values = _dt_to_float_ordinal(values) return values
def _maybe_mask_result(self, result, mask, other, op_name): """ Parameters ---------- result : array-like mask : array-like bool other : scalar or array-like op_name : str """ # may need to fill infs # and mask wraparound if is_float_dtype(result): mask |= (result == np.inf) | (result == -np.inf) # if we have a float operand we are by-definition # a float result # or our op is a divide if ((is_float_dtype(other) or is_float(other)) or (op_name in ['rtruediv', 'truediv', 'rdiv', 'div'])): result[mask] = np.nan return result return type(self)(result, mask, copy=False)
def integer_arithmetic_method(self, other): omask = None if getattr(other, "ndim", 0) > 1: raise NotImplementedError("can only perform ops with 1-d structures") if isinstance(other, IntegerArray): other, omask = other._data, other._mask elif is_list_like(other): other = np.asarray(other) if other.ndim > 1: raise NotImplementedError( "can only perform ops with 1-d structures" ) if len(self) != len(other): raise ValueError("Lengths must match") if not (is_float_dtype(other) or is_integer_dtype(other)): raise TypeError("can only perform ops with numeric values") else: if not (is_float(other) or is_integer(other) or other is libmissing.NA): raise TypeError("can only perform ops with numeric values") if omask is None: mask = self._mask.copy() if other is libmissing.NA: mask |= True else: mask = self._mask | omask if op_name == "pow": # 1 ** x is 1. mask = np.where((self._data == 1) & ~self._mask, False, mask) # x ** 0 is 1. if omask is not None: mask = np.where((other == 0) & ~omask, False, mask) elif other is not libmissing.NA: mask = np.where(other == 0, False, mask) elif op_name == "rpow": # 1 ** x is 1. if omask is not None: mask = np.where((other == 1) & ~omask, False, mask) elif other is not libmissing.NA: mask = np.where(other == 1, False, mask) # x ** 0 is 1. mask = np.where((self._data == 0) & ~self._mask, False, mask) if other is libmissing.NA: result = np.ones_like(self._data) else: with np.errstate(all="ignore"): result = op(self._data, other) # divmod returns a tuple if op_name == "divmod": div, mod = result return ( self._maybe_mask_result(div, mask, other, "floordiv"), self._maybe_mask_result(mod, mask, other, "mod"), ) return self._maybe_mask_result(result, mask, other, op_name)
def test_is_float(self): assert is_float(1.1) assert is_float(np.float64(1.1)) assert is_float(np.nan) assert not is_float(True) assert not is_float(1) assert not is_float(1 + 3j) assert not is_float(np.bool(False)) assert not is_float(np.bool_(False)) assert not is_float(np.int64(1)) assert not is_float(np.complex128(1 + 3j)) assert not is_float(None) assert not is_float("x") assert not is_float(datetime(2011, 1, 1)) assert not is_float(np.datetime64("2011-01-01")) assert not is_float(Timestamp("2011-01-01")) assert not is_float(Timestamp("2011-01-01", tz="US/Eastern")) assert not is_float(timedelta(1000)) assert not is_float(np.timedelta64(1, "D")) assert not is_float(Timedelta("1 days"))
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ if _count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') start = _maybe_box_datetimelike(start) end = _maybe_box_datetimelike(end) endpoint = next(_not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) freq = freq or (1 if is_number(endpoint) else 'D') if not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") if is_number(endpoint): if periods is None: periods = int((end - start) // freq) if start is None: start = end - periods * freq # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, closed=None, verify_integrity=True, **kwargs): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() freq_infer = False if not isinstance(freq, DateOffset): # if a passed freq is None, don't infer automatically if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, freq, closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) # check that we are matching freqs if verify_integrity and len(data) > 0: if freq is not None and not freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred != freq.freqstr: on_freq = cls._generate( index[0], None, len(index), name, freq) if not np.array_equal(index.asi8, on_freq.asi8): raise ValueError('Inferred frequency {0} from passed ' 'timedeltas does not conform to ' 'passed frequency {1}' .format(inferred, freq.freqstr)) index.freq = freq return index if freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred: index.freq = to_offset(inferred) return index return cls._simple_new(data, name=name, freq=freq)
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') start = com._maybe_box_datetimelike(start) end = com._maybe_box_datetimelike(end) endpoint = next(com._not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) freq = freq or (1 if is_number(endpoint) else 'D') if not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") if is_number(endpoint): if periods is None: periods = int((end - start) // freq) if start is None: start = end - periods * freq # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def infer_dtype_from_scalar(val, pandas_dtype: bool = False): """ Interpret the dtype from a scalar. Parameters ---------- pandas_dtype : bool, default False whether to infer dtype including pandas extension types. If False, scalar belongs to pandas extension types is inferred as object """ dtype = np.object_ # a 1-element ndarray if isinstance(val, np.ndarray): msg = "invalid ndarray passed to infer_dtype_from_scalar" if val.ndim != 0: raise ValueError(msg) dtype = val.dtype val = val.item() elif isinstance(val, str): # If we create an empty array using a string to infer # the dtype, NumPy will only allocate one character per entry # so this is kind of bad. Alternately we could use np.repeat # instead of np.empty (but then you still don't want things # coming out as np.str_! dtype = np.object_ elif isinstance(val, (np.datetime64, datetime)): val = tslibs.Timestamp(val) if val is tslibs.NaT or val.tz is None: dtype = np.dtype("M8[ns]") else: if pandas_dtype: dtype = DatetimeTZDtype(unit="ns", tz=val.tz) else: # return datetimetz as object return np.object_, val val = val.value elif isinstance(val, (np.timedelta64, timedelta)): val = tslibs.Timedelta(val).value dtype = np.dtype("m8[ns]") elif is_bool(val): dtype = np.bool_ elif is_integer(val): if isinstance(val, np.integer): dtype = type(val) else: dtype = np.int64 elif is_float(val): if isinstance(val, np.floating): dtype = type(val) else: dtype = np.float64 elif is_complex(val): dtype = np.complex_ elif pandas_dtype: if lib.is_period(val): dtype = PeriodDtype(freq=val.freq) val = val.ordinal elif lib.is_interval(val): subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] dtype = IntervalDtype(subtype=subtype) return dtype, val
def _get_string_slice(self, key): if is_integer(key) or is_float(key) or key is NaT: self._invalid_indexer("slice", key) loc = self._partial_td_slice(key) return loc
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, closed=None, verify_integrity=True, **kwargs): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() freq_infer = False if not isinstance(freq, DateOffset): # if a passed freq is None, don't infer automatically if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: return cls._generate(start, end, periods, name, freq, closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) # check that we are matching freqs if verify_integrity and len(data) > 0: if freq is not None and not freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred != freq.freqstr: on_freq = cls._generate(index[0], None, len(index), name, freq) if not np.array_equal(index.asi8, on_freq.asi8): raise ValueError('Inferred frequency {0} from passed ' 'timedeltas does not conform to ' 'passed frequency {1}'.format( inferred, freq.freqstr)) index.freq = freq return index if freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred: index.freq = to_offset(inferred) return index return cls._simple_new(data, name=name, freq=freq)
def default_display_func(x): if is_float(x): return "{:>.{precision}g}".format(x, precision=self.precision) else: return x
def _adjust_to_origin(arg, origin, unit): """ Helper function for to_datetime. Adjust input argument to the specified origin Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be adjusted origin : 'julian' or Timestamp origin offset for the arg unit : str passed unit from to_datetime, must be 'D' Returns ------- ndarray or scalar of adjusted date(s) """ if origin == "julian": original = arg j0 = Timestamp(0).to_julian_date() if unit != "D": raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 except TypeError as err: raise ValueError( "incompatible 'arg' type for given 'origin'='julian'" ) from err # preemptively check this for a nice range j_max = Timestamp.max.to_julian_date() - j0 j_min = Timestamp.min.to_julian_date() - j0 if np.any(arg > j_max) or np.any(arg < j_min): raise OutOfBoundsDatetime( f"{original} is Out of Bounds for origin='julian'" ) else: # arg must be numeric if not ( (is_scalar(arg) and (is_integer(arg) or is_float(arg))) or is_numeric_dtype(np.asarray(arg)) ): raise ValueError( f"'{arg}' is not compatible with origin='{origin}'; " "it must be numeric with a unit specified" ) # we are going to offset back to unix / epoch time try: offset = Timestamp(origin) except OutOfBoundsDatetime as err: raise OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") from err except ValueError as err: raise ValueError( f"origin {origin} cannot be converted to a Timestamp" ) from err if offset.tz is not None: raise ValueError(f"origin offset {offset} must be tz-naive") td_offset = offset - Timestamp(0) # convert the offset to the unit of the arg # this should be lossless in terms of precision ioffset = td_offset // Timedelta(1, unit=unit) # scalars & ndarray-like can handle the addition if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)): arg = np.asarray(arg) arg = arg + ioffset return arg
def default_display_func(x): if is_float(x): display_format = "{0:.{precision}f}".format(x, precision=self.precision) return display_format else: return x
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() freq_infer = False if not isinstance(freq, DateOffset): # if a passed freq is None, don't infer automatically if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if data is None: if freq is None and com._any_none(periods, start, end): msg = 'Must provide freq argument if no data is supplied' raise ValueError(msg) else: return cls._generate(start, end, periods, name, freq, closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) # check that we are matching freqs if verify_integrity and len(data) > 0: if freq is not None and not freq_infer: index = cls._simple_new(data, name=name) cls._validate_frequency(index, freq) index.freq = freq return index if freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred: index.freq = to_offset(inferred) return index return cls._simple_new(data, name=name, freq=freq)
def _adjust_to_origin(arg, origin, unit): """ Helper function for to_datetime. Adjust input argument to the specified origin Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be adjusted origin : 'julian' or Timestamp origin offset for the arg unit : string passed unit from to_datetime, must be 'D' Returns ------- ndarray or scalar of adjusted date(s) """ if origin == 'julian': original = arg j0 = Timestamp(0).to_julian_date() if unit != 'D': raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 except TypeError: raise ValueError("incompatible 'arg' type for given " "'origin'='julian'") # premptively check this for a nice range j_max = Timestamp.max.to_julian_date() - j0 j_min = Timestamp.min.to_julian_date() - j0 if np.any(arg > j_max) or np.any(arg < j_min): raise tslibs.OutOfBoundsDatetime( "{original} is Out of Bounds for " "origin='julian'".format(original=original)) else: # arg must be numeric if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or is_numeric_dtype(np.asarray(arg))): raise ValueError( "'{arg}' is not compatible with origin='{origin}'; " "it must be numeric with a unit specified ".format( arg=arg, origin=origin)) # we are going to offset back to unix / epoch time try: offset = Timestamp(origin) except tslibs.OutOfBoundsDatetime: raise tslibs.OutOfBoundsDatetime( "origin {origin} is Out of Bounds".format(origin=origin)) except ValueError: raise ValueError("origin {origin} cannot be converted " "to a Timestamp".format(origin=origin)) if offset.tz is not None: raise ValueError( "origin offset {} must be tz-naive".format(offset)) offset -= Timestamp(0) # convert the offset to the unit of the arg # this should be lossless in terms of precision offset = offset // tslibs.Timedelta(1, unit=unit) # scalars & ndarray-like can handle the addition if is_list_like(arg) and not isinstance( arg, (ABCSeries, ABCIndexClass, np.ndarray)): arg = np.asarray(arg) arg = arg + offset return arg
def maybe_promote(dtype, fill_value=np.nan): """ Find the minimal dtype that can hold both the given dtype and fill_value. Parameters ---------- dtype : np.dtype or ExtensionDtype fill_value : scalar, default np.nan Returns ------- dtype Upcasted from dtype argument if necessary. fill_value Upcasted from fill_value argument if necessary. """ if not is_scalar(fill_value) and not is_object_dtype(dtype): # with object dtype there is nothing to promote, and the user can # pass pretty much any weird fill_value they like raise ValueError("fill_value must be a scalar") # if we passed an array here, determine the fill value by dtype if isinstance(fill_value, np.ndarray): if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): fill_value = fill_value.dtype.type("NaT", "ns") else: # we need to change to object type as our # fill_value is of object type if fill_value.dtype == np.object_: dtype = np.dtype(np.object_) fill_value = np.nan if dtype == np.object_ or dtype.kind in ["U", "S"]: # We treat string-like dtypes as object, and _always_ fill # with np.nan fill_value = np.nan dtype = np.dtype(np.object_) # returns tuple of (dtype, fill_value) if issubclass(dtype.type, np.datetime64): if isinstance(fill_value, datetime) and fill_value.tzinfo is not None: # Trying to insert tzaware into tznaive, have to cast to object dtype = np.dtype(np.object_) elif is_integer(fill_value) or (is_float(fill_value) and not isna(fill_value)): dtype = np.dtype(np.object_) else: try: fill_value = tslibs.Timestamp(fill_value).to_datetime64() except (TypeError, ValueError): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.timedelta64): if (is_integer(fill_value) or (is_float(fill_value) and not np.isnan(fill_value)) or isinstance(fill_value, str)): # TODO: What about str that can be a timedelta? dtype = np.dtype(np.object_) else: try: fv = tslibs.Timedelta(fill_value) except ValueError: dtype = np.dtype(np.object_) else: if fv is NaT: # NaT has no `to_timedelta64` method fill_value = np.timedelta64("NaT", "ns") else: fill_value = fv.to_timedelta64() elif is_datetime64tz_dtype(dtype): if isna(fill_value): fill_value = NaT elif not isinstance(fill_value, datetime): dtype = np.dtype(np.object_) elif fill_value.tzinfo is None: dtype = np.dtype(np.object_) elif not tz_compare(fill_value.tzinfo, dtype.tz): # TODO: sure we want to cast here? dtype = np.dtype(np.object_) elif is_extension_array_dtype(dtype) and isna(fill_value): fill_value = dtype.na_value elif is_float(fill_value): if issubclass(dtype.type, np.bool_): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): dtype = np.dtype(np.float64) elif dtype.kind == "f": mst = np.min_scalar_type(fill_value) if mst > dtype: # e.g. mst is np.float64 and dtype is np.float32 dtype = mst elif dtype.kind == "c": mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) elif is_bool(fill_value): if not issubclass(dtype.type, np.bool_): dtype = np.dtype(np.object_) elif is_integer(fill_value): if issubclass(dtype.type, np.bool_): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): if not np.can_cast(fill_value, dtype): # upcast to prevent overflow mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) if dtype.kind == "f": # Case where we disagree with numpy dtype = np.dtype(np.object_) elif is_complex(fill_value): if issubclass(dtype.type, np.bool_): dtype = np.dtype(np.object_) elif issubclass(dtype.type, (np.integer, np.floating)): mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) elif dtype.kind == "c": mst = np.min_scalar_type(fill_value) if mst > dtype: # e.g. mst is np.complex128 and dtype is np.complex64 dtype = mst elif fill_value is None: if is_float_dtype(dtype) or is_complex_dtype(dtype): fill_value = np.nan elif is_integer_dtype(dtype): dtype = np.float64 fill_value = np.nan elif is_datetime_or_timedelta_dtype(dtype): fill_value = dtype.type("NaT", "ns") else: dtype = np.dtype(np.object_) fill_value = np.nan else: dtype = np.dtype(np.object_) # in case we have a string that looked like a number if is_extension_array_dtype(dtype): pass elif issubclass(np.dtype(dtype).type, (bytes, str)): dtype = np.dtype(np.object_) fill_value = _ensure_dtype_type(fill_value, dtype) return dtype, fill_value
def test_is_float(self): assert is_float(1.1) assert is_float(np.float64(1.1)) assert is_float(np.nan) assert not is_float(True) assert not is_float(1) assert not is_float(1 + 3j) assert not is_float(np.bool(False)) assert not is_float(np.bool_(False)) assert not is_float(np.int64(1)) assert not is_float(np.complex128(1 + 3j)) assert not is_float(None) assert not is_float('x') assert not is_float(datetime(2011, 1, 1)) assert not is_float(np.datetime64('2011-01-01')) assert not is_float(Timestamp('2011-01-01')) assert not is_float(Timestamp('2011-01-01', tz='US/Eastern')) assert not is_float(timedelta(1000)) assert not is_float(np.timedelta64(1, 'D')) assert not is_float(Timedelta('1 days'))
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() freq_infer = False if not isinstance(freq, DateOffset): # if a passed freq is None, don't infer automatically if freq != 'infer': freq = to_offset(freq) else: freq_infer = True freq = None if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if data is None: if freq is None and com._any_none(periods, start, end): msg = 'Must provide freq argument if no data is supplied' raise ValueError(msg) else: return cls._generate(start, end, periods, name, freq, closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) # check that we are matching freqs if verify_integrity and len(data) > 0: if freq is not None and not freq_infer: index = cls._simple_new(data, name=name) cls._validate_frequency(index, freq) index.freq = freq return index if freq_infer: index = cls._simple_new(data, name=name) inferred = index.inferred_freq if inferred: index.freq = to_offset(inferred) return index return cls._simple_new(data, name=name, freq=freq)
def interval_range( start=None, end=None, periods=None, freq=None, name: Hashable = None, closed: lib.NoDefault = lib.no_default, inclusive: IntervalClosedType | None = None, ) -> IntervalIndex: """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. .. deprecated:: 1.5.0 Argument `closed` has been deprecated to standardize boundary inputs. Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. .. versionadded:: 1.5.0 Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5, inclusive="right") IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], dtype='interval[int64, right]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04'), inclusive="right") IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], dtype='interval[datetime64[ns], right]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS', inclusive="right") IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], dtype='interval[datetime64[ns], right]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') The ``inclusive`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, inclusive='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], dtype='interval[int64, both]') """ if inclusive is not None and not isinstance(closed, lib.NoDefault): raise ValueError( "Deprecated argument `closed` cannot be passed " "if argument `inclusive` is not None" ) elif not isinstance(closed, lib.NoDefault): warnings.warn( "Argument `closed` is deprecated in favor of `inclusive`.", FutureWarning, stacklevel=2, ) if closed is None: inclusive = "both" elif closed in ("both", "neither", "left", "right"): inclusive = closed else: raise ValueError( "Argument `closed` has to be either" "'both', 'neither', 'left' or 'right'" ) elif inclusive is None: inclusive = "both" start = maybe_box_datetimelike(start) end = maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, and " "freq, exactly three must be specified" ) if not _is_valid_endpoint(start): raise ValueError(f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all( [ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ] ): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray]", variable has type "ndarray") breaks = maybe_downcast_numeric( # type: ignore[assignment] breaks, np.dtype("int64") ) else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=inclusive)
def test_is_float(self): assert is_float(1.1) assert is_float(np.float64(1.1)) assert is_float(np.nan) assert not is_float(True) assert not is_float(1) assert not is_float(1 + 3j) assert not is_float(np.bool(False)) assert not is_float(np.bool_(False)) assert not is_float(np.int64(1)) assert not is_float(np.complex128(1 + 3j)) assert not is_float(None) assert not is_float('x') assert not is_float(datetime(2011, 1, 1)) assert not is_float(np.datetime64('2011-01-01')) assert not is_float(Timestamp('2011-01-01')) assert not is_float(Timestamp('2011-01-01', tz='US/Eastern')) assert not is_float(timedelta(1000)) assert not is_float(np.timedelta64(1, 'D')) assert not is_float(Timedelta('1 days'))
def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, dtype=None, **kwargs): if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if name is None and hasattr(data, 'name'): name = data.name if dtype is not None: dtype = pandas_dtype(dtype) if not is_period_dtype(dtype): raise ValueError('dtype must be PeriodDtype') if freq is None: freq = dtype.freq elif freq != dtype.freq: msg = 'specified freq and dtype are different' raise IncompatibleFrequency(msg) # coerce freq to freq object, otherwise it can be coerced elementwise # which is slow if freq: freq = Period._maybe_convert_freq(freq) if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, freq, kwargs) return cls._from_ordinals(data, name=name, freq=freq) if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: # no freq change freq = data.freq data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._values, base1, base2, 1) return cls._simple_new(data, name=name, freq=freq) # not array / index if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): cls._scalar_data_error(data) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data) # datetime other than period if is_datetime64_dtype(data.dtype): data = dt64arr_to_periodarr(data, freq, tz) return cls._from_ordinals(data, name=name, freq=freq) # check not floats if infer_dtype(data) == 'floating' and len(data) > 0: raise TypeError("PeriodIndex does not allow " "floating point in construction") # anything else, likely an array of strings or periods data = _ensure_object(data) freq = freq or period.extract_freq(data) data = period.extract_ordinals(data, freq) return cls._from_ordinals(data, name=name, freq=freq)
def __contains__(self, other: Any) -> bool: hash(other) if super().__contains__(other): return True return is_float(other) and np.isnan(other) and self.hasnans
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, unit=None, infer_datetime_format=False, origin='unix', cache=False): """ Convert argument to datetime. Parameters ---------- arg : integer, float, string, datetime, list, tuple, 1-d array, Series .. versionadded:: 0.18.1 or DataFrame/dict-like errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaT - If 'ignore', then invalid parsing will return the input dayfirst : boolean, default False Specify a date parse order if `arg` is str or its list-likes. If True, parses dates with the day first, eg 10/11/12 is parsed as 2012-11-10. Warning: dayfirst=True is not strict, but will prefer to parse with day first (this is a known bug, based on dateutil behavior). yearfirst : boolean, default False Specify a date parse order if `arg` is str or its list-likes. - If True parses dates with the year first, eg 10/11/12 is parsed as 2010-11-12. - If both dayfirst and yearfirst are True, yearfirst is preceded (same as dateutil). Warning: yearfirst=True is not strict, but will prefer to parse with year first (this is a known bug, based on dateutil beahavior). .. versionadded:: 0.16.1 utc : boolean, default None Return UTC DatetimeIndex if True (converting any tz-aware datetime.datetime objects as well). box : boolean, default True - If True returns a DatetimeIndex - If False returns ndarray of values. format : string, default None strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse all the way up to nanoseconds. exact : boolean, True by default - If True, require an exact format match. - If False, allow the format to match anywhere in the target string. unit : string, default 'ns' unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer or float number. This will be based off the origin. Example, with unit='ms' and origin='unix' (the default), this would calculate the number of milliseconds to the unix epoch start. infer_datetime_format : boolean, default False If True and no `format` is given, attempt to infer the format of the datetime strings, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. origin : scalar, default is 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. - If 'unix' (or POSIX) time; origin is set to 1970-01-01. - If 'julian', unit must be 'D', and origin is set to beginning of Julian Calendar. Julian day number 0 is assigned to the day starting at noon on January 1, 4713 BC. - If Timestamp convertible, origin is set to Timestamp identified by origin. .. versionadded:: 0.20.0 cache : boolean, default False If True, use a cache of unique, converted dates to apply the datetime conversion. May produce sigificant speed-up when parsing duplicate date strings, especially ones with timezone offsets. .. versionadded:: 0.22.0 Returns ------- ret : datetime if parsing succeeded. Return type depends on input: - list-like: DatetimeIndex - Series: Series of datetime64 dtype - scalar: Timestamp In case when it is not possible to return designated types (e.g. when any element of input is before Timestamp.min or after Timestamp.max) return will have datetime.datetime type (or correspoding array/Series). Examples -------- Assembling a datetime from multiple columns of a DataFrame. The keys can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or plurals of the same >>> df = pd.DataFrame({'year': [2015, 2016], 'month': [2, 3], 'day': [4, 5]}) >>> pd.to_datetime(df) 0 2015-02-04 1 2016-03-05 dtype: datetime64[ns] If a date does not meet the `timestamp limitations <http://pandas.pydata.org/pandas-docs/stable/timeseries.html #timeseries-timestamp-limits>`_, passing errors='ignore' will return the original input instead of raising any exception. Passing errors='coerce' will force an out-of-bounds date to NaT, in addition to forcing non-dates (or non-parseable dates) to NaT. >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore') datetime.datetime(1300, 1, 1, 0, 0) >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') NaT Passing infer_datetime_format=True can often-times speedup a parsing if its not an ISO8601 format exactly, but in a regular format. >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000']*1000) >>> s.head() 0 3/11/2000 1 3/12/2000 2 3/13/2000 3 3/11/2000 4 3/12/2000 dtype: object >>> %timeit pd.to_datetime(s,infer_datetime_format=True) 100 loops, best of 3: 10.4 ms per loop >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop Using a unix epoch time >>> pd.to_datetime(1490195805, unit='s') Timestamp('2017-03-22 15:16:45') >>> pd.to_datetime(1490195805433502912, unit='ns') Timestamp('2017-03-22 15:16:45.433502912') .. warning:: For float arg, precision rounding might happen. To prevent unexpected behavior use a fixed-width exact type. Using a non-unix epoch origin >>> pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) 0 1960-01-02 1 1960-01-03 2 1960-01-04 See also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_timedelta : Convert argument to timedelta. """ from pandas.core.indexes.datetimes import DatetimeIndex tz = 'utc' if utc else None def _convert_listlike(arg, box, format, name=None, tz=tz): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if utc: arg = arg.tz_convert(None).tz_localize('UTC') return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result = array_strptime(arg, format, exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601 ) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) return result except ValueError as e: try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e if arg is None: return None # handle origin if origin == 'julian': original = arg j0 = tslib.Timestamp(0).to_julian_date() if unit != 'D': raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 except: raise ValueError("incompatible 'arg' type for given " "'origin'='julian'") # premptively check this for a nice range j_max = tslib.Timestamp.max.to_julian_date() - j0 j_min = tslib.Timestamp.min.to_julian_date() - j0 if np.any(arg > j_max) or np.any(arg < j_min): raise tslib.OutOfBoundsDatetime( "{original} is Out of Bounds for " "origin='julian'".format(original=original)) elif origin not in ['unix', 'julian']: # arg must be a numeric original = arg if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or is_numeric_dtype(np.asarray(arg))): raise ValueError( "'{arg}' is not compatible with origin='{origin}'; " "it must be numeric with a unit specified ".format( arg=arg, origin=origin)) # we are going to offset back to unix / epoch time try: offset = tslib.Timestamp(origin) except tslib.OutOfBoundsDatetime: raise tslib.OutOfBoundsDatetime( "origin {origin} is Out of Bounds".format(origin=origin)) except ValueError: raise ValueError("origin {origin} cannot be converted " "to a Timestamp".format(origin=origin)) if offset.tz is not None: raise ValueError( "origin offset {} must be tz-naive".format(offset)) offset -= tslib.Timestamp(0) # convert the offset to the unit of the arg # this should be lossless in terms of precision offset = offset // tslib.Timedelta(1, unit=unit) # scalars & ndarray-like can handle the addition if is_list_like(arg) and not isinstance( arg, (ABCSeries, ABCIndexClass, np.ndarray)): arg = np.asarray(arg) arg = arg + offset if isinstance(arg, tslib.Timestamp): result = arg elif isinstance(arg, ABCSeries): cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) if not cache_array.empty: result = arg.map(cache_array) else: from pandas import Series values = _convert_listlike(arg._values, True, format) result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) elif isinstance(arg, ABCIndexClass): cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) if not cache_array.empty: result = _convert_and_box_cache(arg, cache_array, box, errors, name=arg.name) else: result = _convert_listlike(arg, box, format, name=arg.name) elif is_list_like(arg): cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) if not cache_array.empty: result = _convert_and_box_cache(arg, cache_array, box, errors) else: result = _convert_listlike(arg, box, format) else: result = _convert_listlike(np.array([arg]), box, format)[0] return result
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed="right"): """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ start = com.maybe_box_datetimelike(start) end = com.maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError("Of the four parameters: start, end, periods, and " "freq, exactly three must be specified") if not _is_valid_endpoint(start): raise ValueError( f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all([ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ]): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, "int64") else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def _get_string_slice(self, key): if is_integer(key) or is_float(key) or key is NaT: self._invalid_indexer('slice', key) loc = self._partial_td_slice(key) return loc
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, unit=None, infer_datetime_format=False, origin='unix', cache=False): """ Convert argument to datetime. Parameters ---------- arg : integer, float, string, datetime, list, tuple, 1-d array, Series .. versionadded: 0.18.1 or DataFrame/dict-like errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaT - If 'ignore', then invalid parsing will return the input dayfirst : boolean, default False Specify a date parse order if `arg` is str or its list-likes. If True, parses dates with the day first, eg 10/11/12 is parsed as 2012-11-10. Warning: dayfirst=True is not strict, but will prefer to parse with day first (this is a known bug, based on dateutil behavior). yearfirst : boolean, default False Specify a date parse order if `arg` is str or its list-likes. - If True parses dates with the year first, eg 10/11/12 is parsed as 2010-11-12. - If both dayfirst and yearfirst are True, yearfirst is preceded (same as dateutil). Warning: yearfirst=True is not strict, but will prefer to parse with year first (this is a known bug, based on dateutil beahavior). .. versionadded: 0.16.1 utc : boolean, default None Return UTC DatetimeIndex if True (converting any tz-aware datetime.datetime objects as well). box : boolean, default True - If True returns a DatetimeIndex - If False returns ndarray of values. format : string, default None strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse all the way up to nanoseconds. exact : boolean, True by default - If True, require an exact format match. - If False, allow the format to match anywhere in the target string. unit : string, default 'ns' unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer or float number. This will be based off the origin. Example, with unit='ms' and origin='unix' (the default), this would calculate the number of milliseconds to the unix epoch start. infer_datetime_format : boolean, default False If True and no `format` is given, attempt to infer the format of the datetime strings, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. origin : scalar, default is 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. - If 'unix' (or POSIX) time; origin is set to 1970-01-01. - If 'julian', unit must be 'D', and origin is set to beginning of Julian Calendar. Julian day number 0 is assigned to the day starting at noon on January 1, 4713 BC. - If Timestamp convertible, origin is set to Timestamp identified by origin. .. versionadded: 0.20.0 cache : boolean, default False If True, use a cache of unique, converted dates to apply the datetime conversion. May produce sigificant speed-up when parsing duplicate date strings, especially ones with timezone offsets. .. versionadded: 0.22.0 Returns ------- ret : datetime if parsing succeeded. Return type depends on input: - list-like: DatetimeIndex - Series: Series of datetime64 dtype - scalar: Timestamp In case when it is not possible to return designated types (e.g. when any element of input is before Timestamp.min or after Timestamp.max) return will have datetime.datetime type (or correspoding array/Series). Examples -------- Assembling a datetime from multiple columns of a DataFrame. The keys can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or plurals of the same >>> df = pd.DataFrame({'year': [2015, 2016], 'month': [2, 3], 'day': [4, 5]}) >>> pd.to_datetime(df) 0 2015-02-04 1 2016-03-05 dtype: datetime64[ns] If a date does not meet the `timestamp limitations <http://pandas.pydata.org/pandas-docs/stable/timeseries.html #timeseries-timestamp-limits>`_, passing errors='ignore' will return the original input instead of raising any exception. Passing errors='coerce' will force an out-of-bounds date to NaT, in addition to forcing non-dates (or non-parseable dates) to NaT. >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore') datetime.datetime(1300, 1, 1, 0, 0) >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') NaT Passing infer_datetime_format=True can often-times speedup a parsing if its not an ISO8601 format exactly, but in a regular format. >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000']*1000) >>> s.head() 0 3/11/2000 1 3/12/2000 2 3/13/2000 3 3/11/2000 4 3/12/2000 dtype: object >>> %timeit pd.to_datetime(s,infer_datetime_format=True) 100 loops, best of 3: 10.4 ms per loop >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop Using a unix epoch time >>> pd.to_datetime(1490195805, unit='s') Timestamp('2017-03-22 15:16:45') >>> pd.to_datetime(1490195805433502912, unit='ns') Timestamp('2017-03-22 15:16:45.433502912') .. warning:: For float arg, precision rounding might happen. To prevent unexpected behavior use a fixed-width exact type. Using a non-unix epoch origin >>> pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) 0 1960-01-02 1 1960-01-03 2 1960-01-04 See also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_timedelta : Convert argument to timedelta. """ from pandas.core.indexes.datetimes import DatetimeIndex tz = 'utc' if utc else None def _convert_listlike(arg, box, format, name=None, tz=tz): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if utc: arg = arg.tz_convert(None).tz_localize('UTC') return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result = array_strptime(arg, format, exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) return result except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e if arg is None: return None # handle origin if origin == 'julian': original = arg j0 = tslib.Timestamp(0).to_julian_date() if unit != 'D': raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 except: raise ValueError("incompatible 'arg' type for given " "'origin'='julian'") # premptively check this for a nice range j_max = tslib.Timestamp.max.to_julian_date() - j0 j_min = tslib.Timestamp.min.to_julian_date() - j0 if np.any(arg > j_max) or np.any(arg < j_min): raise tslib.OutOfBoundsDatetime( "{original} is Out of Bounds for " "origin='julian'".format(original=original)) elif origin not in ['unix', 'julian']: # arg must be a numeric original = arg if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or is_numeric_dtype(np.asarray(arg))): raise ValueError( "'{arg}' is not compatible with origin='{origin}'; " "it must be numeric with a unit specified ".format( arg=arg, origin=origin)) # we are going to offset back to unix / epoch time try: offset = tslib.Timestamp(origin) except tslib.OutOfBoundsDatetime: raise tslib.OutOfBoundsDatetime( "origin {origin} is Out of Bounds".format(origin=origin)) except ValueError: raise ValueError("origin {origin} cannot be converted " "to a Timestamp".format(origin=origin)) if offset.tz is not None: raise ValueError( "origin offset {} must be tz-naive".format(offset)) offset -= tslib.Timestamp(0) # convert the offset to the unit of the arg # this should be lossless in terms of precision offset = offset // tslib.Timedelta(1, unit=unit) # scalars & ndarray-like can handle the addition if is_list_like(arg) and not isinstance( arg, (ABCSeries, ABCIndexClass, np.ndarray)): arg = np.asarray(arg) arg = arg + offset if isinstance(arg, tslib.Timestamp): result = arg elif isinstance(arg, ABCSeries): cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) if not cache_array.empty: result = arg.map(cache_array) else: from pandas import Series values = _convert_listlike(arg._values, True, format) result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) elif isinstance(arg, ABCIndexClass): cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) if not cache_array.empty: result = _convert_and_box_cache(arg, cache_array, box, errors, name=arg.name) else: result = _convert_listlike(arg, box, format, name=arg.name) elif is_list_like(arg): cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) if not cache_array.empty: result = _convert_and_box_cache(arg, cache_array, box, errors) else: result = _convert_listlike(arg, box, format) else: result = _convert_listlike(np.array([arg]), box, format)[0] return result
def default_display_func(x): if is_float(x): return '{:>.{precision}g}'.format(x, precision=self.precision) else: return x
def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, tz=None, dtype=None, copy=False, name=None, **fields): valid_field_set = { 'year', 'month', 'day', 'quarter', 'hour', 'minute', 'second' } if not set(fields).issubset(valid_field_set): raise TypeError( '__new__() got an unexpected keyword argument {}'.format( list(set(fields) - valid_field_set)[0])) if periods is not None: if is_float(periods): periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if name is None and hasattr(data, 'name'): name = data.name if dtype is not None: dtype = pandas_dtype(dtype) if not is_period_dtype(dtype): raise ValueError('dtype must be PeriodDtype') if freq is None: freq = dtype.freq elif freq != dtype.freq: msg = 'specified freq and dtype are different' raise IncompatibleFrequency(msg) # coerce freq to freq object, otherwise it can be coerced elementwise # which is slow if freq: freq = Period._maybe_convert_freq(freq) if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, freq, fields) return cls._from_ordinals(data, name=name, freq=freq) if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: # no freq change freq = data.freq data = data._ndarray_values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._ndarray_values, base1, base2, 1) return cls._simple_new(data, name=name, freq=freq) # not array / index if not isinstance( data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): cls._scalar_data_error(data) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data) # datetime other than period if is_datetime64_dtype(data.dtype): data = dt64arr_to_periodarr(data, freq, tz) return cls._from_ordinals(data, name=name, freq=freq) # check not floats if infer_dtype(data) == 'floating' and len(data) > 0: raise TypeError("PeriodIndex does not allow " "floating point in construction") # anything else, likely an array of strings or periods data = _ensure_object(data) freq = freq or period.extract_freq(data) data = period.extract_ordinals(data, freq) return cls._from_ordinals(data, name=name, freq=freq)