def _is_type_compatible(a, b): """helper for interval_range to check type compat of start/end/freq""" is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) return ((is_number(a) and is_number(b)) or (is_ts_compat(a) and is_ts_compat(b)) or (is_td_compat(a) and is_td_compat(b)) or com._any_none(a, b))
def _is_type_compatible(a, b) -> bool: """ Helper for interval_range to check type compat of start/end/freq. """ is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset)) is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset)) return ( (is_number(a) and is_number(b)) or (is_ts_compat(a) and is_ts_compat(b)) or (is_td_compat(a) and is_td_compat(b)) or com.any_none(a, b) )
def _validate_fill_value(self, value): if is_bool(value) or is_bool_dtype(value): # force conversion to object # so we don't lose the bools raise TypeError elif is_scalar(value) and isna(value): if is_valid_nat_for_dtype(value, self.dtype): value = self._na_value if self.dtype.kind != "f": # raise so that caller can cast raise TypeError else: # NaT, np.datetime64("NaT"), np.timedelta64("NaT") raise TypeError elif is_scalar(value): if not is_number(value): # e.g. datetime64, timedelta64, datetime, ... raise TypeError elif lib.is_complex(value): # at least until we have a ComplexIndx raise TypeError elif is_float(value) and self.dtype.kind != "f": if not value.is_integer(): raise TypeError value = int(value) return value
def _validate_fill_value(self, value): if is_bool(value) or is_bool_dtype(value): # force conversion to object # so we don't lose the bools raise TypeError elif is_scalar(value) and isna(value): if is_valid_nat_for_dtype(value, self.dtype): value = self._na_value if self.dtype.kind != "f": # raise so that caller can cast raise TypeError else: # NaT, np.datetime64("NaT"), np.timedelta64("NaT") raise TypeError elif is_scalar(value): if not is_number(value): # e.g. datetime64, timedelta64, datetime, ... raise TypeError elif lib.is_complex(value): # at least until we have a ComplexIndx raise TypeError elif is_float(value) and self.dtype.kind != "f": if not value.is_integer(): raise TypeError value = int(value) elif hasattr(value, "dtype") and value.dtype.kind in ["m", "M"]: # TODO: if we're checking arraylike here, do so systematically raise TypeError return value
def _is_valid_endpoint(endpoint): """helper for interval_range to check if start/end are valid types""" return any([ is_number(endpoint), isinstance(endpoint, Timestamp), isinstance(endpoint, Timedelta), endpoint is None ])
def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): """ Check attributes are equal. Both objects must have attribute. Parameters ---------- attr : str Attribute name being compared. left : object right : object obj : str, default 'Attributes' Specify object name being compared, internally used to show appropriate assertion message """ __tracebackhide__ = True left_attr = getattr(left, attr) right_attr = getattr(right, attr) if left_attr is right_attr: return True elif (is_number(left_attr) and np.isnan(left_attr) and is_number(right_attr) and np.isnan(right_attr)): # np.nan return True elif (isinstance(left_attr, (np.datetime64, np.timedelta64)) and isinstance(right_attr, (np.datetime64, np.timedelta64)) and type(left_attr) is type(right_attr) and np.isnat(left_attr) and np.isnat(right_attr)): # np.datetime64("nat") or np.timedelta64("nat") return True try: result = left_attr == right_attr except TypeError: # datetimetz on rhs may raise TypeError result = False if (left_attr is pd.NA) ^ (right_attr is pd.NA): result = False elif not isinstance(result, bool): result = result.all() if result: return True else: msg = f'Attribute "{attr}" are different' raise_assert_detail(obj, msg, left_attr, right_attr)
def test_agg_cython_table(self, series, func, expected): # GH21224 # test reducing functions in # pandas.core.base.SelectionMixin._cython_table result = series.agg(func) if is_number(expected): assert np.isclose(result, expected, equal_nan=True) else: assert result == expected
def _check_as_is(x): return (self.quoting == csv.QUOTE_NONNUMERIC and is_number(x)) or isinstance(x, str)
def test_is_number(self): assert is_number(True) assert is_number(1) assert is_number(1.1) assert is_number(1 + 3j) assert is_number(np.bool(False)) assert is_number(np.int64(1)) assert is_number(np.float64(1.1)) assert is_number(np.complex128(1 + 3j)) assert is_number(np.nan) assert not is_number(None) assert not is_number('x') assert not is_number(datetime(2011, 1, 1)) assert not is_number(np.datetime64('2011-01-01')) assert not is_number(Timestamp('2011-01-01')) assert not is_number(Timestamp('2011-01-01', tz='US/Eastern')) assert not is_number(timedelta(1000)) assert not is_number(Timedelta('1 days')) # questionable assert not is_number(np.bool_(False)) assert is_number(np.timedelta64(1, 'D'))
def _is_numeric(x): """ Whether the array x is numeric. """ return all(is_number(i) for i in x)
def _check_as_is(x): return (self.quoting == csv.QUOTE_NONNUMERIC and is_number(x)) or isinstance(x, str)
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') start = com._maybe_box_datetimelike(start) end = com._maybe_box_datetimelike(end) endpoint = next(com._not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) freq = freq or (1 if is_number(endpoint) else 'D') if not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") if is_number(endpoint): if periods is None: periods = int((end - start) // freq) if start is None: start = end - periods * freq # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def _is_numeric(x): """ Whether the array x is numeric. """ print("x is: ",x) return all(is_number(i) for i in x)
def interval_range( start=None, end=None, periods=None, freq=None, name: Hashable = None, closed: lib.NoDefault = lib.no_default, inclusive: IntervalClosedType | None = None, ) -> IntervalIndex: """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. .. deprecated:: 1.5.0 Argument `closed` has been deprecated to standardize boundary inputs. Use `inclusive` instead, to set each bound as closed or open. inclusive : {"both", "neither", "left", "right"}, default "both" Include boundaries; Whether to set each bound as closed or open. .. versionadded:: 1.5.0 Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5, inclusive="right") IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], dtype='interval[int64, right]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04'), inclusive="right") IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], dtype='interval[datetime64[ns], right]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS', inclusive="right") IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], dtype='interval[datetime64[ns], right]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4, inclusive="right") IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]') The ``inclusive`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, inclusive='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], dtype='interval[int64, both]') """ if inclusive is not None and not isinstance(closed, lib.NoDefault): raise ValueError( "Deprecated argument `closed` cannot be passed " "if argument `inclusive` is not None" ) elif not isinstance(closed, lib.NoDefault): warnings.warn( "Argument `closed` is deprecated in favor of `inclusive`.", FutureWarning, stacklevel=2, ) if closed is None: inclusive = "both" elif closed in ("both", "neither", "left", "right"): inclusive = closed else: raise ValueError( "Argument `closed` has to be either" "'both', 'neither', 'left' or 'right'" ) elif inclusive is None: inclusive = "both" start = maybe_box_datetimelike(start) end = maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, and " "freq, exactly three must be specified" ) if not _is_valid_endpoint(start): raise ValueError(f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all( [ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ] ): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray]", variable has type "ndarray") breaks = maybe_downcast_numeric( # type: ignore[assignment] breaks, np.dtype("int64") ) else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=inclusive)
def interval_range( start=None, end=None, periods=None, freq=None, name=None, closed="right" ): """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ start = com.maybe_box_datetimelike(start) end = com.maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, and " "freq, exactly three must be specified" ) if not _is_valid_endpoint(start): raise ValueError(f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all( [ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ] ): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, "int64") else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def to_numeric(arg, errors='raise', downcast=None): """ Convert argument to a numeric type. The default return dtype is `float64` or `int64` depending on the data supplied. Use the `downcast` parameter to obtain other dtypes. Please note that precision loss may occur if really large numbers are passed in. Due to the internal limitations of `ndarray`, if numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are passed in, it is very likely they will be converted to float so that they can stored in an `ndarray`. These warnings apply similarly to `Series` since it internally leverages `ndarray`. Parameters ---------- arg : scalar, list, tuple, 1-d array, or Series errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaN - If 'ignore', then invalid parsing will return the input downcast : {'integer', 'signed', 'unsigned', 'float'} , default None If not None, and if the data has been successfully cast to a numerical dtype (or if the data was numeric to begin with), downcast that resulting data to the smallest numerical dtype possible according to the following rules: - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - 'float': smallest float dtype (min.: np.float32) As this behaviour is separate from the core conversion to numeric values, any errors raised during the downcasting will be surfaced regardless of the value of the 'errors' input. In addition, downcasting will only occur if the size of the resulting data's dtype is strictly larger than the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. .. versionadded:: 0.19.0 Returns ------- ret : numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray. See Also -------- DataFrame.astype : Cast argument to a specified dtype. to_datetime : Convert argument to datetime. to_timedelta : Convert argument to timedelta. numpy.ndarray.astype : Cast a numpy array to a specified type. Examples -------- Take separate series and convert to numeric, coercing when told to >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 >>> pd.to_numeric(s, downcast='float') 0 1.0 1 2.0 2 -3.0 dtype: float32 >>> pd.to_numeric(s, downcast='signed') 0 1 1 2 2 -3 dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') 0 apple 1 1.0 2 2 3 -3 dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 dtype: float64 """ if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): raise ValueError('invalid downcasting method provided') is_series = False is_index = False is_scalars = False if isinstance(arg, ABCSeries): is_series = True values = arg.values elif isinstance(arg, ABCIndexClass): is_index = True values = arg.asi8 if values is None: values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') elif is_scalar(arg): if is_decimal(arg): return float(arg) if is_number(arg): return arg is_scalars = True values = np.array([arg], dtype='O') elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a list, tuple, 1-d array, or Series') else: values = arg try: if is_numeric_dtype(values): pass elif is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: values = ensure_object(values) coerce_numeric = errors not in ('ignore', 'raise') values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) except Exception: if errors == 'raise': raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified if downcast is not None and is_numeric_dtype(values): typecodes = None if downcast in ('integer', 'signed'): typecodes = np.typecodes['Integer'] elif downcast == 'unsigned' and np.min(values) >= 0: typecodes = np.typecodes['UnsignedInteger'] elif downcast == 'float': typecodes = np.typecodes['Float'] # pandas support goes only to np.float32, # as float dtypes smaller than that are # extremely rare and not well supported float_32_char = np.dtype(np.float32).char float_32_ind = typecodes.index(float_32_char) typecodes = typecodes[float_32_ind:] if typecodes is not None: # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize <= values.dtype.itemsize: values = maybe_downcast_to_dtype(values, dtype) # successful conversion if values.dtype == dtype: break if is_series: return pd.Series(values, index=arg.index, name=arg.name) elif is_index: # because we want to coerce to numeric if possible, # do not use _shallow_copy_with_infer return pd.Index(values, name=arg.name) elif is_scalars: return values[0] else: return values
def test_is_number(self): assert is_number(True) assert is_number(1) assert is_number(1.1) assert is_number(1 + 3j) assert is_number(np.bool(False)) assert is_number(np.int64(1)) assert is_number(np.float64(1.1)) assert is_number(np.complex128(1 + 3j)) assert is_number(np.nan) assert not is_number(None) assert not is_number("x") assert not is_number(datetime(2011, 1, 1)) assert not is_number(np.datetime64("2011-01-01")) assert not is_number(Timestamp("2011-01-01")) assert not is_number(Timestamp("2011-01-01", tz="US/Eastern")) assert not is_number(timedelta(1000)) assert not is_number(Timedelta("1 days")) # questionable assert not is_number(np.bool_(False)) assert is_number(np.timedelta64(1, "D"))
def to_numeric(arg, errors='raise', downcast=None): """ Convert argument to a numeric type. The default return dtype is `float64` or `int64` depending on the data supplied. Use the `downcast` parameter to obtain other dtypes. Parameters ---------- arg : list, tuple, 1-d array, or Series errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaN - If 'ignore', then invalid parsing will return the input downcast : {'integer', 'signed', 'unsigned', 'float'} , default None If not None, and if the data has been successfully cast to a numerical dtype (or if the data was numeric to begin with), downcast that resulting data to the smallest numerical dtype possible according to the following rules: - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - 'float': smallest float dtype (min.: np.float32) As this behaviour is separate from the core conversion to numeric values, any errors raised during the downcasting will be surfaced regardless of the value of the 'errors' input. In addition, downcasting will only occur if the size of the resulting data's dtype is strictly larger than the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. .. versionadded:: 0.19.0 Returns ------- ret : numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray Examples -------- Take separate series and convert to numeric, coercing when told to >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 >>> pd.to_numeric(s, downcast='float') 0 1.0 1 2.0 2 -3.0 dtype: float32 >>> pd.to_numeric(s, downcast='signed') 0 1 1 2 2 -3 dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') 0 apple 1 1.0 2 2 3 -3 dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 dtype: float64 See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. pandas.to_timedelta : Convert argument to timedelta. numpy.ndarray.astype : Cast a numpy array to a specified type. """ if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): raise ValueError('invalid downcasting method provided') is_series = False is_index = False is_scalars = False if isinstance(arg, ABCSeries): is_series = True values = arg.values elif isinstance(arg, ABCIndexClass): is_index = True values = arg.asi8 if values is None: values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') elif is_scalar(arg): if is_decimal(arg): return float(arg) if is_number(arg): return arg is_scalars = True values = np.array([arg], dtype='O') elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a list, tuple, 1-d array, or Series') else: values = arg try: if is_numeric_dtype(values): pass elif is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: values = ensure_object(values) coerce_numeric = False if errors in ('ignore', 'raise') else True values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) except Exception: if errors == 'raise': raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified if downcast is not None and is_numeric_dtype(values): typecodes = None if downcast in ('integer', 'signed'): typecodes = np.typecodes['Integer'] elif downcast == 'unsigned' and np.min(values) >= 0: typecodes = np.typecodes['UnsignedInteger'] elif downcast == 'float': typecodes = np.typecodes['Float'] # pandas support goes only to np.float32, # as float dtypes smaller than that are # extremely rare and not well supported float_32_char = np.dtype(np.float32).char float_32_ind = typecodes.index(float_32_char) typecodes = typecodes[float_32_ind:] if typecodes is not None: # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize <= values.dtype.itemsize: values = maybe_downcast_to_dtype(values, dtype) # successful conversion if values.dtype == dtype: break if is_series: return pd.Series(values, index=arg.index, name=arg.name) elif is_index: # because we want to coerce to numeric if possible, # do not use _shallow_copy_with_infer return pd.Index(values, name=arg.name) elif is_scalars: return values[0] else: return values
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Returns ------- rng : IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ start = com.maybe_box_datetimelike(start) end = com.maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com._any_none(periods, start, end): freq = 1 if is_number(endpoint) else 'D' if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) elif not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com._all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com._not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, 'int64') else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): range_func = date_range else: range_func = timedelta_range breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def _is_numeric(x): """ Whether the array x is numeric. """ return all(is_number(i) for i in x)
def assert_almost_equal( left, right, check_dtype: bool | str = "equiv", check_less_precise: bool | int | NoDefault = no_default, rtol: float = 1.0e-5, atol: float = 1.0e-8, **kwargs, ): """ Check that the left and right objects are approximately equal. By approximately equal, we refer to objects that are numbers or that contain numbers which may be equivalent to specific levels of precision. Parameters ---------- left : object right : object check_dtype : bool or {'equiv'}, default 'equiv' Check dtype if both a and b are the same type. If 'equiv' is passed in, then `RangeIndex` and `Int64Index` are also considered equivalent when doing type checking. check_less_precise : bool or int, default False Specify comparison precision. 5 digits (False) or 3 digits (True) after decimal points are compared. If int, then specify the number of digits to compare. When comparing two numbers, if the first number has magnitude less than 1e-5, we compare the two numbers directly and check whether they are equivalent within the specified precision. Otherwise, we compare the **ratio** of the second number to the first number and check whether it is equivalent to 1 within the specified precision. .. deprecated:: 1.1.0 Use `rtol` and `atol` instead to define relative/absolute tolerance, respectively. Similar to :func:`math.isclose`. rtol : float, default 1e-5 Relative tolerance. .. versionadded:: 1.1.0 atol : float, default 1e-8 Absolute tolerance. .. versionadded:: 1.1.0 """ if check_less_precise is not no_default: warnings.warn( "The 'check_less_precise' keyword in testing.assert_*_equal " "is deprecated and will be removed in a future version. " "You can stop passing 'check_less_precise' to silence this warning.", FutureWarning, stacklevel=find_stack_level(), ) # https://github.com/python/mypy/issues/7642 # error: Argument 1 to "_get_tol_from_less_precise" has incompatible # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]" rtol = atol = _get_tol_from_less_precise( check_less_precise # type: ignore[arg-type] ) if isinstance(left, Index): assert_index_equal( left, right, check_exact=False, exact=check_dtype, rtol=rtol, atol=atol, **kwargs, ) elif isinstance(left, Series): assert_series_equal( left, right, check_exact=False, check_dtype=check_dtype, rtol=rtol, atol=atol, **kwargs, ) elif isinstance(left, DataFrame): assert_frame_equal( left, right, check_exact=False, check_dtype=check_dtype, rtol=rtol, atol=atol, **kwargs, ) else: # Other sequences. if check_dtype: if is_number(left) and is_number(right): # Do not compare numeric classes, like np.float64 and float. pass elif is_bool(left) and is_bool(right): # Do not compare bool classes, like np.bool_ and bool. pass else: if isinstance(left, np.ndarray) or isinstance( right, np.ndarray): obj = "numpy array" else: obj = "Input" assert_class_equal(left, right, obj=obj) # if we have "equiv", this becomes True check_dtype = bool(check_dtype) _testing.assert_almost_equal(left, right, check_dtype=check_dtype, rtol=rtol, atol=atol, **kwargs)
def to_numeric(arg, errors="raise", downcast=None): """ Convert argument to a numeric type. The default return dtype is `float64` or `int64` depending on the data supplied. Use the `downcast` parameter to obtain other dtypes. Please note that precision loss may occur if really large numbers are passed in. Due to the internal limitations of `ndarray`, if numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are passed in, it is very likely they will be converted to float so that they can stored in an `ndarray`. These warnings apply similarly to `Series` since it internally leverages `ndarray`. Parameters ---------- arg : scalar, list, tuple, 1-d array, or Series Argument to be converted. errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaN. - If 'ignore', then invalid parsing will return the input. downcast : {'integer', 'signed', 'unsigned', 'float'}, default None If not None, and if the data has been successfully cast to a numerical dtype (or if the data was numeric to begin with), downcast that resulting data to the smallest numerical dtype possible according to the following rules: - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - 'float': smallest float dtype (min.: np.float32) As this behaviour is separate from the core conversion to numeric values, any errors raised during the downcasting will be surfaced regardless of the value of the 'errors' input. In addition, downcasting will only occur if the size of the resulting data's dtype is strictly larger than the dtype it is to be cast to, so if none of the dtypes checked satisfy that specification, no downcasting will be performed on the data. Returns ------- ret Numeric if parsing succeeded. Return type depends on input. Series if Series, otherwise ndarray. See Also -------- DataFrame.astype : Cast argument to a specified dtype. to_datetime : Convert argument to datetime. to_timedelta : Convert argument to timedelta. numpy.ndarray.astype : Cast a numpy array to a specified type. DataFrame.convert_dtypes : Convert dtypes. Examples -------- Take separate series and convert to numeric, coercing when told to >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 >>> pd.to_numeric(s, downcast='float') 0 1.0 1 2.0 2 -3.0 dtype: float32 >>> pd.to_numeric(s, downcast='signed') 0 1 1 2 2 -3 dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') 0 apple 1 1.0 2 2 3 -3 dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 dtype: float64 Downcasting of nullable integer and floating dtypes is supported: >>> s = pd.Series([1, 2, 3], dtype="Int64") >>> pd.to_numeric(s, downcast="integer") 0 1 1 2 2 3 dtype: Int8 >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64") >>> pd.to_numeric(s, downcast="float") 0 1.0 1 2.1 2 3.0 dtype: Float32 """ if downcast not in (None, "integer", "signed", "unsigned", "float"): raise ValueError("invalid downcasting method provided") if errors not in ("ignore", "raise", "coerce"): raise ValueError("invalid error value specified") is_series = False is_index = False is_scalars = False if isinstance(arg, ABCSeries): is_series = True values = arg.values elif isinstance(arg, ABCIndex): is_index = True if needs_i8_conversion(arg.dtype): values = arg.asi8 else: values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype="O") elif is_scalar(arg): if is_decimal(arg): return float(arg) if is_number(arg): return arg is_scalars = True values = np.array([arg], dtype="O") elif getattr(arg, "ndim", 1) > 1: raise TypeError("arg must be a list, tuple, 1-d array, or Series") else: values = arg # GH33013: for IntegerArray & FloatingArray extract non-null values for casting # save mask to reconstruct the full array after casting if isinstance(values, NumericArray): mask = values._mask values = values._data[~mask] else: mask = None values_dtype = getattr(values, "dtype", None) if is_numeric_dtype(values_dtype): pass elif is_datetime_or_timedelta_dtype(values_dtype): values = values.view(np.int64) else: values = ensure_object(values) coerce_numeric = errors not in ("ignore", "raise") try: values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) except (ValueError, TypeError): if errors == "raise": raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified if downcast is not None and is_numeric_dtype(values.dtype): typecodes = None if downcast in ("integer", "signed"): typecodes = np.typecodes["Integer"] elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0): typecodes = np.typecodes["UnsignedInteger"] elif downcast == "float": typecodes = np.typecodes["Float"] # pandas support goes only to np.float32, # as float dtypes smaller than that are # extremely rare and not well supported float_32_char = np.dtype(np.float32).char float_32_ind = typecodes.index(float_32_char) typecodes = typecodes[float_32_ind:] if typecodes is not None: # from smallest to largest for dtype in typecodes: dtype = np.dtype(dtype) if dtype.itemsize <= values.dtype.itemsize: values = maybe_downcast_numeric(values, dtype) # successful conversion if values.dtype == dtype: break # GH33013: for IntegerArray & FloatingArray need to reconstruct masked array if mask is not None: data = np.zeros(mask.shape, dtype=values.dtype) data[~mask] = values from pandas.core.arrays import FloatingArray, IntegerArray klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray values = klass(data, mask) if is_series: return arg._constructor(values, index=arg.index, name=arg.name) elif is_index: # because we want to coerce to numeric if possible, # do not use _shallow_copy return pd.Index(values, name=arg.name) elif is_scalars: return values[0] else: return values
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None Number of periods to generate freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns ------- rng : IntervalIndex Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]] closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] closed='both', dtype='interval[int64]') See Also -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ if _count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') start = _maybe_box_datetimelike(start) end = _maybe_box_datetimelike(end) endpoint = next(_not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) freq = freq or (1 if is_number(endpoint) else 'D') if not is_number(freq): try: freq = to_offset(freq) except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility if not all([_is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") if is_number(endpoint): if periods is None: periods = int((end - start) // freq) if start is None: start = end - periods * freq # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def test_is_number(self): assert is_number(True) assert is_number(1) assert is_number(1.1) assert is_number(1 + 3j) assert is_number(np.bool(False)) assert is_number(np.int64(1)) assert is_number(np.float64(1.1)) assert is_number(np.complex128(1 + 3j)) assert is_number(np.nan) assert not is_number(None) assert not is_number('x') assert not is_number(datetime(2011, 1, 1)) assert not is_number(np.datetime64('2011-01-01')) assert not is_number(Timestamp('2011-01-01')) assert not is_number(Timestamp('2011-01-01', tz='US/Eastern')) assert not is_number(timedelta(1000)) assert not is_number(Timedelta('1 days')) # questionable assert not is_number(np.bool_(False)) assert is_number(np.timedelta64(1, 'D'))
def _parse_errorbars(self, label, err): """ Look for error keyword arguments and return the actual errorbar data or return the error DataFrame/dict Error bars can be specified in several ways: Series: the user provides a pandas.Series object of the same length as the data ndarray: provides a np.ndarray of the same length as the data DataFrame/dict: error values are paired with keys matching the key in the plotted DataFrame str: the name of the column within the plotted DataFrame """ if err is None: return None def match_labels(data, e): e = e.reindex(data.index) return e # key-matched DataFrame if isinstance(err, ABCDataFrame): err = match_labels(self.data, err) # key-matched dict elif isinstance(err, dict): pass # Series of error values elif isinstance(err, ABCSeries): # broadcast error series across data err = match_labels(self.data, err) err = np.atleast_2d(err) err = np.tile(err, (self.nseries, 1)) # errors are a column in the dataframe elif isinstance(err, str): evalues = self.data[err].values self.data = self.data[self.data.columns.drop(err)] err = np.atleast_2d(evalues) err = np.tile(err, (self.nseries, 1)) elif is_list_like(err): if is_iterator(err): err = np.atleast_2d(list(err)) else: # raw error values err = np.atleast_2d(err) err_shape = err.shape # asymmetrical error bars if err.ndim == 3: if ((err_shape[0] != self.nseries) or (err_shape[1] != 2) or (err_shape[2] != len(self.data))): msg = ("Asymmetrical error bars should be provided " + "with the shape (%u, 2, %u)" % (self.nseries, len(self.data))) raise ValueError(msg) # broadcast errors to each data series if len(err) == 1: err = np.tile(err, (self.nseries, 1)) elif is_number(err): err = np.tile([err], (self.nseries, len(self.data))) else: msg = "No valid {label} detected".format(label=label) raise ValueError(msg) return err
def _is_valid_endpoint(endpoint): """helper for interval_range to check if start/end are valid types""" return any([is_number(endpoint), isinstance(endpoint, Timestamp), isinstance(endpoint, Timedelta), endpoint is None])