def test_equality(self): assert is_dtype_equal(self.dtype, 'interval[int64]') assert is_dtype_equal(self.dtype, IntervalDtype('int64')) assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64')) assert not is_dtype_equal(self.dtype, 'int64') assert not is_dtype_equal(IntervalDtype('int64'), IntervalDtype('float64'))
def test_construction_from_string(self): result = DatetimeTZDtype('datetime64[ns, US/Eastern]') assert is_dtype_equal(self.dtype, result) result = DatetimeTZDtype.construct_from_string( 'datetime64[ns, US/Eastern]') assert is_dtype_equal(self.dtype, result) pytest.raises(TypeError, lambda: DatetimeTZDtype.construct_from_string('foo'))
def test_construction_from_string(self): result = IntervalDtype('interval[int64]') self.assertTrue(is_dtype_equal(self.dtype, result)) result = IntervalDtype.construct_from_string('interval[int64]') self.assertTrue(is_dtype_equal(self.dtype, result)) with tm.assertRaises(TypeError): IntervalDtype.construct_from_string('foo') with tm.assertRaises(TypeError): IntervalDtype.construct_from_string('interval[foo]') with tm.assertRaises(TypeError): IntervalDtype.construct_from_string('foo[int64]')
def test_construction_from_string(self): result = IntervalDtype('interval[int64]') assert is_dtype_equal(self.dtype, result) result = IntervalDtype.construct_from_string('interval[int64]') assert is_dtype_equal(self.dtype, result) with pytest.raises(TypeError): IntervalDtype.construct_from_string('foo') with pytest.raises(TypeError): IntervalDtype.construct_from_string('interval[foo]') with pytest.raises(TypeError): IntervalDtype.construct_from_string('foo[int64]')
def _validate_td64_dtype(dtype): dtype = pandas_dtype(dtype) if is_dtype_equal(dtype, np.dtype("timedelta64")): dtype = _TD_DTYPE msg = textwrap.dedent("""\ Passing in 'timedelta' dtype with no precision is deprecated and will raise in a future version. Please pass in 'timedelta64[ns]' instead.""") warnings.warn(msg, FutureWarning, stacklevel=4) if not is_dtype_equal(dtype, _TD_DTYPE): raise ValueError(_BAD_DTYPE.format(dtype=dtype)) return dtype
def test_equality(self): assert is_dtype_equal(self.dtype, 'interval[int64]') assert is_dtype_equal(self.dtype, IntervalDtype('int64')) assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64')) assert not is_dtype_equal(self.dtype, 'int64') assert not is_dtype_equal(IntervalDtype('int64'), IntervalDtype('float64')) # invalid subtype comparisons do not raise when directly compared dtype1 = IntervalDtype('float64') dtype2 = IntervalDtype('datetime64[ns, US/Eastern]') assert dtype1 != dtype2 assert dtype2 != dtype1
def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): if fastpath is not None: warnings.warn("The 'fastpath' keyword is deprecated, and will be " "removed in a future version.", FutureWarning, stacklevel=2) if fastpath: return cls._simple_new(data, name=name) # is_scalar, generators handled in coerce_to_ndarray data = cls._coerce_to_ndarray(data) if issubclass(data.dtype.type, compat.string_types): cls._string_data_error(data) if copy or not is_dtype_equal(data.dtype, cls._default_dtype): subarr = np.array(data, dtype=cls._default_dtype, copy=copy) cls._assert_safe_casting(data, subarr) else: subarr = data if name is None and hasattr(data, 'name'): name = data.name return cls._simple_new(subarr, name=name)
def test_construction_from_string(self): result = DatetimeTZDtype.construct_from_string( 'datetime64[ns, US/Eastern]') assert is_dtype_equal(self.dtype, result) msg = "Could not construct DatetimeTZDtype from 'foo'" with pytest.raises(TypeError, match=msg): DatetimeTZDtype.construct_from_string('foo')
def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, ABCIndexClass): return False elif not isinstance(other, type(self)): try: other = type(self)(other) except Exception: return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False elif is_period_dtype(self): if not is_period_dtype(other): return False if self.freq != other.freq: return False return np.array_equal(self.asi8, other.asi8)
def test_construction_from_string(self): result = PeriodDtype('period[D]') assert is_dtype_equal(self.dtype, result) result = PeriodDtype.construct_from_string('period[D]') assert is_dtype_equal(self.dtype, result) with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo') with pytest.raises(TypeError): PeriodDtype.construct_from_string('period[foo]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo[D]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('datetime64[ns]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('datetime64[ns, US/Eastern]')
def _sparse_array_op(left, right, op, name): if name.startswith('__'): # For lookups in _libs.sparse we need non-dunder op name name = name[2:-2] # dtype used to find corresponding sparse method if not is_dtype_equal(left.dtype, right.dtype): dtype = find_common_type([left.dtype, right.dtype]) left = left.astype(dtype) right = right.astype(dtype) else: dtype = left.dtype # dtype the result must have result_dtype = None if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: with np.errstate(all='ignore'): result = op(left.get_values(), right.get_values()) fill = op(_get_fill(left), _get_fill(right)) if left.sp_index.ngaps == 0: index = left.sp_index else: index = right.sp_index elif left.sp_index.equals(right.sp_index): with np.errstate(all='ignore'): result = op(left.sp_values, right.sp_values) fill = op(_get_fill(left), _get_fill(right)) index = left.sp_index else: if name[0] == 'r': left, right = right, left name = name[1:] if name in ('and', 'or') and dtype == 'bool': opname = 'sparse_{name}_uint8'.format(name=name) # to make template simple, cast here left_sp_values = left.sp_values.view(np.uint8) right_sp_values = right.sp_values.view(np.uint8) result_dtype = np.bool else: opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype) left_sp_values = left.sp_values right_sp_values = right.sp_values sparse_op = getattr(splib, opname) with np.errstate(all='ignore'): result, index, fill = sparse_op(left_sp_values, left.sp_index, left.fill_value, right_sp_values, right.sp_index, right.fill_value) if result_dtype is None: result_dtype = result.dtype return _wrap_result(name, result, index, fill, dtype=result_dtype)
def fill_value(self, value): if not is_scalar(value): raise ValueError('fill_value must be a scalar') # if the specified value triggers type promotion, raise ValueError new_dtype, fill_value = maybe_promote(self.dtype, value) if is_dtype_equal(self.dtype, new_dtype): self._fill_value = fill_value else: msg = 'unable to set fill_value {0} to {1} dtype' raise ValueError(msg.format(value, self.dtype))
def test_dtype_equal_strict(): # we are strict on kind equality for dtype in [np.int8, np.int16, np.int32]: assert not com.is_dtype_equal(np.int64, dtype) for dtype in [np.float32]: assert not com.is_dtype_equal(np.float64, dtype) # strict w.r.t. PeriodDtype assert not com.is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D')) # strict w.r.t. datetime64 assert not com.is_dtype_equal( com.pandas_dtype('datetime64[ns, US/Eastern]'), com.pandas_dtype('datetime64[ns, CET]')) # see gh-15941: no exception should be raised assert not com.is_dtype_equal(None, None)
def astype(self, dtype, copy=True): if is_dtype_equal(self.dtype, dtype) and copy is False: # Ensure that self.astype(self.dtype) is self return self new_values = self._data.astype(dtype, copy=copy) # pass copy=False because any copying will be done in the # _data.astype call above return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False)
def __eq__(self, other): if isinstance(other, str): return other.lower() in (self.name.lower(), str(self).lower()) elif not isinstance(other, IntervalDtype): return False elif self.subtype is None or other.subtype is None: # None should match any subtype return True else: from pandas.core.dtypes.common import is_dtype_equal return is_dtype_equal(self.subtype, other.subtype)
def test_equality(self): assert is_dtype_equal(self.dtype, 'period[D]') assert is_dtype_equal(self.dtype, PeriodDtype('D')) assert is_dtype_equal(self.dtype, PeriodDtype('D')) assert is_dtype_equal(PeriodDtype('D'), PeriodDtype('D')) assert not is_dtype_equal(self.dtype, 'D') assert not is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D'))
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'period[D]')) self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) self.assertTrue(is_dtype_equal(PeriodDtype('D'), PeriodDtype('D'))) self.assertFalse(is_dtype_equal(self.dtype, 'D')) self.assertFalse(is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D')))
def _maybe_convert_i8(self, key): """ Maybe convert a given key to it's equivalent i8 value(s). Used as a preprocessing step prior to IntervalTree queries (self._engine), which expects numeric data. Parameters ---------- key : scalar or list-like The key that should maybe be converted to i8. Returns ------- key: scalar or list-like The original key if no conversion occured, int if converted scalar, Int64Index if converted list-like. """ original = key if is_list_like(key): key = ensure_index(key) if not self._needs_i8_conversion(key): return original scalar = is_scalar(key) if is_interval_dtype(key) or isinstance(key, Interval): # convert left/right and reconstruct left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) constructor = Interval if scalar else IntervalIndex.from_arrays return constructor(left, right, closed=self.closed) if scalar: # Timestamp/Timedelta key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) else: # DatetimeIndex/TimedeltaIndex key_dtype, key_i8 = key.dtype, Index(key.asi8) if key.hasnans: # convert NaT from it's i8 value to np.nan so it's not viewed # as a valid value, maybe causing errors (e.g. is_overlapping) key_i8 = key_i8.where(~key._isnan) # ensure consistency with IntervalIndex subtype subtype = self.dtype.subtype msg = ('Cannot index an IntervalIndex of subtype {subtype} with ' 'values of dtype {other}') if not is_dtype_equal(subtype, key_dtype): raise ValueError(msg.format(subtype=subtype, other=key_dtype)) return key_i8
def astype(self, dtype, copy=True): if is_object_dtype(dtype): return self._box_values_as_index() elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return Index(self.format(), name=self.name, dtype=object) elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
def test_equality(self): assert is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]') assert is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'US/Eastern')) assert not is_dtype_equal(self.dtype, 'foo') assert not is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'CET')) assert not is_dtype_equal(DatetimeTZDtype('ns', 'US/Eastern'), DatetimeTZDtype('ns', 'US/Pacific')) # numpy compat assert is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]")
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'interval[int64]')) self.assertTrue(is_dtype_equal(self.dtype, IntervalDtype('int64'))) self.assertTrue(is_dtype_equal(self.dtype, IntervalDtype('int64'))) self.assertTrue(is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64'))) self.assertFalse(is_dtype_equal(self.dtype, 'int64')) self.assertFalse(is_dtype_equal(IntervalDtype('int64'), IntervalDtype('float64')))
def astype(self, dtype, copy=True): if is_object_dtype(dtype): return self._box_values_as_index() elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return Index(self.format(), name=self.name, dtype=object) elif is_integer_dtype(dtype): # TODO(DatetimeArray): use self._values here. # Can't use ._values currently, because that returns a # DatetimeIndex, which throws us in an infinite loop. return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]')) self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype( 'ns', 'US/Eastern'))) self.assertFalse(is_dtype_equal(self.dtype, 'foo')) self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype('ns', 'CET'))) self.assertFalse(is_dtype_equal( DatetimeTZDtype('ns', 'US/Eastern'), DatetimeTZDtype( 'ns', 'US/Pacific'))) # numpy compat self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]"))
def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False): if fastpath: return cls._simple_new(data, name=name) # is_scalar, generators handled in coerce_to_ndarray data = cls._coerce_to_ndarray(data) if issubclass(data.dtype.type, compat.string_types): cls._string_data_error(data) if copy or not is_dtype_equal(data.dtype, cls._default_dtype): subarr = np.array(data, dtype=cls._default_dtype, copy=copy) cls._assert_safe_casting(data, subarr) else: subarr = data if name is None and hasattr(data, 'name'): name = data.name return cls._simple_new(subarr, name=name)
def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self is other: return True if not isinstance(other, Index): return False # need to compare nans locations and make sure that they are the same # since nans don't compare equal this is a bit tricky try: if not isinstance(other, Float64Index): other = self._constructor(other) if (not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape): return False left, right = self._ndarray_values, other._ndarray_values return ((left == right) | (self._isnan & other._isnan)).all() except (TypeError, ValueError): return False
def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): if not hasattr(values, "dtype"): raise ValueError( "Unexpected type '{}'. 'values' must be a TimedeltaArray " "ndarray, or Series or Index containing one of those." .format(type(values).__name__)) if freq == "infer": raise ValueError( "Frequency inference not allowed in TimedeltaArray.__init__. " "Use 'pd.array()' instead.") if dtype is not None and not is_dtype_equal(dtype, _TD_DTYPE): raise TypeError("dtype {dtype} cannot be converted to " "timedelta64[ns]".format(dtype=dtype)) if values.dtype == 'i8': values = values.view('timedelta64[ns]') result = type(self)._from_sequence(values, dtype=dtype, copy=copy, freq=freq) self._data = result._data self._freq = result._freq self._dtype = result._dtype
def astype(self, dtype, copy=True): # TODO: Figure out something better here... # We have DatetimeLikeArrayMixin -> # super(...), which ends up being... DatetimeIndexOpsMixin? # this is complicated. # need a pandas_astype(arr, dtype). from pandas import Categorical dtype = pandas_dtype(dtype) if is_object_dtype(dtype): return np.asarray(self, dtype=object) elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return self._format_native_types() elif is_integer_dtype(dtype): values = self._data if values.dtype != dtype: # int32 vs. int64 values = values.astype(dtype) elif copy: values = values.copy() return values elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) elif is_categorical_dtype(dtype): return Categorical(self, dtype=dtype) elif is_period_dtype(dtype): return self.asfreq(dtype.freq) else: return np.asarray(self, dtype=dtype)
def test_union_different_types(index_pair): # GH 23525 idx1, idx2 = index_pair type_pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x))) if type_pair in COMPATIBLE_INCONSISTENT_PAIRS: pytest.xfail('This test only considers non compatible indexes.') if any(isinstance(idx, pd.MultiIndex) for idx in index_pair): pytest.xfail('This test doesn\'t consider multiindixes.') if is_dtype_equal(idx1.dtype, idx2.dtype): pytest.xfail('This test only considers non matching dtypes.') # A union with a CategoricalIndex (even as dtype('O')) and a # non-CategoricalIndex can only be made if both indices are monotonic. # This is true before this PR as well. # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index idx1 = idx1.sort_values() idx2 = idx2.sort_values() assert idx1.union(idx2).dtype == np.dtype('O') assert idx2.union(idx1).dtype == np.dtype('O')
def union_categoricals(to_union, sort_categories: bool = False, ignore_order: bool = False): """ Combine list-like of Categorical-like, unioning categories. All categories must have the same dtype. Parameters ---------- to_union : list-like Categorical, CategoricalIndex, or Series with dtype='category'. sort_categories : bool, default False If true, resulting categories will be lexsorted, otherwise they will be ordered as they appear in the data. ignore_order : bool, default False If true, the ordered attribute of the Categoricals will be ignored. Results in an unordered categorical. Returns ------- Categorical Raises ------ TypeError - all inputs do not have the same dtype - all inputs do not have the same ordered property - all inputs are ordered and their categories are not identical - sort_categories=True and Categoricals are ordered ValueError Empty list of categoricals passed Notes ----- To learn more about categories, see `link <https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html#unioning>`__ Examples -------- >>> from pandas.api.types import union_categoricals If you want to combine categoricals that do not necessarily have the same categories, `union_categoricals` will combine a list-like of categoricals. The new categories will be the union of the categories being combined. >>> a = pd.Categorical(["b", "c"]) >>> b = pd.Categorical(["a", "b"]) >>> union_categoricals([a, b]) ['b', 'c', 'a', 'b'] Categories (3, object): ['b', 'c', 'a'] By default, the resulting categories will be ordered as they appear in the `categories` of the data. If you want the categories to be lexsorted, use `sort_categories=True` argument. >>> union_categoricals([a, b], sort_categories=True) ['b', 'c', 'a', 'b'] Categories (3, object): ['a', 'b', 'c'] `union_categoricals` also works with the case of combining two categoricals of the same categories and order information (e.g. what you could also `append` for). >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "a"], ordered=True) >>> union_categoricals([a, b]) ['a', 'b', 'a', 'b', 'a'] Categories (2, object): ['a' < 'b'] Raises `TypeError` because the categories are ordered and not identical. >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "c"], ordered=True) >>> union_categoricals([a, b]) Traceback (most recent call last): ... TypeError: to union ordered Categoricals, all categories must be the same New in version 0.20.0 Ordered categoricals with different categories or orderings can be combined by using the `ignore_ordered=True` argument. >>> a = pd.Categorical(["a", "b", "c"], ordered=True) >>> b = pd.Categorical(["c", "b", "a"], ordered=True) >>> union_categoricals([a, b], ignore_order=True) ['a', 'b', 'c', 'c', 'b', 'a'] Categories (3, object): ['a', 'b', 'c'] `union_categoricals` also works with a `CategoricalIndex`, or `Series` containing categorical data, but note that the resulting array will always be a plain `Categorical` >>> a = pd.Series(["b", "c"], dtype='category') >>> b = pd.Series(["a", "b"], dtype='category') >>> union_categoricals([a, b]) ['b', 'c', 'a', 'b'] Categories (3, object): ['b', 'c', 'a'] """ from pandas import Categorical from pandas.core.arrays.categorical import recode_for_categories if len(to_union) == 0: raise ValueError("No Categoricals to union") def _maybe_unwrap(x): if isinstance(x, (ABCCategoricalIndex, ABCSeries)): return x._values elif isinstance(x, Categorical): return x else: raise TypeError("all components to combine must be Categorical") to_union = [_maybe_unwrap(x) for x in to_union] first = to_union[0] if not all( is_dtype_equal(other.categories.dtype, first.categories.dtype) for other in to_union[1:]): raise TypeError("dtype of categories must be the same") ordered = False if all( first._categories_match_up_to_permutation(other) for other in to_union[1:]): # identical categories - fastpath categories = first.categories ordered = first.ordered all_codes = [ first._encode_with_my_categories(x)._codes for x in to_union ] new_codes = np.concatenate(all_codes) if sort_categories and not ignore_order and ordered: raise TypeError( "Cannot use sort_categories=True with ordered Categoricals") if sort_categories and not categories.is_monotonic_increasing: categories = categories.sort_values() indexer = categories.get_indexer(first.categories) from pandas.core.algorithms import take_nd new_codes = take_nd(indexer, new_codes, fill_value=-1) elif ignore_order or all(not c.ordered for c in to_union): # different categories - union and recode cats = first.categories.append([c.categories for c in to_union[1:]]) categories = cats.unique() if sort_categories: categories = categories.sort_values() new_codes = [ recode_for_categories(c.codes, c.categories, categories) for c in to_union ] new_codes = np.concatenate(new_codes) else: # ordered - to show a proper error message if all(c.ordered for c in to_union): msg = "to union ordered Categoricals, all categories must be the same" raise TypeError(msg) else: raise TypeError("Categorical.ordered must be the same") if ignore_order: ordered = False return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
def test_construction_from_string(self, dtype): result = IntervalDtype("interval[int64, right]") assert is_dtype_equal(dtype, result) result = IntervalDtype.construct_from_string("interval[int64, right]") assert is_dtype_equal(dtype, result)
def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') assert is_dtype_equal(self.dtype, result) pytest.raises( TypeError, lambda: CategoricalDtype.construct_from_string('foo'))
def _convert_to_ndarrays( self, dct: Mapping, na_values, na_fvalues, verbose: bool = False, converters=None, dtypes=None, ): result = {} for c, values in dct.items(): conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) else: # single dtype or None cast_type = dtypes if self.na_filter: col_na_values, col_na_fvalues = _get_na_values( c, na_values, na_fvalues, self.keep_default_na) else: col_na_values, col_na_fvalues = set(), set() if c in self._parse_date_cols: # GH#26203 Do not convert columns which get converted to dates # but replace nans to ensure to_datetime works mask = algorithms.isin(values, set(col_na_values) | col_na_fvalues) np.putmask(values, mask, np.nan) result[c] = values continue if conv_f is not None: # conv_f applied to data before inference if cast_type is not None: warnings.warn( ("Both a converter and dtype were specified " f"for column {c} - only the converter will be used."), ParserWarning, stacklevel=find_stack_level(), ) try: values = lib.map_infer(values, conv_f) except ValueError: # error: Argument 2 to "isin" has incompatible type "List[Any]"; # expected "Union[Union[ExtensionArray, ndarray], Index, Series]" mask = algorithms.isin( values, list(na_values) # type: ignore[arg-type] ).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types(values, set(col_na_values) | col_na_fvalues, try_num_bool=False) else: is_ea = is_extension_array_dtype(cast_type) is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) # skip inference if specified dtype is object # or casting to an EA try_num_bool = not (cast_type and is_str_or_ea_dtype) # general type inference and conversion cvals, na_count = self._infer_types( values, set(col_na_values) | col_na_fvalues, try_num_bool) # type specified in dtype param or cast_type is an EA if cast_type and (not is_dtype_equal(cvals, cast_type) or is_extension_array_dtype(cast_type)): if not is_ea and na_count > 0: try: if is_bool_dtype(cast_type): raise ValueError( f"Bool column has NA values in column {c}") except (AttributeError, TypeError): # invalid input to is_bool_dtype pass cast_type = pandas_dtype(cast_type) cvals = self._cast_types(cvals, cast_type, c) result[c] = cvals if verbose and na_count: print(f"Filled {na_count} NA values in column {c!s}") return result
def ndarray_to_mgr(values, index, columns, dtype: DtypeObj | None, copy: bool, typ: str) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray if isinstance(values, ABCSeries): if columns is None: if values.name is not None: columns = Index([values.name]) if index is None: index = values.index else: values = values.reindex(index) # zero len case (GH #2234) if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) vdtype = getattr(values, "dtype", None) if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype): # GH#19157 if isinstance(values, np.ndarray) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values values = [values[:, n] for n in range(values.shape[1])] else: values = [values] if columns is None: columns = Index(range(len(values))) else: columns = ensure_index(columns) return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ) elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): # i.e. Datetime64TZ values = extract_array(values, extract_numpy=True) if copy: values = values.copy() if values.ndim == 1: values = values.reshape(-1, 1) else: # by definition an array here # the dtypes will be coerced to a single dtype values = _prep_ndarray(values, copy=copy) if dtype is not None and not is_dtype_equal(values.dtype, dtype): shape = values.shape flat = values.ravel() # GH#40110 see similar check inside sanitize_array rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f") values = sanitize_array(flat, None, dtype=dtype, copy=copy, raise_cast_failure=rcf) values = values.reshape(shape) # _prep_ndarray ensures that values.ndim == 2 at this point index, columns = _get_axes(values.shape[0], values.shape[1], index=index, columns=columns) _check_values_indices_shape_match(values, index, columns) if typ == "array": if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) if dtype is None and is_object_dtype(values.dtype): arrays = [ ensure_wrapped_if_datetimelike( maybe_infer_to_datetimelike(values[:, i].copy())) for i in range(values.shape[1]) ] else: if is_datetime_or_timedelta_dtype(values.dtype): values = ensure_wrapped_if_datetimelike(values) arrays = [values[:, i].copy() for i in range(values.shape[1])] return ArrayManager(arrays, [index, columns], verify_integrity=False) values = values.T # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values.dtype): if values.ndim == 2 and values.shape[0] != 1: # transpose and separate blocks dtlike_vals = [maybe_infer_to_datetimelike(row) for row in values] dvals_list = [ensure_block_shape(dval, 2) for dval in dtlike_vals] # TODO: What about re-joining object columns? block_values = [ new_block(dvals_list[n], placement=n, ndim=2) for n in range(len(dvals_list)) ] else: datelike_vals = maybe_infer_to_datetimelike(values) nb = new_block(datelike_vals, placement=slice(len(columns)), ndim=2) block_values = [nb] else: nb = new_block(values, placement=slice(len(columns)), ndim=2) block_values = [nb] if len(columns) == 0: block_values = [] return create_block_manager_from_blocks(block_values, [columns, index])
def test_dtype_equal_strict(dtype1, dtype2): assert not com.is_dtype_equal(dtype1, dtype2)
def array_equivalent(left, right, strict_nan: bool = False, dtype_equal: bool = False) -> bool: """ True if two arrays, left and right, have equal non-NaN elements, and NaNs in corresponding locations. False otherwise. It is assumed that left and right are NumPy arrays of the same dtype. The behavior of this function (particularly with respect to NaNs) is not defined if the dtypes are different. Parameters ---------- left, right : ndarrays strict_nan : bool, default False If True, consider NaN and None to be different. dtype_equal : bool, default False Whether `left` and `right` are known to have the same dtype according to `is_dtype_equal`. Some methods like `BlockManager.equals`. require that the dtypes match. Setting this to ``True`` can improve performance, but will give different results for arrays that are equal but different dtypes. Returns ------- b : bool Returns True if the arrays are equivalent. Examples -------- >>> array_equivalent( ... np.array([1, 2, np.nan]), ... np.array([1, 2, np.nan])) True >>> array_equivalent( ... np.array([1, np.nan, 2]), ... np.array([1, 2, np.nan])) False """ left, right = np.asarray(left), np.asarray(right) # shape compat if left.shape != right.shape: return False if dtype_equal: # fastpath when we require that the dtypes match (Block.equals) if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): return _array_equivalent_float(left, right) elif is_datetimelike_v_numeric(left.dtype, right.dtype): return False elif needs_i8_conversion(left.dtype): return _array_equivalent_datetimelike(left, right) elif is_string_dtype(left.dtype): # TODO: fastpath for pandas' StringDtype return _array_equivalent_object(left, right, strict_nan) else: return np.array_equal(left, right) # Slow path when we allow comparing different dtypes. # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat if is_string_dtype(left.dtype) or is_string_dtype(right.dtype): return _array_equivalent_object(left, right, strict_nan) # NaNs can occur in float and complex arrays. if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): if not (np.prod(left.shape) and np.prod(right.shape)): return True return ((left == right) | (isna(left) & isna(right))).all() elif is_datetimelike_v_numeric(left, right): # GH#29553 avoid numpy deprecation warning return False elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): return False left = left.view("i8") right = right.view("i8") # if we have structured dtypes, compare first if left.dtype.type is np.void or right.dtype.type is np.void: if left.dtype != right.dtype: return False return np.array_equal(left, right)
def astype(self, dtype=None, copy=True): """ Change the dtype of a SparseArray. The output will always be a SparseArray. To convert to a dense ndarray with a certain dtype, use :meth:`numpy.asarray`. Parameters ---------- dtype : np.dtype or ExtensionDtype For SparseDtype, this changes the dtype of ``self.sp_values`` and the ``self.fill_value``. For other dtypes, this only changes the dtype of ``self.sp_values``. copy : bool, default True Whether to ensure a copy is made, even if not necessary. Returns ------- SparseArray Examples -------- >>> arr = pd.arrays.SparseArray([0, 0, 1, 2]) >>> arr [0, 0, 1, 2] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) >>> arr.astype(np.dtype('int32')) [0, 0, 1, 2] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) Using a NumPy dtype with a different kind (e.g. float) will coerce just ``self.sp_values``. >>> arr.astype(np.dtype('float64')) ... # doctest: +NORMALIZE_WHITESPACE [0.0, 0.0, 1.0, 2.0] Fill: 0.0 IntIndex Indices: array([2, 3], dtype=int32) Use a SparseDtype if you wish to be change the fill value as well. >>> arr.astype(SparseDtype("float64", fill_value=np.nan)) ... # doctest: +NORMALIZE_WHITESPACE [nan, nan, 1.0, 2.0] Fill: nan IntIndex Indices: array([2, 3], dtype=int32) """ if is_dtype_equal(dtype, self._dtype): if not copy: return self else: return self.copy() dtype = self.dtype.update_dtype(dtype) subtype = dtype._subtype_with_str # TODO copy=False is broken for astype_nansafe with int -> float, so cannot # passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456 sp_values = astype_nansafe(self.sp_values, subtype, copy=True) if sp_values is self.sp_values and copy: sp_values = sp_values.copy() return self._simple_new(sp_values, self.sp_index, dtype)
def _maybe_convert_i8(self, key): """ Maybe convert a given key to its equivalent i8 value(s). Used as a preprocessing step prior to IntervalTree queries (self._engine), which expects numeric data. Parameters ---------- key : scalar or list-like The key that should maybe be converted to i8. Returns ------- scalar or list-like The original key if no conversion occurred, int if converted scalar, Int64Index if converted list-like. """ original = key if is_list_like(key): key = ensure_index(key) if not self._needs_i8_conversion(key): return original scalar = is_scalar(key) if is_interval_dtype(key) or isinstance(key, Interval): # convert left/right and reconstruct left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) constructor = Interval if scalar else IntervalIndex.from_arrays # error: "object" not callable return constructor(left, right, closed=self.closed) # type: ignore[operator] if scalar: # Timestamp/Timedelta key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) if lib.is_period(key): key_i8 = key.ordinal elif isinstance(key_i8, Timestamp): key_i8 = key_i8.value elif isinstance(key_i8, (np.datetime64, np.timedelta64)): key_i8 = key_i8.view("i8") else: # DatetimeIndex/TimedeltaIndex key_dtype, key_i8 = key.dtype, Index(key.asi8) if key.hasnans: # convert NaT from its i8 value to np.nan so it's not viewed # as a valid value, maybe causing errors (e.g. is_overlapping) key_i8 = key_i8.where(~key._isnan) # ensure consistency with IntervalIndex subtype # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], # ExtensionDtype]" has no attribute "subtype" subtype = self.dtype.subtype # type: ignore[union-attr] if not is_dtype_equal(subtype, key_dtype): raise ValueError( f"Cannot index an IntervalIndex of subtype {subtype} with " f"values of dtype {key_dtype}") return key_i8
def test_construction_from_string(self): result = IntervalDtype('interval[int64]') assert is_dtype_equal(self.dtype, result) result = IntervalDtype.construct_from_string('interval[int64]') assert is_dtype_equal(self.dtype, result)
def test_equality_invalid(self): assert not self.dtype == 'foo' assert not is_dtype_equal(self.dtype, np.int64)
def test_equality_generic(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) assert is_dtype_equal(dtype, 'interval') assert is_dtype_equal(dtype, IntervalDtype())
def test_equality(self): assert is_dtype_equal(self.dtype, 'category') assert is_dtype_equal(self.dtype, CategoricalDtype()) assert not is_dtype_equal(self.dtype, 'foo')
def test_equality_invalid(self, dtype): assert not dtype == "foo" assert not is_dtype_equal(dtype, np.int64)
def test_infer_dtype_from_array(self, arr, expected, pandas_dtype): dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype) assert is_dtype_equal(dtype, expected)
def init_ndarray(values, index, columns, dtype=None, copy=False): # input must be a ndarray, list, Series, index if isinstance(values, ABCSeries): if columns is None: if values.name is not None: columns = [values.name] if index is None: index = values.index else: values = values.reindex(index) # zero len case (GH #2234) if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) # we could have a categorical type passed or coerced to 'category' # recast this to an arrays_to_mgr if (is_categorical_dtype(getattr(values, 'dtype', None)) or is_categorical_dtype(dtype)): if not hasattr(values, 'dtype'): values = prep_ndarray(values, copy=copy) values = values.ravel() elif copy: values = values.copy() index, columns = _get_axes(len(values), 1, index, columns) return arrays_to_mgr([values], columns, index, columns, dtype=dtype) elif is_extension_array_dtype(values): # GH#19157 if columns is None: columns = [0] return arrays_to_mgr([values], columns, index, columns, dtype=dtype) # by definition an array here # the dtypes will be coerced to a single dtype values = prep_ndarray(values, copy=copy) if dtype is not None: if not is_dtype_equal(values.dtype, dtype): try: values = values.astype(dtype) except Exception as orig: e = ValueError("failed to cast to '{dtype}' (Exception " "was: {orig})".format(dtype=dtype, orig=orig)) raise_with_traceback(e) index, columns = _get_axes(*values.shape, index=index, columns=columns) values = values.T # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values): if values.ndim == 2 and values.shape[0] != 1: # transpose and separate blocks dvals_list = [maybe_infer_to_datetimelike(row) for row in values] for n in range(len(dvals_list)): if isinstance(dvals_list[n], np.ndarray): dvals_list[n] = dvals_list[n].reshape(1, -1) from pandas.core.internals.blocks import make_block # TODO: What about re-joining object columns? block_values = [ make_block(dvals_list[n], placement=[n]) for n in range(len(dvals_list)) ] else: datelike_vals = maybe_infer_to_datetimelike(values) block_values = [datelike_vals] else: block_values = [values] return create_block_manager_from_blocks(block_values, [columns, index])
def test_infer_dtype_from_scalar(value, expected, pandas_dtype): dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=pandas_dtype) assert is_dtype_equal(dtype, expected) with pytest.raises(TypeError, match="must be list-like"): infer_dtype_from_array(value, pandas_dtype=pandas_dtype)
def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj], copy: bool): # used in DataFrame.__init__ # input must be a ndarray, list, Series, index if isinstance(values, ABCSeries): if columns is None: if values.name is not None: columns = [values.name] if index is None: index = values.index else: values = values.reindex(index) # zero len case (GH #2234) if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) if is_extension_array_dtype(values) or is_extension_array_dtype(dtype): # GH#19157 if isinstance(values, np.ndarray) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values values = [values[:, n] for n in range(values.shape[1])] else: values = [values] if columns is None: columns = Index(range(len(values))) return arrays_to_mgr(values, columns, index, columns, dtype=dtype) # by definition an array here # the dtypes will be coerced to a single dtype values = _prep_ndarray(values, copy=copy) if dtype is not None and not is_dtype_equal(values.dtype, dtype): try: values = construct_1d_ndarray_preserving_na( values.ravel(), dtype=dtype, copy=False ).reshape(values.shape) except Exception as orig: # e.g. ValueError when trying to cast object dtype to float64 raise ValueError( f"failed to cast to '{dtype}' (Exception was: {orig})" ) from orig # _prep_ndarray ensures that values.ndim == 2 at this point index, columns = _get_axes( values.shape[0], values.shape[1], index=index, columns=columns ) values = values.T # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values.dtype): if values.ndim == 2 and values.shape[0] != 1: # transpose and separate blocks dvals_list = [maybe_infer_to_datetimelike(row) for row in values] for n in range(len(dvals_list)): if isinstance(dvals_list[n], np.ndarray): dvals_list[n] = dvals_list[n].reshape(1, -1) from pandas.core.internals.blocks import make_block # TODO: What about re-joining object columns? block_values = [ make_block(dvals_list[n], placement=[n], ndim=2) for n in range(len(dvals_list)) ] else: datelike_vals = maybe_infer_to_datetimelike(values) block_values = [datelike_vals] else: block_values = [values] return create_block_manager_from_blocks(block_values, [columns, index])
def equals(self, other) -> bool: if type(self) is not type(other): return False if not is_dtype_equal(self.dtype, other.dtype): return False return bool(array_equivalent(self._ndarray, other._ndarray))
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'category')) self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype())) self.assertFalse(is_dtype_equal(self.dtype, 'foo'))
def test_dtype_equal(name1, dtype1, name2, dtype2): # match equal to self, but not equal to other assert com.is_dtype_equal(dtype1, dtype1) if name1 != name2: assert not com.is_dtype_equal(dtype1, dtype2)
def test_equality_invalid(self): self.assertRaises(self.dtype == 'foo') self.assertFalse(is_dtype_equal(self.dtype, np.int64))
def maybe_cast_to_datetime(value, dtype, errors: str = "raise"): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ from pandas.core.tools.timedeltas import to_timedelta from pandas.core.tools.datetimes import to_datetime if dtype is not None: if isinstance(dtype, str): dtype = np.dtype(dtype) is_datetime64 = is_datetime64_dtype(dtype) is_datetime64tz = is_datetime64tz_dtype(dtype) is_timedelta64 = is_timedelta64_dtype(dtype) if is_datetime64 or is_datetime64tz or is_timedelta64: # Force the dtype if needed. msg = (f"The '{dtype.name}' dtype has no unit. " f"Please pass in '{dtype.name}[ns]' instead.") if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): # pandas supports dtype whose granularity is less than [ns] # e.g., [ps], [fs], [as] if dtype <= np.dtype("M8[ns]"): if dtype.name == "datetime64": raise ValueError(msg) dtype = _NS_DTYPE else: raise TypeError( f"cannot convert datetimelike to dtype [{dtype}]") elif is_datetime64tz: # our NaT doesn't support tz's # this will coerce to DatetimeIndex with # a matching dtype below if is_scalar(value) and isna(value): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): # pandas supports dtype whose granularity is less than [ns] # e.g., [ps], [fs], [as] if dtype <= np.dtype("m8[ns]"): if dtype.name == "timedelta64": raise ValueError(msg) dtype = _TD_DTYPE else: raise TypeError( f"cannot convert timedeltalike to dtype [{dtype}]") if is_scalar(value): if value == iNaT or isna(value): value = iNaT else: value = np.array(value, copy=False) # have a scalar array-like (e.g. NaT) if value.ndim == 0: value = iNaT # we have an array of datetime or timedeltas & nulls elif np.prod( value.shape) or not is_dtype_equal(value.dtype, dtype): try: if is_datetime64: value = to_datetime(value, errors=errors) # GH 25843: Remove tz information since the dtype # didn't specify one if value.tz is not None: value = value.tz_localize(None) value = value._values elif is_datetime64tz: # The string check can be removed once issue #13712 # is solved. String data that is passed with a # datetime64tz is assumed to be naive which should # be localized to the timezone. is_dt_string = is_string_dtype(value) value = to_datetime(value, errors=errors).array if is_dt_string: # Strings here are naive, so directly localize value = value.tz_localize(dtype.tz) else: # Numeric values are UTC at this point, # so localize and convert value = value.tz_localize("UTC").tz_convert( dtype.tz) elif is_timedelta64: value = to_timedelta(value, errors=errors)._values except OutOfBoundsDatetime: raise except (AttributeError, ValueError, TypeError): pass # coerce datetimelike to object elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype): if is_object_dtype(dtype): if value.dtype != _NS_DTYPE: value = value.astype(_NS_DTYPE) ints = np.asarray(value).view("i8") return tslib.ints_to_pydatetime(ints) # we have a non-castable dtype that was passed raise TypeError(f"Cannot cast datetime64 to {dtype}") else: is_array = isinstance(value, np.ndarray) # catch a datetime/timedelta that is not of ns variety # and no coercion specified if is_array and value.dtype.kind in ["M", "m"]: dtype = value.dtype if dtype.kind == "M" and dtype != _NS_DTYPE: value = tslibs.conversion.ensure_datetime64ns(value) elif dtype.kind == "m" and dtype != _TD_DTYPE: value = to_timedelta(value) # only do this if we have an array and the dtype of the array is not # setup already we are not an integer/object, so don't bother with this # conversion elif not (is_array and not (issubclass(value.dtype.type, np.integer) or value.dtype == np.object_)): value = maybe_infer_to_datetimelike(value) return value
def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None): result = {} for c, values in dct.items(): conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) else: # single dtype or None cast_type = dtypes if self.na_filter: col_na_values, col_na_fvalues = _get_na_values( c, na_values, na_fvalues, self.keep_default_na) else: col_na_values, col_na_fvalues = set(), set() if conv_f is not None: # conv_f applied to data before inference if cast_type is not None: warnings.warn( ("Both a converter and dtype were specified " f"for column {c} - only the converter will be used"), ParserWarning, stacklevel=7, ) try: values = lib.map_infer(values, conv_f) except ValueError: mask = algorithms.isin(values, list(na_values)).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types(values, set(col_na_values) | col_na_fvalues, try_num_bool=False) else: is_ea = is_extension_array_dtype(cast_type) is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) # skip inference if specified dtype is object # or casting to an EA try_num_bool = not (cast_type and is_str_or_ea_dtype) # general type inference and conversion cvals, na_count = self._infer_types( values, set(col_na_values) | col_na_fvalues, try_num_bool) # type specified in dtype param or cast_type is an EA if cast_type and (not is_dtype_equal(cvals, cast_type) or is_extension_array_dtype(cast_type)): if not is_ea and na_count > 0: try: if is_bool_dtype(cast_type): raise ValueError( f"Bool column has NA values in column {c}") except (AttributeError, TypeError): # invalid input to is_bool_dtype pass cast_type = pandas_dtype(cast_type) cvals = self._cast_types(cvals, cast_type, c) result[c] = cvals if verbose and na_count: print(f"Filled {na_count} NA values in column {c!s}") return result
def ndarray_to_mgr(values, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray if isinstance(values, ABCSeries): if columns is None: if values.name is not None: columns = Index([values.name]) if index is None: index = values.index else: values = values.reindex(index) # zero len case (GH #2234) if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) if is_extension_array_dtype(values) or is_extension_array_dtype(dtype): # GH#19157 if isinstance(values, np.ndarray) and values.ndim > 1: # GH#12513 a EA dtype passed with a 2D array, split into # multiple EAs that view the values values = [values[:, n] for n in range(values.shape[1])] else: values = [values] if columns is None: columns = Index(range(len(values))) return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ) # by definition an array here # the dtypes will be coerced to a single dtype values = _prep_ndarray(values, copy=copy) if dtype is not None and not is_dtype_equal(values.dtype, dtype): shape = values.shape flat = values.ravel() if not is_integer_dtype(dtype): # TODO: skipping integer_dtype is needed to keep the tests passing, # not clear it is correct # Note: we really only need _try_cast, but keeping to exposed funcs values = sanitize_array(flat, None, dtype=dtype, copy=copy, raise_cast_failure=True) else: try: values = construct_1d_ndarray_preserving_na(flat, dtype=dtype, copy=False) except Exception as err: # e.g. ValueError when trying to cast object dtype to float64 msg = f"failed to cast to '{dtype}' (Exception was: {err})" raise ValueError(msg) from err values = values.reshape(shape) # _prep_ndarray ensures that values.ndim == 2 at this point index, columns = _get_axes(values.shape[0], values.shape[1], index=index, columns=columns) values = values.T # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values.dtype): if values.ndim == 2 and values.shape[0] != 1: # transpose and separate blocks dvals_list = [maybe_infer_to_datetimelike(row) for row in values] dvals_list = [ensure_block_shape(dval, 2) for dval in dvals_list] # TODO: What about re-joining object columns? dvals_list = [maybe_squeeze_dt64tz(x) for x in dvals_list] block_values = [ new_block(dvals_list[n], placement=n, ndim=2) for n in range(len(dvals_list)) ] else: datelike_vals = maybe_infer_to_datetimelike(values) datelike_vals = maybe_squeeze_dt64tz(datelike_vals) block_values = [datelike_vals] else: # error: List item 0 has incompatible type "Union[ExtensionArray, ndarray]"; # expected "Block" block_values = [maybe_squeeze_dt64tz(values) ] # type: ignore[list-item] return create_block_manager_from_blocks(block_values, [columns, index])
def _sparse_array_op(left, right, op, name, series=False): if series and is_integer_dtype(left) and is_integer_dtype(right): # series coerces to float64 if result should have NaN/inf if name in ('floordiv', 'mod') and (right.values == 0).any(): left = left.astype(np.float64) right = right.astype(np.float64) elif name in ('rfloordiv', 'rmod') and (left.values == 0).any(): left = left.astype(np.float64) right = right.astype(np.float64) # dtype used to find corresponding sparse method if not is_dtype_equal(left.dtype, right.dtype): dtype = find_common_type([left.dtype, right.dtype]) left = left.astype(dtype) right = right.astype(dtype) else: dtype = left.dtype # dtype the result must have result_dtype = None if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: with np.errstate(all='ignore'): result = op(left.get_values(), right.get_values()) fill = op(_get_fill(left), _get_fill(right)) if left.sp_index.ngaps == 0: index = left.sp_index else: index = right.sp_index elif left.sp_index.equals(right.sp_index): with np.errstate(all='ignore'): result = op(left.sp_values, right.sp_values) fill = op(_get_fill(left), _get_fill(right)) index = left.sp_index else: if name[0] == 'r': left, right = right, left name = name[1:] if name in ('and', 'or') and dtype == 'bool': opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype) # to make template simple, cast here left_sp_values = left.sp_values.view(np.uint8) right_sp_values = right.sp_values.view(np.uint8) result_dtype = np.bool else: opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype) left_sp_values = left.sp_values right_sp_values = right.sp_values sparse_op = getattr(splib, opname) with np.errstate(all='ignore'): result, index, fill = sparse_op(left_sp_values, left.sp_index, left.fill_value, right_sp_values, right.sp_index, right.fill_value) if result_dtype is None: result_dtype = result.dtype return _wrap_result(name, result, index, fill, dtype=result_dtype)
def test_equality_generic(self, subtype): # GH 18980 inclusive = "right" if subtype is not None else None dtype = IntervalDtype(subtype, inclusive=inclusive) assert is_dtype_equal(dtype, "interval") assert is_dtype_equal(dtype, IntervalDtype())
def _sparse_array_op(left: ABCSparseArray, right: ABCSparseArray, op: Callable, name: str) -> Any: """ Perform a binary operation between two arrays. Parameters ---------- left : Union[SparseArray, ndarray] right : Union[SparseArray, ndarray] op : Callable The binary operation to perform name str Name of the callable. Returns ------- SparseArray """ if name.startswith("__"): # For lookups in _libs.sparse we need non-dunder op name name = name[2:-2] # dtype used to find corresponding sparse method ltype = left.dtype.subtype rtype = right.dtype.subtype if not is_dtype_equal(ltype, rtype): subtype = find_common_type([ltype, rtype]) ltype = SparseDtype(subtype, left.fill_value) rtype = SparseDtype(subtype, right.fill_value) # TODO(GH-23092): pass copy=False. Need to fix astype_nansafe left = left.astype(ltype) right = right.astype(rtype) dtype = ltype.subtype else: dtype = ltype # dtype the result must have result_dtype = None if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: with np.errstate(all="ignore"): result = op(left.to_dense(), right.to_dense()) fill = op(_get_fill(left), _get_fill(right)) if left.sp_index.ngaps == 0: index = left.sp_index else: index = right.sp_index elif left.sp_index.equals(right.sp_index): with np.errstate(all="ignore"): result = op(left.sp_values, right.sp_values) fill = op(_get_fill(left), _get_fill(right)) index = left.sp_index else: if name[0] == "r": left, right = right, left name = name[1:] if name in ("and", "or", "xor") and dtype == "bool": opname = f"sparse_{name}_uint8" # to make template simple, cast here left_sp_values = left.sp_values.view(np.uint8) right_sp_values = right.sp_values.view(np.uint8) result_dtype = np.bool else: opname = f"sparse_{name}_{dtype}" left_sp_values = left.sp_values right_sp_values = right.sp_values sparse_op = getattr(splib, opname) with np.errstate(all="ignore"): result, index, fill = sparse_op( left_sp_values, left.sp_index, left.fill_value, right_sp_values, right.sp_index, right.fill_value, ) if result_dtype is None: result_dtype = result.dtype return _wrap_result(name, result, index, fill, dtype=result_dtype)
def array_equivalent(left, right, strict_nan: bool = False) -> bool: """ True if two arrays, left and right, have equal non-NaN elements, and NaNs in corresponding locations. False otherwise. It is assumed that left and right are NumPy arrays of the same dtype. The behavior of this function (particularly with respect to NaNs) is not defined if the dtypes are different. Parameters ---------- left, right : ndarrays strict_nan : bool, default False If True, consider NaN and None to be different. Returns ------- b : bool Returns True if the arrays are equivalent. Examples -------- >>> array_equivalent( ... np.array([1, 2, np.nan]), ... np.array([1, 2, np.nan])) True >>> array_equivalent( ... np.array([1, np.nan, 2]), ... np.array([1, 2, np.nan])) False """ left, right = np.asarray(left), np.asarray(right) # shape compat if left.shape != right.shape: return False # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat if is_string_dtype(left) or is_string_dtype(right): if not strict_nan: # isna considers NaN and None to be equivalent. return lib.array_equivalent_object(ensure_object(left.ravel()), ensure_object(right.ravel())) for left_value, right_value in zip(left, right): if left_value is NaT and right_value is not NaT: return False elif left_value is libmissing.NA and right_value is not libmissing.NA: return False elif isinstance(left_value, float) and np.isnan(left_value): if not isinstance(right_value, float) or not np.isnan(right_value): return False else: try: if np.any(np.asarray(left_value != right_value)): return False except TypeError as err: if "Cannot compare tz-naive" in str(err): # tzawareness compat failure, see GH#28507 return False elif "boolean value of NA is ambiguous" in str(err): return False raise return True # NaNs can occur in float and complex arrays. if is_float_dtype(left) or is_complex_dtype(left): # empty if not (np.prod(left.shape) and np.prod(right.shape)): return True return ((left == right) | (isna(left) & isna(right))).all() elif is_datetimelike_v_numeric(left, right): # GH#29553 avoid numpy deprecation warning return False elif needs_i8_conversion(left) or needs_i8_conversion(right): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): return False left = left.view("i8") right = right.view("i8") # if we have structured dtypes, compare first if left.dtype.type is np.void or right.dtype.type is np.void: if left.dtype != right.dtype: return False return np.array_equal(left, right)
def _can_union_without_object_cast(self, other) -> bool: return is_dtype_equal(self.dtype, other.dtype)
def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') assert is_dtype_equal(self.dtype, result) pytest.raises(TypeError, lambda: CategoricalDtype.construct_from_string('foo'))