def test_infer_dtype_from_python_scalar(self): data = 12 dtype, val = infer_dtype_from_scalar(data) assert dtype == np.int64 data = np.float(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == np.float64
def testinfer_from_scalar_tz(self, tz): dt = Timestamp(1, tz=tz) dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=True) assert dtype == 'datetime64[ns, {0}]'.format(tz) assert val == dt.value dtype, val = infer_dtype_from_scalar(dt) assert dtype == np.object_ assert val == dt
def test_infer_dtype_from_period(self, freq): p = Period("2011-01-01", freq=freq) dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) assert dtype == "period[{0}]".format(freq) assert val == p.ordinal dtype, val = infer_dtype_from_scalar(p) assert dtype == np.object_ assert val == p
def _hash_scalar(val, encoding='utf8', hash_key=None): """ Hash scalar value Returns ------- 1d uint64 numpy array of hash value, of length 1 """ if isna(val): # this is to be consistent with the _hash_categorical implementation return np.array([np.iinfo(np.uint64).max], dtype='u8') if getattr(val, 'tzinfo', None) is not None: # for tz-aware datetimes, we need the underlying naive UTC value and # not the tz aware object or pd extension type (as # infer_dtype_from_scalar would do) if not isinstance(val, tslibs.Timestamp): val = tslibs.Timestamp(val) val = val.tz_convert(None) dtype, val = infer_dtype_from_scalar(val) vals = np.array([val], dtype=dtype) return hash_array(vals, hash_key=hash_key, encoding=encoding, categorize=False)
def test_infer_dtype_from_scalar(self): # Test that _infer_dtype_from_scalar is returning correct dtype for int # and float. for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == type(data) data = 12 dtype, val = infer_dtype_from_scalar(data) assert dtype == np.int64 for dtypec in [np.float16, np.float32, np.float64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == dtypec data = np.float(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == np.float64 for data in [True, False]: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.bool_ for data in [np.complex64(1), np.complex128(1)]: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.complex_ for data in [np.datetime64(1, 'ns'), Timestamp(1), datetime(2000, 1, 1, 0, 0)]: dtype, val = infer_dtype_from_scalar(data) assert dtype == 'M8[ns]' for data in [np.timedelta64(1, 'ns'), Timedelta(1), timedelta(1)]: dtype, val = infer_dtype_from_scalar(data) assert dtype == 'm8[ns]' for data in [date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_
def test_infer_dtype_from_period(freq, pandas_dtype): p = Period("2011-01-01", freq=freq) dtype, val = infer_dtype_from_scalar(p, pandas_dtype=pandas_dtype) if pandas_dtype: exp_dtype = "period[{0}]".format(freq) exp_val = p.ordinal else: exp_dtype = np.object_ exp_val = p assert dtype == exp_dtype assert val == exp_val
def test_infer_from_scalar_tz(tz, pandas_dtype): dt = Timestamp(1, tz=tz) dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=pandas_dtype) if pandas_dtype: exp_dtype = "datetime64[ns, {0}]".format(tz) exp_val = dt.value else: exp_dtype = np.object_ exp_val = dt assert dtype == exp_dtype assert val == exp_val
def _maybe_convert_i8(self, key): """ Maybe convert a given key to it's equivalent i8 value(s). Used as a preprocessing step prior to IntervalTree queries (self._engine), which expects numeric data. Parameters ---------- key : scalar or list-like The key that should maybe be converted to i8. Returns ------- key: scalar or list-like The original key if no conversion occured, int if converted scalar, Int64Index if converted list-like. """ original = key if is_list_like(key): key = ensure_index(key) if not self._needs_i8_conversion(key): return original scalar = is_scalar(key) if is_interval_dtype(key) or isinstance(key, Interval): # convert left/right and reconstruct left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) constructor = Interval if scalar else IntervalIndex.from_arrays return constructor(left, right, closed=self.closed) if scalar: # Timestamp/Timedelta key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) else: # DatetimeIndex/TimedeltaIndex key_dtype, key_i8 = key.dtype, Index(key.asi8) if key.hasnans: # convert NaT from it's i8 value to np.nan so it's not viewed # as a valid value, maybe causing errors (e.g. is_overlapping) key_i8 = key_i8.where(~key._isnan) # ensure consistency with IntervalIndex subtype subtype = self.dtype.subtype msg = ('Cannot index an IntervalIndex of subtype {subtype} with ' 'values of dtype {other}') if not is_dtype_equal(subtype, key_dtype): raise ValueError(msg.format(subtype=subtype, other=key_dtype)) return key_i8
def __new__(cls, data, sparse_index=None, index=None, kind='integer', fill_value=None, dtype=None, copy=False): if index is not None: if data is None: data = np.nan if not is_scalar(data): raise Exception("must only pass scalars with an index ") dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar( data, len(index), dtype) if isinstance(data, ABCSparseSeries): data = data.values is_sparse_array = isinstance(data, SparseArray) if dtype is not None: dtype = np.dtype(dtype) if is_sparse_array: sparse_index = data.sp_index values = data.sp_values fill_value = data.fill_value else: # array-like if sparse_index is None: if dtype is not None: data = np.asarray(data, dtype=dtype) res = make_sparse(data, kind=kind, fill_value=fill_value) values, sparse_index, fill_value = res else: values = _sanitize_values(data) if len(values) != sparse_index.npoints: raise AssertionError("Non array-like type {type} must " "have the same length as the index" .format(type=type(values))) # Create array, do *not* copy data by default if copy: subarr = np.array(values, dtype=dtype, copy=True) else: subarr = np.asarray(values, dtype=dtype) # Change the class of the array to be the subclass type. return cls._simple_new(subarr, sparse_index, fill_value)
def test_infer_dtype_from_scalar_errors(): msg = "invalid ndarray passed to infer_dtype_from_scalar" with pytest.raises(ValueError, match=msg): infer_dtype_from_scalar(np.array([1]))
def test_infer_dtype_from_int_scalar(any_int_dtype): # Test that infer_dtype_from_scalar is # returning correct dtype for int and float. data = np.dtype(any_int_dtype).type(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == type(data)
def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): """ Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ if dtype is not None: dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: data = data.copy() data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): if dtype is not None: subarr = np.array(data, copy=False) # possibility of nan -> garbage if is_float_dtype(data.dtype) and is_integer_dtype(dtype): try: subarr = _try_cast(data, True, dtype, copy, True) except ValueError: if copy: subarr = data.copy() else: subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, Index): # don't coerce Index types # e.g. indexes can have different conversions (so don't fast path # them) # GH#6140 subarr = sanitize_index(data, index, copy=copy) else: # we will try to copy be-definition here subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, ExtensionArray): if isinstance(data, ABCPandasArray): # We don't want to let people put our PandasArray wrapper # (the output of Series/Index.array), into a Series. So # we explicitly unwrap it here. subarr = data.to_numpy() else: subarr = data # everything else in this block must also handle ndarray's, # because we've unwrapped PandasArray into an ndarray. if dtype is not None: subarr = data.astype(dtype) if copy: subarr = data.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) except Exception: if raise_cast_failure: # pragma: no cover raise subarr = np.array(data, dtype=object, copy=copy) subarr = lib.maybe_convert_objects(subarr) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype='int64') subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure) else: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # figure out the dtype from the value (upcast if necessary) if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) else: return subarr.item() # the result that we want elif subarr.ndim == 1: if index is not None: # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: subarr = construct_1d_arraylike_from_scalar( subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): raise Exception('Data must be 1-dimensional') else: subarr = com.asarray_tuplesafe(data, dtype=dtype) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, str): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result if not lib.is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) if is_object_dtype(subarr.dtype) and dtype != 'object': inferred = lib.infer_dtype(subarr, skipna=False) if inferred == 'period': try: subarr = period_array(subarr) except IncompatibleFrequency: pass return subarr
def test_infer_dtype_from_boolean(bool_val): dtype, val = infer_dtype_from_scalar(bool_val) assert dtype == np.bool_
def test_infer_dtype_from_datetime(data): dtype, val = infer_dtype_from_scalar(data) assert dtype == "M8[ns]"
def test_infer_dtype_misc(data): dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_
def test_infer_dtype_from_scalar(value, expected, pandas_dtype): dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=pandas_dtype) assert is_dtype_equal(dtype, expected) with pytest.raises(TypeError, match="must be list-like"): infer_dtype_from_array(value, pandas_dtype=pandas_dtype)
def test_infer_dtype_from_boolean(bool_val): dtype, val = infer_dtype_from_scalar(bool_val) assert dtype == np.bool_
def sanitize_array( data, index: Optional[Index], dtype: Optional[DtypeObj] = None, copy: bool = False, raise_cast_failure: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, coerce to the dtype if specified. """ if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: data = data.copy() # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except ValueError: if copy: subarr = data.copy() else: subarr = np.array(data, copy=False) else: # we will try to copy be-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: if isinstance(data, set): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError("Set type is unordered") data = list(data) if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) elif lib.is_scalar(data) and index is not None and dtype is not None: data = maybe_cast_to_datetime(data, dtype) if not lib.is_scalar(data): data = data[0] subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, "ndim", 0) == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # figure out the dtype from the value (upcast if necessary) if dtype is None: dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) else: return subarr.item() # the result that we want elif subarr.ndim == 1: if index is not None: # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: subarr = construct_1d_arraylike_from_scalar( subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): raise ValueError("Data must be 1-dimensional") else: subarr = com.asarray_tuplesafe(data, dtype=dtype) if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, str): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result if not lib.is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype( dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: inferred = lib.infer_dtype(subarr, skipna=False) if inferred in {"interval", "period"}: subarr = array(subarr) return subarr
def test_infer_dtype_from_scalar_errors(): msg = "invalid ndarray passed to infer_dtype_from_scalar" with pytest.raises(ValueError, match=msg): infer_dtype_from_scalar(np.array([1]))
def test_infer_dtype_from_timedelta(data): dtype, val = infer_dtype_from_scalar(data) assert dtype == "m8[ns]"
def test_infer_dtype_from_datetime(data): dtype, val = infer_dtype_from_scalar(data) assert dtype == "M8[ns]"
def test_infer_dtype_from_complex(complex_dtype): data = np.dtype(complex_dtype).type(1) dtype, val = infer_dtype_from_scalar(data) assert dtype == np.complex_
def test_infer_dtype_from_float_scalar(float_dtype): float_dtype = np.dtype(float_dtype).type data = float_dtype(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == float_dtype
def test_infer_dtype_from_int_scalar(any_int_dtype): # Test that infer_dtype_from_scalar is # returning correct dtype for int and float. data = np.dtype(any_int_dtype).type(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == type(data)
def test_infer_dtype_from_python_scalar(data, exp_dtype): dtype, val = infer_dtype_from_scalar(data) assert dtype == exp_dtype
def testinfer_dtype_from_scalar_errors(self): with pytest.raises(ValueError): infer_dtype_from_scalar(np.array([1]))
def test_infer_dtype_from_complex(complex_dtype): data = np.dtype(complex_dtype).type(1) dtype, val = infer_dtype_from_scalar(data) assert dtype == np.complex_
def testinfer_dtype_from_scalar(self): # Test that infer_dtype_from_scalar is returning correct dtype for int # and float. for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == type(data) data = 12 dtype, val = infer_dtype_from_scalar(data) assert dtype == np.int64 for dtypec in [np.float16, np.float32, np.float64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == dtypec data = np.float(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == np.float64 for data in [True, False]: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.bool_ for data in [np.complex64(1), np.complex128(1)]: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.complex_ for data in [np.datetime64(1, 'ns'), Timestamp(1), datetime(2000, 1, 1, 0, 0)]: dtype, val = infer_dtype_from_scalar(data) assert dtype == 'M8[ns]' for data in [np.timedelta64(1, 'ns'), Timedelta(1), timedelta(1)]: dtype, val = infer_dtype_from_scalar(data) assert dtype == 'm8[ns]' for freq in ['M', 'D']: p = Period('2011-01-01', freq=freq) dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) assert dtype == 'period[{0}]'.format(freq) assert val == p.ordinal dtype, val = infer_dtype_from_scalar(p) dtype == np.object_ assert val == p # misc for data in [date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_
def test_infer_dtype_from_timedelta(data): dtype, val = infer_dtype_from_scalar(data) assert dtype == "m8[ns]"
def test_infer_dtype_from_float_scalar(float_dtype): float_dtype = np.dtype(float_dtype).type data = float_dtype(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == float_dtype
def test_infer_dtype_misc(data): dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_
def testinfer_dtype_from_scalar_errors(self): with pytest.raises(ValueError): infer_dtype_from_scalar(np.array([1]))
def _maybe_convert_i8(self, key): """ Maybe convert a given key to its equivalent i8 value(s). Used as a preprocessing step prior to IntervalTree queries (self._engine), which expects numeric data. Parameters ---------- key : scalar or list-like The key that should maybe be converted to i8. Returns ------- scalar or list-like The original key if no conversion occurred, int if converted scalar, Int64Index if converted list-like. """ original = key if is_list_like(key): key = ensure_index(key) if not self._needs_i8_conversion(key): return original scalar = is_scalar(key) if is_interval_dtype(key) or isinstance(key, Interval): # convert left/right and reconstruct left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) constructor = Interval if scalar else IntervalIndex.from_arrays # error: "object" not callable return constructor( left, right, closed=self.closed ) # type: ignore[operator] if scalar: # Timestamp/Timedelta key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) if lib.is_period(key): key_i8 = key.ordinal elif isinstance(key_i8, Timestamp): key_i8 = key_i8.value elif isinstance(key_i8, (np.datetime64, np.timedelta64)): key_i8 = key_i8.view("i8") else: # DatetimeIndex/TimedeltaIndex key_dtype, key_i8 = key.dtype, Index(key.asi8) if key.hasnans: # convert NaT from its i8 value to np.nan so it's not viewed # as a valid value, maybe causing errors (e.g. is_overlapping) key_i8 = key_i8.where(~key._isnan) # ensure consistency with IntervalIndex subtype # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], # ExtensionDtype]" has no attribute "subtype" subtype = self.dtype.subtype # type: ignore[union-attr] if not is_dtype_equal(subtype, key_dtype): raise ValueError( f"Cannot index an IntervalIndex of subtype {subtype} with " f"values of dtype {key_dtype}" ) return key_i8
def test_infer_dtype_from_python_scalar(data, exp_dtype): dtype, val = infer_dtype_from_scalar(data) assert dtype == exp_dtype
def testinfer_dtype_from_scalar(self): # Test that infer_dtype_from_scalar is returning correct dtype for int # and float. for dtypec in [ np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64 ]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == type(data) data = 12 dtype, val = infer_dtype_from_scalar(data) assert dtype == np.int64 for dtypec in [np.float16, np.float32, np.float64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == dtypec data = np.float(12) dtype, val = infer_dtype_from_scalar(data) assert dtype == np.float64 for data in [True, False]: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.bool_ for data in [np.complex64(1), np.complex128(1)]: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.complex_ for data in [ np.datetime64(1, 'ns'), Timestamp(1), datetime(2000, 1, 1, 0, 0) ]: dtype, val = infer_dtype_from_scalar(data) assert dtype == 'M8[ns]' for data in [np.timedelta64(1, 'ns'), Timedelta(1), timedelta(1)]: dtype, val = infer_dtype_from_scalar(data) assert dtype == 'm8[ns]' for freq in ['M', 'D']: p = Period('2011-01-01', freq=freq) dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) assert dtype == 'period[{0}]'.format(freq) assert val == p.ordinal dtype, val = infer_dtype_from_scalar(p) dtype == np.object_ assert val == p # misc for data in [date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_
def __init__( self, data, sparse_index=None, index=None, fill_value=None, kind="integer", dtype=None, copy=False, ): if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value if isinstance(data, type(self)): # disable normal inference on dtype, sparse_index, & fill_value if sparse_index is None: sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value if dtype is None: dtype = data.dtype # TODO: make kind=None, and use data.kind? data = data.sp_values # Handle use-provided dtype if isinstance(dtype, str): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: dtype = SparseDtype.construct_from_string(dtype) except TypeError: dtype = pandas_dtype(dtype) if isinstance(dtype, SparseDtype): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype if index is not None and not is_scalar(data): raise Exception("must only pass scalars with an index ") if is_scalar(data): if index is not None: if data is None: data = np.nan if index is not None: npoints = len(index) elif sparse_index is None: npoints = 1 else: npoints = sparse_index.length dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar(data, npoints, dtype) if dtype is not None: dtype = pandas_dtype(dtype) # TODO: disentangle the fill_value dtype inference from # dtype inference if data is None: # XXX: What should the empty dtype be? Object or float? data = np.array([], dtype=dtype) if not is_array_like(data): try: # probably shared code in sanitize_series data = sanitize_array(data, index=None) except ValueError: # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: dtype = object data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise if copy: # TODO: avoid double copy when dtype forces cast. data = data.copy() if fill_value is None: fill_value_dtype = data.dtype if dtype is None else dtype if fill_value_dtype is None: fill_value = np.nan else: fill_value = na_value_for_dtype(fill_value_dtype) if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype ) else: sparse_values = np.asarray(data, dtype=dtype) if len(sparse_values) != sparse_index.npoints: raise AssertionError( f"Non array-like type {type(sparse_values)} must " "have the same length as the index" ) self._sparse_index = sparse_index self._sparse_values = sparse_values self._dtype = SparseDtype(sparse_values.dtype, fill_value)
def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): """ Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ if dtype is not None: dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: data = data.copy() data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): if dtype is not None: subarr = np.array(data, copy=False) # possibility of nan -> garbage if is_float_dtype(data.dtype) and is_integer_dtype(dtype): try: subarr = _try_cast(data, True, dtype, copy, True) except ValueError: if copy: subarr = data.copy() else: subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, Index): # don't coerce Index types # e.g. indexes can have different conversions (so don't fast path # them) # GH#6140 subarr = sanitize_index(data, index, copy=copy) else: # we will try to copy be-definition here subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, ExtensionArray): if isinstance(data, ABCPandasArray): # We don't want to let people put our PandasArray wrapper # (the output of Series/Index.array), into a Series. So # we explicitly unwrap it here. subarr = data.to_numpy() else: subarr = data # everything else in this block must also handle ndarray's, # becuase we've unwrapped PandasArray into an ndarray. if dtype is not None: subarr = data.astype(dtype) if copy: subarr = data.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) except Exception: if raise_cast_failure: # pragma: no cover raise subarr = np.array(data, dtype=object, copy=copy) subarr = lib.maybe_convert_objects(subarr) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 start, stop, step = get_range_parameters(data) arr = np.arange(start, stop, step, dtype='int64') subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure) else: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # figure out the dtype from the value (upcast if necessary) if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) else: return subarr.item() # the result that we want elif subarr.ndim == 1: if index is not None: # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: subarr = construct_1d_arraylike_from_scalar( subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): raise Exception('Data must be 1-dimensional') else: subarr = com.asarray_tuplesafe(data, dtype=dtype) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, compat.string_types): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result if not lib.is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) if is_object_dtype(subarr.dtype) and dtype != 'object': inferred = lib.infer_dtype(subarr, skipna=False) if inferred == 'period': try: subarr = period_array(subarr) except IncompatibleFrequency: pass return subarr
def __init__(self, data, sparse_index=None, index=None, fill_value=None, kind='integer', dtype=None, copy=False): from pandas.core.internals import SingleBlockManager if isinstance(data, SingleBlockManager): data = data.internal_values() if fill_value is None and isinstance(dtype, SparseDtype): fill_value = dtype.fill_value if isinstance(data, (type(self), ABCSparseSeries)): # disable normal inference on dtype, sparse_index, & fill_value if sparse_index is None: sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value if dtype is None: dtype = data.dtype # TODO: make kind=None, and use data.kind? data = data.sp_values # Handle use-provided dtype if isinstance(dtype, compat.string_types): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: dtype = SparseDtype.construct_from_string(dtype) except TypeError: dtype = pandas_dtype(dtype) if isinstance(dtype, SparseDtype): if fill_value is None: fill_value = dtype.fill_value dtype = dtype.subtype if index is not None and not is_scalar(data): raise Exception("must only pass scalars with an index ") if is_scalar(data): if index is not None: if data is None: data = np.nan if index is not None: npoints = len(index) elif sparse_index is None: npoints = 1 else: npoints = sparse_index.length dtype = infer_dtype_from_scalar(data)[0] data = construct_1d_arraylike_from_scalar( data, npoints, dtype ) if dtype is not None: dtype = pandas_dtype(dtype) # TODO: disentangle the fill_value dtype inference from # dtype inference if data is None: # XXX: What should the empty dtype be? Object or float? data = np.array([], dtype=dtype) if not is_array_like(data): try: # probably shared code in sanitize_series from pandas.core.series import _sanitize_array data = _sanitize_array(data, index=None) except ValueError: # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: dtype = object data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise if copy: # TODO: avoid double copy when dtype forces cast. data = data.copy() if fill_value is None: fill_value_dtype = data.dtype if dtype is None else dtype if fill_value_dtype is None: fill_value = np.nan else: fill_value = na_value_for_dtype(fill_value_dtype) if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index sparse_values = np.asarray(data.sp_values, dtype=dtype) elif sparse_index is None: sparse_values, sparse_index, fill_value = make_sparse( data, kind=kind, fill_value=fill_value, dtype=dtype ) else: sparse_values = np.asarray(data, dtype=dtype) if len(sparse_values) != sparse_index.npoints: raise AssertionError("Non array-like type {type} must " "have the same length as the index" .format(type=type(sparse_values))) self._sparse_index = sparse_index self._sparse_values = sparse_values self._dtype = SparseDtype(sparse_values.dtype, fill_value)