Ejemplo n.º 1
 def test_datetimetz_dtype(self):
     for dtype in ['datetime64[ns, US/Eastern]',
                   'datetime64[ns, Asia/Tokyo]',
                   'datetime64[ns, UTC]']:
         assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype)
         assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype)
         assert com.pandas_dtype(dtype) == dtype
Ejemplo n.º 2
    def test_invalid_dtype_error(self):
        msg = 'not understood'
        invalid_list = [pd.Timestamp, 'pd.Timestamp', list]
        for dtype in invalid_list:
            with tm.assert_raises_regex(TypeError, msg):

        valid_list = [object, 'float64', np.object_, np.dtype('object'), 'O',
                      np.float64, float, np.dtype('float64')]
        for dtype in valid_list:
Ejemplo n.º 3
    def astype(self, dtype, copy=True):
        # We handle
        #   --> timedelta64[ns]
        #   --> timedelta64
        # DatetimeLikeArrayMixin super call handles other cases
        dtype = pandas_dtype(dtype)

        if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
            # by pandas convention, converting to non-nano timedelta64
            #  returns an int64-dtyped array with ints representing multiples
            #  of the desired timedelta unit.  This is essentially division
            if self._hasnans:
                # avoid double-copying
                result = self._data.astype(dtype, copy=False)
                values = self._maybe_mask_results(result,
                return values
            result = self._data.astype(dtype, copy=copy)
            return result.astype('i8')
        elif is_timedelta64_ns_dtype(dtype):
            if copy:
                return self.copy()
            return self
        return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
Ejemplo n.º 4
def validate_dtype_freq(dtype, freq):
    If both a dtype and a freq are available, ensure they match.  If only
    dtype is available, extract the implied freq.

    dtype : dtype
    freq : DateOffset or None

    freq : DateOffset

    ValueError : non-period dtype
    IncompatibleFrequency : mismatch between dtype and freq
    if freq is not None:
        freq = frequencies.to_offset(freq)

    if dtype is not None:
        dtype = pandas_dtype(dtype)
        if not is_period_dtype(dtype):
            raise ValueError('dtype must be PeriodDtype')
        if freq is None:
            freq = dtype.freq
        elif freq != dtype.freq:
            raise IncompatibleFrequency('specified freq and dtype '
                                        'are different')
    return freq
Ejemplo n.º 5
    def __new__(cls, subtype=None):
        subtype : the dtype of the Interval

        if isinstance(subtype, IntervalDtype):
            return subtype
        elif subtype is None or (isinstance(subtype, compat.string_types) and
                                 subtype == 'interval'):
            subtype = None
            if isinstance(subtype, compat.string_types):
                m = cls._match.search(subtype)
                if m is not None:
                    subtype = m.group('subtype')

            from pandas.core.dtypes.common import pandas_dtype
                subtype = pandas_dtype(subtype)
            except TypeError:
                raise ValueError("could not construct IntervalDtype")

            return cls._cache[str(subtype)]
        except KeyError:
            u = object.__new__(cls)
            u.subtype = subtype
            cls._cache[str(subtype)] = u
            return u
Ejemplo n.º 6
def test_astype(dtype):
    # Need to ensure ordinals are astyped correctly for both
    # int32 and 64
    arr = period_array(['2000', '2001', None], freq='D')
    result = arr.astype(dtype)
    # need pandas_dtype to handle int32 vs. int64 correctly
    expected = pandas_dtype(dtype)
    assert result.dtype == expected
Ejemplo n.º 7
    def astype(self, dtype, copy=True):
        # We handle Period[T] -> Period[U]
        # Our parent handles everything else.
        dtype = pandas_dtype(dtype)

        if is_period_dtype(dtype):
            return self.asfreq(dtype.freq)
        return super(PeriodArray, self).astype(dtype, copy=copy)
Ejemplo n.º 8
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if is_categorical_dtype(dtype):
        return values

    elif is_object_dtype(dtype):
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress:
        if compress == u'zlib':
            decompress = zlib.decompress
        elif compress == u'blosc':
            decompress = blosc.decompress
            raise ValueError("compress must be one of 'zlib' or 'blosc'")

            return np.frombuffer(
        except _BadMove as e:
            # Pull the decompressed data off of the `_BadMove` exception.
            # We don't just store this in the locals because we want to
            # minimize the risk of giving users access to a `bytes` object
            # whose data is also given to a mutable buffer.
            values = e.args[0]
            if len(values) > 1:
                # The empty string and single characters are memoized in many
                # string creating functions in the capi. This case should not
                # warn even though we need to make a copy because we are only
                # copying at most 1 byte.
                    'copying data after decompressing; this may mean that'
                    ' decompress is caching its result',
                # fall through to copying `np.fromstring`

    # Copy the bytes into a numpy array.
    buf = np.frombuffer(values, dtype=dtype)
    buf = buf.copy()  # required to not mutate the original data
    buf.flags.writeable = True
    return buf
Ejemplo n.º 9
def test_dtype_equal_strict():

    # we are strict on kind equality
    for dtype in [np.int8, np.int16, np.int32]:
        assert not com.is_dtype_equal(np.int64, dtype)

    for dtype in [np.float32]:
        assert not com.is_dtype_equal(np.float64, dtype)

    # strict w.r.t. PeriodDtype
    assert not com.is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D'))

    # strict w.r.t. datetime64
    assert not com.is_dtype_equal(
        com.pandas_dtype('datetime64[ns, US/Eastern]'),
        com.pandas_dtype('datetime64[ns, CET]'))

    # see gh-15941: no exception should be raised
    assert not com.is_dtype_equal(None, None)
Ejemplo n.º 10
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_integer_dtype(dtype):
         return self._int64index.copy() if copy else self._int64index
     elif is_datetime64_any_dtype(dtype):
         tz = getattr(dtype, 'tz', None)
         return self.to_timestamp(how=how).tz_localize(tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     return super(PeriodIndex, self).astype(dtype, copy=copy)
Ejemplo n.º 11
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
         # return an index (essentially this is division)
         result = self.values.astype(dtype, copy=copy)
         if self.hasnans:
             values = self._maybe_mask_results(result, convert='float64')
             return Index(values, name=self.name)
         return Index(result.astype('i8'), name=self.name)
     return super(TimedeltaIndex, self).astype(dtype, copy=copy)
Ejemplo n.º 12
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if needs_i8_conversion(dtype):
         msg = ('Cannot convert Float64Index to dtype {dtype}; integer '
                'values are required for conversion').format(dtype=dtype)
         raise TypeError(msg)
     elif is_integer_dtype(dtype) and self.hasnans:
         # GH 13149
         raise ValueError('Cannot convert NA to integer')
     return super(Float64Index, self).astype(dtype, copy=copy)
Ejemplo n.º 13
    def astype(self, dtype, copy=True, how='start'):
        dtype = pandas_dtype(dtype)

        if is_datetime64_any_dtype(dtype):
            # 'how' is index-specific, isn't part of the EA interface.
            tz = getattr(dtype, 'tz', None)
            return self.to_timestamp(how=how).tz_localize(tz)

        # TODO: should probably raise on `how` here, so we don't ignore it.
        return super(PeriodIndex, self).astype(dtype, copy=copy)
Ejemplo n.º 14
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
         # Have to repeat the check for 'timedelta64' (not ns) dtype
         #  so that we can return a numeric index, since pandas will return
         #  a TimedeltaIndex when dtype='timedelta'
         result = self._data.astype(dtype, copy=copy)
         if self.hasnans:
             return Index(result, name=self.name)
         return Index(result.astype('i8'), name=self.name)
     return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy)
Ejemplo n.º 15
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if needs_i8_conversion(dtype):
         msg = ('Cannot convert Float64Index to dtype {dtype}; integer '
                'values are required for conversion').format(dtype=dtype)
         raise TypeError(msg)
     elif (is_integer_dtype(dtype) and
           not is_extension_array_dtype(dtype)) and self.hasnans:
         # TODO(jreback); this can change once we have an EA Index type
         # GH 13149
         raise ValueError('Cannot convert NA to integer')
     return super().astype(dtype, copy=copy)
Ejemplo n.º 16
    def astype(self, dtype, copy=True, how='start'):
        dtype = pandas_dtype(dtype)

        # We have a few special-cases for `dtype`.
        # Failing those, we fall back to astyping the values

        if is_datetime64_any_dtype(dtype):
            # 'how' is index-speicifc, isn't part of the EA interface.
            tz = getattr(dtype, 'tz', None)
            return self.to_timestamp(how=how).tz_localize(tz)

        result = self._data.astype(dtype, copy=copy)
        return Index(result, name=self.name, dtype=dtype, copy=False)
Ejemplo n.º 17
def _validate_td64_dtype(dtype):
    dtype = pandas_dtype(dtype)
    if is_dtype_equal(dtype, np.dtype("timedelta64")):
        dtype = _TD_DTYPE
        msg = textwrap.dedent("""\
            Passing in 'timedelta' dtype with no precision is deprecated
            and will raise in a future version. Please pass in
            'timedelta64[ns]' instead.""")
        warnings.warn(msg, FutureWarning, stacklevel=4)

    if not is_dtype_equal(dtype, _TD_DTYPE):
        raise ValueError(_BAD_DTYPE.format(dtype=dtype))

    return dtype
Ejemplo n.º 18
    def _simple_new(cls, left, right, closed=None,
                    copy=False, dtype=None, verify_integrity=True):
        result = IntervalMixin.__new__(cls)

        closed = closed or 'right'
        left = ensure_index(left, copy=copy)
        right = ensure_index(right, copy=copy)

        if dtype is not None:
            # GH 19262: dtype must be an IntervalDtype to override inferred
            dtype = pandas_dtype(dtype)
            if not is_interval_dtype(dtype):
                msg = 'dtype must be an IntervalDtype, got {dtype}'
                raise TypeError(msg.format(dtype=dtype))
            elif dtype.subtype is not None:
                left = left.astype(dtype.subtype)
                right = right.astype(dtype.subtype)

        # coerce dtypes to match if needed
        if is_float_dtype(left) and is_integer_dtype(right):
            right = right.astype(left.dtype)
        elif is_float_dtype(right) and is_integer_dtype(left):
            left = left.astype(right.dtype)

        if type(left) != type(right):
            msg = ('must not have differing left [{ltype}] and right '
                   '[{rtype}] types')
            raise ValueError(msg.format(ltype=type(left).__name__,
        elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
            # GH 19016
            msg = ('category, object, and string subtypes are not supported '
                   'for IntervalArray')
            raise TypeError(msg)
        elif isinstance(left, ABCPeriodIndex):
            msg = 'Period dtypes are not supported, use a PeriodIndex instead'
            raise ValueError(msg)
        elif (isinstance(left, ABCDatetimeIndex) and
                str(left.tz) != str(right.tz)):
            msg = ("left and right must have the same time zone, got "
                   "'{left_tz}' and '{right_tz}'")
            raise ValueError(msg.format(left_tz=left.tz, right_tz=right.tz))

        result._left = left
        result._right = right
        result._closed = closed
        if verify_integrity:
        return result
Ejemplo n.º 19
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
     return Index(values, name=self.name, dtype=dtype)
Ejemplo n.º 20
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         if copy:
             return self._int64index.copy()
             return self._int64index
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
Ejemplo n.º 21
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
     elif is_categorical_dtype(dtype):
         return CategoricalIndex(self, name=self.name, dtype=dtype,
         raise TypeError('Setting {cls} dtype to anything other than '
                         'float64, object, or category is not supported'
     return Index(values, name=self.name, dtype=dtype)
Ejemplo n.º 22
    def __new__(cls, subtype=None):
        subtype : the dtype of the Interval
        from pandas.core.dtypes.common import (
            is_categorical_dtype, is_string_dtype, pandas_dtype)

        if isinstance(subtype, IntervalDtype):
            return subtype
        elif subtype is None:
            # we are called as an empty constructor
            # generally for pickle compat
            u = object.__new__(cls)
            u.subtype = None
            return u
        elif (isinstance(subtype, compat.string_types) and
              subtype.lower() == 'interval'):
            subtype = None
            if isinstance(subtype, compat.string_types):
                m = cls._match.search(subtype)
                if m is not None:
                    subtype = m.group('subtype')

                subtype = pandas_dtype(subtype)
            except TypeError:
                raise TypeError("could not construct IntervalDtype")

        if is_categorical_dtype(subtype) or is_string_dtype(subtype):
            # GH 19016
            msg = ('category, object, and string subtypes are not supported '
                   'for IntervalDtype')
            raise TypeError(msg)

            return cls._cache[str(subtype)]
        except KeyError:
            u = object.__new__(cls)
            u.subtype = subtype
            cls._cache[str(subtype)] = u
            return u
Ejemplo n.º 23
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self._box_values_as_index()
     elif is_integer_dtype(dtype):
         if copy:
             return self._int64index.copy()
             return self._int64index
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     elif is_categorical_dtype(dtype):
         return CategoricalIndex(self.values, name=self.name, dtype=dtype,
     raise TypeError('Cannot cast PeriodIndex to dtype %s' % dtype)
Ejemplo n.º 24
    def astype(self, dtype, copy=True):
        Cast to an ExtensionArray or NumPy array with dtype 'dtype'.

        dtype : str or dtype
            Typecode or data-type to which the array is cast.

        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        array : ExtensionArray or ndarray
            ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
        dtype = pandas_dtype(dtype)
        if is_interval_dtype(dtype):
            if dtype == self.dtype:
                return self.copy() if copy else self

            # need to cast to different subtype
                new_left = self.left.astype(dtype.subtype)
                new_right = self.right.astype(dtype.subtype)
            except TypeError:
                msg = ('Cannot convert {dtype} to {new_dtype}; subtypes are '
                raise TypeError(msg.format(dtype=self.dtype, new_dtype=dtype))
            return self._shallow_copy(new_left, new_right)
        elif is_categorical_dtype(dtype):
            return Categorical(np.asarray(self))
        # TODO: This try/except will be repeated.
            return np.asarray(self).astype(dtype, copy=copy)
        except (TypeError, ValueError):
            msg = 'Cannot cast {name} to dtype {dtype}'
            raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
Ejemplo n.º 25
    def astype(self, dtype, copy=True):
        # TODO: Figure out something better here...
        # We have DatetimeLikeArrayMixin ->
        #     super(...), which ends up being... DatetimeIndexOpsMixin?
        # this is complicated.
        # need a pandas_astype(arr, dtype).
        from pandas import Categorical

        dtype = pandas_dtype(dtype)

        if is_object_dtype(dtype):
            return np.asarray(self, dtype=object)
        elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
            return self._format_native_types()
        elif is_integer_dtype(dtype):
            values = self._data

            if values.dtype != dtype:
                # int32 vs. int64
                values = values.astype(dtype)

            elif copy:
                values = values.copy()

            return values
        elif (is_datetime_or_timedelta_dtype(dtype) and
              not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
            # disallow conversion between datetime/timedelta,
            # and conversions for any datetimelike to float
            msg = 'Cannot cast {name} to dtype {dtype}'
            raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
        elif is_categorical_dtype(dtype):
            return Categorical(self, dtype=dtype)
        elif is_period_dtype(dtype):
            return self.asfreq(dtype.freq)
            return np.asarray(self, dtype=dtype)
Ejemplo n.º 26
    def __init__(self, dtype=np.float64, fill_value=None):
        # type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None
        from pandas.core.dtypes.missing import na_value_for_dtype
        from pandas.core.dtypes.common import (
            pandas_dtype, is_string_dtype, is_scalar

        if isinstance(dtype, type(self)):
            if fill_value is None:
                fill_value = dtype.fill_value
            dtype = dtype.subtype

        dtype = pandas_dtype(dtype)
        if is_string_dtype(dtype):
            dtype = np.dtype('object')

        if fill_value is None:
            fill_value = na_value_for_dtype(dtype)

        if not is_scalar(fill_value):
            raise ValueError("fill_value must be a scalar. Got {} "
        self._dtype = dtype
        self._fill_value = fill_value
Ejemplo n.º 27
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)

        if is_object_dtype(dtype):
            return self._box_values_as_index()
        elif is_timedelta64_ns_dtype(dtype):
            if copy is True:
                return self.copy()
            return self
        elif is_timedelta64_dtype(dtype):
            # return an index (essentially this is division)
            result = self.values.astype(dtype, copy=copy)
            if self.hasnans:
                return Index(self._maybe_mask_results(result,
            return Index(result.astype('i8'), name=self.name)
        elif is_integer_dtype(dtype):
            return Index(self.values.astype('i8', copy=copy), dtype='i8',
        elif is_categorical_dtype(dtype):
            return CategoricalIndex(self.values, name=self.name, dtype=dtype,
        raise TypeError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
Ejemplo n.º 28
    def astype(self, dtype, copy=True):
        dtype = pandas_dtype(dtype)
        if isinstance(dtype, type(self.dtype)):
            return type(self)(self._data, context=dtype.context)

        return super().astype(dtype, copy=copy)
Ejemplo n.º 29
def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False):
    Cast the elements of an array to a given dtype a nan-safe manner.

    arr : ndarray
    dtype : np.dtype
    copy : bool, default True
        If False, a view will be attempted but may fail, if
        e.g. the item sizes don't align.
    skipna: bool, default False
        Whether or not we should skip NaN when casting as a string-type.

        The dtype was a datetime64/timedelta64 dtype, but it had no unit.

    # dispatch on extension dtype if needed
    if is_extension_array_dtype(dtype):
        return dtype.construct_array_type()._from_sequence(arr,

    if not isinstance(dtype, np.dtype):
        dtype = pandas_dtype(dtype)

    if issubclass(dtype.type, str):
        return lib.astype_str(arr.ravel(), skipna=skipna).reshape(arr.shape)

    elif is_datetime64_dtype(arr):
        if is_object_dtype(dtype):
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        # allow frequency conversions
        if dtype.kind == "M":
            return arr.astype(dtype)

        raise TypeError(
            f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")

    elif is_timedelta64_dtype(arr):
        if is_object_dtype(dtype):
            return tslibs.ints_to_pytimedelta(arr.view(np.int64))
        elif dtype == np.int64:
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        if dtype not in [_INT64_DTYPE, _TD_DTYPE]:

            # allow frequency conversions
            # we return a float here!
            if dtype.kind == "m":
                mask = isna(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result
        elif dtype == _TD_DTYPE:
            return arr.astype(_TD_DTYPE, copy=copy)

        raise TypeError(
            f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")

    elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(
            dtype, np.integer):

        if not np.isfinite(arr).all():
            raise ValueError(
                "Cannot convert non-finite values (NA or inf) to integer")

    elif is_object_dtype(arr):

        # work around NumPy brokenness, #1987
        if np.issubdtype(dtype.type, np.integer):
            return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

        # if we have a datetime/timedelta array of objects
        # then coerce to a proper dtype and recall astype_nansafe

        elif is_datetime64_dtype(dtype):
            from pandas import to_datetime

            return astype_nansafe(to_datetime(arr).values, dtype, copy=copy)
        elif is_timedelta64_dtype(dtype):
            from pandas import to_timedelta

            return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = (f"The '{dtype.name}' dtype has no unit. Please pass in "
               f"'{dtype.name}[ns]' instead.")
        raise ValueError(msg)

    if copy or is_object_dtype(arr) or is_object_dtype(dtype):
        # Explicit copy, or required since NumPy can't view from / to object.
        return arr.astype(dtype, copy=True)

    return arr.view(dtype)
Ejemplo n.º 30
def sanitize_array(data,
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
            data = data.copy()

    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None:
            subarr = np.array(data, copy=False)

            # possibility of nan -> garbage
            if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
                    subarr = _try_cast(data, True, dtype, copy, True)
                except ValueError:
                    if copy:
                        subarr = data.copy()
                subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
        elif isinstance(data, Index):
            # don't coerce Index types
            # e.g. indexes can have different conversions (so don't fast path
            # them)
            # GH#6140
            subarr = sanitize_index(data, index, copy=copy)

            # we will try to copy be-definition here
            subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)

    elif isinstance(data, ExtensionArray):
        if isinstance(data, ABCPandasArray):
            # We don't want to let people put our PandasArray wrapper
            # (the output of Series/Index.array), into a Series. So
            # we explicitly unwrap it here.
            subarr = data.to_numpy()
            subarr = data

        # everything else in this block must also handle ndarray's,
        # becuase we've unwrapped PandasArray into an ndarray.

        if dtype is not None:
            subarr = data.astype(dtype)

        if copy:
            subarr = data.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
                subarr = _try_cast(data, False, dtype, copy,
            except Exception:
                if raise_cast_failure:  # pragma: no cover
                subarr = np.array(data, dtype=object, copy=copy)
                subarr = lib.maybe_convert_objects(subarr)

            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype='int64')
        subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
        subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, 'ndim', 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception('Data must be 1-dimensional')
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    # This is to prevent mixed-type Series getting all casted to
    # NumPy string type, e.g. NaN --> '-1#IND'.
    if issubclass(subarr.dtype.type, str):
        # GH#16605
        # If not empty convert the data to dtype
        # GH#19853: If data is a scalar, subarr has already the result
        if not lib.is_scalar(data):
            if not np.all(isna(data)):
                data = np.array(data, dtype=dtype, copy=False)
            subarr = np.array(data, dtype=object, copy=copy)

    if is_object_dtype(subarr.dtype) and dtype != 'object':
        inferred = lib.infer_dtype(subarr, skipna=False)
        if inferred == 'period':
                subarr = period_array(subarr)
            except IncompatibleFrequency:

    return subarr
Ejemplo n.º 31
    def astype(self, dtype=None, copy=True):
        Change the dtype of a SparseArray.

        The output will always be a SparseArray. To convert to a dense
        ndarray with a certain dtype, use :meth:`numpy.asarray`.

        dtype : np.dtype or ExtensionDtype
            For SparseDtype, this changes the dtype of
            ``self.sp_values`` and the ``self.fill_value``.

            For other dtypes, this only changes the dtype of

        copy : bool, default True
            Whether to ensure a copy is made, even if not necessary.


        >>> arr = SparseArray([0, 0, 1, 2])
        >>> arr
        [0, 0, 1, 2]
        Fill: 0
        Indices: array([2, 3], dtype=int32)

        >>> arr.astype(np.dtype('int32'))
        [0, 0, 1, 2]
        Fill: 0
        Indices: array([2, 3], dtype=int32)

        Using a NumPy dtype with a different kind (e.g. float) will coerce
        just ``self.sp_values``.

        >>> arr.astype(np.dtype('float64'))
        ... # doctest: +NORMALIZE_WHITESPACE
        [0, 0, 1.0, 2.0]
        Fill: 0
        Indices: array([2, 3], dtype=int32)

        Use a SparseDtype if you wish to be change the fill value as well.

        >>> arr.astype(SparseDtype("float64", fill_value=np.nan))
        ... # doctest: +NORMALIZE_WHITESPACE
        [nan, nan, 1.0, 2.0]
        Fill: nan
        Indices: array([2, 3], dtype=int32)
        dtype = pandas_dtype(dtype)

        if not isinstance(dtype, SparseDtype):
            dtype = SparseDtype(dtype, fill_value=self.fill_value)

        sp_values = astype_nansafe(self.sp_values, dtype.subtype, copy=copy)
        if sp_values is self.sp_values and copy:
            sp_values = sp_values.copy()

        return self._simple_new(sp_values, self.sp_index, dtype)
Ejemplo n.º 32
    def _convert_to_ndarrays(
        dct: dict,
        verbose: bool = False,
        result = {}
        for c, values in dct.items():
            conv_f = None if converters is None else converters.get(c, None)
            if isinstance(dtypes, dict):
                cast_type = dtypes.get(c, None)
                # single dtype or None
                cast_type = dtypes

            if self.na_filter:
                col_na_values, col_na_fvalues = _get_na_values(
                    c, na_values, na_fvalues, self.keep_default_na)
                col_na_values, col_na_fvalues = set(), set()

            if conv_f is not None:
                # conv_f applied to data before inference
                if cast_type is not None:
                        ("Both a converter and dtype were specified "
                         f"for column {c} - only the converter will be used."),

                    values = lib.map_infer(values, conv_f)
                except ValueError:
                    # error: Argument 2 to "isin" has incompatible type "List[Any]";
                    # expected "Union[Union[ExtensionArray, ndarray], Index, Series]"
                    mask = algorithms.isin(
                        list(na_values)  # type: ignore[arg-type]
                    values = lib.map_infer_mask(values, conv_f, mask)

                cvals, na_count = self._infer_types(values,
                                                    | col_na_fvalues,
                is_ea = is_extension_array_dtype(cast_type)
                is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type)
                # skip inference if specified dtype is object
                # or casting to an EA
                try_num_bool = not (cast_type and is_str_or_ea_dtype)

                # general type inference and conversion
                cvals, na_count = self._infer_types(
                    set(col_na_values) | col_na_fvalues, try_num_bool)

                # type specified in dtype param or cast_type is an EA
                if cast_type and (not is_dtype_equal(cvals, cast_type)
                                  or is_extension_array_dtype(cast_type)):
                    if not is_ea and na_count > 0:
                            if is_bool_dtype(cast_type):
                                raise ValueError(
                                    f"Bool column has NA values in column {c}")
                        except (AttributeError, TypeError):
                            # invalid input to is_bool_dtype
                    cast_type = pandas_dtype(cast_type)
                    cvals = self._cast_types(cvals, cast_type, c)

            result[c] = cvals
            if verbose and na_count:
                print(f"Filled {na_count} NA values in column {c!s}")
        return result
Ejemplo n.º 33
    def __init__(

        if fill_value is None and isinstance(dtype, SparseDtype):
            fill_value = dtype.fill_value

        if isinstance(data, type(self)):
            # disable normal inference on dtype, sparse_index, & fill_value
            if sparse_index is None:
                sparse_index = data.sp_index
            if fill_value is None:
                fill_value = data.fill_value
            if dtype is None:
                dtype = data.dtype
            # TODO: make kind=None, and use data.kind?
            data = data.sp_values

        # Handle use-provided dtype
        if isinstance(dtype, str):
            # Two options: dtype='int', regular numpy dtype
            # or dtype='Sparse[int]', a sparse dtype
                dtype = SparseDtype.construct_from_string(dtype)
            except TypeError:
                dtype = pandas_dtype(dtype)

        if isinstance(dtype, SparseDtype):
            if fill_value is None:
                fill_value = dtype.fill_value
            dtype = dtype.subtype

        if index is not None and not is_scalar(data):
            raise Exception("must only pass scalars with an index ")

        if is_scalar(data):
            if index is not None:
                if data is None:
                    data = np.nan

            if index is not None:
                npoints = len(index)
            elif sparse_index is None:
                npoints = 1
                npoints = sparse_index.length

            dtype = infer_dtype_from_scalar(data)[0]
            data = construct_1d_arraylike_from_scalar(data, npoints, dtype)

        if dtype is not None:
            dtype = pandas_dtype(dtype)

        # TODO: disentangle the fill_value dtype inference from
        # dtype inference
        if data is None:
            # XXX: What should the empty dtype be? Object or float?
            data = np.array([], dtype=dtype)

        if not is_array_like(data):
                # probably shared code in sanitize_series

                data = sanitize_array(data, index=None)
            except ValueError:
                # NumPy may raise a ValueError on data like [1, []]
                # we retry with object dtype here.
                if dtype is None:
                    dtype = object
                    data = np.atleast_1d(np.asarray(data, dtype=dtype))

        if copy:
            # TODO: avoid double copy when dtype forces cast.
            data = data.copy()

        if fill_value is None:
            fill_value_dtype = data.dtype if dtype is None else dtype
            if fill_value_dtype is None:
                fill_value = np.nan
                fill_value = na_value_for_dtype(fill_value_dtype)

        if isinstance(data, type(self)) and sparse_index is None:
            sparse_index = data._sparse_index
            sparse_values = np.asarray(data.sp_values, dtype=dtype)
        elif sparse_index is None:
            sparse_values, sparse_index, fill_value = make_sparse(
                data, kind=kind, fill_value=fill_value, dtype=dtype)
            sparse_values = np.asarray(data, dtype=dtype)
            if len(sparse_values) != sparse_index.npoints:
                raise AssertionError(
                    f"Non array-like type {type(sparse_values)} must "
                    "have the same length as the index")
        self._sparse_index = sparse_index
        self._sparse_values = sparse_values
        self._dtype = SparseDtype(sparse_values.dtype, fill_value)
Ejemplo n.º 34
 def test_period_dtype(self, dtype):
     assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
     assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
     assert com.pandas_dtype(dtype) == dtype
Ejemplo n.º 35
    def test_period_dtype(self, dtype):
        assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
        assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
        assert com.pandas_dtype(dtype) == dtype

dtypes = dict(
    datetime_tz=com.pandas_dtype("datetime64[ns, US/Eastern]"),

                         ids=lambda x: str(x))
Ejemplo n.º 36
 def test_period_dtype(self, dtype):
     assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
     assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
     assert com.pandas_dtype(dtype) == dtype
Ejemplo n.º 37
def decode(obj):
    Decoder for deserializing numpy data types.

    typ = obj.get(u'typ')
    if typ is None:
        return obj
    elif typ == u'timestamp':
        freq = obj[u'freq'] if 'freq' in obj else obj[u'offset']
        return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq)
    elif typ == u'nat':
        return NaT
    elif typ == u'period':
        return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
    elif typ == u'index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype,
        return Index(data, dtype=dtype, name=obj[u'name'])
    elif typ == u'range_index':
        return RangeIndex(obj[u'start'],
    elif typ == u'multi_index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype,
        data = [tuple(x) for x in data]
        return MultiIndex.from_tuples(data, names=obj[u'names'])
    elif typ == u'period_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'])
        freq = d.pop('freq', None)
        return PeriodIndex(PeriodArray(data, freq), **d)

    elif typ == u'datetime_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'])
        result = DatetimeIndex(data, **d)
        tz = obj[u'tz']

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize('UTC').tz_convert(tz)
        return result

    elif typ in (u'interval_index', 'interval_array'):
        return globals()[obj[u'klass']].from_arrays(obj[u'left'],
    elif typ == u'category':
        from_codes = globals()[obj[u'klass']].from_codes
        return from_codes(codes=obj[u'codes'],

    elif typ == u'interval':
        return Interval(obj[u'left'], obj[u'right'], obj[u'closed'])
    elif typ == u'series':
        dtype = dtype_for(obj[u'dtype'])
        pd_dtype = pandas_dtype(dtype)

        index = obj[u'index']
        result = Series(unconvert(obj[u'data'], dtype, obj[u'compress']),
        return result

    elif typ == u'block_manager':
        axes = obj[u'axes']

        def create_block(b):
            values = _safe_reshape(unconvert(
                b[u'values'], dtype_for(b[u'dtype']),
                b[u'compress']), b[u'shape'])

            # locs handles duplicate column names, and should be used instead
            # of items; see GH 9618
            if u'locs' in b:
                placement = b[u'locs']
                placement = axes[0].get_indexer(b[u'items'])

            if is_datetime64tz_dtype(b[u'dtype']):
                assert isinstance(values, np.ndarray), type(values)
                assert values.dtype == 'M8[ns]', values.dtype
                values = DatetimeArray(values, dtype=b[u'dtype'])

            return make_block(values=values,
                              klass=getattr(internals, b[u'klass']),

        blocks = [create_block(b) for b in obj[u'blocks']]
        return globals()[obj[u'klass']](BlockManager(blocks, axes))
    elif typ == u'datetime':
        return parse(obj[u'data'])
    elif typ == u'datetime64':
        return np.datetime64(parse(obj[u'data']))
    elif typ == u'date':
        return parse(obj[u'data']).date()
    elif typ == u'timedelta':
        return timedelta(*obj[u'data'])
    elif typ == u'timedelta64':
        return np.timedelta64(int(obj[u'data']))
    # elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return SparseSeries(
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    # elif typ == 'sparse_dataframe':
    #    return SparseDataFrame(
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    # elif typ == 'sparse_panel':
    #    return SparsePanel(
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == u'block_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
    elif typ == u'int_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
    elif typ == u'ndarray':
        return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
    elif typ == u'np_scalar':
        if obj.get(u'sub_typ') == u'np_complex':
            return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
            dtype = dtype_for(obj[u'dtype'])
                return dtype(obj[u'data'])
            except (ValueError, TypeError):
                return dtype.type(obj[u'data'])
    elif typ == u'np_complex':
        return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
    elif isinstance(obj, (dict, list, set)):
        return obj
        return obj
Ejemplo n.º 38
 def test_categorical_dtype(self):
     assert com.pandas_dtype("category") == CategoricalDtype()
Ejemplo n.º 39
 def test_numpy_string_dtype(self):
     # do not parse freq-like string as period dtype
     assert com.pandas_dtype("U") == np.dtype("U")
     assert com.pandas_dtype("S") == np.dtype("S")
Ejemplo n.º 40
 def test_numpy_dtype(self, dtype):
     assert com.pandas_dtype(dtype) == np.dtype(dtype)
Ejemplo n.º 41
def get_dtype(dtype, coerce_int=None):
    if coerce_int is False and "int" in dtype:
        return None
    return pandas_dtype(dtype)
Ejemplo n.º 42
 def __init__(self, *dtypes):
     self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)
Ejemplo n.º 43
 def test_pandas_dtype_valid(self, dtype):
     assert com.pandas_dtype(dtype) == dtype
Ejemplo n.º 44
    def __new__(cls,

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                msg = 'periods must be a number, got {periods}'
                raise TypeError(msg.format(periods=periods))

        if name is None and hasattr(data, 'name'):
            name = data.name

        if dtype is not None:
            dtype = pandas_dtype(dtype)
            if not is_period_dtype(dtype):
                raise ValueError('dtype must be PeriodDtype')
            if freq is None:
                freq = dtype.freq
            elif freq != dtype.freq:
                msg = 'specified freq and dtype are different'
                raise IncompatibleFrequency(msg)

        # coerce freq to freq object, otherwise it can be coerced elementwise
        # which is slow
        if freq:
            freq = Period._maybe_convert_freq(freq)

        if data is None:
            if ordinal is not None:
                data = np.asarray(ordinal, dtype=np.int64)
                data, freq = cls._generate_range(start, end, periods, freq,
            return cls._from_ordinals(data, name=name, freq=freq)

        if isinstance(data, PeriodIndex):
            if freq is None or freq == data.freq:  # no freq change
                freq = data.freq
                data = data._values
                base1, _ = _gfc(data.freq)
                base2, _ = _gfc(freq)
                data = period.period_asfreq_arr(data._values, base1, base2, 1)
            return cls._simple_new(data, name=name, freq=freq)

        # not array / index
        if not isinstance(
                data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)):
            if is_scalar(data) or isinstance(data, Period):

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data)

        # datetime other than period
        if is_datetime64_dtype(data.dtype):
            data = dt64arr_to_periodarr(data, freq, tz)
            return cls._from_ordinals(data, name=name, freq=freq)

        # check not floats
        if infer_dtype(data) == 'floating' and len(data) > 0:
            raise TypeError("PeriodIndex does not allow "
                            "floating point in construction")

        # anything else, likely an array of strings or periods
        data = _ensure_object(data)
        freq = freq or period.extract_freq(data)
        data = period.extract_ordinals(data, freq)
        return cls._from_ordinals(data, name=name, freq=freq)
Ejemplo n.º 45
def sanitize_array(
    data, index, dtype=None, copy: bool = False, raise_cast_failure: bool = False
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
            data = data.copy()

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                if copy:
                    subarr = data.copy()
                    subarr = np.array(data, copy=False)
            # we will try to copy be-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, "ndim", 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype)

            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception("Data must be 1-dimensional")
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)):
        # This is to prevent mixed-type Series getting all casted to
        # NumPy string type, e.g. NaN --> '-1#IND'.
        if issubclass(subarr.dtype.type, str):
            # GH#16605
            # If not empty convert the data to dtype
            # GH#19853: If data is a scalar, subarr has already the result
            if not lib.is_scalar(data):
                if not np.all(isna(data)):
                    data = np.array(data, dtype=dtype, copy=False)
                subarr = np.array(data, dtype=object, copy=copy)

        if is_object_dtype(subarr.dtype) and not is_object_dtype(dtype):
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred == "period":
                from pandas.core.arrays import period_array

                    subarr = period_array(subarr)
                except IncompatibleFrequency:

    return subarr
Ejemplo n.º 46
    def __new__(cls, subtype=None, closed: str_type | None = None):
        from pandas.core.dtypes.common import (

        if closed is not None and closed not in {
                "right", "left", "both", "neither"
            raise ValueError(
                "closed must be one of 'right', 'left', 'both', 'neither'")

        if isinstance(subtype, IntervalDtype):
            if closed is not None and closed != subtype.closed:
                raise ValueError(
                    "dtype.closed and 'closed' do not match. "
                    "Try IntervalDtype(dtype.subtype, closed) instead.")
            return subtype
        elif subtype is None:
            # we are called as an empty constructor
            # generally for pickle compat
            u = object.__new__(cls)
            u._subtype = None
            u._closed = closed
            return u
        elif isinstance(subtype, str) and subtype.lower() == "interval":
            subtype = None
            if isinstance(subtype, str):
                m = cls._match.search(subtype)
                if m is not None:
                    gd = m.groupdict()
                    subtype = gd["subtype"]
                    if gd.get("closed", None) is not None:
                        if closed is not None:
                            if closed != gd["closed"]:
                                raise ValueError(
                                    "'closed' keyword does not match value "
                                    "specified in dtype string")
                        closed = gd["closed"]

                subtype = pandas_dtype(subtype)
            except TypeError as err:
                raise TypeError("could not construct IntervalDtype") from err

        if CategoricalDtype.is_dtype(subtype) or is_string_dtype(subtype):
            # GH 19016
            msg = ("category, object, and string subtypes are not supported "
                   "for IntervalDtype")
            raise TypeError(msg)

        key = str(subtype) + str(closed)
            return cls._cache_dtypes[key]
        except KeyError:
            u = object.__new__(cls)
            u._subtype = subtype
            u._closed = closed
            cls._cache_dtypes[key] = u
            return u
Ejemplo n.º 47
    def _cast_types(self, values, cast_type, column):
        Cast values to specified type

        values : ndarray
        cast_type : string or np.dtype
           dtype to cast values to
        column : string
            column name - used only for error reporting

        converted : ndarray
        if is_categorical_dtype(cast_type):
            known_cats = (isinstance(cast_type, CategoricalDtype)
                          and cast_type.categories is not None)

            if not is_object_dtype(values) and not known_cats:
                # TODO: this is for consistency with
                # c-parser which parses all categories
                # as strings

                values = astype_nansafe(values, np.dtype(str))

            cats = Index(values).unique().dropna()
            values = Categorical._from_inferred_categories(

        # use the EA's implementation of casting
        elif is_extension_array_dtype(cast_type):
            # ensure cast_type is an actual dtype and not a string
            cast_type = pandas_dtype(cast_type)
            array_type = cast_type.construct_array_type()
                if is_bool_dtype(cast_type):
                    return array_type._from_sequence_of_strings(
                    return array_type._from_sequence_of_strings(
                        values, dtype=cast_type)
            except NotImplementedError as err:
                raise NotImplementedError(
                    f"Extension Array: {array_type} must implement "
                    "_from_sequence_of_strings in order to be used in parser methods"
                ) from err

                values = astype_nansafe(values,
            except ValueError as err:
                raise ValueError(
                    f"Unable to convert column {column} to type {cast_type}"
                ) from err
        return values
Ejemplo n.º 48
    def _cmp_method(self, other, op):
        # ensure pandas array for list-like and eliminate non-interval scalars
        if is_list_like(other):
            if len(self) != len(other):
                raise ValueError("Lengths must match to compare")
            other = array(other)
        elif not isinstance(other, Interval):
            # non-interval scalar -> no matches
            return invalid_comparison(self, other, op)

        # determine the dtype of the elements we want to compare
        if isinstance(other, Interval):
            other_dtype = pandas_dtype("interval")
        elif not is_categorical_dtype(other.dtype):
            other_dtype = other.dtype
            # for categorical defer to categories for dtype
            other_dtype = other.categories.dtype

            # extract intervals if we have interval categories with matching closed
            if is_interval_dtype(other_dtype):
                if self.closed != other.categories.closed:
                    return invalid_comparison(self, other, op)

                other = other.categories.take(

        # interval-like -> need same closed and matching endpoints
        if is_interval_dtype(other_dtype):
            if self.closed != other.closed:
                return invalid_comparison(self, other, op)
            elif not isinstance(other, Interval):
                other = type(self)(other)

            if op is operator.eq:
                return (self._left == other.left) & (self._right
                                                     == other.right)
            elif op is operator.ne:
                return (self._left != other.left) | (self._right !=
            elif op is operator.gt:
                return (self._left > other.left) | (
                    (self._left == other.left) & (self._right > other.right))
            elif op is operator.ge:
                return (self == other) | (self > other)
            elif op is operator.lt:
                return (self._left < other.left) | (
                    (self._left == other.left) & (self._right < other.right))
                # operator.lt
                return (self == other) | (self < other)

        # non-interval/non-object dtype -> no matches
        if not is_object_dtype(other_dtype):
            return invalid_comparison(self, other, op)

        # object dtype -> iteratively check for intervals
        result = np.zeros(len(self), dtype=bool)
        for i, obj in enumerate(other):
                result[i] = op(self[i], obj)
            except TypeError:
                if obj is NA:
                    # comparison with np.nan returns NA
                    # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092
                    result[i] = op is operator.ne
        return result
Ejemplo n.º 49
    def test_period_dtype(self, dtype):
        assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
        assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
        assert com.pandas_dtype(dtype) == dtype

dtypes = {
    "datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"),
    "datetime": com.pandas_dtype("datetime64[ns]"),
    "timedelta": com.pandas_dtype("timedelta64[ns]"),
    "period": PeriodDtype("D"),
    "integer": np.dtype(np.int64),
    "float": np.dtype(np.float64),
    "object": np.dtype(object),
    "category": com.pandas_dtype("category"),

                         ids=lambda x: str(x))
Ejemplo n.º 50
    def _simple_new(cls,
        result = IntervalMixin.__new__(cls)

        closed = closed or "right"
        left = ensure_index(left, copy=copy)
        right = ensure_index(right, copy=copy)

        if dtype is not None:
            # GH 19262: dtype must be an IntervalDtype to override inferred
            dtype = pandas_dtype(dtype)
            if not is_interval_dtype(dtype):
                msg = f"dtype must be an IntervalDtype, got {dtype}"
                raise TypeError(msg)
            elif dtype.subtype is not None:
                left = left.astype(dtype.subtype)
                right = right.astype(dtype.subtype)

        # coerce dtypes to match if needed
        if is_float_dtype(left) and is_integer_dtype(right):
            right = right.astype(left.dtype)
        elif is_float_dtype(right) and is_integer_dtype(left):
            left = left.astype(right.dtype)

        if type(left) != type(right):
            msg = (f"must not have differing left [{type(left).__name__}] and "
                   f"right [{type(right).__name__}] types")
            raise ValueError(msg)
        elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
            # GH 19016
            msg = ("category, object, and string subtypes are not supported "
                   "for IntervalArray")
            raise TypeError(msg)
        elif isinstance(left, ABCPeriodIndex):
            msg = "Period dtypes are not supported, use a PeriodIndex instead"
            raise ValueError(msg)
        elif isinstance(left,
                        ABCDatetimeIndex) and str(left.tz) != str(right.tz):
            msg = ("left and right must have the same time zone, got "
                   f"'{left.tz}' and '{right.tz}'")
            raise ValueError(msg)

        # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray
        left = ensure_wrapped_if_datetimelike(left)
        left = extract_array(left, extract_numpy=True)
        right = ensure_wrapped_if_datetimelike(right)
        right = extract_array(right, extract_numpy=True)

        lbase = getattr(left, "_ndarray", left).base
        rbase = getattr(right, "_ndarray", right).base
        if lbase is not None and lbase is rbase:
            # If these share area_data, then setitem could corrupt our IA
            right = right.copy()

        result._left = left
        result._right = right
        result._closed = closed
        if verify_integrity:
        return result
Ejemplo n.º 51
    def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
                periods=None, copy=False, name=None, tz=None, dtype=None,

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                msg = 'periods must be a number, got {periods}'
                raise TypeError(msg.format(periods=periods))

        if name is None and hasattr(data, 'name'):
            name = data.name

        if dtype is not None:
            dtype = pandas_dtype(dtype)
            if not is_period_dtype(dtype):
                raise ValueError('dtype must be PeriodDtype')
            if freq is None:
                freq = dtype.freq
            elif freq != dtype.freq:
                msg = 'specified freq and dtype are different'
                raise IncompatibleFrequency(msg)

        # coerce freq to freq object, otherwise it can be coerced elementwise
        # which is slow
        if freq:
            freq = Period._maybe_convert_freq(freq)

        if data is None:
            if ordinal is not None:
                data = np.asarray(ordinal, dtype=np.int64)
                data, freq = cls._generate_range(start, end, periods,
                                                 freq, kwargs)
            return cls._from_ordinals(data, name=name, freq=freq)

        if isinstance(data, PeriodIndex):
            if freq is None or freq == data.freq:  # no freq change
                freq = data.freq
                data = data._values
                base1, _ = _gfc(data.freq)
                base2, _ = _gfc(freq)
                data = period.period_asfreq_arr(data._values,
                                                base1, base2, 1)
            return cls._simple_new(data, name=name, freq=freq)

        # not array / index
        if not isinstance(data, (np.ndarray, PeriodIndex,
                                 DatetimeIndex, Int64Index)):
            if is_scalar(data) or isinstance(data, Period):

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data)

        # datetime other than period
        if is_datetime64_dtype(data.dtype):
            data = dt64arr_to_periodarr(data, freq, tz)
            return cls._from_ordinals(data, name=name, freq=freq)

        # check not floats
        if infer_dtype(data) == 'floating' and len(data) > 0:
            raise TypeError("PeriodIndex does not allow "
                            "floating point in construction")

        # anything else, likely an array of strings or periods
        data = _ensure_object(data)
        freq = freq or period.extract_freq(data)
        data = period.extract_ordinals(data, freq)
        return cls._from_ordinals(data, name=name, freq=freq)
Ejemplo n.º 52
 def test_datetimetz_dtype(self, dtype):
     assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype)
     assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype)
     assert com.pandas_dtype(dtype) == dtype
Ejemplo n.º 53
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if is_categorical_dtype(dtype):
        return values

    elif is_object_dtype(dtype):
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress:
        if compress == 'zlib':
            zlib = import_optional_dependency(
                extra="zlib is required when `compress='zlib'`."
            decompress = zlib.decompress
        elif compress == 'blosc':
            blosc = import_optional_dependency(
                extra="zlib is required when `compress='blosc'`."
            decompress = blosc.decompress
            raise ValueError("compress must be one of 'zlib' or 'blosc'")

            return np.frombuffer(
        except _BadMove as e:
            # Pull the decompressed data off of the `_BadMove` exception.
            # We don't just store this in the locals because we want to
            # minimize the risk of giving users access to a `bytes` object
            # whose data is also given to a mutable buffer.
            values = e.args[0]
            if len(values) > 1:
                # The empty string and single characters are memoized in many
                # string creating functions in the capi. This case should not
                # warn even though we need to make a copy because we are only
                # copying at most 1 byte.
                    'copying data after decompressing; this may mean that'
                    ' decompress is caching its result',
                # fall through to copying `np.fromstring`

    # Copy the bytes into a numpy array.
    buf = np.frombuffer(values, dtype=dtype)
    buf = buf.copy()  # required to not mutate the original data
    buf.flags.writeable = True
    return buf
Ejemplo n.º 54
 def test_invalid_dtype_error(self, box):
     with tm.assert_raises_regex(TypeError, 'not understood'):
Ejemplo n.º 55
    def astype(self, dtype: Optional[Dtype] = None, copy=True):
        Change the dtype of a SparseArray.

        The output will always be a SparseArray. To convert to a dense
        ndarray with a certain dtype, use :meth:`numpy.asarray`.

        dtype : np.dtype or ExtensionDtype
            For SparseDtype, this changes the dtype of
            ``self.sp_values`` and the ``self.fill_value``.

            For other dtypes, this only changes the dtype of

        copy : bool, default True
            Whether to ensure a copy is made, even if not necessary.


        >>> arr = pd.arrays.SparseArray([0, 0, 1, 2])
        >>> arr
        [0, 0, 1, 2]
        Fill: 0
        Indices: array([2, 3], dtype=int32)

        >>> arr.astype(np.dtype('int32'))
        [0, 0, 1, 2]
        Fill: 0
        Indices: array([2, 3], dtype=int32)

        Using a NumPy dtype with a different kind (e.g. float) will coerce
        just ``self.sp_values``.

        >>> arr.astype(np.dtype('float64'))
        ... # doctest: +NORMALIZE_WHITESPACE
        [0.0, 0.0, 1.0, 2.0]
        Fill: 0.0
        Indices: array([2, 3], dtype=int32)

        Use a SparseDtype if you wish to be change the fill value as well.

        >>> arr.astype(SparseDtype("float64", fill_value=np.nan))
        ... # doctest: +NORMALIZE_WHITESPACE
        [nan, nan, 1.0, 2.0]
        Fill: nan
        Indices: array([2, 3], dtype=int32)
        if is_dtype_equal(dtype, self._dtype):
            if not copy:
                return self
                return self.copy()
        dtype = self.dtype.update_dtype(dtype)
        subtype = pandas_dtype(dtype._subtype_with_str)
        # TODO copy=False is broken for astype_nansafe with int -> float, so cannot
        # passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456
        sp_values = astype_nansafe(self.sp_values, subtype, copy=True)
        if sp_values is self.sp_values and copy:
            sp_values = sp_values.copy()

        return self._simple_new(sp_values, self.sp_index, dtype)
Ejemplo n.º 56
        assert com.pandas_dtype(dtype) == dtype

    def test_categorical_dtype(self):
        assert com.pandas_dtype('category') == CategoricalDtype()

    @pytest.mark.parametrize('dtype', [
        'period[D]', 'period[3M]', 'period[U]', 'Period[D]', 'Period[3M]',
    def test_period_dtype(self, dtype):
        assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
        assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
        assert com.pandas_dtype(dtype) == dtype

dtypes = dict(datetime_tz=com.pandas_dtype('datetime64[ns, US/Eastern]'),

                         ids=lambda x: str(x))
                         ids=lambda x: str(x))
Ejemplo n.º 57
    def astype(self, dtype, copy: bool = True) -> ArrayLike:
        Cast to a NumPy array or ExtensionArray with 'dtype'.

        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        ndarray or ExtensionArray
            NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.

            if incompatible type with an BooleanDtype, equivalent of same_kind
        from pandas.core.arrays.string_ import StringDtype

        dtype = pandas_dtype(dtype)

        if isinstance(dtype, BooleanDtype):
            values, mask = coerce_to_array(self, copy=copy)
            if not copy:
                return self
                return BooleanArray(values, mask, copy=False)
        elif isinstance(dtype, StringDtype):
            return dtype.construct_array_type()._from_sequence(self,

        if is_bool_dtype(dtype):
            # astype_nansafe converts np.nan to True
            if self._hasna:
                raise ValueError("cannot convert float NaN to bool")
                return self._data.astype(dtype, copy=copy)
        if is_extension_array_dtype(dtype) and is_integer_dtype(dtype):
            from pandas.core.arrays import IntegerArray

            return IntegerArray(self._data.astype(dtype.numpy_dtype),
        # for integer, error if there are missing values
        if is_integer_dtype(dtype):
            if self._hasna:
                raise ValueError("cannot convert NA to integer")
        # for float dtype, ensure we use np.nan before casting (numpy cannot
        # deal with pd.NA)
        na_value = self._na_value
        if is_float_dtype(dtype):
            na_value = np.nan
        # coerce
        return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
Ejemplo n.º 58
 def test_categorical_dtype(self):
     assert com.pandas_dtype('category') == CategoricalDtype()
Ejemplo n.º 59
 def test_datetimetz_dtype(self, dtype):
     assert com.pandas_dtype(
         dtype) == DatetimeTZDtype.construct_from_string(dtype)
     assert com.pandas_dtype(dtype) == dtype
Ejemplo n.º 60
 def test_invalid_dtype_error(self, box):
     with pytest.raises(TypeError, match="not understood"):