예제 #1
0
def _astype_nansafe(arr, dtype, copy=True):
    """ return a view if copy is False, but
        need to be very careful as the result shape could change! """
    if not isinstance(dtype, np.dtype):
        dtype = _coerce_to_dtype(dtype)

    if issubclass(dtype.type, text_type):
        # in Py3 that's str, in Py2 that's unicode
        return lib.astype_unicode(arr.ravel()).reshape(arr.shape)
    elif issubclass(dtype.type, string_types):
        return lib.astype_str(arr.ravel()).reshape(arr.shape)
    elif is_datetime64_dtype(arr):
        if dtype == object:
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)
        elif dtype != _NS_DTYPE:
            raise TypeError("cannot astype a datetimelike from [%s] to [%s]" %
                            (arr.dtype, dtype))
        return arr.astype(_NS_DTYPE)
    elif is_timedelta64_dtype(arr):
        if dtype == np.int64:
            return arr.view(dtype)
        elif dtype == object:
            return tslib.ints_to_pytimedelta(arr.view(np.int64))

        # in py3, timedelta64[ns] are int64
        elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
              (not PY3 and dtype != _TD_DTYPE)):

            # allow frequency conversions
            if dtype.kind == 'm':
                mask = isnull(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result

            raise TypeError("cannot astype a timedelta from [%s] to [%s]" %
                            (arr.dtype, dtype))

        return arr.astype(_TD_DTYPE)
    elif (np.issubdtype(arr.dtype, np.floating) and
          np.issubdtype(dtype, np.integer)):

        if not np.isfinite(arr).all():
            raise ValueError('Cannot convert non-finite values (NA or inf) to '
                             'integer')

    elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
        # work around NumPy brokenness, #1987
        return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

    if copy:
        return arr.astype(dtype)
    return arr.view(dtype)
예제 #2
0
파일: cast.py 프로젝트: RogerThomas/pandas
def astype_nansafe(arr, dtype, copy=True):
    """ return a view if copy is False, but
        need to be very careful as the result shape could change! """
    if not isinstance(dtype, np.dtype):
        dtype = _coerce_to_dtype(dtype)

    if issubclass(dtype.type, text_type):
        # in Py3 that's str, in Py2 that's unicode
        return lib.astype_unicode(arr.ravel()).reshape(arr.shape)
    elif issubclass(dtype.type, string_types):
        return lib.astype_str(arr.ravel()).reshape(arr.shape)
    elif is_datetime64_dtype(arr):
        if dtype == object:
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)
        elif dtype != _NS_DTYPE:
            raise TypeError("cannot astype a datetimelike from [%s] to [%s]" %
                            (arr.dtype, dtype))
        return arr.astype(_NS_DTYPE)
    elif is_timedelta64_dtype(arr):
        if dtype == np.int64:
            return arr.view(dtype)
        elif dtype == object:
            return tslib.ints_to_pytimedelta(arr.view(np.int64))

        # in py3, timedelta64[ns] are int64
        elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
              (not PY3 and dtype != _TD_DTYPE)):

            # allow frequency conversions
            if dtype.kind == 'm':
                mask = isnull(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result

            raise TypeError("cannot astype a timedelta from [%s] to [%s]" %
                            (arr.dtype, dtype))

        return arr.astype(_TD_DTYPE)
    elif (np.issubdtype(arr.dtype, np.floating) and
          np.issubdtype(dtype, np.integer)):

        if not np.isfinite(arr).all():
            raise ValueError('Cannot convert non-finite values (NA or inf) to '
                             'integer')

    elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
        # work around NumPy brokenness, #1987
        return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

    if copy:
        return arr.astype(dtype)
    return arr.view(dtype)
def astype_nansafe(arr, dtype, copy=True, skipna=False):
    """
    Cast the elements of an array to a given dtype a nan-safe manner.

    Parameters
    ----------
    arr : ndarray
    dtype : np.dtype
    copy : bool, default True
        If False, a view will be attempted but may fail, if
        e.g. the item sizes don't align.
    skipna: bool, default False
        Whether or not we should skip NaN when casting as a string-type.

    Raises
    ------
    ValueError
        The dtype was a datetime64/timedelta64 dtype, but it had no unit.
    """

    # dispatch on extension dtype if needed
    if is_extension_array_dtype(dtype):
        return dtype.construct_array_type()._from_sequence(arr,
                                                           dtype=dtype,
                                                           copy=copy)

    if not isinstance(dtype, np.dtype):
        dtype = pandas_dtype(dtype)

    if issubclass(dtype.type, str):
        return lib.astype_str(arr.ravel(), skipna=skipna).reshape(arr.shape)

    elif is_datetime64_dtype(arr):
        if is_object_dtype(dtype):
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)

        # allow frequency conversions
        if dtype.kind == "M":
            return arr.astype(dtype)

        raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
                        "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                 to_dtype=dtype))

    elif is_timedelta64_dtype(arr):
        if is_object_dtype(dtype):
            return tslibs.ints_to_pytimedelta(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)

        if dtype not in [_INT64_DTYPE, _TD_DTYPE]:

            # allow frequency conversions
            # we return a float here!
            if dtype.kind == "m":
                mask = isna(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result
        elif dtype == _TD_DTYPE:
            return arr.astype(_TD_DTYPE, copy=copy)

        raise TypeError("cannot astype a timedelta from [{from_dtype}] "
                        "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                 to_dtype=dtype))

    elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(
            dtype, np.integer):

        if not np.isfinite(arr).all():
            raise ValueError(
                "Cannot convert non-finite values (NA or inf) to integer")

    elif is_object_dtype(arr):

        # work around NumPy brokenness, #1987
        if np.issubdtype(dtype.type, np.integer):
            return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

        # if we have a datetime/timedelta array of objects
        # then coerce to a proper dtype and recall astype_nansafe

        elif is_datetime64_dtype(dtype):
            from pandas import to_datetime

            return astype_nansafe(to_datetime(arr).values, dtype, copy=copy)
        elif is_timedelta64_dtype(dtype):
            from pandas import to_timedelta

            return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = "The '{dtype}' dtype has no unit. Please pass in '{dtype}[ns]' instead."
        raise ValueError(msg.format(dtype=dtype.name))

    if copy or is_object_dtype(arr) or is_object_dtype(dtype):
        # Explicit copy, or required since NumPy can't view from / to object.
        return arr.astype(dtype, copy=True)

    return arr.view(dtype)
예제 #4
0
def astype_nansafe(arr, dtype, copy=True):
    """ return a view if copy is False, but
        need to be very careful as the result shape could change! """
    if not isinstance(dtype, np.dtype):
        dtype = pandas_dtype(dtype)

    if issubclass(dtype.type, text_type):
        # in Py3 that's str, in Py2 that's unicode
        return lib.astype_unicode(arr.ravel()).reshape(arr.shape)
    elif issubclass(dtype.type, string_types):
        return lib.astype_str(arr.ravel()).reshape(arr.shape)
    elif is_datetime64_dtype(arr):
        if dtype == object:
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)
        elif dtype != _NS_DTYPE:
            raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
                            "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                     to_dtype=dtype))
        return arr.astype(_NS_DTYPE)
    elif is_timedelta64_dtype(arr):
        if dtype == np.int64:
            return arr.view(dtype)
        elif dtype == object:
            return tslib.ints_to_pytimedelta(arr.view(np.int64))

        # in py3, timedelta64[ns] are int64
        elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE])
              or (not PY3 and dtype != _TD_DTYPE)):

            # allow frequency conversions
            if dtype.kind == 'm':
                mask = isna(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result

            raise TypeError("cannot astype a timedelta from [{from_dtype}] "
                            "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                     to_dtype=dtype))

        return arr.astype(_TD_DTYPE)
    elif (np.issubdtype(arr.dtype, np.floating)
          and np.issubdtype(dtype, np.integer)):

        if not np.isfinite(arr).all():
            raise ValueError('Cannot convert non-finite values (NA or inf) to '
                             'integer')

    elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
        # work around NumPy brokenness, #1987
        return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = ("Passing in '{dtype}' dtype with no frequency is "
               "deprecated and will raise in a future version. "
               "Please pass in '{dtype}[ns]' instead.")
        warnings.warn(msg.format(dtype=dtype.name),
                      FutureWarning,
                      stacklevel=5)
        dtype = np.dtype(dtype.name + "[ns]")

    if copy:
        return arr.astype(dtype)
    return arr.view(dtype)
예제 #5
0
파일: cast.py 프로젝트: cpcloud/pandas
def astype_nansafe(arr, dtype, copy=True):
    """ return a view if copy is False, but
        need to be very careful as the result shape could change! """
    if not isinstance(dtype, np.dtype):
        dtype = pandas_dtype(dtype)

    if issubclass(dtype.type, text_type):
        # in Py3 that's str, in Py2 that's unicode
        return lib.astype_unicode(arr.ravel()).reshape(arr.shape)
    elif issubclass(dtype.type, string_types):
        return lib.astype_str(arr.ravel()).reshape(arr.shape)
    elif is_datetime64_dtype(arr):
        if dtype == object:
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)
        elif dtype != _NS_DTYPE:
            raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
                            "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                     to_dtype=dtype))
        return arr.astype(_NS_DTYPE)
    elif is_timedelta64_dtype(arr):
        if dtype == np.int64:
            return arr.view(dtype)
        elif dtype == object:
            return tslib.ints_to_pytimedelta(arr.view(np.int64))

        # in py3, timedelta64[ns] are int64
        elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
              (not PY3 and dtype != _TD_DTYPE)):

            # allow frequency conversions
            if dtype.kind == 'm':
                mask = isna(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result

            raise TypeError("cannot astype a timedelta from [{from_dtype}] "
                            "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                     to_dtype=dtype))

        return arr.astype(_TD_DTYPE)
    elif (np.issubdtype(arr.dtype, np.floating) and
          np.issubdtype(dtype, np.integer)):

        if not np.isfinite(arr).all():
            raise ValueError('Cannot convert non-finite values (NA or inf) to '
                             'integer')

    elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
        # work around NumPy brokenness, #1987
        return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = ("Passing in '{dtype}' dtype with no frequency is "
               "deprecated and will raise in a future version. "
               "Please pass in '{dtype}[ns]' instead.")
        warnings.warn(msg.format(dtype=dtype.name),
                      FutureWarning, stacklevel=5)
        dtype = np.dtype(dtype.name + "[ns]")

    if copy:
        return arr.astype(dtype)
    return arr.view(dtype)
예제 #6
0
파일: cast.py 프로젝트: brianholland/pandas
def astype_nansafe(arr, dtype, copy=True, skipna=False):
    """
    Cast the elements of an array to a given dtype a nan-safe manner.

    Parameters
    ----------
    arr : ndarray
    dtype : np.dtype
    copy : bool, default True
        If False, a view will be attempted but may fail, if
        e.g. the item sizes don't align.
    skipna: bool, default False
        Whether or not we should skip NaN when casting as a string-type.

    Raises
    ------
    ValueError
        The dtype was a datetime64/timedelta64 dtype, but it had no unit.
    """

    # dispatch on extension dtype if needed
    if is_extension_array_dtype(dtype):
        return dtype.construct_array_type()._from_sequence(
            arr, dtype=dtype, copy=copy)

    if not isinstance(dtype, np.dtype):
        dtype = pandas_dtype(dtype)

    if issubclass(dtype.type, text_type):
        # in Py3 that's str, in Py2 that's unicode
        return lib.astype_unicode(arr.ravel(),
                                  skipna=skipna).reshape(arr.shape)

    elif issubclass(dtype.type, string_types):
        return lib.astype_str(arr.ravel(),
                              skipna=skipna).reshape(arr.shape)

    elif is_datetime64_dtype(arr):
        if is_object_dtype(dtype):
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)

        # allow frequency conversions
        if dtype.kind == 'M':
            return arr.astype(dtype)

        raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
                        "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                 to_dtype=dtype))

    elif is_timedelta64_dtype(arr):
        if is_object_dtype(dtype):
            return tslibs.ints_to_pytimedelta(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)

        # in py3, timedelta64[ns] are int64
        if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
                (not PY3 and dtype != _TD_DTYPE)):

            # allow frequency conversions
            # we return a float here!
            if dtype.kind == 'm':
                mask = isna(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result
        elif dtype == _TD_DTYPE:
            return arr.astype(_TD_DTYPE, copy=copy)

        raise TypeError("cannot astype a timedelta from [{from_dtype}] "
                        "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                 to_dtype=dtype))

    elif (np.issubdtype(arr.dtype, np.floating) and
          np.issubdtype(dtype, np.integer)):

        if not np.isfinite(arr).all():
            raise ValueError('Cannot convert non-finite values (NA or inf) to '
                             'integer')

    elif is_object_dtype(arr):

        # work around NumPy brokenness, #1987
        if np.issubdtype(dtype.type, np.integer):
            return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

        # if we have a datetime/timedelta array of objects
        # then coerce to a proper dtype and recall astype_nansafe

        elif is_datetime64_dtype(dtype):
            from pandas import to_datetime
            return astype_nansafe(to_datetime(arr).values, dtype, copy=copy)
        elif is_timedelta64_dtype(dtype):
            from pandas import to_timedelta
            return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = ("The '{dtype}' dtype has no unit. "
               "Please pass in '{dtype}[ns]' instead.")
        raise ValueError(msg.format(dtype=dtype.name))

    if copy or is_object_dtype(arr) or is_object_dtype(dtype):
        # Explicit copy, or required since NumPy can't view from / to object.
        return arr.astype(dtype, copy=True)

    return arr.view(dtype)
예제 #7
0
def astype_nansafe(arr, dtype, copy=True):
    """ return a view if copy is False, but
        need to be very careful as the result shape could change!

    Parameters
    ----------
    arr : ndarray
    dtype : np.dtype
    copy : bool, default True
        If False, a view will be attempted but may fail, if
        e.g. the itemsizes don't align.
    """

    # dispatch on extension dtype if needed
    if is_extension_array_dtype(dtype):
        return dtype.construct_array_type()._from_sequence(
            arr, dtype=dtype, copy=copy)

    if not isinstance(dtype, np.dtype):
        dtype = pandas_dtype(dtype)

    if issubclass(dtype.type, text_type):
        # in Py3 that's str, in Py2 that's unicode
        return lib.astype_unicode(arr.ravel()).reshape(arr.shape)

    elif issubclass(dtype.type, string_types):
        return lib.astype_str(arr.ravel()).reshape(arr.shape)

    elif is_datetime64_dtype(arr):
        if is_object_dtype(dtype):
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)

        # allow frequency conversions
        if dtype.kind == 'M':
            return arr.astype(dtype)

        raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
                        "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                 to_dtype=dtype))

    elif is_timedelta64_dtype(arr):
        if is_object_dtype(dtype):
            return tslibs.ints_to_pytimedelta(arr.view(np.int64))
        elif dtype == np.int64:
            return arr.view(dtype)

        # in py3, timedelta64[ns] are int64
        if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
                (not PY3 and dtype != _TD_DTYPE)):

            # allow frequency conversions
            # we return a float here!
            if dtype.kind == 'm':
                mask = isna(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result
        elif dtype == _TD_DTYPE:
            return arr.astype(_TD_DTYPE, copy=copy)

        raise TypeError("cannot astype a timedelta from [{from_dtype}] "
                        "to [{to_dtype}]".format(from_dtype=arr.dtype,
                                                 to_dtype=dtype))

    elif (np.issubdtype(arr.dtype, np.floating) and
          np.issubdtype(dtype, np.integer)):

        if not np.isfinite(arr).all():
            raise ValueError('Cannot convert non-finite values (NA or inf) to '
                             'integer')

    elif is_object_dtype(arr):

        # work around NumPy brokenness, #1987
        if np.issubdtype(dtype.type, np.integer):
            return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

        # if we have a datetime/timedelta array of objects
        # then coerce to a proper dtype and recall astype_nansafe

        elif is_datetime64_dtype(dtype):
            from pandas import to_datetime
            return astype_nansafe(to_datetime(arr).values, dtype, copy=copy)
        elif is_timedelta64_dtype(dtype):
            from pandas import to_timedelta
            return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = ("Passing in '{dtype}' dtype with no frequency is "
               "deprecated and will raise in a future version. "
               "Please pass in '{dtype}[ns]' instead.")
        warnings.warn(msg.format(dtype=dtype.name),
                      FutureWarning, stacklevel=5)
        dtype = np.dtype(dtype.name + "[ns]")

    if copy or is_object_dtype(arr) or is_object_dtype(dtype):
        # Explicit copy, or required since NumPy can't view from / to object.
        return arr.astype(dtype, copy=True)

    return arr.view(dtype)