Exemple #1
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        coerce = True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)

        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (other is not None and other.dtype == 'timedelta64[ns]' and
                    all(isnull(v) for v in values)):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            elif not (isinstance(values, (np.ndarray, pd.Series)) and
                      is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, coerce=coerce)
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = np.array([getattr(v, 'delta', None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError("cannot use a non-absolute DateOffset in "
                                "datetime/timedelta operations [{0}]".format(
                                    ', '.join([com.pprint_thing(v)
                                               for v in values[mask]])))
            values = to_timedelta(os, coerce=coerce)
        elif inferred_type == 'floating':

            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT
            else:
                raise TypeError(
                    'incompatible type [{0}] for a datetime/timedelta '
                    'operation'.format(np.array(values).dtype))
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
Exemple #2
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        coerce = True
        if not is_list_like(values):
            values = np.array([values])
        inferred_type = lib.infer_dtype(values)

        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (other is not None and other.dtype == 'timedelta64[ns]'
                    and all(isnull(v) for v in values)):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = tslib.iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            elif not (isinstance(values, (np.ndarray, pd.Series))
                      and com.is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, coerce=coerce)
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif isinstance(values[0], pd.DateOffset):
            # handle DateOffsets
            os = np.array([getattr(v, 'delta', None) for v in values])
            mask = isnull(os)
            if mask.any():
                raise TypeError(
                    "cannot use a non-absolute DateOffset in "
                    "datetime/timedelta operations [{0}]".format(', '.join(
                        [com.pprint_thing(v) for v in values[mask]])))
            values = to_timedelta(os, coerce=coerce)
        elif inferred_type == 'floating':

            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = tslib.iNaT
            else:
                raise TypeError(
                    'incompatible type [{0}] for a datetime/timedelta '
                    'operation'.format(np.array(values).dtype))
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
Exemple #3
0
def _soft_convert_objects(values,
                          datetime=True,
                          numeric=True,
                          timedelta=True,
                          coerce=False,
                          copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError('At least one of datetime, numeric or timedelta must '
                         'be True.')
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if isinstance(values, (list, tuple)):
        # List or scalar
        values = np.array(values, dtype=np.object_)
    elif not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)
    elif not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            from pandas import to_datetime
            return to_datetime(values, errors='coerce', box=False)
        elif timedelta:
            from pandas import to_timedelta
            return to_timedelta(values, errors='coerce', box=False)
        elif numeric:
            from pandas import to_numeric
            return to_numeric(values, errors='coerce')

    # Soft conversions
    if datetime:
        values = lib.maybe_convert_objects(values, convert_datetime=datetime)

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values,
                                                  set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isnull(converted).all() else values
            values = values.copy() if copy else values
        except:
            pass

    return values
Exemple #4
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        ovalues = values
        if not is_list_like(values):
            values = np.array([values])

        inferred_type = lib.infer_dtype(values)

        if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (other is not None and other.dtype == 'timedelta64[ns]' and
                    all(isnull(v) for v in values)):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            # datetime with tz
            elif isinstance(ovalues, datetime.datetime) and hasattr(ovalues,'tz'):
                values = pd.DatetimeIndex(values)
            # datetime array with tz
            elif com.is_datetimetz(values):
                if isinstance(values, pd.Series):
                    values = values._values
            elif not (isinstance(values, (np.ndarray, pd.Series)) and
                      is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, errors='coerce')
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif inferred_type == 'floating':
            # all nan, so ok, use the other dtype (e.g. timedelta or datetime)
            if isnull(values).all():
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT
            else:
                raise TypeError(
                    'incompatible type [{0}] for a datetime/timedelta '
                    'operation'.format(np.array(values).dtype))
        elif self._is_offset(values):
            return values
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
Exemple #5
0
 def test_sub_of_datetime_from_TimeSeries(self):
     from pandas.tseries.timedeltas import to_timedelta
     from datetime import datetime
     a = Timestamp(datetime(1993, 0o1, 0o7, 13, 30, 00))
     b = datetime(1993, 6, 22, 13, 30)
     a = Series([a])
     result = to_timedelta(np.abs(a - b))
     self.assertEqual(result.dtype, 'timedelta64[ns]')
 def test_sub_of_datetime_from_TimeSeries(self):
     from pandas.tseries.timedeltas import to_timedelta
     from datetime import datetime
     a = Timestamp(datetime(1993, 0o1, 0o7, 13, 30, 00))
     b = datetime(1993, 6, 22, 13, 30)
     a = Series([a])
     result = to_timedelta(np.abs(a - b))
     self.assertEqual(result.dtype, 'timedelta64[ns]')
Exemple #7
0
        def _try_timedelta(v):
            # safe coerce to timedelta64

            # will try first with a string & object conversion
            from pandas import to_timedelta
            try:
                return to_timedelta(v)._values.reshape(shape)
            except:
                return v
Exemple #8
0
def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, convert_timedeltas=True, copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    # if we have passed in a list or scalar
    if isinstance(values, (list, tuple)):
        values = np.array(values, dtype=np.object_)
    if not hasattr(values, "dtype"):
        values = np.array([values], dtype=np.object_)

    # convert dates
    if convert_dates and values.dtype == np.object_:

        # we take an aggressive stance and convert to datetime64[ns]
        if convert_dates == "coerce":
            new_values = _possibly_cast_to_datetime(values, "M8[ns]", errors="coerce")

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)

    # convert timedeltas
    if convert_timedeltas and values.dtype == np.object_:

        if convert_timedeltas == "coerce":
            from pandas.tseries.timedeltas import to_timedelta

            new_values = to_timedelta(values, coerce=True)

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values, convert_timedelta=convert_timedeltas)

    # convert to numeric
    if values.dtype == np.object_:
        if convert_numeric:
            try:
                new_values = lib.maybe_convert_numeric(values, set(), coerce_numeric=True)

                # if we are all nans then leave me alone
                if not isnull(new_values).all():
                    values = new_values

            except:
                pass
        else:
            # soft-conversion
            values = lib.maybe_convert_objects(values)

    values = values.copy() if copy else values

    return values
Exemple #9
0
        def _try_timedelta(v):
            # safe coerce to timedelta64

            # will try first with a string & object conversion
            from pandas import to_timedelta
            try:
                return to_timedelta(v)._values.reshape(shape)
            except:
                return v
Exemple #10
0
def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
                          coerce=False, copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError('At least one of datetime, numeric or timedelta must '
                         'be True.')
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if isinstance(values, (list, tuple)):
        # List or scalar
        values = np.array(values, dtype=np.object_)
    elif not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)
    elif not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            from pandas import to_datetime
            return to_datetime(values, errors='coerce', box=False)
        elif timedelta:
            from pandas import to_timedelta
            return to_timedelta(values, errors='coerce', box=False)
        elif numeric:
            from pandas import to_numeric
            return to_numeric(values, errors='coerce')

    # Soft conversions
    if datetime:
        values = lib.maybe_convert_objects(values, convert_datetime=datetime)

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values, set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isnull(converted).all() else values
            values = values.copy() if copy else values
        except:
            pass

    return values
Exemple #11
0
def isin(comps, values):
    """
    Compute the isin boolean array

    Parameters
    ----------
    comps: array-like
    values: array-like

    Returns
    -------
    boolean array same length as comps
    """

    if not com.is_list_like(comps):
        raise TypeError(
            "only list-like objects are allowed to be passed"
            " to isin(), you passed a "
            "[{0}]".format(type(comps).__name__)
        )
    comps = np.asarray(comps)
    if not com.is_list_like(values):
        raise TypeError(
            "only list-like objects are allowed to be passed"
            " to isin(), you passed a "
            "[{0}]".format(type(values).__name__)
        )

    # GH11232
    # work-around for numpy < 1.8 and comparisions on py3
    # faster for larger cases to use np.in1d
    if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
        f = lambda x, y: np.in1d(x, np.asarray(list(y)))
    else:
        f = lambda x, y: lib.ismember_int64(x, set(y))

    # may need i8 conversion for proper membership testing
    if com.is_datetime64_dtype(comps):
        from pandas.tseries.tools import to_datetime

        values = to_datetime(values)._values.view("i8")
        comps = comps.view("i8")
    elif com.is_timedelta64_dtype(comps):
        from pandas.tseries.timedeltas import to_timedelta

        values = to_timedelta(values)._values.view("i8")
        comps = comps.view("i8")
    elif com.is_int64_dtype(comps):
        pass
    else:
        f = lambda x, y: lib.ismember(x, set(values))

    return f(comps, values)
Exemple #12
0
def isin(comps, values):
    """
    Compute the isin boolean array

    Parameters
    ----------
    comps: array-like
    values: array-like

    Returns
    -------
    boolean array same length as comps
    """

    if not is_list_like(comps):
        raise TypeError("only list-like objects are allowed to be passed"
                        " to isin(), you passed a "
                        "[{0}]".format(type(comps).__name__))
    comps = np.asarray(comps)
    if not is_list_like(values):
        raise TypeError("only list-like objects are allowed to be passed"
                        " to isin(), you passed a "
                        "[{0}]".format(type(values).__name__))
    if not isinstance(values, np.ndarray):
        values = list(values)

    # GH11232
    # work-around for numpy < 1.8 and comparisions on py3
    # faster for larger cases to use np.in1d
    if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
        f = lambda x, y: np.in1d(x, np.asarray(list(y)))
    else:
        f = lambda x, y: lib.ismember_int64(x, set(y))

    # may need i8 conversion for proper membership testing
    if is_datetime64_dtype(comps):
        from pandas.tseries.tools import to_datetime
        values = to_datetime(values)._values.view('i8')
        comps = comps.view('i8')
    elif is_timedelta64_dtype(comps):
        from pandas.tseries.timedeltas import to_timedelta
        values = to_timedelta(values)._values.view('i8')
        comps = comps.view('i8')
    elif is_int64_dtype(comps):
        pass
    else:
        f = lambda x, y: lib.ismember(x, set(values))

    return f(comps, values)
Exemple #13
0
    def _convert_for_datetime(self, lvalues, rvalues):
        from pandas.tseries.timedeltas import to_timedelta

        mask = isnull(lvalues) | isnull(rvalues)

        # datetimes require views
        if self.is_datetime_lhs or self.is_datetime_rhs:

            # datetime subtraction means timedelta
            if self.is_datetime_lhs and self.is_datetime_rhs:
                if self.name in ('__sub__', '__rsub__'):
                    self.dtype = 'timedelta64[ns]'
                else:
                    self.dtype = 'datetime64[ns]'
            elif self.is_datetime64tz_lhs:
                self.dtype = lvalues.dtype
            elif self.is_datetime64tz_rhs:
                self.dtype = rvalues.dtype
            else:
                self.dtype = 'datetime64[ns]'

            # if adding single offset try vectorized path
            # in DatetimeIndex; otherwise elementwise apply
            def _offset(lvalues, rvalues):
                if len(lvalues) == 1:
                    rvalues = pd.DatetimeIndex(rvalues)
                    lvalues = lvalues[0]
                else:
                    warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized",
                                  PerformanceWarning)
                    rvalues = rvalues.astype('O')

                # pass thru on the na_op
                self.na_op = lambda x, y: getattr(x,self.name)(y)
                return lvalues, rvalues


            if self.is_offset_lhs:
                lvalues, rvalues = _offset(lvalues, rvalues)
            elif self.is_offset_rhs:
                rvalues, lvalues = _offset(rvalues, lvalues)
            else:

                # with tz, convert to UTC
                if self.is_datetime64tz_lhs:
                    lvalues = lvalues.tz_localize(None)
                if self.is_datetime64tz_rhs:
                    rvalues = rvalues.tz_localize(None)

                lvalues = lvalues.view(np.int64)
                rvalues = rvalues.view(np.int64)

        # otherwise it's a timedelta
        else:

            self.dtype = 'timedelta64[ns]'

            # convert Tick DateOffset to underlying delta
            if self.is_offset_lhs:
                lvalues = to_timedelta(lvalues)
            if self.is_offset_rhs:
                rvalues = to_timedelta(rvalues)

            lvalues = lvalues.astype(np.int64)
            if not self.is_floating_rhs:
                rvalues = rvalues.astype(np.int64)

            # time delta division -> unit less
            # integer gets converted to timedelta in np < 1.6
            if (self.is_timedelta_lhs and self.is_timedelta_rhs) and\
               not self.is_integer_rhs and\
               not self.is_integer_lhs and\
               self.name in ('__div__', '__truediv__'):
                self.dtype = 'float64'
                self.fill_value = np.nan
                lvalues = lvalues.astype(np.float64)
                rvalues = rvalues.astype(np.float64)

        # if we need to mask the results
        if mask.any():
            def f(x):

                # datetime64[ns]/timedelta64[ns] masking
                try:
                    x = np.array(x, dtype=self.dtype)
                except TypeError:
                    x = np.array(x, dtype='datetime64[ns]')

                np.putmask(x, mask, self.fill_value)
                return x
            self.wrap_results = f

        return lvalues, rvalues
Exemple #14
0
def _possibly_cast_to_datetime(value, dtype, errors='raise'):
    """ try to cast the array/value to a datetimelike dtype, converting float
    nan to iNaT
    """
    from pandas.tseries.timedeltas import to_timedelta
    from pandas.tseries.tools import to_datetime

    if dtype is not None:
        if isinstance(dtype, string_types):
            dtype = np.dtype(dtype)

        is_datetime64 = is_datetime64_dtype(dtype)
        is_datetime64tz = is_datetime64tz_dtype(dtype)
        is_timedelta64 = is_timedelta64_dtype(dtype)

        if is_datetime64 or is_datetime64tz or is_timedelta64:

            # force the dtype if needed
            if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE):
                if dtype.name == 'datetime64[ns]':
                    dtype = _NS_DTYPE
                else:
                    raise TypeError("cannot convert datetimelike to "
                                    "dtype [%s]" % dtype)
            elif is_datetime64tz:

                # our NaT doesn't support tz's
                # this will coerce to DatetimeIndex with
                # a matching dtype below
                if is_scalar(value) and isnull(value):
                    value = [value]

            elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE):
                if dtype.name == 'timedelta64[ns]':
                    dtype = _TD_DTYPE
                else:
                    raise TypeError("cannot convert timedeltalike to "
                                    "dtype [%s]" % dtype)

            if is_scalar(value):
                if value == tslib.iNaT or isnull(value):
                    value = tslib.iNaT
            else:
                value = np.array(value, copy=False)

                # have a scalar array-like (e.g. NaT)
                if value.ndim == 0:
                    value = tslib.iNaT

                # we have an array of datetime or timedeltas & nulls
                elif np.prod(
                        value.shape) or not is_dtype_equal(value.dtype, dtype):
                    try:
                        if is_datetime64:
                            value = to_datetime(value, errors=errors)._values
                        elif is_datetime64tz:
                            # input has to be UTC at this point, so just
                            # localize
                            value = to_datetime(
                                value, errors=errors).tz_localize(dtype.tz)
                        elif is_timedelta64:
                            value = to_timedelta(value, errors=errors)._values
                    except (AttributeError, ValueError, TypeError):
                        pass

        # coerce datetimelike to object
        elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype):
            if is_object_dtype(dtype):
                if value.dtype != _NS_DTYPE:
                    value = value.astype(_NS_DTYPE)
                ints = np.asarray(value).view('i8')
                return tslib.ints_to_pydatetime(ints)

            # we have a non-castable dtype that was passed
            raise TypeError('Cannot cast datetime64 to %s' % dtype)

    else:

        is_array = isinstance(value, np.ndarray)

        # catch a datetime/timedelta that is not of ns variety
        # and no coercion specified
        if is_array and value.dtype.kind in ['M', 'm']:
            dtype = value.dtype

            if dtype.kind == 'M' and dtype != _NS_DTYPE:
                value = value.astype(_NS_DTYPE)

            elif dtype.kind == 'm' and dtype != _TD_DTYPE:
                value = to_timedelta(value)

        # only do this if we have an array and the dtype of the array is not
        # setup already we are not an integer/object, so don't bother with this
        # conversion
        elif not (is_array and not (issubclass(value.dtype.type, np.integer)
                                    or value.dtype == np.object_)):
            value = _possibly_infer_to_datetimelike(value)

    return value
Exemple #15
0
def _possibly_convert_objects(values,
                              convert_dates=True,
                              convert_numeric=True,
                              convert_timedeltas=True,
                              copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    # if we have passed in a list or scalar
    if isinstance(values, (list, tuple)):
        values = np.array(values, dtype=np.object_)
    if not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)

    # convert dates
    if convert_dates and values.dtype == np.object_:

        # we take an aggressive stance and convert to datetime64[ns]
        if convert_dates == 'coerce':
            new_values = _possibly_cast_to_datetime(values,
                                                    'M8[ns]',
                                                    errors='coerce')

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values,
                                               convert_datetime=convert_dates)

    # convert timedeltas
    if convert_timedeltas and values.dtype == np.object_:

        if convert_timedeltas == 'coerce':
            from pandas.tseries.timedeltas import to_timedelta
            new_values = to_timedelta(values, coerce=True)

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(
                values, convert_timedelta=convert_timedeltas)

    # convert to numeric
    if values.dtype == np.object_:
        if convert_numeric:
            try:
                new_values = lib.maybe_convert_numeric(values,
                                                       set(),
                                                       coerce_numeric=True)

                # if we are all nans then leave me alone
                if not isnull(new_values).all():
                    values = new_values

            except:
                pass
        else:
            # soft-conversion
            values = lib.maybe_convert_objects(values)

    values = values.copy() if copy else values

    return values
Exemple #16
0
    def __new__(cls, data=None, unit=None,
                freq=None, start=None, end=None, periods=None,
                copy=False, name=None,
                closed=None, verify_integrity=True, **kwargs):

        if isinstance(data, TimedeltaIndex) and freq is None and name is None:
            if copy:
                data = data.copy()
            return data

        freq_infer = False
        if not isinstance(freq, DateOffset):

            # if a passed freq is None, don't infer automatically
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None and freq is None:
            raise ValueError("Must provide freq argument if no data is "
                             "supplied")

        if data is None:
            return cls._generate(start, end, periods, name, freq,
                                 closed=closed)

        if unit is not None:
            data = to_timedelta(data, unit=unit, box=False)

        if not isinstance(data, (np.ndarray, Index, ABCSeries)):
            if lib.isscalar(data):
                raise ValueError('TimedeltaIndex() must be called with a '
                                 'collection of some kind, %s was passed'
                                 % repr(data))

        # convert if not already
        if getattr(data, 'dtype', None) != _TD_DTYPE:
            data = to_timedelta(data, unit=unit, box=False)
        elif copy:
            data = np.array(data, copy=True)

        # check that we are matching freqs
        if verify_integrity and len(data) > 0:
            if freq is not None and not freq_infer:
                index = cls._simple_new(data, name=name)
                inferred = index.inferred_freq
                if inferred != freq.freqstr:
                    on_freq = cls._generate(
                        index[0], None, len(index), name, freq)
                    if not np.array_equal(index.asi8, on_freq.asi8):
                        raise ValueError('Inferred frequency {0} from passed '
                                         'timedeltas does not conform to '
                                         'passed frequency {1}'
                                         .format(inferred, freq.freqstr))
                index.freq = freq
                return index

        if freq_infer:
            index = cls._simple_new(data, name=name)
            inferred = index.inferred_freq
            if inferred:
                index.freq = to_offset(inferred)
            return index

        return cls._simple_new(data, name=name, freq=freq)
Exemple #17
0
    def _convert_for_datetime(self, lvalues, rvalues):
        from pandas.tseries.timedeltas import to_timedelta

        mask = isnull(lvalues) | isnull(rvalues)

        # datetimes require views
        if self.is_datetime_lhs or self.is_datetime_rhs:

            # datetime subtraction means timedelta
            if self.is_datetime_lhs and self.is_datetime_rhs:
                if self.name in ('__sub__', '__rsub__'):
                    self.dtype = 'timedelta64[ns]'
                else:
                    self.dtype = 'datetime64[ns]'
            elif self.is_datetime64tz_lhs:
                self.dtype = lvalues.dtype
            elif self.is_datetime64tz_rhs:
                self.dtype = rvalues.dtype
            else:
                self.dtype = 'datetime64[ns]'

            # if adding single offset try vectorized path
            # in DatetimeIndex; otherwise elementwise apply
            def _offset(lvalues, rvalues):
                if len(lvalues) == 1:
                    rvalues = pd.DatetimeIndex(rvalues)
                    lvalues = lvalues[0]
                else:
                    warnings.warn("Adding/subtracting array of DateOffsets to "
                                  "Series not vectorized", PerformanceWarning)
                    rvalues = rvalues.astype('O')

                # pass thru on the na_op
                self.na_op = lambda x, y: getattr(x, self.name)(y)
                return lvalues, rvalues

            if self.is_offset_lhs:
                lvalues, rvalues = _offset(lvalues, rvalues)
            elif self.is_offset_rhs:
                rvalues, lvalues = _offset(rvalues, lvalues)
            else:

                # with tz, convert to UTC
                if self.is_datetime64tz_lhs:
                    lvalues = lvalues.tz_localize(None)
                if self.is_datetime64tz_rhs:
                    rvalues = rvalues.tz_localize(None)

                lvalues = lvalues.view(np.int64)
                rvalues = rvalues.view(np.int64)

        # otherwise it's a timedelta
        else:

            self.dtype = 'timedelta64[ns]'

            # convert Tick DateOffset to underlying delta
            if self.is_offset_lhs:
                lvalues = to_timedelta(lvalues)
            if self.is_offset_rhs:
                rvalues = to_timedelta(rvalues)

            lvalues = lvalues.astype(np.int64)
            if not self.is_floating_rhs:
                rvalues = rvalues.astype(np.int64)

            # time delta division -> unit less
            # integer gets converted to timedelta in np < 1.6
            if ((self.is_timedelta_lhs and self.is_timedelta_rhs) and
                    not self.is_integer_rhs and not self.is_integer_lhs and
                    self.name in ('__div__', '__truediv__')):
                self.dtype = 'float64'
                self.fill_value = np.nan
                lvalues = lvalues.astype(np.float64)
                rvalues = rvalues.astype(np.float64)

        # if we need to mask the results
        if mask.any():

            def f(x):

                # datetime64[ns]/timedelta64[ns] masking
                try:
                    x = np.array(x, dtype=self.dtype)
                except TypeError:
                    x = np.array(x, dtype='datetime64[ns]')

                np.putmask(x, mask, self.fill_value)
                return x

            self.wrap_results = f

        return lvalues, rvalues
Exemple #18
0
    def _convert_for_datetime(self, lvalues, rvalues):
        from pandas.tseries.timedeltas import to_timedelta
        mask = None
        # datetimes require views
        if self.is_datetime_lhs or self.is_datetime_rhs:
            # datetime subtraction means timedelta
            if self.is_datetime_lhs and self.is_datetime_rhs:
                self.dtype = 'timedelta64[ns]'
            else:
                self.dtype = 'datetime64[ns]'
            mask = isnull(lvalues) | isnull(rvalues)

            # if adding single offset try vectorized path
            # in DatetimeIndex; otherwise elementwise apply
            if self.is_offset_lhs:
                if len(lvalues) == 1:
                    rvalues = pd.DatetimeIndex(rvalues)
                    lvalues = lvalues[0]
                else:
                    warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized",
                                  PerformanceWarning)
                    rvalues = rvalues.astype('O')
            elif self.is_offset_rhs:
                if len(rvalues) == 1:
                    lvalues = pd.DatetimeIndex(lvalues)
                    rvalues = rvalues[0]
                else:
                    warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized",
                                  PerformanceWarning)
                    lvalues = lvalues.astype('O')
            else:
                lvalues = lvalues.view(np.int64)
                rvalues = rvalues.view(np.int64)

        # otherwise it's a timedelta
        else:
            self.dtype = 'timedelta64[ns]'
            mask = isnull(lvalues) | isnull(rvalues)

            # convert Tick DateOffset to underlying delta
            if self.is_offset_lhs:
                lvalues = to_timedelta(lvalues)
            if self.is_offset_rhs:
                rvalues = to_timedelta(rvalues)

            lvalues = lvalues.astype(np.int64)
            rvalues = rvalues.astype(np.int64)

            # time delta division -> unit less
            # integer gets converted to timedelta in np < 1.6
            if (self.is_timedelta_lhs and self.is_timedelta_rhs) and\
               not self.is_integer_rhs and\
               not self.is_integer_lhs and\
               self.name in ('__div__', '__truediv__'):
                self.dtype = 'float64'
                self.fill_value = np.nan
                lvalues = lvalues.astype(np.float64)
                rvalues = rvalues.astype(np.float64)

        # if we need to mask the results
        if mask is not None:
            if mask.any():
                def f(x):
                    x = np.array(x, dtype=self.dtype)
                    np.putmask(x, mask, self.fill_value)
                    return x
                self.wrap_results = f
        self.lvalues = lvalues
        self.rvalues = rvalues
Exemple #19
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        ovalues = values
        supplied_dtype = None
        if not is_list_like(values):
            values = np.array([values])
        # if this is a Series that contains relevant dtype info, then use this
        # instead of the inferred type; this avoids coercing Series([NaT],
        # dtype='datetime64[ns]') to Series([NaT], dtype='timedelta64[ns]')
        elif isinstance(values, pd.Series) and (
                    is_timedelta64_dtype(values) or is_datetime64_dtype(values)):
            supplied_dtype = values.dtype
        inferred_type = supplied_dtype or lib.infer_dtype(values)
        if (inferred_type in ('datetime64', 'datetime', 'date', 'time')
            or com.is_datetimetz(inferred_type)):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (supplied_dtype is None
                and other is not None
                and (other.dtype in ('timedelta64[ns]', 'datetime64[ns]'))
                and isnull(values).all()):
                values = np.empty(values.shape, dtype='timedelta64[ns]')
                values[:] = iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            # datetime with tz
            elif isinstance(ovalues, datetime.datetime) and hasattr(ovalues,'tz'):
                values = pd.DatetimeIndex(values)
            # datetime array with tz
            elif com.is_datetimetz(values):
                if isinstance(values, pd.Series):
                    values = values._values
            elif not (isinstance(values, (np.ndarray, pd.Series)) and
                      is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, errors='coerce')
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__', '__rmul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif inferred_type == 'floating':
            if isnull(values).all() and name in ('__add__', '__radd__',
                                                 '__sub__', '__rsub__'):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT
            return values
        elif self._is_offset(values):
            return values
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values
Exemple #20
0
def _possibly_cast_to_datetime(value, dtype, errors='raise'):
    """ try to cast the array/value to a datetimelike dtype, converting float
    nan to iNaT
    """
    from pandas.tseries.timedeltas import to_timedelta
    from pandas.tseries.tools import to_datetime

    if dtype is not None:
        if isinstance(dtype, string_types):
            dtype = np.dtype(dtype)

        is_datetime64 = is_datetime64_dtype(dtype)
        is_datetime64tz = is_datetime64tz_dtype(dtype)
        is_timedelta64 = is_timedelta64_dtype(dtype)

        if is_datetime64 or is_datetime64tz or is_timedelta64:

            # force the dtype if needed
            if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE):
                if dtype.name == 'datetime64[ns]':
                    dtype = _NS_DTYPE
                else:
                    raise TypeError("cannot convert datetimelike to "
                                    "dtype [%s]" % dtype)
            elif is_datetime64tz:

                # our NaT doesn't support tz's
                # this will coerce to DatetimeIndex with
                # a matching dtype below
                if is_scalar(value) and isnull(value):
                    value = [value]

            elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE):
                if dtype.name == 'timedelta64[ns]':
                    dtype = _TD_DTYPE
                else:
                    raise TypeError("cannot convert timedeltalike to "
                                    "dtype [%s]" % dtype)

            if is_scalar(value):
                if value == tslib.iNaT or isnull(value):
                    value = tslib.iNaT
            else:
                value = np.array(value, copy=False)

                # have a scalar array-like (e.g. NaT)
                if value.ndim == 0:
                    value = tslib.iNaT

                # we have an array of datetime or timedeltas & nulls
                elif np.prod(value.shape) or not is_dtype_equal(value.dtype,
                                                                dtype):
                    try:
                        if is_datetime64:
                            value = to_datetime(value, errors=errors)._values
                        elif is_datetime64tz:
                            # input has to be UTC at this point, so just
                            # localize
                            value = to_datetime(
                                value,
                                errors=errors).tz_localize(dtype.tz)
                        elif is_timedelta64:
                            value = to_timedelta(value, errors=errors)._values
                    except (AttributeError, ValueError, TypeError):
                        pass

        # coerce datetimelike to object
        elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype):
            if is_object_dtype(dtype):
                if value.dtype != _NS_DTYPE:
                    value = value.astype(_NS_DTYPE)
                ints = np.asarray(value).view('i8')
                return tslib.ints_to_pydatetime(ints)

            # we have a non-castable dtype that was passed
            raise TypeError('Cannot cast datetime64 to %s' % dtype)

    else:

        is_array = isinstance(value, np.ndarray)

        # catch a datetime/timedelta that is not of ns variety
        # and no coercion specified
        if is_array and value.dtype.kind in ['M', 'm']:
            dtype = value.dtype

            if dtype.kind == 'M' and dtype != _NS_DTYPE:
                value = value.astype(_NS_DTYPE)

            elif dtype.kind == 'm' and dtype != _TD_DTYPE:
                value = to_timedelta(value)

        # only do this if we have an array and the dtype of the array is not
        # setup already we are not an integer/object, so don't bother with this
        # conversion
        elif not (is_array and not (issubclass(value.dtype.type, np.integer) or
                                    value.dtype == np.object_)):
            value = _possibly_infer_to_datetimelike(value)

    return value
Exemple #21
0
    def __new__(cls, data=None, unit=None,
                freq=None, start=None, end=None, periods=None,
                copy=False, name=None,
                closed=None, verify_integrity=True, **kwargs):

        if isinstance(data, TimedeltaIndex) and freq is None and name is None:
            if copy:
                data = data.copy()
            return data

        freq_infer = False
        if not isinstance(freq, DateOffset):

            # if a passed freq is None, don't infer automatically
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None and freq is None:
            raise ValueError("Must provide freq argument if no data is "
                             "supplied")

        if data is None:
            return cls._generate(start, end, periods, name, freq,
                                 closed=closed)

        if unit is not None:
            data = to_timedelta(data, unit=unit, box=False)

        if not isinstance(data, (np.ndarray, Index, ABCSeries)):
            if lib.isscalar(data):
                raise ValueError('TimedeltaIndex() must be called with a '
                                 'collection of some kind, %s was passed'
                                 % repr(data))

        # convert if not already
        if getattr(data, 'dtype', None) != _TD_DTYPE:
            data = to_timedelta(data, unit=unit, box=False)
        elif copy:
            data = np.array(data, copy=True)

        # check that we are matching freqs
        if verify_integrity and len(data) > 0:
            if freq is not None and not freq_infer:
                index = cls._simple_new(data, name=name)
                inferred = index.inferred_freq
                if inferred != freq.freqstr:
                    on_freq = cls._generate(
                        index[0], None, len(index), name, freq)
                    if not np.array_equal(index.asi8, on_freq.asi8):
                        raise ValueError('Inferred frequency {0} from passed '
                                         'timedeltas does not conform to '
                                         'passed frequency {1}'
                                         .format(inferred, freq.freqstr))
                index.freq = freq
                return index

        if freq_infer:
            index = cls._simple_new(data, name=name)
            inferred = index.inferred_freq
            if inferred:
                index.freq = to_offset(inferred)
            return index

        return cls._simple_new(data, name=name, freq=freq)
Exemple #22
0
    def _convert_to_array(self, values, name=None, other=None):
        """converts values to ndarray"""
        from pandas.tseries.timedeltas import to_timedelta

        ovalues = values
        supplied_dtype = None
        if not is_list_like(values):
            values = np.array([values])
        # if this is a Series that contains relevant dtype info, then use this
        # instead of the inferred type; this avoids coercing Series([NaT],
        # dtype='datetime64[ns]') to Series([NaT], dtype='timedelta64[ns]')
        elif (isinstance(values, pd.Series) and
              (is_timedelta64_dtype(values) or is_datetime64_dtype(values))):
            supplied_dtype = values.dtype
        inferred_type = supplied_dtype or lib.infer_dtype(values)
        if (inferred_type in ('datetime64', 'datetime', 'date', 'time') or
                com.is_datetimetz(inferred_type)):
            # if we have a other of timedelta, but use pd.NaT here we
            # we are in the wrong path
            if (supplied_dtype is None and other is not None and
                (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and
                    isnull(values).all()):
                values = np.empty(values.shape, dtype='timedelta64[ns]')
                values[:] = iNaT

            # a datelike
            elif isinstance(values, pd.DatetimeIndex):
                values = values.to_series()
            # datetime with tz
            elif (isinstance(ovalues, datetime.datetime) and
                  hasattr(ovalues, 'tz')):
                values = pd.DatetimeIndex(values)
            # datetime array with tz
            elif com.is_datetimetz(values):
                if isinstance(values, ABCSeries):
                    values = values._values
            elif not (isinstance(values, (np.ndarray, ABCSeries)) and
                      is_datetime64_dtype(values)):
                values = tslib.array_to_datetime(values)
        elif inferred_type in ('timedelta', 'timedelta64'):
            # have a timedelta, convert to to ns here
            values = to_timedelta(values, errors='coerce')
        elif inferred_type == 'integer':
            # py3 compat where dtype is 'm' but is an integer
            if values.dtype.kind == 'm':
                values = values.astype('timedelta64[ns]')
            elif isinstance(values, pd.PeriodIndex):
                values = values.to_timestamp().to_series()
            elif name not in ('__truediv__', '__div__', '__mul__', '__rmul__'):
                raise TypeError("incompatible type for a datetime/timedelta "
                                "operation [{0}]".format(name))
        elif inferred_type == 'floating':
            if (isnull(values).all() and
                    name in ('__add__', '__radd__', '__sub__', '__rsub__')):
                values = np.empty(values.shape, dtype=other.dtype)
                values[:] = iNaT
            return values
        elif self._is_offset(values):
            return values
        else:
            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
                            " operation".format(np.array(values).dtype))

        return values