Esempio n. 1
0
    def __init__(self, left, right, name, na_op):

        # need to make sure that we are aligning the data
        if isinstance(left, pd.Series) and isinstance(right, pd.Series):
            left, right = left.align(right,copy=False)

        lvalues = self._convert_to_array(left, name=name)
        rvalues = self._convert_to_array(right, name=name, other=lvalues)

        self.name = name
        self.na_op = na_op

        # left
        self.left = left
        self.is_offset_lhs = self._is_offset(left)
        self.is_timedelta_lhs = is_timedelta64_dtype(lvalues)
        self.is_datetime64_lhs = is_datetime64_dtype(lvalues)
        self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues)
        self.is_datetime_lhs = self.is_datetime64_lhs or self.is_datetime64tz_lhs
        self.is_integer_lhs = left.dtype.kind in ['i', 'u']
        self.is_floating_lhs = left.dtype.kind == 'f'

        # right
        self.right = right
        self.is_offset_rhs = self._is_offset(right)
        self.is_datetime64_rhs = is_datetime64_dtype(rvalues)
        self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues)
        self.is_datetime_rhs = self.is_datetime64_rhs or self.is_datetime64tz_rhs
        self.is_timedelta_rhs = is_timedelta64_dtype(rvalues)
        self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u')
        self.is_floating_rhs = rvalues.dtype.kind == 'f'

        self._validate(lvalues, rvalues, name)
        self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)
Esempio n. 2
0
    def __init__(self, left, right, name, na_op):
        super(_TimeOp, self).__init__(left, right, name, na_op)

        lvalues = self._convert_to_array(left, name=name)
        rvalues = self._convert_to_array(right, name=name, other=lvalues)

        # left
        self.is_offset_lhs = self._is_offset(left)
        self.is_timedelta_lhs = is_timedelta64_dtype(lvalues)
        self.is_datetime64_lhs = is_datetime64_dtype(lvalues)
        self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues)
        self.is_datetime_lhs = (self.is_datetime64_lhs or
                                self.is_datetime64tz_lhs)
        self.is_integer_lhs = left.dtype.kind in ['i', 'u']
        self.is_floating_lhs = left.dtype.kind == 'f'

        # right
        self.is_offset_rhs = self._is_offset(right)
        self.is_datetime64_rhs = is_datetime64_dtype(rvalues)
        self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues)
        self.is_datetime_rhs = (self.is_datetime64_rhs or
                                self.is_datetime64tz_rhs)
        self.is_timedelta_rhs = is_timedelta64_dtype(rvalues)
        self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u')
        self.is_floating_rhs = rvalues.dtype.kind == 'f'

        self._validate(lvalues, rvalues, name)
        self.lvalues, self.rvalues = self._convert_for_datetime(lvalues,
                                                                rvalues)
Esempio n. 3
0
    def get_op(cls, left, right, name, na_op):
        """
        Get op dispatcher, returns _Op or _TimeOp.

        If ``left`` and ``right`` are appropriate for datetime arithmetic with
        operation ``name``, processes them and returns a ``_TimeOp`` object
        that stores all the required values.  Otherwise, it will generate
        either a ``_Op``, indicating that the operation is performed via
        normal numpy path.
        """
        is_timedelta_lhs = is_timedelta64_dtype(left)
        is_datetime_lhs = (is_datetime64_dtype(left) or
                           is_datetime64tz_dtype(left))

        if isinstance(left, ABCSeries) and isinstance(right, ABCSeries):
            # avoid repated alignment
            if not left.index.equals(right.index):
                left, right = left.align(right, copy=False)

                index, lidx, ridx = left.index.join(right.index, how='outer',
                                                    return_indexers=True)
                # if DatetimeIndex have different tz, convert to UTC
                left.index = index
                right.index = index

        if not (is_datetime_lhs or is_timedelta_lhs):
            return _Op(left, right, name, na_op)
        else:
            return _TimeOp(left, right, name, na_op)
Esempio n. 4
0
def na_value_for_dtype(dtype):
    """
    Return a dtype compat na value

    Parameters
    ----------
    dtype : string / dtype

    Returns
    -------
    dtype compat na value
    """

    from pandas.core import common as com
    from pandas import NaT
    dtype = pandas_dtype(dtype)

    if (com.is_datetime64_dtype(dtype) or
        com.is_datetime64tz_dtype(dtype) or
        com.is_timedelta64_dtype(dtype)):
        return NaT
    elif com.is_float_dtype(dtype):
        return np.nan
    elif com.is_integer_dtype(dtype):
        return 0
    elif com.is_bool_dtype(dtype):
        return False
    return np.nan
Esempio n. 5
0
def nonempty_sample_df(empty):
    """ Create a dataframe from the given empty dataframe that contains one
    row of fake data (generated from the empty dataframe's dtypes).
    """
    nonempty = {}
    idx = pd.RangeIndex(start=0, stop=1, step=1)
    for key, dtype in empty.dtypes.iteritems():
        if is_datetime64tz_dtype(dtype):
            entry = pd.Timestamp('1970-01-01', tz='America/New_York')
        elif pd.core.common.is_categorical_dtype(dtype):
            accessor = empty[key].cat
            example = accessor.categories[0]
            cat = pd.Categorical([example], categories=accessor.categories,
                                 ordered=accessor.ordered)
            entry = pd.Series(cat, name=key)
        elif dtype.kind in _simple_fake_mapping:
            entry = _simple_fake_mapping[dtype.kind]
        elif is_extension_type(dtype):
            raise TypeError("Can't handle extension dtype: {}".format(dtype))
        elif dtype.name == 'object':
            entry = 'foo'
        else:
            raise TypeError("Can't handle dtype: {}".format(dtype))

        if not isinstance(entry, pd.Series):
            entry = pd.Series([entry], name=key, index=idx)

        nonempty[key] = entry

    df = pd.DataFrame(nonempty)
    return df
Esempio n. 6
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))

    index = data.index
    if is_datetime64_dtype(data.dtype) or is_datetime64tz_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name)
    elif is_timedelta64_dtype(data.dtype):
        return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name)

    raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
Esempio n. 7
0
def nonempty_sample_df(empty):
    """ Create a dataframe from the given empty dataframe that contains one
    row of fake data (generated from the empty dataframe's dtypes).
    """
    nonempty = {}
    idx = pd.RangeIndex(start=0, stop=1, step=1)
    for key, dtype in empty.dtypes.iteritems():
        if is_datetime64tz_dtype(dtype):
            entry = pd.Timestamp('1970-01-01', tz='America/New_York')
        elif pd.core.common.is_categorical_dtype(dtype):
            accessor = empty[key].cat
            example = accessor.categories[0]
            cat = pd.Categorical([example],
                                 categories=accessor.categories,
                                 ordered=accessor.ordered)
            entry = pd.Series(cat, name=key)
        elif dtype.kind in _simple_fake_mapping:
            entry = _simple_fake_mapping[dtype.kind]
        elif is_extension_type(dtype):
            raise TypeError("Can't handle extension dtype: {}".format(dtype))
        elif dtype.name == 'object':
            entry = 'foo'
        else:
            raise TypeError("Can't handle dtype: {}".format(dtype))

        if not isinstance(entry, pd.Series):
            entry = pd.Series([entry], name=key, index=idx)

        nonempty[key] = entry

    df = pd.DataFrame(nonempty)
    return df
Esempio n. 8
0
    def test_basic(self):

        self.assertTrue(is_datetime64tz_dtype(self.dtype))

        dr = date_range('20130101',periods=3,tz='US/Eastern')
        s = Series(dr,name='A')

        # dtypes
        self.assertTrue(is_datetime64tz_dtype(s.dtype))
        self.assertTrue(is_datetime64tz_dtype(s))
        self.assertFalse(is_datetime64tz_dtype(np.dtype('float64')))
        self.assertFalse(is_datetime64tz_dtype(1.0))

        self.assertTrue(is_datetimetz(s))
        self.assertTrue(is_datetimetz(s.dtype))
        self.assertFalse(is_datetimetz(np.dtype('float64')))
        self.assertFalse(is_datetimetz(1.0))
Esempio n. 9
0
    def test_basic(self):

        self.assertTrue(is_datetime64tz_dtype(self.dtype))

        dr = date_range("20130101", periods=3, tz="US/Eastern")
        s = Series(dr, name="A")

        # dtypes
        self.assertTrue(is_datetime64tz_dtype(s.dtype))
        self.assertTrue(is_datetime64tz_dtype(s))
        self.assertFalse(is_datetime64tz_dtype(np.dtype("float64")))
        self.assertFalse(is_datetime64tz_dtype(1.0))

        self.assertTrue(is_datetimetz(s))
        self.assertTrue(is_datetimetz(s.dtype))
        self.assertFalse(is_datetimetz(np.dtype("float64")))
        self.assertFalse(is_datetimetz(1.0))
Esempio n. 10
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError(
            "cannot convert an object of type {0} to a datetimelike index".
            format(type(data)))

    index = data.index
    if is_datetime64_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'),
                                  index,
                                  name=data.name)
    elif is_datetime64tz_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data,
                                                copy=copy,
                                                freq='infer',
                                                ambiguous='infer'),
                                  index,
                                  name=data.name)
    elif is_timedelta64_dtype(data.dtype):
        return TimedeltaProperties(TimedeltaIndex(data,
                                                  copy=copy,
                                                  freq='infer'),
                                   index,
                                   name=data.name)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy),
                                    index,
                                    name=data.name)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data,
                                                    copy=copy,
                                                    freq='infer'),
                                      index,
                                      name=data.name)

    raise TypeError(
        "cannot convert an object of type {0} to a datetimelike index".format(
            type(data)))
Esempio n. 11
0
def _factorize_keys(lk, rk, sort=True):
    if com.is_datetime64tz_dtype(lk) and com.is_datetime64tz_dtype(rk):
        lk = lk.values
        rk = rk.values
    if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk):
        klass = _hash.Int64Factorizer
        lk = com._ensure_int64(com._values_from_object(lk))
        rk = com._ensure_int64(com._values_from_object(rk))
    else:
        klass = _hash.Factorizer
        lk = com._ensure_object(lk)
        rk = com._ensure_object(rk)

    rizer = klass(max(len(lk), len(rk)))

    llab = rizer.factorize(lk)
    rlab = rizer.factorize(rk)

    count = rizer.get_count()

    if sort:
        uniques = rizer.uniques.to_array()
        llab, rlab = _sort_labels(uniques, llab, rlab)

    # NA group
    lmask = llab == -1
    lany = lmask.any()
    rmask = rlab == -1
    rany = rmask.any()

    if lany or rany:
        if lany:
            np.putmask(llab, lmask, count)
        if rany:
            np.putmask(rlab, rmask, count)
        count += 1

    return llab, rlab, count
Esempio n. 12
0
def _nonempty_series(s, idx):
    dtype = s.dtype
    if is_datetime64tz_dtype(dtype):
        entry = pd.Timestamp('1970-01-01', tz=dtype.tz)
        data = [entry, entry]
    elif is_categorical_dtype(dtype):
        entry = s.cat.categories[0]
        data = pd.Categorical([entry, entry],
                               categories=s.cat.categories,
                               ordered=s.cat.ordered)
    else:
        entry = _scalar_from_dtype(dtype)
        data = [entry, entry]
    return pd.Series(data, name=s.name, index=idx)
Esempio n. 13
0
def _nonempty_series(s, idx):
    dtype = s.dtype
    if is_datetime64tz_dtype(dtype):
        entry = pd.Timestamp('1970-01-01', tz=dtype.tz)
        data = [entry, entry]
    elif is_categorical_dtype(dtype):
        entry = s.cat.categories[0]
        data = pd.Categorical([entry, entry],
                               categories=s.cat.categories,
                               ordered=s.cat.ordered)
    else:
        entry = _scalar_from_dtype(dtype)
        data = np.array([entry, entry], dtype=dtype)
    return pd.Series(data, name=s.name, index=idx)
Esempio n. 14
0
    def __init__(self, left, right, name, na_op):

        # need to make sure that we are aligning the data
        if isinstance(left, ABCSeries) and isinstance(right, ABCSeries):
            left, right = left.align(right, copy=False)

        lvalues = self._convert_to_array(left, name=name)
        rvalues = self._convert_to_array(right, name=name, other=lvalues)

        self.name = name
        self.na_op = na_op

        # left
        self.left = left
        self.is_offset_lhs = self._is_offset(left)
        self.is_timedelta_lhs = is_timedelta64_dtype(lvalues)
        self.is_datetime64_lhs = is_datetime64_dtype(lvalues)
        self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues)
        self.is_datetime_lhs = (self.is_datetime64_lhs or
                                self.is_datetime64tz_lhs)
        self.is_integer_lhs = left.dtype.kind in ['i', 'u']
        self.is_floating_lhs = left.dtype.kind == 'f'

        # right
        self.right = right
        self.is_offset_rhs = self._is_offset(right)
        self.is_datetime64_rhs = is_datetime64_dtype(rvalues)
        self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues)
        self.is_datetime_rhs = (self.is_datetime64_rhs or
                                self.is_datetime64tz_rhs)
        self.is_timedelta_rhs = is_timedelta64_dtype(rvalues)
        self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u')
        self.is_floating_rhs = rvalues.dtype.kind == 'f'

        self._validate(lvalues, rvalues, name)
        self.lvalues, self.rvalues = self._convert_for_datetime(lvalues,
                                                                rvalues)
Esempio n. 15
0
def _nonempty_series(s, idx):
    dtype = s.dtype
    if is_datetime64tz_dtype(dtype):
        entry = pd.Timestamp('1970-01-01', tz=dtype.tz)
    elif is_categorical_dtype(dtype):
        entry = pd.Categorical([s.cat.categories[0]],
                               categories=s.cat.categories,
                               ordered=s.cat.ordered)
    elif dtype.kind in ['i', 'f', 'u']:
        entry = dtype.type(1)
    elif dtype.kind in _simple_fake_mapping:
        entry = _simple_fake_mapping[dtype.kind]
    else:
        raise TypeError("Can't handle dtype: {0}".format(dtype))
    return pd.Series([entry, entry], name=s.name, index=idx)
Esempio n. 16
0
def as_json_table_type(x):
    """
    Convert a NumPy / pandas type to its corresponding json_table.

    Parameters
    ----------
    x : array or dtype

    Returns
    -------
    t : str
        the Table Schema data types

    Notes
    -----
    This table shows the relationship between NumPy / pandas dtypes,
    and Table Schema dtypes.

    ==============  =================
    Pandas type     Table Schema type
    ==============  =================
    int64           integer
    float64         number
    bool            boolean
    datetime64[ns]  datetime
    timedelta64[ns] duration
    object          str
    categorical     any
    =============== =================
    """
    if is_integer_dtype(x):
        return 'integer'
    elif is_bool_dtype(x):
        return 'boolean'
    elif is_numeric_dtype(x):
        return 'number'
    elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)):
        return 'datetime'
    elif is_timedelta64_dtype(x):
        return 'duration'
    elif is_categorical_dtype(x):
        return 'any'
    elif is_string_dtype(x):
        return 'string'
    else:
        return 'any'
Esempio n. 17
0
    def maybe_convert_for_time_op(cls, left, right, name, na_op):
        """
        if ``left`` and ``right`` are appropriate for datetime arithmetic with
        operation ``name``, processes them and returns a ``_TimeOp`` object
        that stores all the required values.  Otherwise, it will generate
        either a ``NotImplementedError`` or ``None``, indicating that the
        operation is unsupported for datetimes (e.g., an unsupported r_op) or
        that the data is not the right type for time ops.
        """
        # decide if we can do it
        is_timedelta_lhs = is_timedelta64_dtype(left)
        is_datetime_lhs = is_datetime64_dtype(left) or is_datetime64tz_dtype(left)

        if not (is_datetime_lhs or is_timedelta_lhs):
            return None

        return cls(left, right, name, na_op)
Esempio n. 18
0
def as_json_table_type(x):
    """
    Convert a NumPy / pandas type to its corresponding json_table.

    Parameters
    ----------
    x : array or dtype

    Returns
    -------
    t : str
        the Table Schema data types

    Notes
    -----
    This table shows the relationship between NumPy / pandas dtypes,
    and Table Schema dtypes.

    ==============  =================
    Pandas type     Table Schema type
    ==============  =================
    int64           integer
    float64         number
    bool            boolean
    datetime64[ns]  datetime
    timedelta64[ns] duration
    object          str
    categorical     any
    =============== =================
    """
    if is_integer_dtype(x):
        return 'integer'
    elif is_bool_dtype(x):
        return 'boolean'
    elif is_numeric_dtype(x):
        return 'number'
    elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)):
        return 'datetime'
    elif is_timedelta64_dtype(x):
        return 'duration'
    elif is_categorical_dtype(x):
        return 'any'
    elif is_string_dtype(x):
        return 'string'
    else:
        return 'any'
Esempio n. 19
0
    def maybe_convert_for_time_op(cls, left, right, name, na_op):
        """
        if ``left`` and ``right`` are appropriate for datetime arithmetic with
        operation ``name``, processes them and returns a ``_TimeOp`` object
        that stores all the required values.  Otherwise, it will generate
        either a ``NotImplementedError`` or ``None``, indicating that the
        operation is unsupported for datetimes (e.g., an unsupported r_op) or
        that the data is not the right type for time ops.
        """
        # decide if we can do it
        is_timedelta_lhs = is_timedelta64_dtype(left)
        is_datetime_lhs = (is_datetime64_dtype(left) or
                           is_datetime64tz_dtype(left))

        if not (is_datetime_lhs or is_timedelta_lhs):
            return None

        return cls(left, right, name, na_op)
Esempio n. 20
0
def _nonempty_series(s, idx):

    dtype = s.dtype
    if is_datetime64tz_dtype(dtype):
        entry = pd.Timestamp('1970-01-01', tz=dtype.tz)
        data = [entry, entry]
    elif is_categorical_dtype(dtype):
        if len(s.cat.categories):
            data = [s.cat.categories[0]] * 2
            cats = s.cat.categories
        else:
            data = _nonempty_index(s.cat.categories)
            cats = None
        data = pd.Categorical(data, categories=cats,
                              ordered=s.cat.ordered)
    else:
        entry = _scalar_from_dtype(dtype)
        data = np.array([entry, entry], dtype=dtype)

    return pd.Series(data, name=s.name, index=idx)
Esempio n. 21
0
def make_field(arr, dtype=None):
    dtype = dtype or arr.dtype
    if arr.name is None:
        name = 'values'
    else:
        name = arr.name
    field = {'name': name, 'type': as_json_table_type(dtype)}

    if is_categorical_dtype(arr):
        if hasattr(arr, 'categories'):
            cats = arr.categories
            ordered = arr.ordered
        else:
            cats = arr.cat.categories
            ordered = arr.cat.ordered
        field['constraints'] = {"enum": list(cats)}
        field['ordered'] = ordered
    elif is_datetime64tz_dtype(arr):
        if hasattr(arr, 'dt'):
            field['tz'] = arr.dt.tz.zone
        else:
            field['tz'] = arr.tz.zone
    return field
Esempio n. 22
0
def make_field(arr, dtype=None):
    dtype = dtype or arr.dtype
    if arr.name is None:
        name = 'values'
    else:
        name = arr.name
    field = {'name': name,
             'type': as_json_table_type(dtype)}

    if is_categorical_dtype(arr):
        if hasattr(arr, 'categories'):
            cats = arr.categories
            ordered = arr.ordered
        else:
            cats = arr.cat.categories
            ordered = arr.cat.ordered
        field['constraints'] = {"enum": list(cats)}
        field['ordered'] = ordered
    elif is_datetime64tz_dtype(arr):
        if hasattr(arr, 'dt'):
            field['tz'] = arr.dt.tz.zone
        else:
            field['tz'] = arr.tz.zone
    return field
Esempio n. 23
0
    def _convert_listlike(arg, box, format, name=None):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg,
                                         tz='utc' if utc else None,
                                         name=name)
                except ValueError:
                    pass

            return arg

        elif com.is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz='utc' if utc else None)
            if utc:
                arg = arg.tz_convert(None).tz_localize('UTC')
            return arg

        elif unit is not None:
            if format is not None:
                raise ValueError("cannot specify both format and unit")
            arg = getattr(arg, 'values', arg)
            result = tslib.array_with_unit_to_datetime(arg,
                                                       unit,
                                                       errors=errors)
            if box:
                if errors == 'ignore':
                    from pandas import Index
                    return Index(result)

                return DatetimeIndex(result,
                                     tz='utc' if utc else None,
                                     name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, '
                            '1-d array, or Series')

        arg = com._ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = _format_is_iso(format)
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(arg,
                                                      format,
                                                      exact=exact,
                                                      errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    freq=freq,
                    require_iso8601=require_iso8601)

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result,
                                       tz='utc' if utc else None,
                                       name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e
Esempio n. 24
0
    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]')
        self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]')
        self.assertTrue(com.is_datetime64tz_dtype(s.dtype))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # export
        result = s.values
        self.assertIsInstance(result, np.ndarray)
        self.assertTrue(result.dtype == 'datetime64[ns]')
        self.assertTrue(dr.equals(pd.DatetimeIndex(result).tz_localize(
            'UTC').tz_convert(tz=s.dt.tz)))

        # indexing
        result = s.iloc[0]
        self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500',
                                           tz='US/Eastern', offset='D'))
        result = s[0]
        self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500',
                                           tz='US/Eastern', offset='D'))

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # astype
        result = s.astype(object)
        expected = Series(DatetimeIndex(s._values).asobject)
        assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
        assert_series_equal(result, s)

        # astype - datetime64[ns, tz]
        result = Series(s.values).astype('datetime64[ns, US/Eastern]')
        assert_series_equal(result, s)

        result = Series(s.values).astype(s.dtype)
        assert_series_equal(result, s)

        result = s.astype('datetime64[ns, CET]')
        expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
        assert_series_equal(result, expected)

        # short str
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # formatting with NaT
        result = s.shift()
        self.assertTrue('datetime64[ns, US/Eastern]' in str(result))
        self.assertTrue('NaT' in str(result))

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(t))

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
                    pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')])
        self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]')
        self.assertTrue(lib.infer_dtype(s) == 'datetime64')

        s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
                    pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')])
        self.assertTrue(s.dtype == 'object')
        self.assertTrue(lib.infer_dtype(s) == 'datetime')

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)
Esempio n. 25
0
    def _convert_listlike(arg, box, format, name=None):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz='utc' if utc else None, name=name)
                except ValueError:
                    pass

            return arg

        elif com.is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz='utc' if utc else None)
            if utc:
                arg = arg.tz_convert(None)
            return arg

        elif format is None and com.is_integer_dtype(arg) and unit=='ns':
            result = arg.astype('datetime64[ns]')
            if box:
                return DatetimeIndex(result, tz='utc' if utc else None, name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, 1-d array, or Series')

        arg = com._ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = (
                ('%Y-%m-%dT%H:%M:%S.%f'.startswith(format) or
                 '%Y-%m-%d %H:%M:%S.%f'.startswith(format)) and
                format != '%Y')
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(
                            arg, format, exact=exact, errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    freq=freq,
                    unit=unit,
                    require_iso8601=require_iso8601
                )

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result,
                                       tz='utc' if utc else None,
                                       name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e
Esempio n. 26
0
    def _convert_listlike(arg, box, format, name=None):

        if isinstance(arg, (list,tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if com.is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz='utc' if utc else None, name=name)
                except ValueError:
                    pass

            return arg

        elif com.is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz='utc' if utc else None)
            if utc:
                arg = arg.tz_convert(None)
            return arg

        elif format is None and com.is_integer_dtype(arg) and unit=='ns':
            result = arg.astype('datetime64[ns]')
            if box:
                return DatetimeIndex(result, tz='utc' if utc else None, name=name)

            return result

        arg = com._ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = (
                ('%Y-%m-%dT%H:%M:%S.%f'.startswith(format) or
                '%Y-%m-%d %H:%M:%S.%f'.startswith(format)) and
				format != '%Y'
            )
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to '%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(
                            arg, format, exact=exact, errors=errors)
                    except (tslib.OutOfBoundsDatetime):
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(arg, errors=errors,
                                                 utc=utc, dayfirst=dayfirst,
                                                 yearfirst=yearfirst, freq=freq,
                                                 unit=unit,
                                                 require_iso8601=require_iso8601)

            if com.is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz='utc' if utc else None, name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e
Esempio n. 27
0
    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]')
        self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]')
        self.assertTrue(com.is_datetime64tz_dtype(s.dtype))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # export
        result = s.values
        self.assertIsInstance(result, np.ndarray)
        self.assertTrue(result.dtype == 'datetime64[ns]')
        self.assertTrue(
            dr.equals(
                pd.DatetimeIndex(result).tz_localize('UTC').tz_convert(
                    tz=s.dt.tz)))

        # indexing
        result = s.iloc[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D'))
        result = s[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D'))

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # astype
        result = s.astype(object)
        expected = Series(DatetimeIndex(s._values).asobject)
        assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
        assert_series_equal(result, s)

        # astype - datetime64[ns, tz]
        result = Series(s.values).astype('datetime64[ns, US/Eastern]')
        assert_series_equal(result, s)

        result = Series(s.values).astype(s.dtype)
        assert_series_equal(result, s)

        result = s.astype('datetime64[ns, CET]')
        expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
        assert_series_equal(result, expected)

        # short str
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # formatting with NaT
        result = s.shift()
        self.assertTrue('datetime64[ns, US/Eastern]' in str(result))
        self.assertTrue('NaT' in str(result))

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(t))

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')
        ])
        self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]')
        self.assertTrue(lib.infer_dtype(s) == 'datetime64')

        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')
        ])
        self.assertTrue(s.dtype == 'object')
        self.assertTrue(lib.infer_dtype(s) == 'datetime')

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)