Esempio n. 1
0
 def _make_str_accessor(self):
     from pandas.core.series import Series
     from pandas.core.index import Index
     if isinstance(self, Series) and not ((is_categorical_dtype(
             self.dtype) and is_object_dtype(self.values.categories)) or
                                          (is_object_dtype(self.dtype))):
         # it's neither a string series not a categorical series with strings
         # inside the categories.
         # this really should exclude all series with any non-string values (instead of test
         # for object dtype), but that isn't practical for performance reasons until we have a
         # str dtype (GH 9343)
         raise AttributeError("Can only use .str accessor with string "
                              "values, which use np.object_ dtype in "
                              "pandas")
     elif isinstance(self, Index):
         # see scc/inferrence.pyx which can contain string values
         allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
         if self.inferred_type not in allowed_types:
             message = (
                 "Can only use .str accessor with string values "
                 "(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
             raise AttributeError(message)
         if self.nlevels > 1:
             message = "Can only use .str accessor with Index, not MultiIndex"
             raise AttributeError(message)
     return StringMethods(self)
Esempio n. 2
0
 def _make_str_accessor(self):
     from pandas.core.series import Series
     from pandas.core.index import Index
     if isinstance(self, Series) and not(
                 (is_categorical_dtype(self.dtype) and
                  is_object_dtype(self.values.categories)) or
                 (is_object_dtype(self.dtype))):
         # it's neither a string series not a categorical series with strings
         # inside the categories.
         # this really should exclude all series with any non-string values (instead of test
         # for object dtype), but that isn't practical for performance reasons until we have a
         # str dtype (GH 9343)
         raise AttributeError("Can only use .str accessor with string "
                              "values, which use np.object_ dtype in "
                              "pandas")
     elif isinstance(self, Index):
         # see scc/inferrence.pyx which can contain string values
         allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
         if self.inferred_type not in allowed_types:
             message = ("Can only use .str accessor with string values "
                        "(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
             raise AttributeError(message)
         if self.nlevels > 1:
             message = "Can only use .str accessor with Index, not MultiIndex"
             raise AttributeError(message)
     return StringMethods(self)
Esempio n. 3
0
def _soft_convert_objects(values,
                          datetime=True,
                          numeric=True,
                          timedelta=True,
                          coerce=False,
                          copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError('At least one of datetime, numeric or timedelta must '
                         'be True.')
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if isinstance(values, (list, tuple)):
        # List or scalar
        values = np.array(values, dtype=np.object_)
    elif not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)
    elif not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            return pd.to_datetime(values, errors='coerce', box=False)
        elif timedelta:
            return pd.to_timedelta(values, errors='coerce', box=False)
        elif numeric:
            return pd.to_numeric(values, errors='coerce')

    # Soft conversions
    if datetime:
        values = lib.maybe_convert_objects(values, convert_datetime=datetime)

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values,
                                                  set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isnull(converted).all() else values
            values = values.copy() if copy else values
        except:
            pass

    return values
Esempio n. 4
0
File: ops.py Progetto: Xndr7/pandas
 def safe_na_op(lvalues, rvalues):
     try:
         return na_op(lvalues, rvalues)
     except Exception:
         if isinstance(rvalues, ABCSeries):
             if is_object_dtype(rvalues):
                 # if dtype is object, try elementwise op
                 return _algos.arrmap_object(rvalues, lambda x: op(lvalues, x))
         else:
             if is_object_dtype(lvalues):
                 return _algos.arrmap_object(lvalues, lambda x: op(x, rvalues))
         raise
Esempio n. 5
0
def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
                          coerce=False, copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError('At least one of datetime, numeric or timedelta must '
                         'be True.')
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if isinstance(values, (list, tuple)):
        # List or scalar
        values = np.array(values, dtype=np.object_)
    elif not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)
    elif not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            return pd.to_datetime(values, errors='coerce', box=False)
        elif timedelta:
            return pd.to_timedelta(values, errors='coerce', box=False)
        elif numeric:
            return pd.to_numeric(values, errors='coerce')

    # Soft conversions
    if datetime:
        values = lib.maybe_convert_objects(values, convert_datetime=datetime)

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values, set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isnull(converted).all() else values
            values = values.copy() if copy else values
        except:
            pass

    return values
Esempio n. 6
0
def nanmax(values, axis=None, skipna=True):
    values, mask, dtype, dtype_max = _get_values(values,
                                                 skipna,
                                                 fill_value_typ='-inf')

    # numpy 1.6.1 workaround in Python 3.x
    if is_object_dtype(values) and compat.PY3:

        if values.ndim > 1:
            apply_ax = axis if axis is not None else 0
            result = np.apply_along_axis(builtins.max, apply_ax, values)
        else:
            try:
                result = builtins.max(values)
            except:
                result = np.nan
    else:
        if ((axis is not None and values.shape[axis] == 0)
                or values.size == 0):
            try:
                result = ensure_float(values.sum(axis, dtype=dtype_max))
                result.fill(np.nan)
            except:
                result = np.nan
        else:
            result = values.max(axis)

    result = _wrap_results(result, dtype)
    return _maybe_null_out(result, axis, mask)
Esempio n. 7
0
def get_dtype_kinds(l):
    """
    Parameters
    ----------
    l : list of arrays

    Returns
    -------
    a set of kinds that exist in this list of arrays
    """

    typs = set()
    for arr in l:

        dtype = arr.dtype
        if com.is_categorical_dtype(dtype):
            typ = 'category'
        elif com.is_sparse(arr):
            typ = 'sparse'
        elif com.is_datetimetz(arr):
            typ = 'datetimetz'
        elif com.is_datetime64_dtype(dtype):
            typ = 'datetime'
        elif com.is_timedelta64_dtype(dtype):
            typ = 'timedelta'
        elif com.is_object_dtype(dtype):
            typ = 'object'
        elif com.is_bool_dtype(dtype):
            typ = 'bool'
        else:
            typ = dtype.kind
        typs.add(typ)
    return typs
Esempio n. 8
0
def nanmax(values, axis=None, skipna=True):
    values, mask, dtype, dtype_max = _get_values(values, skipna,
                                                 fill_value_typ='-inf')

    # numpy 1.6.1 workaround in Python 3.x
    if is_object_dtype(values) and compat.PY3:

        if values.ndim > 1:
            apply_ax = axis if axis is not None else 0
            result = np.apply_along_axis(builtins.max, apply_ax, values)
        else:
            try:
                result = builtins.max(values)
            except:
                result = np.nan
    else:
        if ((axis is not None and values.shape[axis] == 0)
                or values.size == 0):
            try:
                result = ensure_float(values.sum(axis, dtype=dtype_max))
                result.fill(np.nan)
            except:
                result = np.nan
        else:
            result = values.max(axis)

    result = _wrap_results(result, dtype)
    return _maybe_null_out(result, axis, mask)
Esempio n. 9
0
    def memory_usage(self, deep=False):
        """
        Memory usage of my values

        Parameters
        ----------
        deep : bool
            Introspect the data deeply, interrogate
            `object` dtypes for system-level memory consumption

        Returns
        -------
        bytes used

        Notes
        -----
        Memory usage does not include memory consumed by elements that
        are not components of the array if deep=False

        See Also
        --------
        numpy.ndarray.nbytes
        """
        if hasattr(self.values, 'memory_usage'):
            return self.values.memory_usage(deep=deep)

        v = self.values.nbytes
        if deep and com.is_object_dtype(self):
            v += lib.memory_usage_of_objects(self.values)
        return v
Esempio n. 10
0
def get_dtype_kinds(l):
    """
    Parameters
    ----------
    l : list of arrays

    Returns
    -------
    a set of kinds that exist in this list of arrays
    """

    typs = set()
    for arr in l:

        dtype = arr.dtype
        if com.is_categorical_dtype(dtype):
            typ = 'category'
        elif com.is_sparse(arr):
            typ = 'sparse'
        elif com.is_datetimetz(arr):
            typ = 'datetimetz'
        elif com.is_datetime64_dtype(dtype):
            typ = 'datetime'
        elif com.is_timedelta64_dtype(dtype):
            typ = 'timedelta'
        elif com.is_object_dtype(dtype):
            typ = 'object'
        elif com.is_bool_dtype(dtype):
            typ = 'bool'
        else:
            typ = dtype.kind
        typs.add(typ)
    return typs
Esempio n. 11
0
def get_dtype_kinds(l):
    """
    Parameters
    ----------
    l : list of arrays

    Returns
    -------
    a set of kinds that exist in this list of arrays
    """

    typs = set()
    for arr in l:

        dtype = arr.dtype
        if com.is_categorical_dtype(dtype):
            typ = "category"
        elif com.is_sparse(arr):
            typ = "sparse"
        elif com.is_datetimetz(arr):
            typ = "datetimetz"
        elif com.is_datetime64_dtype(dtype):
            typ = "datetime"
        elif com.is_timedelta64_dtype(dtype):
            typ = "timedelta"
        elif com.is_object_dtype(dtype):
            typ = "object"
        elif com.is_bool_dtype(dtype):
            typ = "bool"
        else:
            typ = dtype.kind
        typs.add(typ)
    return typs
Esempio n. 12
0
 def astype(self, dtype, copy=True):
     dtype = np.dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy), name=self.name,
                      dtype='i8')
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
    def test_memory_usage(self):
        for o in self.objs:
            res = o.memory_usage()
            res_deep = o.memory_usage(deep=True)

            if com.is_object_dtype(o) or (isinstance(o, Series) and com.is_object_dtype(o.index)):
                # if there are objects, only deep will pick them up
                self.assertTrue(res_deep > res)
            else:
                self.assertEqual(res, res_deep)

            if isinstance(o, Series):
                self.assertEqual((o.memory_usage(index=False) + o.index.memory_usage()), o.memory_usage(index=True))

            # sys.getsizeof will call the .memory_usage with
            # deep=True, and add on some GC overhead
            diff = res_deep - sys.getsizeof(o)
            self.assertTrue(abs(diff) < 100)
Esempio n. 14
0
 def astype(self, dtype, copy=True):
     dtype = np.dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy),
                      name=self.name,
                      dtype='i8')
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
Esempio n. 15
0
    def _from_arraylike(cls, data, freq, tz):
        if not isinstance(
                data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)):
            if lib.isscalar(data) or isinstance(data, Period):
                raise ValueError('PeriodIndex() must be called with a '
                                 'collection of some kind, %s was passed' %
                                 repr(data))

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            try:
                data = com._ensure_int64(data)
                if freq is None:
                    raise ValueError('freq not specified')
                data = np.array([Period(x, freq=freq).ordinal for x in data],
                                dtype=np.int64)
            except (TypeError, ValueError):
                data = com._ensure_object(data)

                if freq is None:
                    freq = period.extract_freq(data)
                data = period.extract_ordinals(data, freq)
        else:
            if isinstance(data, PeriodIndex):
                if freq is None or freq == data.freq:
                    freq = data.freq
                    data = data.values
                else:
                    base1, _ = _gfc(data.freq)
                    base2, _ = _gfc(freq)
                    data = period.period_asfreq_arr(data.values, base1, base2,
                                                    1)
            else:

                if freq is None and com.is_object_dtype(data):
                    # must contain Period instance and thus extract ordinals
                    freq = period.extract_freq(data)
                    data = period.extract_ordinals(data, freq)

                if freq is None:
                    msg = 'freq not specified and cannot be inferred'
                    raise ValueError(msg)

                if data.dtype != np.int64:
                    if np.issubdtype(data.dtype, np.datetime64):
                        data = dt64arr_to_periodarr(data, freq, tz)
                    else:
                        try:
                            data = com._ensure_int64(data)
                        except (TypeError, ValueError):
                            data = com._ensure_object(data)
                            data = period.extract_ordinals(data, freq)

        return data, freq
Esempio n. 16
0
    def _from_arraylike(cls, data, freq, tz):
        if not isinstance(data, (np.ndarray, PeriodIndex,
                                 DatetimeIndex, Int64Index)):
            if lib.isscalar(data) or isinstance(data, Period):
                raise ValueError('PeriodIndex() must be called with a '
                                 'collection of some kind, %s was passed'
                                 % repr(data))

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            try:
                data = com._ensure_int64(data)
                if freq is None:
                    raise ValueError('freq not specified')
                data = np.array([Period(x, freq=freq).ordinal for x in data],
                                dtype=np.int64)
            except (TypeError, ValueError):
                data = com._ensure_object(data)

                if freq is None:
                    freq = period.extract_freq(data)
                data = period.extract_ordinals(data, freq)
        else:
            if isinstance(data, PeriodIndex):
                if freq is None or freq == data.freq:
                    freq = data.freq
                    data = data.values
                else:
                    base1, _ = _gfc(data.freq)
                    base2, _ = _gfc(freq)
                    data = period.period_asfreq_arr(data.values,
                                                    base1, base2, 1)
            else:

                if freq is None and com.is_object_dtype(data):
                    # must contain Period instance and thus extract ordinals
                    freq = period.extract_freq(data)
                    data = period.extract_ordinals(data, freq)

                if freq is None:
                    msg = 'freq not specified and cannot be inferred'
                    raise ValueError(msg)

                if data.dtype != np.int64:
                    if np.issubdtype(data.dtype, np.datetime64):
                        data = dt64arr_to_periodarr(data, freq, tz)
                    else:
                        try:
                            data = com._ensure_int64(data)
                        except (TypeError, ValueError):
                            data = com._ensure_object(data)
                            data = period.extract_ordinals(data, freq)

        return data, freq
Esempio n. 17
0
    def test_memory_usage(self):
        for o in self.objs:
            res = o.memory_usage()
            res2 = o.memory_usage(deep=True)

            if com.is_object_dtype(o):
                self.assertTrue(res2 > res)
            else:
                self.assertEqual(res, res2)

            if isinstance(o, Series):
                res = o.memory_usage(index=True)
                res2 = o.memory_usage(index=True, deep=True)
                if com.is_object_dtype(o) or com.is_object_dtype(o.index):
                    self.assertTrue(res2 > res)
                else:
                    self.assertEqual(res, res2)

                self.assertEqual(o.memory_usage(index=False) + o.index.memory_usage(), o.memory_usage(index=True))
Esempio n. 18
0
    def test_memory_usage(self):
        for o in self.objs:
            res = o.memory_usage()
            res2 = o.memory_usage(deep=True)

            if com.is_object_dtype(o):
                self.assertTrue(res2 > res)
            else:
                self.assertEqual(res, res2)

            if isinstance(o, Series):
                res = o.memory_usage(index=True)
                res2 = o.memory_usage(index=True, deep=True)
                if com.is_object_dtype(o) or com.is_object_dtype(o.index):
                    self.assertTrue(res2 > res)
                else:
                    self.assertEqual(res, res2)

                self.assertEqual(o.memory_usage(index=False) + o.index.memory_usage(),
                                 o.memory_usage(index=True))
Esempio n. 19
0
 def astype(self, dtype):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype) or is_integer_dtype(dtype):
         values = self._values.astype(dtype)
     elif is_object_dtype(dtype):
         values = self._values
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Esempio n. 20
0
def pandas_col_to_ibis_type(col):
    import pandas.core.common as pdcom
    import ibis.expr.datatypes as dt
    import numpy as np
    dty = col.dtype

    # datetime types
    if pdcom.is_datetime64_dtype(dty):
        if pdcom.is_datetime64_ns_dtype(dty):
            return 'timestamp'
        else:
            raise com.IbisTypeError("Column {0} has dtype {1}, which is "
                                    "datetime64-like but does "
                                    "not use nanosecond units"
                                    .format(col.name, dty))
    if pdcom.is_timedelta64_dtype(dty):
        print("Warning: encoding a timedelta64 as an int64")
        return 'int64'

    if pdcom.is_categorical_dtype(dty):
        return dt.Category(len(col.cat.categories))

    if pdcom.is_bool_dtype(dty):
        return 'boolean'

    # simple numerical types
    if issubclass(dty.type, np.int8):
        return 'int8'
    if issubclass(dty.type, np.int16):
        return 'int16'
    if issubclass(dty.type, np.int32):
        return 'int32'
    if issubclass(dty.type, np.int64):
        return 'int64'
    if issubclass(dty.type, np.float32):
        return 'float'
    if issubclass(dty.type, np.float64):
        return 'double'
    if issubclass(dty.type, np.uint8):
        return 'int16'
    if issubclass(dty.type, np.uint16):
        return 'int32'
    if issubclass(dty.type, np.uint32):
        return 'int64'
    if issubclass(dty.type, np.uint64):
        raise com.IbisTypeError("Column {0} is an unsigned int64"
                                .format(col.name))

    if pdcom.is_object_dtype(dty):
        # TODO: overly broad?
        return 'string'

    raise com.IbisTypeError("Column {0} is dtype {1}"
                            .format(col.name, dty))
Esempio n. 21
0
 def astype(self, dtype):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype) or is_integer_dtype(dtype):
         values = self._values.astype(dtype)
     elif is_object_dtype(dtype):
         values = self._values
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Esempio n. 22
0
    def _simple_new(cls, values, name=None, freq=None, **kwargs):
        if not getattr(values,'dtype',None):
            values = np.array(values,copy=False)
        if is_object_dtype(values):
            return PeriodIndex(values, name=name, freq=freq, **kwargs)

        result = object.__new__(cls)
        result._data = values
        result.name = name
        result.freq = freq
        result._reset_identity()
        return result
Esempio n. 23
0
def _bn_ok_dtype(dt, name):
    # Bottleneck chokes on datetime64
    if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)):

        # bottleneck does not properly upcast during the sum
        # so can overflow
        if name == 'nansum':
            if dt.itemsize < 8:
                return False

        return True
    return False
Esempio n. 24
0
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if is_categorical_dtype(dtype):
        return values

    elif is_object_dtype(dtype):
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress:
        if compress == u'zlib':
            _check_zlib()
            decompress = zlib.decompress
        elif compress == u'blosc':
            _check_blosc()
            decompress = blosc.decompress
        else:
            raise ValueError("compress must be one of 'zlib' or 'blosc'")

        try:
            return np.frombuffer(
                _move_into_mutable_buffer(decompress(values)),
                dtype=dtype,
            )
        except _BadMove as e:
            # Pull the decompressed data off of the `_BadMove` exception.
            # We don't just store this in the locals because we want to
            # minimize the risk of giving users access to a `bytes` object
            # whose data is also given to a mutable buffer.
            values = e.args[0]
            if len(values) > 1:
                # The empty string and single characters are memoized in many
                # string creating functions in the capi. This case should not
                # warn even though we need to make a copy because we are only
                # copying at most 1 byte.
                warnings.warn(
                    'copying data after decompressing; this may mean that'
                    ' decompress is caching its result',
                    PerformanceWarning,
                )
                # fall through to copying `np.fromstring`

    # Copy the string into a numpy array.
    return np.fromstring(values, dtype=dtype)
Esempio n. 25
0
def pandas_col_to_ibis_type(col):
    import pandas.core.common as pdcom
    import ibis.expr.datatypes as dt
    import numpy as np
    dty = col.dtype

    # datetime types
    if pdcom.is_datetime64_dtype(dty):
        if pdcom.is_datetime64_ns_dtype(dty):
            return 'timestamp'
        else:
            raise com.IbisTypeError("Column {0} has dtype {1}, which is "
                                    "datetime64-like but does "
                                    "not use nanosecond units".format(
                                        col.name, dty))
    if pdcom.is_timedelta64_dtype(dty):
        print("Warning: encoding a timedelta64 as an int64")
        return 'int64'

    if pdcom.is_categorical_dtype(dty):
        return dt.Category(len(col.cat.categories))

    if pdcom.is_bool_dtype(dty):
        return 'boolean'

    # simple numerical types
    if issubclass(dty.type, np.int8):
        return 'int8'
    if issubclass(dty.type, np.int16):
        return 'int16'
    if issubclass(dty.type, np.int32):
        return 'int32'
    if issubclass(dty.type, np.int64):
        return 'int64'
    if issubclass(dty.type, np.float32):
        return 'float'
    if issubclass(dty.type, np.float64):
        return 'double'
    if issubclass(dty.type, np.uint8):
        return 'int16'
    if issubclass(dty.type, np.uint16):
        return 'int32'
    if issubclass(dty.type, np.uint32):
        return 'int64'
    if issubclass(dty.type, np.uint64):
        raise com.IbisTypeError("Column {0} is an unsigned int64".format(
            col.name))

    if pdcom.is_object_dtype(dty):
        # TODO: overly broad?
        return 'string'

    raise com.IbisTypeError("Column {0} is dtype {1}".format(col.name, dty))
Esempio n. 26
0
    def test_memory_usage(self):
        for o in self.objs:
            res = o.memory_usage()
            res_deep = o.memory_usage(deep=True)

            if (com.is_object_dtype(o) or
                (isinstance(o, Series) and com.is_object_dtype(o.index))):
                # if there are objects, only deep will pick them up
                self.assertTrue(res_deep > res)
            else:
                self.assertEqual(res, res_deep)

            if isinstance(o, Series):
                self.assertEqual(
                    (o.memory_usage(index=False) + o.index.memory_usage()),
                    o.memory_usage(index=True))

            # sys.getsizeof will call the .memory_usage with
            # deep=True, and add on some GC overhead
            diff = res_deep - sys.getsizeof(o)
            self.assertTrue(abs(diff) < 100)
Esempio n. 27
0
def _bn_ok_dtype(dt, name):
    # Bottleneck chokes on datetime64
    if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)):

        # bottleneck does not properly upcast during the sum
        # so can overflow
        if name == 'nansum':
            if dt.itemsize < 8:
                return False

        return True
    return False
Esempio n. 28
0
    def _simple_new(cls, values, name=None, freq=None, **kwargs):
        if not getattr(values, 'dtype', None):
            values = np.array(values, copy=False)
        if is_object_dtype(values):
            return PeriodIndex(values, name=name, freq=freq, **kwargs)

        result = object.__new__(cls)
        result._data = values
        result.name = name
        result.freq = freq
        result._reset_identity()
        return result
Esempio n. 29
0
    def na_op(x, y):

        # dispatch to the categorical if we have a categorical
        # in either operand
        if is_categorical_dtype(x):
            return op(x, y)
        elif is_categorical_dtype(y) and not isscalar(y):
            return op(y, x)

        if is_object_dtype(x.dtype):
            result = _comp_method_OBJECT_ARRAY(op, x, y)
        else:

            # we want to compare like types
            # we only want to convert to integer like if
            # we are not NotImplemented, otherwise
            # we would allow datetime64 (but viewed as i8) against
            # integer comparisons
            if is_datetimelike_v_numeric(x, y):
                raise TypeError("invalid type comparison")

            # numpy does not like comparisons vs None
            if isscalar(y) and isnull(y):
                if name == '__ne__':
                    return np.ones(len(x), dtype=bool)
                else:
                    return np.zeros(len(x), dtype=bool)

            # we have a datetime/timedelta and may need to convert
            mask = None
            if (needs_i8_conversion(x) or
                    (not isscalar(y) and needs_i8_conversion(y))):

                if isscalar(y):
                    mask = isnull(x)
                    y = _index.convert_scalar(x, _values_from_object(y))
                else:
                    mask = isnull(x) | isnull(y)
                    y = y.view('i8')
                x = x.view('i8')

            try:
                result = getattr(x, name)(y)
                if result is NotImplemented:
                    raise TypeError("invalid type comparison")
            except AttributeError:
                result = op(x, y)

            if mask is not None and mask.any():
                result[mask] = masker

        return result
Esempio n. 30
0
def _comp_method_OBJECT_ARRAY(op, x, y):
    if isinstance(y, list):
        y = lib.list_to_object_array(y)
    if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
        if not is_object_dtype(y.dtype):
            y = y.astype(np.object_)

        if isinstance(y, (ABCSeries, ABCIndex)):
            y = y.values

        result = lib.vec_compare(x, y, op)
    else:
        result = lib.scalar_compare(x, y, op)
    return result
Esempio n. 31
0
 def _make_str_accessor(self):
     from pandas.core.series import Series
     from pandas.core.index import Index
     if isinstance(self, Series) and not com.is_object_dtype(self.dtype):
         # this really should exclude all series with any non-string values,
         # but that isn't practical for performance reasons until we have a
         # str dtype (GH 9343)
         raise AttributeError("Can only use .str accessor with string "
                              "values, which use np.object_ dtype in "
                              "pandas")
     elif isinstance(self, Index) and self.inferred_type != 'string':
         raise AttributeError("Can only use .str accessor with string "
                              "values (i.e. inferred_type is 'string')")
     return StringMethods(self)
Esempio n. 32
0
 def _make_str_accessor(self):
     from pandas.core.series import Series
     from pandas.core.index import Index
     if isinstance(self, Series) and not com.is_object_dtype(self.dtype):
         # this really should exclude all series with any non-string values,
         # but that isn't practical for performance reasons until we have a
         # str dtype (GH 9343)
         raise AttributeError("Can only use .str accessor with string "
                              "values, which use np.object_ dtype in "
                              "pandas")
     elif isinstance(self, Index) and self.inferred_type != 'string':
         raise AttributeError("Can only use .str accessor with string "
                              "values (i.e. inferred_type is 'string')")
     return StringMethods(self)
Esempio n. 33
0
    def _simple_new(cls, values, name=None, freq=None, **kwargs):
        if not getattr(values, 'dtype', None):
            values = np.array(values, copy=False)
        if is_object_dtype(values):
            return PeriodIndex(values, name=name, freq=freq, **kwargs)

        result = object.__new__(cls)
        result._data = values
        result.name = name
        if freq is None:
            raise ValueError('freq is not specified')
        result.freq = Period._maybe_convert_freq(freq)
        result._reset_identity()
        return result
Esempio n. 34
0
    def _simple_new(cls, values, name=None, freq=None, **kwargs):
        if not getattr(values, 'dtype', None):
            values = np.array(values, copy=False)
        if is_object_dtype(values):
            return PeriodIndex(values, name=name, freq=freq, **kwargs)

        result = object.__new__(cls)
        result._data = values
        result.name = name
        if freq is None:
            raise ValueError('freq is not specified')
        result.freq = Period._maybe_convert_freq(freq)
        result._reset_identity()
        return result
Esempio n. 35
0
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
Esempio n. 36
0
 def _f(*args, **kwargs):
     obj_iter = itertools.chain(args, compat.itervalues(kwargs))
     if any(self.check(obj) for obj in obj_iter):
         raise TypeError('reduction operation {0!r} not allowed for '
                         'this dtype'.format(
                             f.__name__.replace('nan', '')))
     try:
         return f(*args, **kwargs)
     except ValueError as e:
         # we want to transform an object array
         # ValueError message to the more typical TypeError
         # e.g. this is normally a disallowed function on
         # object arrays that contain strings
         if is_object_dtype(args[0]):
             raise TypeError(e)
         raise
Esempio n. 37
0
 def _f(*args, **kwargs):
     obj_iter = itertools.chain(args, compat.itervalues(kwargs))
     if any(self.check(obj) for obj in obj_iter):
         raise TypeError('reduction operation {0!r} not allowed for '
                         'this dtype'.format(
                             f.__name__.replace('nan', '')))
     try:
         return f(*args, **kwargs)
     except ValueError as e:
         # we want to transform an object array
         # ValueError message to the more typical TypeError
         # e.g. this is normally a disallowed function on
         # object arrays that contain strings
         if is_object_dtype(args[0]):
             raise TypeError(e)
         raise
Esempio n. 38
0
        def f(values, axis=None, skipna=True, **kwds):
            if len(self.kwargs) > 0:
                for k, v in compat.iteritems(self.kwargs):
                    if k not in kwds:
                        kwds[k] = v
            try:
                if self.zero_value is not None and values.size == 0:
                    if values.ndim == 1:

                        # wrap the 0's if needed
                        if is_timedelta64_dtype(values):
                            return lib.Timedelta(0)
                        return 0
                    else:
                        result_shape = (values.shape[:axis] +
                                        values.shape[axis + 1:])
                        result = np.empty(result_shape)
                        result.fill(0)
                        return result

                if (_USE_BOTTLENECK and skipna and
                        _bn_ok_dtype(values.dtype, bn_name)):
                    result = bn_func(values, axis=axis, **kwds)

                    # prefer to treat inf/-inf as NA, but must compute the func
                    # twice :(
                    if _has_infs(result):
                        result = alt(values, axis=axis, skipna=skipna, **kwds)
                else:
                    result = alt(values, axis=axis, skipna=skipna, **kwds)
            except Exception:
                try:
                    result = alt(values, axis=axis, skipna=skipna, **kwds)
                except ValueError as e:
                    # we want to transform an object array
                    # ValueError message to the more typical TypeError
                    # e.g. this is normally a disallowed function on
                    # object arrays that contain strings

                    if is_object_dtype(values):
                        raise TypeError(e)
                    raise

            return result
Esempio n. 39
0
        def f(values, axis=None, skipna=True, **kwds):
            if len(self.kwargs) > 0:
                for k, v in compat.iteritems(self.kwargs):
                    if k not in kwds:
                        kwds[k] = v
            try:
                if self.zero_value is not None and values.size == 0:
                    if values.ndim == 1:

                        # wrap the 0's if needed
                        if is_timedelta64_dtype(values):
                            return lib.Timedelta(0)
                        return 0
                    else:
                        result_shape = (values.shape[:axis] +
                                        values.shape[axis + 1:])
                        result = np.empty(result_shape)
                        result.fill(0)
                        return result

                if (_USE_BOTTLENECK and skipna
                        and _bn_ok_dtype(values.dtype, bn_name)):
                    result = bn_func(values, axis=axis, **kwds)

                    # prefer to treat inf/-inf as NA, but must compute the func
                    # twice :(
                    if _has_infs(result):
                        result = alt(values, axis=axis, skipna=skipna, **kwds)
                else:
                    result = alt(values, axis=axis, skipna=skipna, **kwds)
            except Exception:
                try:
                    result = alt(values, axis=axis, skipna=skipna, **kwds)
                except ValueError as e:
                    # we want to transform an object array
                    # ValueError message to the more typical TypeError
                    # e.g. this is normally a disallowed function on
                    # object arrays that contain strings

                    if is_object_dtype(values):
                        raise TypeError(e)
                    raise

            return result
Esempio n. 40
0
def _ensure_numeric(x):
    if isinstance(x, np.ndarray):
        if is_integer_dtype(x) or is_bool_dtype(x):
            x = x.astype(np.float64)
        elif is_object_dtype(x):
            try:
                x = x.astype(np.complex128)
            except:
                x = x.astype(np.float64)
            else:
                if not np.any(x.imag):
                    x = x.real
    elif not (is_float(x) or is_integer(x) or is_complex(x)):
        try:
            x = float(x)
        except Exception:
            try:
                x = complex(x)
            except Exception:
                raise TypeError('Could not convert %s to numeric' % str(x))
    return x
Esempio n. 41
0
    def astype(self, dtype, copy=True):
        dtype = np.dtype(dtype)

        if is_object_dtype(dtype):
            return self.asobject
        elif is_timedelta64_ns_dtype(dtype):
            if copy is True:
                return self.copy()
            return self
        elif is_timedelta64_dtype(dtype):
            # return an index (essentially this is division)
            result = self.values.astype(dtype, copy=copy)
            if self.hasnans:
                return Index(self._maybe_mask_results(result,
                                                      convert='float64'),
                             name=self.name)
            return Index(result.astype('i8'), name=self.name)
        elif is_integer_dtype(dtype):
            return Index(self.values.astype('i8', copy=copy), dtype='i8',
                         name=self.name)
        raise ValueError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
Esempio n. 42
0
def _ensure_numeric(x):
    if isinstance(x, np.ndarray):
        if is_integer_dtype(x) or is_bool_dtype(x):
            x = x.astype(np.float64)
        elif is_object_dtype(x):
            try:
                x = x.astype(np.complex128)
            except:
                x = x.astype(np.float64)
            else:
                if not np.any(x.imag):
                    x = x.real
    elif not (is_float(x) or is_integer(x) or is_complex(x)):
        try:
            x = float(x)
        except Exception:
            try:
                x = complex(x)
            except Exception:
                raise TypeError('Could not convert %s to numeric' % str(x))
    return x
Esempio n. 43
0
def convert(values):
    """ convert the numpy values to a list """

    dtype = values.dtype

    if is_categorical_dtype(values):
        return values

    elif is_object_dtype(dtype):
        return values.ravel().tolist()

    if needs_i8_conversion(dtype):
        values = values.view('i8')
    v = values.ravel()

    if compressor == 'zlib':
        _check_zlib()

        # return string arrays like they are
        if dtype == np.object_:
            return v.tolist()

        # convert to a bytes array
        v = v.tostring()
        return ExtType(0, zlib.compress(v))

    elif compressor == 'blosc':
        _check_blosc()

        # return string arrays like they are
        if dtype == np.object_:
            return v.tolist()

        # convert to a bytes array
        v = v.tostring()
        return ExtType(0, blosc.compress(v, typesize=dtype.itemsize))

    # ndarray (on original dtype)
    return ExtType(0, v.tostring())
Esempio n. 44
0
    def na_op(x, y):

        # dispatch to the categorical if we have a categorical
        # in either operand
        if is_categorical_dtype(x):
            return op(x,y)
        elif is_categorical_dtype(y) and not isscalar(y):
            return op(y,x)

        if is_object_dtype(x.dtype):
            if isinstance(y, list):
                y = lib.list_to_object_array(y)

            if isinstance(y, (np.ndarray, pd.Series)):
                if not is_object_dtype(y.dtype):
                    result = lib.vec_compare(x, y.astype(np.object_), op)
                else:
                    result = lib.vec_compare(x, y, op)
            else:
                result = lib.scalar_compare(x, y, op)
        else:

            # we want to compare like types
            # we only want to convert to integer like if
            # we are not NotImplemented, otherwise
            # we would allow datetime64 (but viewed as i8) against
            # integer comparisons
            if is_datetimelike_v_numeric(x, y):
                raise TypeError("invalid type comparison")

            # numpy does not like comparisons vs None
            if isscalar(y) and isnull(y):
                y = np.nan

            # we have a datetime/timedelta and may need to convert
            mask = None
            if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)):

                if isscalar(y):
                    y = _index.convert_scalar(x,_values_from_object(y))
                else:
                    y = y.view('i8')

                if name == '__ne__':
                    mask = notnull(x)
                else:
                    mask = isnull(x)

                x = x.view('i8')

            try:
                result = getattr(x, name)(y)
                if result is NotImplemented:
                    raise TypeError("invalid type comparison")
            except AttributeError:
                result = op(x, y)

            if mask is not None and mask.any():
                result[mask] = False

        return result
Esempio n. 45
0
    def na_op(x, y):

        # dispatch to the categorical if we have a categorical
        # in either operand
        if is_categorical_dtype(x):
            return op(x,y)
        elif is_categorical_dtype(y) and not isscalar(y):
            return op(y,x)

        if is_object_dtype(x.dtype):
            if isinstance(y, list):
                y = lib.list_to_object_array(y)

            if isinstance(y, (np.ndarray, pd.Series)):
                if not is_object_dtype(y.dtype):
                    result = lib.vec_compare(x, y.astype(np.object_), op)
                else:
                    result = lib.vec_compare(x, y, op)
            else:
                result = lib.scalar_compare(x, y, op)
        else:

            # we want to compare like types
            # we only want to convert to integer like if
            # we are not NotImplemented, otherwise
            # we would allow datetime64 (but viewed as i8) against
            # integer comparisons
            if is_datetimelike_v_numeric(x, y):
                raise TypeError("invalid type comparison")

            # numpy does not like comparisons vs None
            if isscalar(y) and isnull(y):
                y = np.nan

            # we have a datetime/timedelta and may need to convert
            mask = None
            if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)):

                if isscalar(y):
                    y = _index.convert_scalar(x,_values_from_object(y))
                else:
                    y = y.view('i8')

                if name == '__ne__':
                    mask = notnull(x)
                else:
                    mask = isnull(x)

                x = x.view('i8')

            try:
                result = getattr(x, name)(y)
                if result is NotImplemented:
                    raise TypeError("invalid type comparison")
            except AttributeError:
                result = op(x, y)

            if mask is not None and mask.any():
                result[mask] = False

        return result