def _make_str_accessor(self): from pandas.core.series import Series from pandas.core.index import Index if isinstance(self, Series) and not ((is_categorical_dtype( self.dtype) and is_object_dtype(self.values.categories)) or (is_object_dtype(self.dtype))): # it's neither a string series not a categorical series with strings # inside the categories. # this really should exclude all series with any non-string values (instead of test # for object dtype), but that isn't practical for performance reasons until we have a # str dtype (GH 9343) raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") elif isinstance(self, Index): # see scc/inferrence.pyx which can contain string values allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') if self.inferred_type not in allowed_types: message = ( "Can only use .str accessor with string values " "(i.e. inferred_type is 'string', 'unicode' or 'mixed')") raise AttributeError(message) if self.nlevels > 1: message = "Can only use .str accessor with Index, not MultiIndex" raise AttributeError(message) return StringMethods(self)
def _make_str_accessor(self): from pandas.core.series import Series from pandas.core.index import Index if isinstance(self, Series) and not( (is_categorical_dtype(self.dtype) and is_object_dtype(self.values.categories)) or (is_object_dtype(self.dtype))): # it's neither a string series not a categorical series with strings # inside the categories. # this really should exclude all series with any non-string values (instead of test # for object dtype), but that isn't practical for performance reasons until we have a # str dtype (GH 9343) raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") elif isinstance(self, Index): # see scc/inferrence.pyx which can contain string values allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') if self.inferred_type not in allowed_types: message = ("Can only use .str accessor with string values " "(i.e. inferred_type is 'string', 'unicode' or 'mixed')") raise AttributeError(message) if self.nlevels > 1: message = "Can only use .str accessor with Index, not MultiIndex" raise AttributeError(message) return StringMethods(self)
def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, coerce=False, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ conversion_count = sum((datetime, numeric, timedelta)) if conversion_count == 0: raise ValueError('At least one of datetime, numeric or timedelta must ' 'be True.') elif conversion_count > 1 and coerce: raise ValueError("Only one of 'datetime', 'numeric' or " "'timedelta' can be True when when coerce=True.") if isinstance(values, (list, tuple)): # List or scalar values = np.array(values, dtype=np.object_) elif not hasattr(values, 'dtype'): values = np.array([values], dtype=np.object_) elif not is_object_dtype(values.dtype): # If not object, do not attempt conversion values = values.copy() if copy else values return values # If 1 flag is coerce, ensure 2 others are False if coerce: # Immediate return if coerce if datetime: return pd.to_datetime(values, errors='coerce', box=False) elif timedelta: return pd.to_timedelta(values, errors='coerce', box=False) elif numeric: return pd.to_numeric(values, errors='coerce') # Soft conversions if datetime: values = lib.maybe_convert_objects(values, convert_datetime=datetime) if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) if numeric and is_object_dtype(values.dtype): try: converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # If all NaNs, then do not-alter values = converted if not isnull(converted).all() else values values = values.copy() if copy else values except: pass return values
def safe_na_op(lvalues, rvalues): try: return na_op(lvalues, rvalues) except Exception: if isinstance(rvalues, ABCSeries): if is_object_dtype(rvalues): # if dtype is object, try elementwise op return _algos.arrmap_object(rvalues, lambda x: op(lvalues, x)) else: if is_object_dtype(lvalues): return _algos.arrmap_object(lvalues, lambda x: op(x, rvalues)) raise
def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, coerce=False, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ conversion_count = sum((datetime, numeric, timedelta)) if conversion_count == 0: raise ValueError('At least one of datetime, numeric or timedelta must ' 'be True.') elif conversion_count > 1 and coerce: raise ValueError("Only one of 'datetime', 'numeric' or " "'timedelta' can be True when when coerce=True.") if isinstance(values, (list, tuple)): # List or scalar values = np.array(values, dtype=np.object_) elif not hasattr(values, 'dtype'): values = np.array([values], dtype=np.object_) elif not is_object_dtype(values.dtype): # If not object, do not attempt conversion values = values.copy() if copy else values return values # If 1 flag is coerce, ensure 2 others are False if coerce: # Immediate return if coerce if datetime: return pd.to_datetime(values, errors='coerce', box=False) elif timedelta: return pd.to_timedelta(values, errors='coerce', box=False) elif numeric: return pd.to_numeric(values, errors='coerce') # Soft conversions if datetime: values = lib.maybe_convert_objects(values, convert_datetime=datetime) if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) if numeric and is_object_dtype(values.dtype): try: converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # If all NaNs, then do not-alter values = converted if not isnull(converted).all() else values values = values.copy() if copy else values except: pass return values
def nanmax(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna, fill_value_typ='-inf') # numpy 1.6.1 workaround in Python 3.x if is_object_dtype(values) and compat.PY3: if values.ndim > 1: apply_ax = axis if axis is not None else 0 result = np.apply_along_axis(builtins.max, apply_ax, values) else: try: result = builtins.max(values) except: result = np.nan else: if ((axis is not None and values.shape[axis] == 0) or values.size == 0): try: result = ensure_float(values.sum(axis, dtype=dtype_max)) result.fill(np.nan) except: result = np.nan else: result = values.max(axis) result = _wrap_results(result, dtype) return _maybe_null_out(result, axis, mask)
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = 'category' elif com.is_sparse(arr): typ = 'sparse' elif com.is_datetimetz(arr): typ = 'datetimetz' elif com.is_datetime64_dtype(dtype): typ = 'datetime' elif com.is_timedelta64_dtype(dtype): typ = 'timedelta' elif com.is_object_dtype(dtype): typ = 'object' elif com.is_bool_dtype(dtype): typ = 'bool' else: typ = dtype.kind typs.add(typ) return typs
def nanmax(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna, fill_value_typ='-inf') # numpy 1.6.1 workaround in Python 3.x if is_object_dtype(values) and compat.PY3: if values.ndim > 1: apply_ax = axis if axis is not None else 0 result = np.apply_along_axis(builtins.max, apply_ax, values) else: try: result = builtins.max(values) except: result = np.nan else: if ((axis is not None and values.shape[axis] == 0) or values.size == 0): try: result = ensure_float(values.sum(axis, dtype=dtype_max)) result.fill(np.nan) except: result = np.nan else: result = values.max(axis) result = _wrap_results(result, dtype) return _maybe_null_out(result, axis, mask)
def memory_usage(self, deep=False): """ Memory usage of my values Parameters ---------- deep : bool Introspect the data deeply, interrogate `object` dtypes for system-level memory consumption Returns ------- bytes used Notes ----- Memory usage does not include memory consumed by elements that are not components of the array if deep=False See Also -------- numpy.ndarray.nbytes """ if hasattr(self.values, 'memory_usage'): return self.values.memory_usage(deep=deep) v = self.values.nbytes if deep and com.is_object_dtype(self): v += lib.memory_usage_of_objects(self.values) return v
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = 'category' elif com.is_sparse(arr): typ = 'sparse' elif com.is_datetimetz(arr): typ = 'datetimetz' elif com.is_datetime64_dtype(dtype): typ = 'datetime' elif com.is_timedelta64_dtype(dtype): typ = 'timedelta' elif com.is_object_dtype(dtype): typ = 'object' elif com.is_bool_dtype(dtype): typ = 'bool' else: typ = dtype.kind typs.add(typ) return typs
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = "category" elif com.is_sparse(arr): typ = "sparse" elif com.is_datetimetz(arr): typ = "datetimetz" elif com.is_datetime64_dtype(dtype): typ = "datetime" elif com.is_timedelta64_dtype(dtype): typ = "timedelta" elif com.is_object_dtype(dtype): typ = "object" elif com.is_bool_dtype(dtype): typ = "bool" else: typ = dtype.kind typs.add(typ) return typs
def astype(self, dtype, copy=True): dtype = np.dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
def test_memory_usage(self): for o in self.objs: res = o.memory_usage() res_deep = o.memory_usage(deep=True) if com.is_object_dtype(o) or (isinstance(o, Series) and com.is_object_dtype(o.index)): # if there are objects, only deep will pick them up self.assertTrue(res_deep > res) else: self.assertEqual(res, res_deep) if isinstance(o, Series): self.assertEqual((o.memory_usage(index=False) + o.index.memory_usage()), o.memory_usage(index=True)) # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = res_deep - sys.getsizeof(o) self.assertTrue(abs(diff) < 100)
def astype(self, dtype, copy=True): dtype = np.dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
def _from_arraylike(cls, data, freq, tz): if not isinstance( data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if lib.isscalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = com._ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq).ordinal for x in data], dtype=np.int64) except (TypeError, ValueError): data = com._ensure_object(data) if freq is None: freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data.values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data.values, base1, base2, 1) else: if freq is None and com.is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) if freq is None: msg = 'freq not specified and cannot be inferred' raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: try: data = com._ensure_int64(data) except (TypeError, ValueError): data = com._ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq
def _from_arraylike(cls, data, freq, tz): if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if lib.isscalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = com._ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq).ordinal for x in data], dtype=np.int64) except (TypeError, ValueError): data = com._ensure_object(data) if freq is None: freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data.values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data.values, base1, base2, 1) else: if freq is None and com.is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) if freq is None: msg = 'freq not specified and cannot be inferred' raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: try: data = com._ensure_int64(data) except (TypeError, ValueError): data = com._ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq
def test_memory_usage(self): for o in self.objs: res = o.memory_usage() res2 = o.memory_usage(deep=True) if com.is_object_dtype(o): self.assertTrue(res2 > res) else: self.assertEqual(res, res2) if isinstance(o, Series): res = o.memory_usage(index=True) res2 = o.memory_usage(index=True, deep=True) if com.is_object_dtype(o) or com.is_object_dtype(o.index): self.assertTrue(res2 > res) else: self.assertEqual(res, res2) self.assertEqual(o.memory_usage(index=False) + o.index.memory_usage(), o.memory_usage(index=True))
def test_memory_usage(self): for o in self.objs: res = o.memory_usage() res2 = o.memory_usage(deep=True) if com.is_object_dtype(o): self.assertTrue(res2 > res) else: self.assertEqual(res, res2) if isinstance(o, Series): res = o.memory_usage(index=True) res2 = o.memory_usage(index=True, deep=True) if com.is_object_dtype(o) or com.is_object_dtype(o.index): self.assertTrue(res2 > res) else: self.assertEqual(res, res2) self.assertEqual(o.memory_usage(index=False) + o.index.memory_usage(), o.memory_usage(index=True))
def astype(self, dtype): dtype = pandas_dtype(dtype) if is_float_dtype(dtype) or is_integer_dtype(dtype): values = self._values.astype(dtype) elif is_object_dtype(dtype): values = self._values else: raise TypeError('Setting %s dtype to anything other than ' 'float64 or object is not supported' % self.__class__) return Index(values, name=self.name, dtype=dtype)
def pandas_col_to_ibis_type(col): import pandas.core.common as pdcom import ibis.expr.datatypes as dt import numpy as np dty = col.dtype # datetime types if pdcom.is_datetime64_dtype(dty): if pdcom.is_datetime64_ns_dtype(dty): return 'timestamp' else: raise com.IbisTypeError("Column {0} has dtype {1}, which is " "datetime64-like but does " "not use nanosecond units" .format(col.name, dty)) if pdcom.is_timedelta64_dtype(dty): print("Warning: encoding a timedelta64 as an int64") return 'int64' if pdcom.is_categorical_dtype(dty): return dt.Category(len(col.cat.categories)) if pdcom.is_bool_dtype(dty): return 'boolean' # simple numerical types if issubclass(dty.type, np.int8): return 'int8' if issubclass(dty.type, np.int16): return 'int16' if issubclass(dty.type, np.int32): return 'int32' if issubclass(dty.type, np.int64): return 'int64' if issubclass(dty.type, np.float32): return 'float' if issubclass(dty.type, np.float64): return 'double' if issubclass(dty.type, np.uint8): return 'int16' if issubclass(dty.type, np.uint16): return 'int32' if issubclass(dty.type, np.uint32): return 'int64' if issubclass(dty.type, np.uint64): raise com.IbisTypeError("Column {0} is an unsigned int64" .format(col.name)) if pdcom.is_object_dtype(dty): # TODO: overly broad? return 'string' raise com.IbisTypeError("Column {0} is dtype {1}" .format(col.name, dty))
def astype(self, dtype): dtype = pandas_dtype(dtype) if is_float_dtype(dtype) or is_integer_dtype(dtype): values = self._values.astype(dtype) elif is_object_dtype(dtype): values = self._values else: raise TypeError('Setting %s dtype to anything other than ' 'float64 or object is not supported' % self.__class__) return Index(values, name=self.name, dtype=dtype)
def _simple_new(cls, values, name=None, freq=None, **kwargs): if not getattr(values,'dtype',None): values = np.array(values,copy=False) if is_object_dtype(values): return PeriodIndex(values, name=name, freq=freq, **kwargs) result = object.__new__(cls) result._data = values result.name = name result.freq = freq result._reset_identity() return result
def _bn_ok_dtype(dt, name): # Bottleneck chokes on datetime64 if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)): # bottleneck does not properly upcast during the sum # so can overflow if name == 'nansum': if dt.itemsize < 8: return False return True return False
def unconvert(values, dtype, compress=None): as_is_ext = isinstance(values, ExtType) and values.code == 0 if as_is_ext: values = values.data if is_categorical_dtype(dtype): return values elif is_object_dtype(dtype): return np.array(values, dtype=object) dtype = pandas_dtype(dtype).base if not as_is_ext: values = values.encode('latin1') if compress: if compress == u'zlib': _check_zlib() decompress = zlib.decompress elif compress == u'blosc': _check_blosc() decompress = blosc.decompress else: raise ValueError("compress must be one of 'zlib' or 'blosc'") try: return np.frombuffer( _move_into_mutable_buffer(decompress(values)), dtype=dtype, ) except _BadMove as e: # Pull the decompressed data off of the `_BadMove` exception. # We don't just store this in the locals because we want to # minimize the risk of giving users access to a `bytes` object # whose data is also given to a mutable buffer. values = e.args[0] if len(values) > 1: # The empty string and single characters are memoized in many # string creating functions in the capi. This case should not # warn even though we need to make a copy because we are only # copying at most 1 byte. warnings.warn( 'copying data after decompressing; this may mean that' ' decompress is caching its result', PerformanceWarning, ) # fall through to copying `np.fromstring` # Copy the string into a numpy array. return np.fromstring(values, dtype=dtype)
def pandas_col_to_ibis_type(col): import pandas.core.common as pdcom import ibis.expr.datatypes as dt import numpy as np dty = col.dtype # datetime types if pdcom.is_datetime64_dtype(dty): if pdcom.is_datetime64_ns_dtype(dty): return 'timestamp' else: raise com.IbisTypeError("Column {0} has dtype {1}, which is " "datetime64-like but does " "not use nanosecond units".format( col.name, dty)) if pdcom.is_timedelta64_dtype(dty): print("Warning: encoding a timedelta64 as an int64") return 'int64' if pdcom.is_categorical_dtype(dty): return dt.Category(len(col.cat.categories)) if pdcom.is_bool_dtype(dty): return 'boolean' # simple numerical types if issubclass(dty.type, np.int8): return 'int8' if issubclass(dty.type, np.int16): return 'int16' if issubclass(dty.type, np.int32): return 'int32' if issubclass(dty.type, np.int64): return 'int64' if issubclass(dty.type, np.float32): return 'float' if issubclass(dty.type, np.float64): return 'double' if issubclass(dty.type, np.uint8): return 'int16' if issubclass(dty.type, np.uint16): return 'int32' if issubclass(dty.type, np.uint32): return 'int64' if issubclass(dty.type, np.uint64): raise com.IbisTypeError("Column {0} is an unsigned int64".format( col.name)) if pdcom.is_object_dtype(dty): # TODO: overly broad? return 'string' raise com.IbisTypeError("Column {0} is dtype {1}".format(col.name, dty))
def test_memory_usage(self): for o in self.objs: res = o.memory_usage() res_deep = o.memory_usage(deep=True) if (com.is_object_dtype(o) or (isinstance(o, Series) and com.is_object_dtype(o.index))): # if there are objects, only deep will pick them up self.assertTrue(res_deep > res) else: self.assertEqual(res, res_deep) if isinstance(o, Series): self.assertEqual( (o.memory_usage(index=False) + o.index.memory_usage()), o.memory_usage(index=True)) # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = res_deep - sys.getsizeof(o) self.assertTrue(abs(diff) < 100)
def _bn_ok_dtype(dt, name): # Bottleneck chokes on datetime64 if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)): # bottleneck does not properly upcast during the sum # so can overflow if name == 'nansum': if dt.itemsize < 8: return False return True return False
def _simple_new(cls, values, name=None, freq=None, **kwargs): if not getattr(values, 'dtype', None): values = np.array(values, copy=False) if is_object_dtype(values): return PeriodIndex(values, name=name, freq=freq, **kwargs) result = object.__new__(cls) result._data = values result.name = name result.freq = freq result._reset_identity() return result
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x, y) elif is_categorical_dtype(y) and not isscalar(y): return op(y, x) if is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if isscalar(y) and isnull(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: return np.zeros(len(x), dtype=bool) # we have a datetime/timedelta and may need to convert mask = None if (needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y))): if isscalar(y): mask = isnull(x) y = _index.convert_scalar(x, _values_from_object(y)) else: mask = isnull(x) | isnull(y) y = y.view('i8') x = x.view('i8') try: result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = masker return result
def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): if not is_object_dtype(y.dtype): y = y.astype(np.object_) if isinstance(y, (ABCSeries, ABCIndex)): y = y.values result = lib.vec_compare(x, y, op) else: result = lib.scalar_compare(x, y, op) return result
def _make_str_accessor(self): from pandas.core.series import Series from pandas.core.index import Index if isinstance(self, Series) and not com.is_object_dtype(self.dtype): # this really should exclude all series with any non-string values, # but that isn't practical for performance reasons until we have a # str dtype (GH 9343) raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") elif isinstance(self, Index) and self.inferred_type != 'string': raise AttributeError("Can only use .str accessor with string " "values (i.e. inferred_type is 'string')") return StringMethods(self)
def _make_str_accessor(self): from pandas.core.series import Series from pandas.core.index import Index if isinstance(self, Series) and not com.is_object_dtype(self.dtype): # this really should exclude all series with any non-string values, # but that isn't practical for performance reasons until we have a # str dtype (GH 9343) raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") elif isinstance(self, Index) and self.inferred_type != 'string': raise AttributeError("Can only use .str accessor with string " "values (i.e. inferred_type is 'string')") return StringMethods(self)
def _simple_new(cls, values, name=None, freq=None, **kwargs): if not getattr(values, 'dtype', None): values = np.array(values, copy=False) if is_object_dtype(values): return PeriodIndex(values, name=name, freq=freq, **kwargs) result = object.__new__(cls) result._data = values result.name = name if freq is None: raise ValueError('freq is not specified') result.freq = Period._maybe_convert_freq(freq) result._reset_identity() return result
def _simple_new(cls, values, name=None, freq=None, **kwargs): if not getattr(values, 'dtype', None): values = np.array(values, copy=False) if is_object_dtype(values): return PeriodIndex(values, name=name, freq=freq, **kwargs) result = object.__new__(cls) result._data = values result.name = name if freq is None: raise ValueError('freq is not specified') result.freq = Period._maybe_convert_freq(freq) result._reset_identity() return result
def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_float_dtype(dtype): values = self._values.astype(dtype, copy=copy) elif is_integer_dtype(dtype): if self.hasnans: raise ValueError('cannot convert float NaN to integer') values = self._values.astype(dtype, copy=copy) elif is_object_dtype(dtype): values = self._values.astype('object', copy=copy) else: raise TypeError('Setting %s dtype to anything other than ' 'float64 or object is not supported' % self.__class__) return Index(values, name=self.name, dtype=dtype)
def _f(*args, **kwargs): obj_iter = itertools.chain(args, compat.itervalues(kwargs)) if any(self.check(obj) for obj in obj_iter): raise TypeError('reduction operation {0!r} not allowed for ' 'this dtype'.format( f.__name__.replace('nan', ''))) try: return f(*args, **kwargs) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(args[0]): raise TypeError(e) raise
def _f(*args, **kwargs): obj_iter = itertools.chain(args, compat.itervalues(kwargs)) if any(self.check(obj) for obj in obj_iter): raise TypeError('reduction operation {0!r} not allowed for ' 'this dtype'.format( f.__name__.replace('nan', ''))) try: return f(*args, **kwargs) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(args[0]): raise TypeError(e) raise
def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: if self.zero_value is not None and values.size == 0: if values.ndim == 1: # wrap the 0's if needed if is_timedelta64_dtype(values): return lib.Timedelta(0) return 0 else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape) result.fill(0) return result if (_USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name)): result = bn_func(values, axis=axis, **kwds) # prefer to treat inf/-inf as NA, but must compute the func # twice :( if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) else: result = alt(values, axis=axis, skipna=skipna, **kwds) except Exception: try: result = alt(values, axis=axis, skipna=skipna, **kwds) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(values): raise TypeError(e) raise return result
def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: if self.zero_value is not None and values.size == 0: if values.ndim == 1: # wrap the 0's if needed if is_timedelta64_dtype(values): return lib.Timedelta(0) return 0 else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape) result.fill(0) return result if (_USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name)): result = bn_func(values, axis=axis, **kwds) # prefer to treat inf/-inf as NA, but must compute the func # twice :( if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) else: result = alt(values, axis=axis, skipna=skipna, **kwds) except Exception: try: result = alt(values, axis=axis, skipna=skipna, **kwds) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(values): raise TypeError(e) raise return result
def _ensure_numeric(x): if isinstance(x, np.ndarray): if is_integer_dtype(x) or is_bool_dtype(x): x = x.astype(np.float64) elif is_object_dtype(x): try: x = x.astype(np.complex128) except: x = x.astype(np.float64) else: if not np.any(x.imag): x = x.real elif not (is_float(x) or is_integer(x) or is_complex(x)): try: x = float(x) except Exception: try: x = complex(x) except Exception: raise TypeError('Could not convert %s to numeric' % str(x)) return x
def astype(self, dtype, copy=True): dtype = np.dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_timedelta64_ns_dtype(dtype): if copy is True: return self.copy() return self elif is_timedelta64_dtype(dtype): # return an index (essentially this is division) result = self.values.astype(dtype, copy=copy) if self.hasnans: return Index(self._maybe_mask_results(result, convert='float64'), name=self.name) return Index(result.astype('i8'), name=self.name) elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), dtype='i8', name=self.name) raise ValueError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
def _ensure_numeric(x): if isinstance(x, np.ndarray): if is_integer_dtype(x) or is_bool_dtype(x): x = x.astype(np.float64) elif is_object_dtype(x): try: x = x.astype(np.complex128) except: x = x.astype(np.float64) else: if not np.any(x.imag): x = x.real elif not (is_float(x) or is_integer(x) or is_complex(x)): try: x = float(x) except Exception: try: x = complex(x) except Exception: raise TypeError('Could not convert %s to numeric' % str(x)) return x
def convert(values): """ convert the numpy values to a list """ dtype = values.dtype if is_categorical_dtype(values): return values elif is_object_dtype(dtype): return values.ravel().tolist() if needs_i8_conversion(dtype): values = values.view('i8') v = values.ravel() if compressor == 'zlib': _check_zlib() # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return ExtType(0, zlib.compress(v)) elif compressor == 'blosc': _check_blosc() # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return ExtType(0, blosc.compress(v, typesize=dtype.itemsize)) # ndarray (on original dtype) return ExtType(0, v.tostring())
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x,y) elif is_categorical_dtype(y) and not isscalar(y): return op(y,x) if is_object_dtype(x.dtype): if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): if not is_object_dtype(y.dtype): result = lib.vec_compare(x, y.astype(np.object_), op) else: result = lib.vec_compare(x, y, op) else: result = lib.scalar_compare(x, y, op) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if isscalar(y) and isnull(y): y = np.nan # we have a datetime/timedelta and may need to convert mask = None if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)): if isscalar(y): y = _index.convert_scalar(x,_values_from_object(y)) else: y = y.view('i8') if name == '__ne__': mask = notnull(x) else: mask = isnull(x) x = x.view('i8') try: result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = False return result
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x,y) elif is_categorical_dtype(y) and not isscalar(y): return op(y,x) if is_object_dtype(x.dtype): if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): if not is_object_dtype(y.dtype): result = lib.vec_compare(x, y.astype(np.object_), op) else: result = lib.vec_compare(x, y, op) else: result = lib.scalar_compare(x, y, op) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if isscalar(y) and isnull(y): y = np.nan # we have a datetime/timedelta and may need to convert mask = None if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)): if isscalar(y): y = _index.convert_scalar(x,_values_from_object(y)) else: y = y.view('i8') if name == '__ne__': mask = notnull(x) else: mask = isnull(x) x = x.view('i8') try: result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = False return result