def _from_arraylike(cls, data, freq, tz): if freq is not None: freq = Period._maybe_convert_freq(freq) if not isinstance( data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq) for x in data], dtype=np.int64) except (TypeError, ValueError): data = _ensure_object(data) if freq is None: freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._values, base1, base2, 1) else: if is_object_dtype(data): inferred = infer_dtype(data) if inferred == 'integer': data = data.astype(np.int64) if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) if freq is None: msg = 'freq not specified and cannot be inferred' raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: data = _ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq
def _from_arraylike(cls, data, freq, tz): if freq is not None: freq = Period._maybe_convert_freq(freq) if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): if is_scalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) try: data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq) for x in data], dtype=np.int64) except (TypeError, ValueError): data = _ensure_object(data) if freq is None: freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) else: if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) data = period.period_asfreq_arr(data._values, base1, base2, 1) else: if is_object_dtype(data): inferred = infer_dtype(data) if inferred == 'integer': data = data.astype(np.int64) if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) if freq is None: msg = 'freq not specified and cannot be inferred' raise ValueError(msg) if data.dtype != np.int64: if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: data = _ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq
def safe_na_op(lvalues, rvalues): try: return na_op(lvalues, rvalues) except Exception: if isinstance(rvalues, ABCSeries): if is_object_dtype(rvalues): # if dtype is object, try elementwise op return _algos.arrmap_object(rvalues, lambda x: op(lvalues, x)) else: if is_object_dtype(lvalues): return _algos.arrmap_object(lvalues, lambda x: op(x, rvalues)) raise
def memory_usage(self, deep=False): """ Memory usage of my values Parameters ---------- deep : bool Introspect the data deeply, interrogate `object` dtypes for system-level memory consumption Returns ------- bytes used Notes ----- Memory usage does not include memory consumed by elements that are not components of the array if deep=False See Also -------- numpy.ndarray.nbytes """ if hasattr(self.values, 'memory_usage'): return self.values.memory_usage(deep=deep) v = self.values.nbytes if deep and is_object_dtype(self): v += lib.memory_usage_of_objects(self.values) return v
def astype(self, dtype, copy=True): dtype = np.dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
def test_memory_usage(self): for o in self.objs: res = o.memory_usage() res_deep = o.memory_usage(deep=True) if is_object_dtype(o) or (isinstance(o, Series) and is_object_dtype(o.index)): # if there are objects, only deep will pick them up self.assertTrue(res_deep > res) else: self.assertEqual(res, res_deep) if isinstance(o, Series): self.assertEqual((o.memory_usage(index=False) + o.index.memory_usage()), o.memory_usage(index=True)) # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = res_deep - sys.getsizeof(o) self.assertTrue(abs(diff) < 100)
def interval_range(start=None, end=None, freq=None, periods=None, name=None, closed='right', **kwargs): """ Return a fixed frequency IntervalIndex Parameters ---------- start : string or datetime-like, default None Left bound for generating data end : string or datetime-like, default None Right bound for generating data freq : interger, string or DateOffset, default 1 periods : interger, default None name : str, default None Name of the resulting index closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- 2 of start, end, or periods must be specified Returns ------- rng : IntervalIndex """ if freq is None: freq = 1 if start is None: if periods is None or end is None: raise ValueError("must specify 2 of start, end, periods") start = end - periods * freq elif end is None: if periods is None or start is None: raise ValueError("must specify 2 of start, end, periods") end = start + periods * freq elif periods is None: if start is None or end is None: raise ValueError("must specify 2 of start, end, periods") pass # must all be same units or None arr = np.array([start, end, freq]) if is_object_dtype(arr): raise ValueError("start, end, freq need to be the same type") return IntervalIndex.from_breaks(np.arange(start, end, freq), name=name, closed=closed)
def astype(self, dtype, copy=True): if is_interval_dtype(dtype): if copy: self = self.copy() return self elif is_object_dtype(dtype): return Index(self.values, dtype=object) elif is_categorical_dtype(dtype): from pandas import Categorical return Categorical(self, ordered=True) raise ValueError('Cannot cast IntervalIndex to dtype %s' % dtype)
def test_memory_usage(self): for o in self.objs: res = o.memory_usage() res_deep = o.memory_usage(deep=True) if (is_object_dtype(o) or (isinstance(o, Series) and is_object_dtype(o.index))): # if there are objects, only deep will pick them up self.assertTrue(res_deep > res) else: self.assertEqual(res, res_deep) if isinstance(o, Series): self.assertEqual( (o.memory_usage(index=False) + o.index.memory_usage()), o.memory_usage(index=True)) # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = res_deep - sys.getsizeof(o) self.assertTrue(abs(diff) < 100)
def _bn_ok_dtype(dt, name): # Bottleneck chokes on datetime64 if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)): # bottleneck does not properly upcast during the sum # so can overflow if name == 'nansum': if dt.itemsize < 8: return False return True return False
def unconvert(values, dtype, compress=None): as_is_ext = isinstance(values, ExtType) and values.code == 0 if as_is_ext: values = values.data if is_categorical_dtype(dtype): return values elif is_object_dtype(dtype): return np.array(values, dtype=object) dtype = pandas_dtype(dtype).base if not as_is_ext: values = values.encode('latin1') if compress: if compress == u'zlib': _check_zlib() decompress = zlib.decompress elif compress == u'blosc': _check_blosc() decompress = blosc.decompress else: raise ValueError("compress must be one of 'zlib' or 'blosc'") try: return np.frombuffer( _move_into_mutable_buffer(decompress(values)), dtype=dtype, ) except _BadMove as e: # Pull the decompressed data off of the `_BadMove` exception. # We don't just store this in the locals because we want to # minimize the risk of giving users access to a `bytes` object # whose data is also given to a mutable buffer. values = e.args[0] if len(values) > 1: # The empty string and single characters are memoized in many # string creating functions in the capi. This case should not # warn even though we need to make a copy because we are only # copying at most 1 byte. warnings.warn( 'copying data after decompressing; this may mean that' ' decompress is caching its result', PerformanceWarning, ) # fall through to copying `np.fromstring` # Copy the string into a numpy array. return np.fromstring(values, dtype=dtype)
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x, y) elif is_categorical_dtype(y) and not isscalar(y): return op(y, x) if is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if isscalar(y) and isnull(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: return np.zeros(len(x), dtype=bool) # we have a datetime/timedelta and may need to convert mask = None if (needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y))): if isscalar(y): mask = isnull(x) y = _index.convert_scalar(x, _values_from_object(y)) else: mask = isnull(x) | isnull(y) y = y.view('i8') x = x.view('i8') try: result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = masker return result
def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): if not is_object_dtype(y.dtype): y = y.astype(np.object_) if isinstance(y, (ABCSeries, ABCIndex)): y = y.values result = lib.vec_compare(x, y, op) else: result = lib.scalar_compare(x, y, op) return result
def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') elif is_datetime64_dtype(dtype): return self.to_timestamp(how=how) elif is_datetime64tz_dtype(dtype): return self.to_timestamp(how=how).tz_localize(dtype.tz) elif is_period_dtype(dtype): return self.asfreq(freq=dtype.freq) raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
def test_setitem(self): df = DataFrame({'A': range(10)}) s = pd.cut(df.A, 5) self.assertIsInstance(s.cat.categories, IntervalIndex) # B & D end up as Categoricals # the remainer are converted to in-line objects # contining an IntervalIndex.values df['B'] = s df['C'] = np.array(s) df['D'] = s.values df['E'] = np.array(s.values) assert is_categorical_dtype(df['B']) assert is_interval_dtype(df['B'].cat.categories) assert is_categorical_dtype(df['D']) assert is_interval_dtype(df['D'].cat.categories) assert is_object_dtype(df['C']) assert is_object_dtype(df['E']) # they compare equal as Index # when converted to numpy objects c = lambda x: Index(np.array(x)) tm.assert_index_equal(c(df.B), c(df.B), check_names=False) tm.assert_index_equal(c(df.B), c(df.C), check_names=False) tm.assert_index_equal(c(df.B), c(df.D), check_names=False) tm.assert_index_equal(c(df.B), c(df.D), check_names=False) # B & D are the same Series tm.assert_series_equal(df['B'], df['B'], check_names=False) tm.assert_series_equal(df['B'], df['D'], check_names=False) # C & E are the same Series tm.assert_series_equal(df['C'], df['C'], check_names=False) tm.assert_series_equal(df['C'], df['E'], check_names=False)
def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_float_dtype(dtype): values = self._values.astype(dtype, copy=copy) elif is_integer_dtype(dtype): if self.hasnans: raise ValueError('cannot convert float NaN to integer') values = self._values.astype(dtype, copy=copy) elif is_object_dtype(dtype): values = self._values.astype('object', copy=copy) else: raise TypeError('Setting %s dtype to anything other than ' 'float64 or object is not supported' % self.__class__) return Index(values, name=self.name, dtype=dtype)
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 tm.skip_if_no_package('scipy') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) # Ensure dtype is preserved if possible was_upcast = ((fill_value is None or is_float(fill_value)) and not is_object_dtype(dtype) and not is_float_dtype(dtype)) res_dtype = (bool if is_bool_dtype(dtype) else float if was_upcast else dtype) tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) tm.assert_equal(sdf.to_coo().dtype, res_dtype) # However, adding a str column results in an upcast to object sdf['strings'] = np.arange(len(sdf)).astype(str) tm.assert_equal(sdf.to_coo().dtype, np.object_)
def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): if copy: return self._int64index.copy() else: return self._int64index elif is_datetime64_dtype(dtype): return self.to_timestamp(how=how) elif is_datetime64tz_dtype(dtype): return self.to_timestamp(how=how).tz_localize(dtype.tz) elif is_period_dtype(dtype): return self.asfreq(freq=dtype.freq) raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
def _f(*args, **kwargs): obj_iter = itertools.chain(args, compat.itervalues(kwargs)) if any(self.check(obj) for obj in obj_iter): raise TypeError('reduction operation {0!r} not allowed for ' 'this dtype'.format( f.__name__.replace('nan', ''))) try: return f(*args, **kwargs) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(args[0]): raise TypeError(e) raise
def astype(self, dtype, copy=True): dtype = np.dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_timedelta64_ns_dtype(dtype): if copy is True: return self.copy() return self elif is_timedelta64_dtype(dtype): # return an index (essentially this is division) result = self.values.astype(dtype, copy=copy) if self.hasnans: return Index(self._maybe_mask_results(result, convert="float64"), name=self.name) return Index(result.astype("i8"), name=self.name) elif is_integer_dtype(dtype): return Index(self.values.astype("i8", copy=copy), dtype="i8", name=self.name) raise ValueError("Cannot cast TimedeltaIndex to dtype %s" % dtype)
def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: if self.zero_value is not None and values.size == 0: if values.ndim == 1: # wrap the 0's if needed if is_timedelta64_dtype(values): return lib.Timedelta(0) return 0 else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) result = np.empty(result_shape) result.fill(0) return result if (_USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name)): result = bn_func(values, axis=axis, **kwds) # prefer to treat inf/-inf as NA, but must compute the func # twice :( if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) else: result = alt(values, axis=axis, skipna=skipna, **kwds) except Exception: try: result = alt(values, axis=axis, skipna=skipna, **kwds) except ValueError as e: # we want to transform an object array # ValueError message to the more typical TypeError # e.g. this is normally a disallowed function on # object arrays that contain strings if is_object_dtype(values): raise TypeError(e) raise return result
def _ensure_numeric(x): if isinstance(x, np.ndarray): if is_integer_dtype(x) or is_bool_dtype(x): x = x.astype(np.float64) elif is_object_dtype(x): try: x = x.astype(np.complex128) except: x = x.astype(np.float64) else: if not np.any(x.imag): x = x.real elif not (is_float(x) or is_integer(x) or is_complex(x)): try: x = float(x) except Exception: try: x = complex(x) except Exception: raise TypeError('Could not convert %s to numeric' % str(x)) return x
def astype(self, dtype, copy=True): dtype = np.dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_timedelta64_ns_dtype(dtype): if copy is True: return self.copy() return self elif is_timedelta64_dtype(dtype): # return an index (essentially this is division) result = self.values.astype(dtype, copy=copy) if self.hasnans: return Index(self._maybe_mask_results(result, convert='float64'), name=self.name) return Index(result.astype('i8'), name=self.name) elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), dtype='i8', name=self.name) raise ValueError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
def convert(values): """ convert the numpy values to a list """ dtype = values.dtype if is_categorical_dtype(values): return values elif is_object_dtype(dtype): return values.ravel().tolist() if needs_i8_conversion(dtype): values = values.view('i8') v = values.ravel() if compressor == 'zlib': _check_zlib() # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return ExtType(0, zlib.compress(v)) elif compressor == 'blosc': _check_blosc() # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return ExtType(0, blosc.compress(v, typesize=dtype.itemsize)) # ndarray (on original dtype) return ExtType(0, v.tostring())