def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x, y) elif is_categorical_dtype(y) and not isscalar(y): return op(y, x) if is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if isscalar(y) and isnull(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: return np.zeros(len(x), dtype=bool) # we have a datetime/timedelta and may need to convert mask = None if (needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y))): if isscalar(y): mask = isnull(x) y = _index.convert_scalar(x, _values_from_object(y)) else: mask = isnull(x) | isnull(y) y = y.view('i8') x = x.view('i8') try: result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = masker return result
def _get_data_algo(values, func_map): mask = None if com.is_float_dtype(values): f = func_map['float64'] values = com._ensure_float64(values) elif com.needs_i8_conversion(values): # if we have NaT, punt to object dtype mask = com.isnull(values) if mask.ravel().any(): f = func_map['generic'] values = com._ensure_object(values) values[mask] = np.nan else: f = func_map['int64'] values = values.view('i8') elif com.is_integer_dtype(values): f = func_map['int64'] values = com._ensure_int64(values) else: f = func_map['generic'] values = com._ensure_object(values) return f, values
def convert(values): """ convert the numpy values to a list """ dtype = values.dtype if needs_i8_conversion(dtype): values = values.view('i8') v = values.ravel() # convert object if dtype == np.object_: return v.tolist() if compressor == 'zlib': # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return zlib.compress(v) elif compressor == 'blosc' and _BLOSC: # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return blosc.compress(v, typesize=dtype.itemsize) # ndarray (on original dtype) return v.tostring()
def convert(values): """ convert the numpy values to a list """ dtype = values.dtype if needs_i8_conversion(dtype): values = values.view("i8") v = values.ravel() if compressor == "zlib": # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return zlib.compress(v) elif compressor == "blosc" and _BLOSC: # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return blosc.compress(v, typesize=dtype.itemsize) # ndarray (on original dtype) if dtype == "float64" or dtype == "int64": return v # as a list return v.tolist()
def _prep_values(self, values=None, kill_inf=True, how=None): if values is None: values = getattr(self._selected_obj, 'values', self._selected_obj) # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 if com.is_float_dtype(values.dtype): values = com._ensure_float64(values) elif com.is_integer_dtype(values.dtype): values = com._ensure_float64(values) elif com.needs_i8_conversion(values.dtype): raise NotImplementedError("ops for {action} for this " "dtype {dtype} are not " "implemented".format( action=self._window_type, dtype=values.dtype)) else: try: values = com._ensure_float64(values) except (ValueError, TypeError): raise TypeError("cannot handle this type -> {0}" "".format(values.dtype)) if kill_inf: values = values.copy() values[np.isinf(values)] = np.NaN return values
def _get_data_algo(values, func_map): if com.is_float_dtype(values): f = func_map['float64'] values = com._ensure_float64(values) elif com.needs_i8_conversion(values): f = func_map['int64'] values = values.view('i8') elif com.is_integer_dtype(values): f = func_map['int64'] values = com._ensure_int64(values) else: f = func_map['generic'] values = com._ensure_object(values) return f, values
def convert(values): """ convert the numpy values to a list """ dtype = values.dtype if is_categorical_dtype(values): return values elif is_object_dtype(dtype): return values.ravel().tolist() if needs_i8_conversion(dtype): values = values.view('i8') v = values.ravel() if compressor == 'zlib': _check_zlib() # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return ExtType(0, zlib.compress(v)) elif compressor == 'blosc': _check_blosc() # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return ExtType(0, blosc.compress(v, typesize=dtype.itemsize)) # ndarray (on original dtype) return ExtType(0, v.tostring())
def count(self): obj = self._convert_freq() window = self._get_window() window = min(window, len(obj)) if not self.center else window blocks, obj = self._create_blocks(how=None) results = [] for b in blocks: if com.needs_i8_conversion(b.values): result = b.notnull().astype(int) else: try: result = np.isfinite(b).astype(float) except TypeError: result = np.isfinite(b.astype(float)).astype(float) result[pd.isnull(result)] = 0 result = self._constructor(result, window=window, min_periods=0, center=self.center).sum() results.append(result) return self._wrap_results(results, blocks, obj)
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x,y) elif is_categorical_dtype(y) and not isscalar(y): return op(y,x) if is_object_dtype(x.dtype): if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): if not is_object_dtype(y.dtype): result = lib.vec_compare(x, y.astype(np.object_), op) else: result = lib.vec_compare(x, y, op) else: result = lib.scalar_compare(x, y, op) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if isscalar(y) and isnull(y): y = np.nan # we have a datetime/timedelta and may need to convert mask = None if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)): if isscalar(y): y = _index.convert_scalar(x,_values_from_object(y)) else: y = y.view('i8') if name == '__ne__': mask = notnull(x) else: mask = isnull(x) x = x.view('i8') try: result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = False return result
def diff(arr, n, axis=0): """ difference of n between self, analagoust to s-s.shift(n) """ n = int(n) na = np.nan dtype = arr.dtype is_timedelta = False if com.needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') na = tslib.iNaT is_timedelta = True elif issubclass(dtype.type, np.integer): dtype = np.float64 elif issubclass(dtype.type, np.bool_): dtype = np.object_ dtype = np.dtype(dtype) out_arr = np.empty(arr.shape, dtype=dtype) na_indexer = [slice(None)] * arr.ndim na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) out_arr[tuple(na_indexer)] = na if arr.ndim == 2 and arr.dtype.name in _diff_special: f = _diff_special[arr.dtype.name] f(arr, out_arr, n, axis) else: res_indexer = [slice(None)] * arr.ndim res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) res_indexer = tuple(res_indexer) lag_indexer = [slice(None)] * arr.ndim lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) lag_indexer = tuple(lag_indexer) # need to make sure that we account for na for datelike/timedelta # we don't actually want to subtract these i8 numbers if is_timedelta: res = arr[res_indexer] lag = arr[lag_indexer] mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na) if mask.any(): res = res.copy() res[mask] = 0 lag = lag.copy() lag[mask] = 0 result = res - lag result[mask] = na out_arr[res_indexer] = result else: out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] if is_timedelta: from pandas import TimedeltaIndex out_arr = TimedeltaIndex(out_arr.ravel().astype('int64')).asi8.reshape( out_arr.shape).astype('timedelta64[ns]') return out_arr