def na_op(x, y): try: result = op(x, y) except TypeError: if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, ABCSeries)): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: x = _ensure_object(x) y = _ensure_object(y) result = lib.vec_binop(x, y, op) else: try: # let null fall thru if not isnull(y): y = bool(y) result = lib.scalar_binop(x, y, op) except: raise TypeError("cannot compare a dtyped [{0}] array with " "a scalar of type [{1}]".format( x.dtype, type(y).__name__)) return result
def _isfinite(values): if is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) return ~np.isfinite(values.astype('float64'))
def _evaluate_compare(self, other, op): """ We have been called because a comparison between 8 aware arrays. numpy >= 1.11 will now warn about NaT comparisons """ # coerce to a similar object if not isinstance(other, type(self)): if not is_list_like(other): # scalar other = [other] elif is_scalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] other = type(self)(other) # compare result = op(self.asi8, other.asi8) # technically we could support bool dtyped Index # for now just return the indexing array directly mask = (self._isnan) | (other._isnan) if is_bool_dtype(result): result[mask] = False return result try: result[mask] = tslib.iNaT return Index(result) except TypeError: return result
def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. Needs additional handling as PeriodIndex stores internal data as int dtype Replace this to __numpy_ufunc__ in future version """ if isinstance(context, tuple) and len(context) > 0: func = context[0] if (func is np.add): pass elif (func is np.subtract): name = self.name left = context[1][0] right = context[1][1] if (isinstance(left, PeriodIndex) and isinstance(right, PeriodIndex)): name = left.name if left.name == right.name else None return Index(result, name=name) elif isinstance(left, Period) or isinstance(right, Period): return Index(result, name=name) elif isinstance(func, np.ufunc): if 'M->M' not in func.types: msg = "ufunc '{0}' not supported for the PeriodIndex" # This should be TypeError, but TypeError cannot be raised # from here because numpy catches. raise ValueError(msg.format(func.__name__)) if is_bool_dtype(result): return result # the result is object dtype array of Period # cannot pass _simple_new as it is return PeriodIndex(result, freq=self.freq, name=self.name)
def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. Needs additional handling as PeriodIndex stores internal data as int dtype Replace this to __numpy_ufunc__ in future version """ if isinstance(context, tuple) and len(context) > 0: func = context[0] if (func is np.add): try: return self._add_delta(context[1][1]) except IncompatibleFrequency: raise TypeError elif (func is np.subtract): try: return self._add_delta(-context[1][1]) except IncompatibleFrequency: raise TypeError elif isinstance(func, np.ufunc): if 'M->M' not in func.types: msg = "ufunc '{0}' not supported for the PeriodIndex" # This should be TypeError, but TypeError cannot be raised # from here because numpy catches. raise ValueError(msg.format(func.__name__)) if is_bool_dtype(result): return result return self._shallow_copy(result)
def wrapper(self, other): msg = "cannot compare a TimedeltaIndex with type {0}" func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other) or other is tslib.NaT: try: other = _to_m8(other) except ValueError: # failed to parse as timedelta raise TypeError(msg.format(type(other))) result = func(other) if isnull(other): result.fill(nat_result) else: if not is_list_like(other): raise TypeError(msg.format(type(other))) other = TimedeltaIndex(other).values result = func(other) result = _values_from_object(result) if isinstance(other, Index): o_mask = other.values.view('i8') == tslib.iNaT else: o_mask = other.view('i8') == tslib.iNaT if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result # support of bool dtype indexers if is_bool_dtype(result): return result return Index(result)
def get_new_values(self): values = self.values # place the values length, width = self.full_shape stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) mask = self.mask mask_all = mask.all() # we can simply reshape if we don't have a mask if mask_all and len(values): new_values = (self.sorted_values.reshape( length, width, stride).swapaxes(1, 2).reshape(result_shape)) new_mask = np.ones(result_shape, dtype=bool) return new_values, new_mask # if our mask is all True, then we can use our existing dtype if mask_all: dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: dtype, fill_value = _maybe_promote(values.dtype, self.fill_value) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) new_mask = np.zeros(result_shape, dtype=bool) name = np.dtype(dtype).name sorted_values = self.sorted_values # we need to convert to a basic dtype # and possibly coerce an input to our output dtype # e.g. ints -> floats if needs_i8_conversion(values): sorted_values = sorted_values.view('i8') new_values = new_values.view('i8') name = 'int64' elif is_bool_dtype(values): sorted_values = sorted_values.astype('object') new_values = new_values.astype('object') name = 'object' else: sorted_values = sorted_values.astype(name, copy=False) # fill in our values & mask f = getattr(_reshape, "unstack_{}".format(name)) f(sorted_values, mask.view('u1'), stride, length, width, new_values, new_mask.view('u1')) # reconstruct dtype if needed if needs_i8_conversion(values): new_values = new_values.view(values.dtype) return new_values, new_mask
def _wrap_result(name, data, sparse_index, fill_value, dtype=None): """ wrap op result to have correct dtype """ if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'): dtype = np.bool if is_bool_dtype(dtype): # fill_value may be np.bool_ fill_value = bool(fill_value) return SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype)
def f(x, y): xmask = isnull(x) ymask = isnull(y) mask = xmask | ymask result = op(x, y) if mask.any(): if is_bool_dtype(result): result = result.astype('O') np.putmask(result, mask, np.nan) return result
def f(x, y): xmask = isnull(x) ymask = isnull(y) mask = xmask | ymask with np.errstate(all='ignore'): result = op(x, y) if mask.any(): if is_bool_dtype(result): result = result.astype('O') np.putmask(result, mask, np.nan) return result
def astype(self, dtype=None, copy=True): dtype = np.dtype(dtype) sp_values = _astype_nansafe(self.sp_values, dtype, copy=copy) try: if is_bool_dtype(dtype): # to avoid np.bool_ dtype fill_value = bool(self.fill_value) else: fill_value = dtype.type(self.fill_value) except ValueError: msg = 'unable to coerce current fill_value {0} to {1} dtype' raise ValueError(msg.format(self.fill_value, dtype)) return self._simple_new(sp_values, self.sp_index, fill_value=fill_value)
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 tm.skip_if_no_package('scipy') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) # Ensure dtype is preserved if possible was_upcast = ((fill_value is None or is_float(fill_value)) and not is_object_dtype(dtype) and not is_float_dtype(dtype)) res_dtype = (bool if is_bool_dtype(dtype) else float if was_upcast else dtype) tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) tm.assert_equal(sdf.to_coo().dtype, res_dtype) # However, adding a str column results in an upcast to object sdf['strings'] = np.arange(len(sdf)).astype(str) tm.assert_equal(sdf.to_coo().dtype, np.object_)
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = _values_from_object(values) if isfinite: mask = _isfinite(values) else: mask = isnull(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna: if copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = _maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() values = _view_if_needed(values) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.int64 elif is_float_dtype(dtype): dtype_max = np.float64 return values, mask, dtype, dtype_max
def as_json_table_type(x): """ Convert a NumPy / pandas type to its corresponding json_table. Parameters ---------- x : array or dtype Returns ------- t : str the Table Schema data types Notes ----- This table shows the relationship between NumPy / pandas dtypes, and Table Schema dtypes. ============== ================= Pandas type Table Schema type ============== ================= int64 integer float64 number bool boolean datetime64[ns] datetime timedelta64[ns] duration object str categorical any =============== ================= """ if is_integer_dtype(x): return 'integer' elif is_bool_dtype(x): return 'boolean' elif is_numeric_dtype(x): return 'number' elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or is_period_dtype(x)): return 'datetime' elif is_timedelta64_dtype(x): return 'duration' elif is_categorical_dtype(x): return 'any' elif is_string_dtype(x): return 'string' else: return 'any'
def __getitem__(self, key): """ """ if is_integer(key): return self._get_val_at(key) elif isinstance(key, tuple): data_slice = self.values[key] else: if isinstance(key, SparseArray): if is_bool_dtype(key): key = key.to_dense() else: key = np.asarray(key) if hasattr(key, '__len__') and len(self) != len(key): return self.take(key) else: data_slice = self.values[key] return self._constructor(data_slice)
def _ensure_numeric(x): if isinstance(x, np.ndarray): if is_integer_dtype(x) or is_bool_dtype(x): x = x.astype(np.float64) elif is_object_dtype(x): try: x = x.astype(np.complex128) except: x = x.astype(np.float64) else: if not np.any(x.imag): x = x.real elif not (is_float(x) or is_integer(x) or is_complex(x)): try: x = float(x) except Exception: try: x = complex(x) except Exception: raise TypeError('Could not convert %s to numeric' % str(x)) return x
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ from pandas import Index, Series, DatetimeIndex, PeriodIndex # handling possibilities here # - for a numpy datetimelike simply view as i8 then cast back # - bool handled as uint8 then cast back # - for an extension datetimelike view as i8 then # reconstruct from boxed values to transfer metadata dtype = None if needs_i8_conversion(values): if is_period_dtype(values): values = PeriodIndex(values) vals = values.asi8 elif is_datetimetz(values): values = DatetimeIndex(values) vals = values.asi8 else: # numpy dtype dtype = values.dtype vals = values.view(np.int64) elif is_bool_dtype(values): dtype = bool vals = np.asarray(values).view('uint8') else: vals = np.asarray(values) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() check_nulls = not is_integer_dtype(values) labels = table.get_labels(vals, uniques, 0, na_sentinel, check_nulls) labels = _ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) if dtype is not None: uniques = uniques.astype(dtype) if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): uniques = Index(uniques) return labels, uniques
def get_new_values(self): values = self.values # place the values length, width = self.full_shape stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) mask = self.mask mask_all = mask.all() # we can simply reshape if we don't have a mask if mask_all and len(values): new_values = (self.sorted_values .reshape(length, width, stride) .swapaxes(1, 2) .reshape(result_shape) ) new_mask = np.ones(result_shape, dtype=bool) return new_values, new_mask # if our mask is all True, then we can use our existing dtype if mask_all: dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: dtype, fill_value = maybe_promote(values.dtype, self.fill_value) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) new_mask = np.zeros(result_shape, dtype=bool) name = np.dtype(dtype).name sorted_values = self.sorted_values # we need to convert to a basic dtype # and possibly coerce an input to our output dtype # e.g. ints -> floats if needs_i8_conversion(values): sorted_values = sorted_values.view('i8') new_values = new_values.view('i8') name = 'int64' elif is_bool_dtype(values): sorted_values = sorted_values.astype('object') new_values = new_values.astype('object') name = 'object' else: sorted_values = sorted_values.astype(name, copy=False) # fill in our values & mask f = getattr(_reshape, "unstack_{}".format(name)) f(sorted_values, mask.view('u1'), stride, length, width, new_values, new_mask.view('u1')) # reconstruct dtype if needed if needs_i8_conversion(values): new_values = new_values.view(values.dtype) return new_values, new_mask
def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) from pandas import DatetimeIndex, PeriodIndex if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): values = np.array(list(values), dtype='object') if needs_i8_conversion(comps): if is_period_dtype(values): comps = PeriodIndex(comps) values = PeriodIndex(values) else: comps = DatetimeIndex(comps) values = DatetimeIndex(values) values = values.asi8 comps = comps.asi8 elif is_bool_dtype(comps): try: comps = np.asarray(comps).view('uint8') values = np.asarray(values).view('uint8') except TypeError: # object array conversion will fail pass else: comps = np.asarray(comps) values = np.asarray(values) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) elif is_int64_dtype(comps): f = lambda x, y: lib.ismember_int64(x, set(y)) else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)