def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True return com.array_equivalent(com._values_from_object(self), com._values_from_object(other))
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) if isinstance(other, pd.Series): name = _maybe_match_name(self, other) if len(self) != len(other): raise ValueError('Series lengths must match to compare') return self._constructor(na_op(self.values, other.values), index=self.index, name=name) elif isinstance(other, pd.DataFrame): # pragma: no cover return NotImplemented elif isinstance(other, (np.ndarray, pd.Index)): if len(self) != len(other): raise ValueError('Lengths must match to compare') return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) elif isinstance(other, pd.Categorical): if not com.is_categorical_dtype(self): msg = "Cannot compare a Categorical for op {op} with Series of dtype {typ}.\n"\ "If you want to compare values, use 'series <op> np.asarray(other)'." raise TypeError(msg.format(op=op,typ=self.dtype)) mask = isnull(self) if com.is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, which would then # not take categories ordering into account # we can go directly to op, as the na_op would just test again and dispatch to it. res = op(self.values, other) else: values = self.get_values() other = _index.convert_scalar(values,_values_from_object(other)) if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): values = values.view('i8') # scalars res = na_op(values, other) if np.isscalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) # always return a full value series here res = _values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') # mask out the invalids if mask.any(): res[mask] = masker return res
def get_value(self, series, key): """ we always want to get an index value, never a value """ if not is_scalar(key): raise InvalidIndexError k = _values_from_object(key) loc = self.get_loc(k) new_values = _values_from_object(series)[loc] return new_values
def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True try: return com.array_equivalent(com._values_from_object(self), com._values_from_object(other)) except TypeError: # e.g. fails in numpy 1.6 with DatetimeIndex #1681 return False
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) index = data.index if issubclass(data.dtype.type, np.datetime64): return DatetimeProperties(DatetimeIndex(data, copy=copy), index) else: if isinstance(data, PeriodIndex): return PeriodProperties(PeriodIndex(data, copy=copy), index) data = com._values_from_object(data) inferred = lib.infer_dtype(data) if inferred == 'period': return PeriodProperties(PeriodIndex(data), index) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
def str_repeat(arr, repeats): """ Duplicate each string in the array by indicated number of times Parameters ---------- repeats : int or array Same value for all (int) or different value per (array) Returns ------- repeated : array """ if np.isscalar(repeats): def rep(x): try: return compat.binary_type.__mul__(x, repeats) except TypeError: return compat.text_type.__mul__(x, repeats) return _na_map(rep, arr) else: def rep(x, r): try: return compat.binary_type.__mul__(x, r) except TypeError: return compat.text_type.__mul__(x, r) repeats = np.asarray(repeats, dtype=object) result = lib.vec_binop(_values_from_object(arr), repeats, rep) return result
def wrapper(self, other): msg = "cannot compare a TimedeltaIndex with type {0}" func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) except ValueError: # failed to parse as timedelta raise TypeError(msg.format(type(other))) result = func(other) if isna(other): result.fill(nat_result) else: if not is_list_like(other): raise TypeError(msg.format(type(other))) other = TimedeltaIndex(other).values result = func(other) result = _values_from_object(result) if isinstance(other, Index): o_mask = other.values.view('i8') == iNaT else: o_mask = other.view('i8') == iNaT if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result # support of bool dtype indexers if is_bool_dtype(result): return result return Index(result)
def _maybe_to_sparse(array): if isinstance(array, com.ABCSparseSeries): array = SparseArray(array.values, sparse_index=array.sp_index, fill_value=array.fill_value, copy=True) if not isinstance(array, SparseArray): array = com._values_from_object(array) return array
def wrapper(self, other): msg = "cannot compare a {cls} with type {typ}" func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) except ValueError: # failed to parse as timedelta raise TypeError(msg.format(cls=type(self).__name__, typ=type(other).__name__)) result = func(other) if isna(other): result.fill(nat_result) elif not is_list_like(other): raise TypeError(msg.format(cls=type(self).__name__, typ=type(other).__name__)) else: other = TimedeltaIndex(other).values result = func(other) result = com._values_from_object(result) o_mask = np.array(isna(other)) if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result # support of bool dtype indexers if is_bool_dtype(result): return result return Index(result)
def wrapper(self, other): func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other): other = _to_m8(other) result = func(other) if com.isnull(other): result.fill(nat_result) else: if not com.is_list_like(other): raise TypeError("cannot compare a TimedeltaIndex with type " "{0}".format(type(other))) other = TimedeltaIndex(other).values result = func(other) result = _values_from_object(result) if isinstance(other, Index): o_mask = other.values.view('i8') == tslib.iNaT else: o_mask = other.view('i8') == tslib.iNaT if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result # support of bool dtype indexers if com.is_bool_dtype(result): return result return Index(result)
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = _values_from_object(values) if isfinite: mask = _isfinite(values) else: mask = isnull(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna: if copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = com._maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() values = _view_if_needed(values) return values, mask, dtype
def wrapper(self, other): msg = "cannot compare a {cls} with type {typ}" meth = getattr(dtl.DatetimeLikeArrayMixin, opname) if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) except ValueError: # failed to parse as timedelta raise TypeError(msg.format(cls=type(self).__name__, typ=type(other).__name__)) result = meth(self, other) if isna(other): result.fill(nat_result) elif not is_list_like(other): raise TypeError(msg.format(cls=type(self).__name__, typ=type(other).__name__)) else: other = type(self)(other).values result = meth(self, other) result = com._values_from_object(result) o_mask = np.array(isna(other)) if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result return result
def duplicated(self, keep="first"): keys = com._values_from_object(com._ensure_object(self.values)) duplicated = lib.duplicated(keys, keep=keep) try: return self._constructor(duplicated, index=self.index).__finalize__(self) except AttributeError: return np.array(duplicated, dtype=bool)
def _possibly_cast_to_timedelta(value, coerce=True, dtype=None): """ try to cast to timedelta64, if already a timedeltalike, then make sure that we are [ns] (as numpy 1.6.2 is very buggy in this regards, don't force the conversion unless coerce is True if dtype is passed then this is the target dtype """ # deal with numpy not being able to handle certain timedelta operations if isinstance(value, (ABCSeries, np.ndarray)): # i8 conversions if value.dtype == 'int64' and np.dtype(dtype) == 'timedelta64[ns]': value = value.astype('timedelta64[ns]') return value elif value.dtype.kind == 'm': if value.dtype != 'timedelta64[ns]': value = value.astype('timedelta64[ns]') return value # we don't have a timedelta, but we want to try to convert to one (but # don't force it) if coerce: new_value = tslib.array_to_timedelta64( _values_from_object(value).astype(object), coerce=False) if new_value.dtype == 'i8': value = np.array(new_value, dtype='timedelta64[ns]') return value
def _possibly_cast_to_timedelta(value, coerce=True): """ try to cast to timedelta64, if already a timedeltalike, then make sure that we are [ns] (as numpy 1.6.2 is very buggy in this regards, don't force the conversion unless coerce is True if coerce='compat' force a compatibilty coercerion (to timedeltas) if needeed """ # coercion compatability if coerce == 'compat' and _np_version_under1p7: def convert(td, dtype): # we have an array with a non-object dtype if hasattr(td,'item'): td = td.astype(np.int64).item() if td == tslib.iNaT: return td if dtype == 'm8[us]': td *= 1000 return td if td == tslib.compat_NaT: return tslib.iNaT # convert td value to a nanosecond value d = td.days s = td.seconds us = td.microseconds if dtype == 'object' or dtype == 'm8[ns]': td = 1000*us + (s + d * 24 * 3600) * 10 ** 9 else: raise ValueError("invalid conversion of dtype in np < 1.7 [%s]" % dtype) return td # < 1.7 coercion if not is_list_like(value): value = np.array([ value ]) dtype = value.dtype return np.array([ convert(v,dtype) for v in value ], dtype='m8[ns]') # deal with numpy not being able to handle certain timedelta operations if isinstance(value, (ABCSeries, np.ndarray)) and value.dtype.kind == 'm': if value.dtype != 'timedelta64[ns]': value = value.astype('timedelta64[ns]') return value # we don't have a timedelta, but we want to try to convert to one (but # don't force it) if coerce: new_value = tslib.array_to_timedelta64( _values_from_object(value).astype(object), coerce=False) if new_value.dtype == 'i8': value = np.array(new_value, dtype='timedelta64[ns]') return value
def wrapper(self, other): if isinstance(other, pd.Series): name = _maybe_match_name(self, other) if len(self) != len(other): raise ValueError('Series lengths must match to compare') return self._constructor(na_op(self.values, other.values), index=self.index, name=name) elif isinstance(other, pd.DataFrame): # pragma: no cover return NotImplemented elif isinstance(other, (pa.Array, pd.Index)): if len(self) != len(other): raise ValueError('Lengths must match to compare') return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) elif isinstance(other, pd.Categorical): if not com.is_categorical_dtype(self): msg = "Cannot compare a Categorical for op {op} with Series of dtype {typ}.\n"\ "If you want to compare values, use 'series <op> np.asarray(other)'." raise TypeError(msg.format(op=op,typ=self.dtype)) else: mask = isnull(self) values = self.get_values() other = _index.convert_scalar(values,_values_from_object(other)) if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): values = values.view('i8') # scalars res = na_op(values, other) if np.isscalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) # always return a full value series here res = _values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') # mask out the invalids if mask.any(): res[mask] = masker return res
def _get_array_list(arr, others): if len(others) and isinstance(_values_from_object(others)[0], (list, np.ndarray, Series)): arrays = [arr] + list(others) else: arrays = [arr, others] return [np.asarray(x, dtype=object) for x in arrays]
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) if isinstance(other, ABCSeries): name = _maybe_match_name(self, other) if len(self) != len(other): raise ValueError('Series lengths must match to compare') return self._constructor(na_op(self.values, other.values), index=self.index, name=name) elif isinstance(other, pd.DataFrame): # pragma: no cover return NotImplemented elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast if (not lib.isscalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') if isinstance(other, ABCPeriodIndex): # temp workaround until fixing GH 13637 # tested in test_nat_comparisons # (pandas.tests.series.test_operators.TestSeriesOperators) return self._constructor(na_op(self.values, other.asobject.values), index=self.index) return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) elif isinstance(other, pd.Categorical): if not is_categorical_dtype(self): msg = ("Cannot compare a Categorical for op {op} with Series " "of dtype {typ}.\nIf you want to compare values, use " "'series <op> np.asarray(other)'.") raise TypeError(msg.format(op=op, typ=self.dtype)) if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. res = op(self.values, other) else: values = self.get_values() if isinstance(other, (list, np.ndarray)): other = np.asarray(other) res = na_op(values, other) if isscalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) # always return a full value series here res = _values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') return res
def convert_value(self, v): """ convert the expression that is in the term to something that is accepted by pytables """ def stringify(value): if self.encoding is not None: encoder = partial(pprint_thing_encoded, encoding=self.encoding) else: encoder = pprint_thing return encoder(value) kind = _ensure_decoded(self.kind) meta = _ensure_decoded(self.meta) if kind == u('datetime64') or kind == u('datetime'): if isinstance(v, (int, float)): v = stringify(v) v = _ensure_decoded(v) v = pd.Timestamp(v) if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v, v.value, kind) elif (isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u('date')): v = time.mktime(v.timetuple()) return TermValue(v, pd.Timestamp(v), kind) elif kind == u('timedelta64') or kind == u('timedelta'): v = _coerce_scalar_to_timedelta_type(v, unit='s').value return TermValue(int(v), v, kind) elif meta == u('category'): metadata = com._values_from_object(self.metadata) result = metadata.searchsorted(v, side='left') # result returns 0 if v is first element or if v is not in metadata # check that metadata contains v if not result and v not in metadata: result = -1 return TermValue(result, result, u('integer')) elif kind == u('integer'): v = int(float(v)) return TermValue(v, v, kind) elif kind == u('float'): v = float(v) return TermValue(v, v, kind) elif kind == u('bool'): if isinstance(v, string_types): v = not v.strip().lower() in [u('false'), u('f'), u('no'), u('n'), u('none'), u('0'), u('[]'), u('{}'), u('')] else: v = bool(v) return TermValue(v, v, kind) elif not isinstance(v, string_types): v = stringify(v) return TermValue(v, stringify(v), u('string')) # string quoting return TermValue(v, stringify(v), u('string'))
def get_value(self, series, key): """ we always want to get an index value, never a value """ if not lib.isscalar(key): raise InvalidIndexError from pandas.core.indexing import maybe_droplevels from pandas.core.series import Series k = com._values_from_object(key) loc = self.get_loc(k) new_values = com._values_from_object(series)[loc] if lib.isscalar(new_values) or new_values is None: return new_values new_index = self[loc] new_index = maybe_droplevels(new_index, k) return Series(new_values, index=new_index, name=series.name)
def indices(self): """ dict {group name -> group indices} """ if len(self.groupings) == 1: return self.groupings[0].indices else: label_list = [ping.labels for ping in self.groupings] keys = [com._values_from_object(ping.group_index) for ping in self.groupings] return get_indexer_dict(label_list, keys)
def nanskew(values, axis=None, skipna=True): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. """ values = _values_from_object(values) mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) else: count = _get_counts(mask, axis, dtype=values.dtype) if skipna: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna: np.putmask(adjusted, mask, 0) adjusted2 = adjusted ** 2 adjusted3 = adjusted2 * adjusted m2 = adjusted2.sum(axis, dtype=np.float64) m3 = adjusted3.sum(axis, dtype=np.float64) # floating point error # # #18044 in _libs/windows.pyx calc_skew follow this behavior # to fix the fperr to treat m2 <1e-14 as zero m2 = _zero_out_fperr(m2) m3 = _zero_out_fperr(m3) with np.errstate(invalid='ignore', divide='ignore'): result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5) dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if m2 == 0 else result if count < 3: return np.nan return result
def wrapper(self, other): if isinstance(other, pd.Series): name = _maybe_match_name(self, other) if len(self) != len(other): raise ValueError('Series lengths must match to compare') return self._constructor(na_op(self.values, other.values), index=self.index, name=name) elif isinstance(other, pd.DataFrame): # pragma: no cover return NotImplemented elif isinstance(other, (pa.Array, pd.Series, pd.Index)): if len(self) != len(other): raise ValueError('Lengths must match to compare') return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) else: mask = isnull(self) values = self.get_values() other = _index.convert_scalar(values,_values_from_object(other)) if issubclass(values.dtype.type, np.datetime64): values = values.view('i8') # scalars res = na_op(values, other) if np.isscalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) # always return a full value series here res = _values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') # mask out the invalids if mask.any(): res[mask] = masker return res
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x, y) elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) if is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if is_scalar(y) and isna(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: return np.zeros(len(x), dtype=bool) # we have a datetime/timedelta and may need to convert mask = None if (needs_i8_conversion(x) or (not is_scalar(y) and needs_i8_conversion(y))): if is_scalar(y): mask = isna(x) y = libindex.convert_scalar(x, com._values_from_object(y)) else: mask = isna(x) | isna(y) y = y.view('i8') x = x.view('i8') try: with np.errstate(all='ignore'): result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = masker return result
def _get_codes_for_values(values, categories): """" utility routine to turn values into codes given the specified categories """ from pandas.core.algorithms import _get_data_algo, _hashtables if values.dtype != categories.dtype: values = com._ensure_object(values) categories = com._ensure_object(categories) (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables) t = hash_klass(len(categories)) t.map_locations(com._values_from_object(categories)) return com._ensure_platform_int(t.lookup(values))
def _factorize_keys(lk, rk, sort=True): if com.is_datetime64tz_dtype(lk) and com.is_datetime64tz_dtype(rk): lk = lk.values rk = rk.values if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk): klass = _hash.Int64Factorizer lk = com._ensure_int64(com._values_from_object(lk)) rk = com._ensure_int64(com._values_from_object(rk)) else: klass = _hash.Factorizer lk = com._ensure_object(lk) rk = com._ensure_object(rk) rizer = klass(max(len(lk), len(rk))) llab = rizer.factorize(lk) rlab = rizer.factorize(rk) count = rizer.get_count() if sort: uniques = rizer.uniques.to_array() llab, rlab = _sort_labels(uniques, llab, rlab) # NA group lmask = llab == -1 lany = lmask.any() rmask = rlab == -1 rany = rmask.any() if lany or rany: if lany: np.putmask(llab, lmask, count) if rany: np.putmask(rlab, rmask, count) count += 1 return llab, rlab, count
def cartesian_product(X): """ Numpy version of itertools.product or pandas.compat.product. Sometimes faster (for large inputs)... Parameters ---------- X : list-like of list-likes Returns ------- product : list of ndarrays Examples -------- >>> cartesian_product([list('ABC'), [1, 2]]) [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), array([1, 2, 1, 2, 1, 2])] See also -------- itertools.product : Cartesian product of input iterables. Equivalent to nested for-loops. pandas.compat.product : An alias for itertools.product. """ msg = "Input must be a list-like of list-likes" if not is_list_like(X): raise TypeError(msg) for x in X: if not is_list_like(x): raise TypeError(msg) if len(X) == 0: return [] lenX = np.fromiter((len(x) for x in X), dtype=int) cumprodX = np.cumproduct(lenX) a = np.roll(cumprodX, 1) a[0] = 1 if cumprodX[-1] != 0: b = cumprodX[-1] / cumprodX else: # if any factor is empty, the cartesian product is empty b = np.zeros_like(cumprodX) return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]), np.product(a[i])) for i, x in enumerate(X)]
def value_counts(values, sort=True, ascending=False, normalize=False): """ Compute a histogram of the counts of non-null values Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order normalize: boolean, default False If True then compute a relative histogram Returns ------- value_counts : Series """ from pandas.core.series import Series values = np.asarray(values) if com.is_integer_dtype(values.dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_int64(values) elif issubclass(values.dtype.type, (np.datetime64,np.timedelta64)): dtype = values.dtype values = values.view(np.int64) keys, counts = htable.value_count_int64(values) # convert the keys back to the dtype we came in keys = Series(keys,dtype=dtype) else: mask = com.isnull(values) values = com._ensure_object(values) keys, counts = htable.value_count_object(values, mask) result = Series(counts, index=com._values_from_object(keys)) if sort: result.sort() if not ascending: result = result[::-1] if normalize: result = result / float(values.size) return result
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ try: k = com._values_from_object(key) k = self._convert_scalar_indexer(k, kind='getitem') indexer = self.get_loc(k) return series.iloc[indexer] except (KeyError, TypeError): pass # we might be a positional inexer return super(CategoricalIndex, self).get_value(series, key)
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = _values_from_object(values) if isfinite: mask = _isfinite(values) else: mask = isnull(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna: if copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = com._maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() values = _view_if_needed(values) # return a platform independent precision dtype dtype_max = dtype if dtype.kind == 'i' and not issubclass(dtype.type, (np.bool, np.datetime64, np.timedelta64)): dtype_max = np.int64 elif dtype.kind in ['b'] or issubclass(dtype.type, np.bool): dtype_max = np.int64 elif dtype.kind in ['f']: dtype_max = np.float64 return values, mask, dtype, dtype_max
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ s = com._values_from_object(series) try: return com._maybe_box(self, super(PeriodIndex, self).get_value(s, key), series, key) except (KeyError, IndexError): try: asdt, parsed, reso = parse_time_string(key, self.freq) grp = frequencies.Resolution.get_freq_group(reso) freqn = frequencies.get_freq_group(self.freq) vals = self._values # if our data is higher resolution than requested key, slice if grp < freqn: iv = Period(asdt, freq=(grp, 1)) ord1 = iv.asfreq(self.freq, how='S').ordinal ord2 = iv.asfreq(self.freq, how='E').ordinal if ord2 < vals[0] or ord1 > vals[-1]: raise KeyError(key) pos = np.searchsorted(self._values, [ord1, ord2]) key = slice(pos[0], pos[1] + 1) return series[key] elif grp == freqn: key = Period(asdt, freq=self.freq).ordinal return com._maybe_box(self, self._engine.get_value(s, key), series, key) else: raise KeyError(key) except TypeError: pass key = Period(key, self.freq).ordinal return com._maybe_box(self, self._engine.get_value(s, key), series, key)
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = _values_from_object(values) if isfinite: mask = _isfinite(values) else: mask = isnull(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna: if copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = _maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() values = _view_if_needed(values) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.int64 elif is_float_dtype(dtype): dtype_max = np.float64 return values, mask, dtype, dtype_max
def wrapper(self, other): meth = getattr(dtl.DatetimeLikeArrayMixin, opname) if isinstance(other, (datetime, np.datetime64, compat.string_types)): if isinstance(other, datetime): # GH#18435 strings get a pass from tzawareness compat self._assert_tzawareness_compat(other) other = _to_m8(other, tz=self.tz) result = meth(self, other) if isna(other): result.fill(nat_result) else: if isinstance(other, list): other = type(self)(other) elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): # Following Timestamp convention, __eq__ is all-False # and __ne__ is all True, others raise TypeError. if opname == '__eq__': return np.zeros(shape=self.shape, dtype=bool) elif opname == '__ne__': return np.ones(shape=self.shape, dtype=bool) raise TypeError('%s type object %s' % (type(other), str(other))) if is_datetimelike(other): self._assert_tzawareness_compat(other) result = meth(self, np.asarray(other)) result = com._values_from_object(result) # Make sure to pass an array to result[...]; indexing with # Series breaks with older version of numpy o_mask = np.array(isna(other)) if o_mask.any(): result[o_mask] = nat_result if self.hasnans: result[self._isnan] = nat_result return result
def wrapper(self, other): if isinstance(other, pd.Series): name = _maybe_match_name(self, other) if len(self) != len(other): raise ValueError('Series lengths must match to compare') return self._constructor(na_op(self.values, other.values), index=self.index, name=name) elif isinstance(other, pd.DataFrame): # pragma: no cover return NotImplemented elif isinstance(other, (pa.Array, pd.Series)): if len(self) != len(other): raise ValueError('Lengths must match to compare') return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) else: mask = isnull(self) values = self.values other = _index.convert_scalar(values, other) if issubclass(values.dtype.type, np.datetime64): values = values.view('i8') # scalars res = na_op(values, other) if np.isscalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) # always return a full value series here res = _values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') # mask out the invalids if mask.any(): res[mask] = masker return res
def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, pd.Series, pd.Index)): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) mask = notnull(x) & notnull(y) result[mask] = op(x[mask], _values_from_object(y[mask])) elif isinstance(x, np.ndarray): result = np.empty(len(x), dtype=x.dtype) mask = notnull(x) result[mask] = op(x[mask], y) else: raise TypeError("{typ} cannot perform the operation {op}".format(typ=type(x).__name__,op=str_rep)) result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) result = com._fill_zeros(result, x, y, name, fill_zeros) return result
def work(self, fig=None, ax=None): """Draw a histogram on matplotlib figure or axis Parameters: ----------- fig: matplotlib figure ax: matplotlib axis Returns: -------- a tuple with figure and axis objects """ if ax is None: if fig is None: return fig, ax else: ax = fig.gca() x = self.data[self.aes['x']] ax.hist(_values_from_object(x), self.bins, facecolor=self.colour) ax.set_xlabel(self.aes['x']) return fig, ax
def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) mask = notna(x) & notna(y) result[mask] = op(x[mask], com._values_from_object(y[mask])) else: assert isinstance(x, np.ndarray) result = np.empty(len(x), dtype=x.dtype) mask = notna(x) result[mask] = op(x[mask], y) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result
def __getitem__(self, key): try: return self._get_val_at(self.index.get_loc(key)) except KeyError: if isinstance(key, (int, np.integer)): return self._get_val_at(key) elif key is Ellipsis: return self raise Exception('Requested index not in this series!') except TypeError: # Could not hash item, must be array-like? pass # is there a case where this would NOT be an ndarray? # need to find an example, I took out the case for now key = _values_from_object(key) dataSlice = self.values[key] new_index = Index(self.index.view(ndarray)[key]) return self._constructor(dataSlice, index=new_index).__finalize__(self)
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ if not isinstance(data, Series): raise TypeError( "cannot convert an object of type {0} to a datetimelike index". format(type(data))) index = data.index if issubclass(data.dtype.type, np.datetime64): return DatetimeProperties(DatetimeIndex(data, copy=copy), index) else: if isinstance(data, PeriodIndex): return PeriodProperties(PeriodIndex(data, copy=copy), index) data = com._values_from_object(data) inferred = lib.infer_dtype(data) if inferred == 'period': return PeriodProperties(PeriodIndex(data), index) raise TypeError( "cannot convert an object of type {0} to a datetimelike index".format( type(data)))
def __getitem__(self, key): try: return self.index.get_value(self, key) except InvalidIndexError: pass except KeyError: if isinstance(key, (int, np.integer)): return self._get_val_at(key) elif key is Ellipsis: return self raise Exception('Requested index not in this series!') except TypeError: # Could not hash item, must be array-like? pass key = _values_from_object(key) if self.index.nlevels > 1 and isinstance(key, tuple): # to handle MultiIndex labels key = self.index.get_loc(key) return self._constructor(self.values[key], index=self.index[key]).__finalize__(self)
def nanvar(values, axis=None, skipna=True, ddof=1): values = _values_from_object(values) dtype = values.dtype mask = isna(values) if is_any_int_dtype(values): values = values.astype('f8') values[mask] = np.nan if is_float_dtype(values): count, d = _get_counts_nanvar(mask, axis, ddof, values.dtype) else: count, d = _get_counts_nanvar(mask, axis, ddof) if skipna: values = values.copy() np.putmask(values, mask, 0) # xref GH10242 # Compute variance via two-pass algorithm, which is stable against # cancellation errors and relatively accurate for small numbers of # observations. # # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) sqr = _ensure_numeric((avg - values)**2) np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d # Return variance as np.float64 (the datatype used in the accumulator), # unless we were dealing with a float array, in which case use the same # precision as the original values array. if is_float_dtype(dtype): result = result.astype(dtype) return _wrap_results(result, values.dtype)
def cartesian_product(X): ''' Numpy version of itertools.product or pandas.compat.product. Sometimes faster (for large inputs)... Examples -------- >>> cartesian_product([list('ABC'), [1, 2]]) [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), array([1, 2, 1, 2, 1, 2])] ''' lenX = np.fromiter((len(x) for x in X), dtype=int) cumprodX = np.cumproduct(lenX) a = np.roll(cumprodX, 1) a[0] = 1 b = cumprodX[-1] / cumprodX return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]), np.product(a[i])) for i, x in enumerate(X)]
def convert_value(self, v): """ convert the expression that is in the term to something that is accepted by pytables """ def stringify(value): if self.encoding is not None: encoder = partial(pprint_thing_encoded, encoding=self.encoding) else: encoder = pprint_thing return encoder(value) kind = _ensure_decoded(self.kind) meta = _ensure_decoded(self.meta) if kind == u('datetime64') or kind == u('datetime'): if isinstance(v, (int, float)): v = stringify(v) v = _ensure_decoded(v) v = pd.Timestamp(v) if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v, v.value, kind) elif kind == u('timedelta64') or kind == u('timedelta'): v = _coerce_scalar_to_timedelta_type(v, unit='s').value return TermValue(int(v), v, kind) elif meta == u('category'): metadata = com._values_from_object(self.metadata) result = metadata.searchsorted(v, side='left') # result returns 0 if v is first element or if v is not in metadata # check that metadata contains v if not result and v not in metadata: result = -1 return TermValue(result, result, u('integer')) elif kind == u('integer'): v = int(float(v)) return TermValue(v, v, kind) elif kind == u('float'): v = float(v) return TermValue(v, v, kind) elif kind == u('bool'): if isinstance(v, string_types): v = not v.strip().lower() in [ u('false'), u('f'), u('no'), u('n'), u('none'), u('0'), u('[]'), u('{}'), u('') ] else: v = bool(v) return TermValue(v, v, kind) elif isinstance(v, string_types): # string quoting return TermValue(v, stringify(v), u('string')) else: raise TypeError(("Cannot compare {v} of type {typ}" " to {kind} column").format(v=v, typ=type(v), kind=kind))
def _where_standard(cond, a, b, raise_on_error=True): return np.where(_values_from_object(cond), _values_from_object(a), _values_from_object(b))
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) if isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) return self._constructor(na_op(self.values, other.values), index=self.index, name=name) elif isinstance(other, ABCDataFrame): # pragma: no cover return NotImplemented elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast if (not is_scalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') if isinstance(other, ABCPeriodIndex): # temp workaround until fixing GH 13637 # tested in test_nat_comparisons # (pandas.tests.series.test_operators.TestSeriesOperators) return self._constructor(na_op(self.values, other.astype(object).values), index=self.index) return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) elif isinstance(other, pd.Categorical): if not is_categorical_dtype(self): msg = ("Cannot compare a Categorical for op {op} with Series " "of dtype {typ}.\nIf you want to compare values, use " "'series <op> np.asarray(other)'.") raise TypeError(msg.format(op=op, typ=self.dtype)) if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) else: values = self.get_values() if isinstance(other, (list, np.ndarray)): other = np.asarray(other) with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): raise TypeError( 'Could not compare {typ} type with Series'.format( typ=type(other))) # always return a full value series here res = com._values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') return res
def get_value_maybe_box(self, series, key): if not isinstance(key, Timedelta): key = Timedelta(key) values = self._engine.get_value(_values_from_object(series), key) return _maybe_box(self, values, series, key)
def _where_standard(cond, a, b): return np.where(com._values_from_object(cond), com._values_from_object(a), com._values_from_object(b))
def _possibly_cast_to_timedelta(value, coerce=True, dtype=None): """ try to cast to timedelta64, if already a timedeltalike, then make sure that we are [ns] (as numpy 1.6.2 is very buggy in this regards, don't force the conversion unless coerce is True if coerce='compat' force a compatibilty coercerion (to timedeltas) if needeed if dtype is passed then this is the target dtype """ # coercion compatability if coerce == 'compat' and _np_version_under1p7: def convert(td, dtype): # we have an array with a non-object dtype if hasattr(td, 'item'): td = td.astype(np.int64).item() if td == tslib.iNaT: return td if dtype == 'm8[us]': td *= 1000 return td if isnull(td) or td == tslib.compat_NaT or td == tslib.iNaT: return tslib.iNaT # convert td value to a nanosecond value d = td.days s = td.seconds us = td.microseconds if dtype == 'object' or dtype == 'm8[ns]': td = 1000 * us + (s + d * 24 * 3600) * 10**9 else: raise ValueError( "invalid conversion of dtype in np < 1.7 [%s]" % dtype) return td # < 1.7 coercion if not is_list_like(value): value = np.array([value]) dtype = value.dtype return np.array([convert(v, dtype) for v in value], dtype='m8[ns]') # deal with numpy not being able to handle certain timedelta operations if isinstance(value, (ABCSeries, np.ndarray)): # i8 conversions if value.dtype == 'int64' and np.dtype(dtype) == 'timedelta64[ns]': value = value.astype('timedelta64[ns]') return value elif value.dtype.kind == 'm': if value.dtype != 'timedelta64[ns]': value = value.astype('timedelta64[ns]') return value # we don't have a timedelta, but we want to try to convert to one (but # don't force it) if coerce: new_value = tslib.array_to_timedelta64( _values_from_object(value).astype(object), coerce=False) if new_value.dtype == 'i8': value = np.array(new_value, dtype='timedelta64[ns]') return value
def get_median(x): mask = notnull(x) if not skipna and not mask.all(): return np.nan return algos.median(_values_from_object(x[mask]))
def value_counts(values, sort=True, ascending=False, normalize=False, bins=None): """ Compute a histogram of the counts of non-null values Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order normalize: boolean, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data Returns ------- value_counts : Series """ from pandas.core.series import Series from pandas.tools.tile import cut values = Series(values).values if bins is not None: try: cat, bins = cut(values, bins, retbins=True) except TypeError: raise TypeError("bins argument only works with numeric data.") values = cat.labels if com.is_integer_dtype(values.dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_int64(values) elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): dtype = values.dtype values = values.view(np.int64) keys, counts = htable.value_count_int64(values) # convert the keys back to the dtype we came in keys = Series(keys, dtype=dtype) else: mask = com.isnull(values) values = com._ensure_object(values) keys, counts = htable.value_count_object(values, mask) result = Series(counts, index=com._values_from_object(keys)) if bins is not None: # TODO: This next line should be more efficient result = result.reindex(np.arange(len(cat.levels)), fill_value=0) result.index = bins[:-1] if sort: result.sort() if not ascending: result = result[::-1] if normalize: result = result / float(values.size) return result
def nankurt(values, axis=None, skipna=True): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G2, computed directly from the second and fourth central moment. """ values = _values_from_object(values) mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) else: count = _get_counts(mask, axis, dtype=values.dtype) if skipna: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna: np.putmask(adjusted, mask, 0) adjusted2 = adjusted**2 adjusted4 = adjusted2**2 m2 = adjusted2.sum(axis, dtype=np.float64) m4 = adjusted4.sum(axis, dtype=np.float64) with np.errstate(invalid='ignore', divide='ignore'): adj = 3 * (count - 1)**2 / ((count - 2) * (count - 3)) numer = count * (count + 1) * (count - 1) * m4 denom = (count - 2) * (count - 3) * m2**2 result = numer / denom - adj # floating point error numer = _zero_out_fperr(numer) denom = _zero_out_fperr(denom) if not isinstance(denom, np.ndarray): # if ``denom`` is a scalar, check these corner cases first before # doing division if count < 4: return np.nan if denom == 0: return 0 with np.errstate(invalid='ignore', divide='ignore'): result = numer / denom - adj dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(denom == 0, 0, result) result[count < 4] = np.nan return result
def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True): """ Compute a histogram of the counts of non-null values. Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order normalize: boolean, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN Returns ------- value_counts : Series """ from pandas.core.series import Series from pandas.tools.tile import cut from pandas.tseries.period import PeriodIndex is_period = com.is_period_arraylike(values) values = Series(values).values is_category = com.is_categorical_dtype(values.dtype) if bins is not None: try: cat, bins = cut(values, bins, retbins=True) except TypeError: raise TypeError("bins argument only works with numeric data.") values = cat.codes elif is_category: bins = values.categories cat = values values = cat.codes dtype = values.dtype if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)) or is_period: if is_period: values = PeriodIndex(values) values = values.view(np.int64) keys, counts = htable.value_count_int64(values) if dropna: from pandas.tslib import iNaT msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_int64(values) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna: keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) result = Series(counts, index=com._values_from_object(keys)) if bins is not None: # TODO: This next line should be more efficient result = result.reindex(np.arange(len(cat.categories)), fill_value=0) if not is_category: result.index = bins[:-1] else: result.index = cat.categories if sort: result.sort() if not ascending: result = result[::-1] if normalize: result = result / float(values.size) return result
def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented elif isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) res_values = na_op(self.values, other.values) return self._constructor(res_values, index=self.index, name=name) elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast if (not is_scalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') res_values = na_op(self.values, np.asarray(other)) return self._constructor(res_values, index=self.index).__finalize__(self) elif (isinstance(other, pd.Categorical) and not is_categorical_dtype(self)): raise TypeError( "Cannot compare a Categorical for op {op} with " "Series of dtype {typ}.\nIf you want to compare " "values, use 'series <op> np.asarray(other)'.".format( op=op, typ=self.dtype)) elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(len(self), dtype=bool) else: res_values = np.zeros(len(self), dtype=bool) return self._constructor(res_values, index=self.index, name=self.name, dtype='bool') if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) else: values = self.get_values() if isinstance(other, (list, np.ndarray)): other = np.asarray(other) with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): raise TypeError( 'Could not compare {typ} type with Series'.format( typ=type(other))) # always return a full value series here res = com._values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') return res
def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(x): return op(x,y) elif is_categorical_dtype(y) and not isscalar(y): return op(y,x) if is_object_dtype(x.dtype): if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): if not is_object_dtype(y.dtype): result = lib.vec_compare(x, y.astype(np.object_), op) else: result = lib.vec_compare(x, y, op) else: result = lib.scalar_compare(x, y, op) else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None if isscalar(y) and isnull(y): y = np.nan # we have a datetime/timedelta and may need to convert mask = None if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)): if isscalar(y): y = _index.convert_scalar(x,_values_from_object(y)) else: y = y.view('i8') if name == '__ne__': mask = notnull(x) else: mask = isnull(x) x = x.view('i8') try: result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") except AttributeError: result = op(x, y) if mask is not None and mask.any(): result[mask] = False return result