def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not com.is_list_like(comps): raise TypeError( "only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__) ) comps = np.asarray(comps) if not com.is_list_like(values): raise TypeError( "only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__) ) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) else: f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing if com.is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view("i8") comps = comps.view("i8") elif com.is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view("i8") comps = comps.view("i8") elif com.is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)
def _get_wom_rule(self): wdiffs = unique(np.diff(self.index.week)) if not lib.ismember(wdiffs, set([4, 5])).all(): return None weekdays = unique(self.index.weekday) if len(weekdays) > 1: return None # get which week week = (self.index[0].day - 1) // 7 + 1 wd = _weekday_rule_aliases[weekdays[0]] return "WOM-%d%s" % (week, wd)
def _get_wom_rule(self): wdiffs = unique(np.diff(self.index.week)) if not lib.ismember(wdiffs, set([4, 5])).all(): return None weekdays = unique(self.index.weekday) if len(weekdays) > 1: return None # get which week week = (self.index[0].day - 1) // 7 + 1 wd = _weekday_rule_aliases[weekdays[0]] return 'WOM-%d%s' % (week, wd)
def isin(comps, values): """ Compute the isin boolean array Parameters ---------- comps: array-like values: array-like Returns ------- boolean array same length as comps """ if not is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) comps = np.asarray(comps) if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) if not isinstance(values, np.ndarray): values = list(values) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) else: f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing if is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view('i8') comps = comps.view('i8') elif is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view('i8') comps = comps.view('i8') elif is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) return f(comps, values)
def _attempt_YYYYMMDD(arg, errors): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) Parameters ---------- arg : passed value errors : 'raise','ignore','coerce' """ def calc(carg): # calculate the actual result carg = carg.astype(object) parsed = lib.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100) return tslib.array_to_datetime(parsed, errors=errors) def calc_with_mask(carg, mask): result = np.empty(carg.shape, dtype='M8[ns]') iresult = result.view('i8') iresult[~mask] = tslib.iNaT result[mask] = calc(carg[mask].astype(np.float64).astype(np.int64)). \ astype('M8[ns]') return result # try intlike / strings that are ints try: return calc(arg.astype(np.int64)) except: pass # a float with actual np.nan try: carg = arg.astype(np.float64) return calc_with_mask(carg, notnull(carg)) except: pass # string with NaN-like try: mask = ~lib.ismember(arg, tslib._nat_strings) return calc_with_mask(arg, mask) except: pass return None
def _attempt_YYYYMMDD(arg, errors): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) Parameters ---------- arg : passed value errors : 'raise','ignore','coerce' """ def calc(carg): # calculate the actual result carg = carg.astype(object) parsed = lib.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100) return tslib.array_to_datetime(parsed, errors=errors) def calc_with_mask(carg, mask): result = np.empty(carg.shape, dtype='M8[ns]') iresult = result.view('i8') iresult[~mask] = tslib.iNaT result[mask] = calc(carg[mask].astype(np.float64).astype(np.int64)).\ astype('M8[ns]') return result # try intlike / strings that are ints try: return calc(arg.astype(np.int64)) except: pass # a float with actual np.nan try: carg = arg.astype(np.float64) return calc_with_mask(carg, notnull(carg)) except: pass # string with NaN-like try: mask = ~lib.ismember(arg, tslib._nat_strings) return calc_with_mask(arg, mask) except: pass return None
def drop(self, labels, axis=0, level=None): """ Return new object with labels in requested axis removed Parameters ---------- labels : array-like axis : int level : int or name, default None For MultiIndex Returns ------- dropped : type of caller """ axis_name = self._get_axis_name(axis) axis, axis_ = self._get_axis(axis), axis if axis.is_unique: if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') new_axis = axis.drop(labels, level=level) else: new_axis = axis.drop(labels) dropped = self.reindex(**{axis_name: new_axis}) try: dropped.axes[axis_].names = axis.names except AttributeError: pass return dropped else: if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') indexer = -lib.ismember(axis.get_level_values(level), set(labels)) else: indexer = -axis.isin(labels) slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer return self.ix[tuple(slicer)]
def isin(self, values): """ Compute boolean array of whether each index value is found in the passed set of values Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.asobject.isin(values) value_set = set(values.asi8) return lib.ismember(self.asi8, value_set)
def _convert_types(values, na_values): na_count = 0 if issubclass(values.dtype.type, (np.number, np.bool_)): mask = lib.ismember(values, na_values) na_count = mask.sum() if na_count > 0: if com.is_integer_dtype(values): values = values.astype(np.float64) np.putmask(values, mask, np.nan) return values, na_count try: result = lib.maybe_convert_numeric(values, na_values, False) except Exception: na_count = lib.sanitize_objects(values, na_values, False) result = values if result.dtype == np.object_: result = lib.maybe_convert_bool(values) return result, na_count
def _convert_types(values, na_values): na_count = 0 if issubclass(values.dtype.type, (np.number, np.bool_)): mask = lib.ismember(values, na_values) na_count = mask.sum() if na_count > 0: if com.is_integer_dtype(values): values = values.astype(np.float64) np.putmask(values, mask, np.nan) return values, na_count try: result = lib.maybe_convert_numeric(values, na_values) except Exception: na_count = lib.sanitize_objects(values, na_values) result = values if result.dtype == np.object_: result = lib.maybe_convert_bool(values) return result, na_count
def _attempt_YYYYMMDD(arg, coerce): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """ def calc(carg): # calculate the actual result carg = carg.astype(object) return tslib.array_to_datetime( lib.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100), coerce=coerce ) def calc_with_mask(carg, mask): result = np.empty(carg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult[~mask] = tslib.iNaT result[mask] = calc(carg[mask].astype(np.float64).astype(np.int64)).astype("M8[ns]") return result # try intlike / strings that are ints try: return calc(arg.astype(np.int64)) except: pass # a float with actual np.nan try: carg = arg.astype(np.float64) return calc_with_mask(carg, com.notnull(carg)) except: pass # string with NaN-like try: mask = ~lib.ismember(arg, tslib._nat_strings) return calc_with_mask(arg, mask) except: pass return None
def drop(self, labels, axis=0, level=None): """ Return new object with labels in requested axis removed Parameters ---------- labels : array-like axis : int level : int or name, default None For MultiIndex Returns ------- dropped : type of caller """ axis_name = self._get_axis_name(axis) axis = self._get_axis(axis) if axis.is_unique: if level is not None: assert (isinstance(axis, MultiIndex)) new_axis = axis.drop(labels, level=level) else: new_axis = axis.drop(labels) return self.reindex(**{axis_name: new_axis}) else: if level is not None: assert (isinstance(axis, MultiIndex)) indexer = -lib.ismember(axis.get_level_values(level), set(labels)) else: indexer = -axis.isin(labels) slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer return self.ix[tuple(slicer)]
def _attempt_YYYYMMDD(arg): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """ def calc(carg): # calculate the actual result carg = carg.astype(object) return lib.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100) def calc_with_mask(carg, mask): result = np.empty(carg.shape, dtype='M8[ns]') iresult = result.view('i8') iresult[-mask] = tslib.iNaT result[mask] = calc(carg[mask].astype(np.float64).astype( np.int64)).astype('M8[ns]') return result # try intlike / strings that are ints try: return calc(arg.astype(np.int64)) except: pass # a float with actual np.nan try: carg = arg.astype(np.float64) return calc_with_mask(carg, com.notnull(carg)) except: pass # string with NaN-like try: mask = ~lib.ismember(arg, tslib._nat_strings) return calc_with_mask(arg, mask) except: pass return None
def drop(self, labels, axis=0, level=None): """ Return new object with labels in requested axis removed Parameters ---------- labels : array-like axis : int level : int or name, default None For MultiIndex Returns ------- dropped : type of caller """ axis_name = self._get_axis_name(axis) axis = self._get_axis(axis) if axis.is_unique: if level is not None: assert(isinstance(axis, MultiIndex)) new_axis = axis.drop(labels, level=level) else: new_axis = axis.drop(labels) return self.reindex(**{axis_name: new_axis}) else: if level is not None: assert(isinstance(axis, MultiIndex)) indexer = -lib.ismember(axis.get_level_values(level), set(labels)) else: indexer = -axis.isin(labels) slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer return self.ix[tuple(slicer)]