def test_large(self): s = pd.date_range('20000101', periods=2000000, freq='s').values result = algos.isin(s, s[0:2]) expected = np.zeros(len(s), dtype=bool) expected[0] = True expected[1] = True tm.assert_numpy_array_equal(result, expected)
def test_basic(self): result = algos.isin([1, 2], [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(np.array([1, 2]), [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(pd.Series([1, 2]), [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(pd.Series([1, 2]), pd.Series([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(['a', 'b'], ['a']) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(pd.Series(['a', 'b']), pd.Series(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(['a', 'b'], [1]) expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) arr = pd.date_range('20130101', periods=3).values result = algos.isin(arr, [arr[0]]) expected = np.array([True, False, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(arr, arr[0:2]) expected = np.array([True, True, False]) tm.assert_numpy_array_equal(result, expected) arr = pd.timedelta_range('1 day', periods=3).values result = algos.isin(arr, [arr[0]]) expected = np.array([True, False, False]) tm.assert_numpy_array_equal(result, expected)
def _attempt_YYYYMMDD(arg, errors): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) Parameters ---------- arg : passed value errors : 'raise','ignore','coerce' """ def calc(carg): # calculate the actual result carg = carg.astype(object) parsed = parsing.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100) return tslib.array_to_datetime(parsed, errors=errors)[0] def calc_with_mask(carg, mask): result = np.empty(carg.shape, dtype='M8[ns]') iresult = result.view('i8') iresult[~mask] = tslibs.iNaT masked_result = calc(carg[mask].astype(np.float64).astype(np.int64)) result[mask] = masked_result.astype('M8[ns]') return result # try intlike / strings that are ints try: return calc(arg.astype(np.int64)) except ValueError: pass # a float with actual np.nan try: carg = arg.astype(np.float64) return calc_with_mask(carg, notna(carg)) except ValueError: pass # string with NaN-like try: mask = ~algorithms.isin(arg, list(tslib.nat_strings)) return calc_with_mask(arg, mask) except ValueError: pass return None
def isin(self, values): """ Compute boolean array of whether each index value is found in the passed set of values Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8)
def isin(self, values): """ Compute boolean array of whether each index value is found in the passed set of values. Parameters ---------- values : set or sequence of values Returns ------- is_contained : ndarray (boolean dtype) """ if not isinstance(values, type(self)): try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) return algorithms.isin(self.asi8, values.asi8)
def isin(self, values) -> BooleanArray: # type: ignore[override] from pandas.core.arrays import BooleanArray # algorithms.isin will eventually convert values to an ndarray, so no extra # cost to doing it here first values_arr = np.asarray(values) result = isin(self._data, values_arr) if self._hasna: values_have_NA = is_object_dtype(values_arr.dtype) and any( val is self.dtype.na_value for val in values_arr ) # For now, NA does not propagate so set result according to presence of NA, # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion result[self._mask] = values_have_NA mask = np.zeros(self._data.shape, dtype=bool) return BooleanArray(result, mask, copy=False)
def test_float_complex_int_are_equal_as_objects(): values = ["a", 5, 5.0, 5.0 + 0j] comps = list(range(129)) result = isin(values, comps) expected = np.array([False, True, True, True], dtype=np.bool_) tm.assert_numpy_array_equal(result, expected)
def test_invalid(self): self.assertRaises(TypeError, lambda: algos.isin(1, 1)) self.assertRaises(TypeError, lambda: algos.isin(1, [1])) self.assertRaises(TypeError, lambda: algos.isin([1], 1))
def isin(self, values, level=None): if level is not None: self._validate_index_level(level) return algorithms.isin(np.array(self), values)
def _convert_to_ndarrays( self, dct: Mapping, na_values, na_fvalues, verbose: bool = False, converters=None, dtypes=None, ): result = {} for c, values in dct.items(): conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) else: # single dtype or None cast_type = dtypes if self.na_filter: col_na_values, col_na_fvalues = _get_na_values( c, na_values, na_fvalues, self.keep_default_na) else: col_na_values, col_na_fvalues = set(), set() if c in self._parse_date_cols: # GH#26203 Do not convert columns which get converted to dates # but replace nans to ensure to_datetime works mask = algorithms.isin(values, set(col_na_values) | col_na_fvalues) np.putmask(values, mask, np.nan) result[c] = values continue if conv_f is not None: # conv_f applied to data before inference if cast_type is not None: warnings.warn( ("Both a converter and dtype were specified " f"for column {c} - only the converter will be used."), ParserWarning, stacklevel=find_stack_level(), ) try: values = lib.map_infer(values, conv_f) except ValueError: # error: Argument 2 to "isin" has incompatible type "List[Any]"; # expected "Union[Union[ExtensionArray, ndarray], Index, Series]" mask = algorithms.isin( values, list(na_values) # type: ignore[arg-type] ).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types(values, set(col_na_values) | col_na_fvalues, try_num_bool=False) else: is_ea = is_extension_array_dtype(cast_type) is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) # skip inference if specified dtype is object # or casting to an EA try_num_bool = not (cast_type and is_str_or_ea_dtype) # general type inference and conversion cvals, na_count = self._infer_types( values, set(col_na_values) | col_na_fvalues, try_num_bool) # type specified in dtype param or cast_type is an EA if cast_type and (not is_dtype_equal(cvals, cast_type) or is_extension_array_dtype(cast_type)): if not is_ea and na_count > 0: try: if is_bool_dtype(cast_type): raise ValueError( f"Bool column has NA values in column {c}") except (AttributeError, TypeError): # invalid input to is_bool_dtype pass cast_type = pandas_dtype(cast_type) cvals = self._cast_types(cvals, cast_type, c) result[c] = cvals if verbose and na_count: print(f"Filled {na_count} NA values in column {c!s}") return result
def test_basic(self): result = algos.isin([1, 2], [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(np.array([1, 2]), [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(pd.Series([1, 2]), [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(pd.Series([1, 2]), pd.Series([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(pd.Series([1, 2]), set([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(['a', 'b'], ['a']) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(pd.Series(['a', 'b']), pd.Series(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(pd.Series(['a', 'b']), set(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(['a', 'b'], [1]) expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) arr = pd.date_range('20130101', periods=3).values result = algos.isin(arr, [arr[0]]) expected = np.array([True, False, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(arr, arr[0:2]) expected = np.array([True, True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(arr, set(arr[0:2])) expected = np.array([True, True, False]) tm.assert_numpy_array_equal(result, expected) arr = pd.timedelta_range('1 day', periods=3).values result = algos.isin(arr, [arr[0]]) expected = np.array([True, False, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(arr, arr[0:2]) expected = np.array([True, True, False]) tm.assert_numpy_array_equal(result, expected) result = algos.isin(arr, set(arr[0:2])) expected = np.array([True, True, False]) tm.assert_numpy_array_equal(result, expected)
def _isin_addresses(self, other): """Check whether elements of self are present in other.""" from pandas.core.algorithms import isin # TODO(factorize): replace this return isin(self, other)
def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None): result = {} for c, values in dct.items(): conv_f = None if converters is None else converters.get(c, None) if isinstance(dtypes, dict): cast_type = dtypes.get(c, None) else: # single dtype or None cast_type = dtypes if self.na_filter: col_na_values, col_na_fvalues = _get_na_values( c, na_values, na_fvalues, self.keep_default_na) else: col_na_values, col_na_fvalues = set(), set() if conv_f is not None: # conv_f applied to data before inference if cast_type is not None: warnings.warn( ("Both a converter and dtype were specified " f"for column {c} - only the converter will be used"), ParserWarning, stacklevel=7, ) try: values = lib.map_infer(values, conv_f) except ValueError: mask = algorithms.isin(values, list(na_values)).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types(values, set(col_na_values) | col_na_fvalues, try_num_bool=False) else: is_ea = is_extension_array_dtype(cast_type) is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) # skip inference if specified dtype is object # or casting to an EA try_num_bool = not (cast_type and is_str_or_ea_dtype) # general type inference and conversion cvals, na_count = self._infer_types( values, set(col_na_values) | col_na_fvalues, try_num_bool) # type specified in dtype param or cast_type is an EA if cast_type and (not is_dtype_equal(cvals, cast_type) or is_extension_array_dtype(cast_type)): if not is_ea and na_count > 0: try: if is_bool_dtype(cast_type): raise ValueError( f"Bool column has NA values in column {c}") except (AttributeError, TypeError): # invalid input to is_bool_dtype pass cast_type = pandas_dtype(cast_type) cvals = self._cast_types(cvals, cast_type, c) result[c] = cvals if verbose and na_count: print(f"Filled {na_count} NA values in column {c!s}") return result