def hash_array( vals: ArrayLike, encoding: str = "utf8", hash_key: str = _default_hash_key, categorize: bool = True, ) -> np.ndarray: """ Given a 1d array, return an array of deterministic integers. Parameters ---------- vals : ndarray or ExtensionArray encoding : str, default 'utf8' Encoding for data & key when strings. hash_key : str, default _default_hash_key Hash_key for string key to encode. categorize : bool, default True Whether to first categorize object arrays before hashing. This is more efficient when the array contains duplicate values. Returns ------- ndarray[np.uint64, ndim=1] Hashed values, same length as the vals. """ if not hasattr(vals, "dtype"): raise TypeError("must pass a ndarray-like") dtype = vals.dtype # For categoricals, we hash the categories, then remap the codes to the # hash values. (This check is above the complex check so that we don't ask # numpy if categorical is a subdtype of complex, as it will choke). if is_categorical_dtype(dtype): vals = cast("Categorical", vals) return _hash_categorical(vals, encoding, hash_key) elif isinstance(vals, ABCExtensionArray): vals, _ = vals._values_for_factorize() elif not isinstance(vals, np.ndarray): # GH#42003 raise TypeError( "hash_array requires np.ndarray or ExtensionArray, not " f"{type(vals).__name__}. Use hash_pandas_object instead." ) return _hash_ndarray(vals, encoding, hash_key, categorize)
def hash_array( vals: ArrayLike, encoding: str = "utf8", hash_key: str = _default_hash_key, categorize: bool = True, ) -> np.ndarray: """ Given a 1d array, return an array of deterministic integers. Parameters ---------- vals : ndarray or ExtensionArray encoding : str, default 'utf8' Encoding for data & key when strings. hash_key : str, default _default_hash_key Hash_key for string key to encode. categorize : bool, default True Whether to first categorize object arrays before hashing. This is more efficient when the array contains duplicate values. Returns ------- 1d uint64 numpy array of hash values, same length as the vals """ if not hasattr(vals, "dtype"): raise TypeError("must pass a ndarray-like") dtype = vals.dtype # For categoricals, we hash the categories, then remap the codes to the # hash values. (This check is above the complex check so that we don't ask # numpy if categorical is a subdtype of complex, as it will choke). if is_categorical_dtype(dtype): # error: Incompatible types in assignment (expression has type "Categorical", # variable has type "ndarray") vals = cast("Categorical", vals) # type: ignore[assignment] # error: Argument 1 to "_hash_categorical" has incompatible type "ndarray"; # expected "Categorical" return _hash_categorical(vals, encoding, hash_key) # type: ignore[arg-type] elif is_extension_array_dtype(dtype): # error: Incompatible types in assignment (expression has type "ndarray", # variable has type "ExtensionArray") # error: "ndarray" has no attribute "_values_for_factorize" vals, _ = vals._values_for_factorize() # type: ignore[assignment,attr-defined] # error: Argument 1 to "_hash_ndarray" has incompatible type "ExtensionArray"; # expected "ndarray" return _hash_ndarray(vals, encoding, hash_key, categorize) # type: ignore[arg-type]