def test_hash_tuples(self): tups = [(1, 'one'), (1, 'two'), (2, 'one')] result = hash_tuples(tups) expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tups[0]) assert result == expected[0]
def test_hash_tuples(): tuples = [(1, "one"), (1, "two"), (2, "one")] result = hash_tuples(tuples) expected = hash_pandas_object(MultiIndex.from_tuples(tuples)).values tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tuples[0]) assert result == expected[0]
def test_hash_tuples(): tuples = [(1, "one"), (1, "two"), (2, "one")] result = hash_tuples(tuples) expected = hash_pandas_object(MultiIndex.from_tuples(tuples)).values tm.assert_numpy_array_equal(result, expected) # We only need to support MultiIndex and list-of-tuples msg = "|".join( ["object is not iterable", "zip argument #1 must support iteration"]) with pytest.raises(TypeError, match=msg): hash_tuples(tuples[0])
def test_hash_tuple(self): # test equivalence between hash_tuples and hash_tuple for tup in [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'), ('A', pd.Timestamp("2012-01-01"))]: result = hash_tuple(tup) expected = hash_tuples([tup])[0] assert result == expected
def _hash_categories(categories, ordered=True): from pandas.core.util.hashing import (hash_array, _combine_hash_arrays, hash_tuples) if len(categories) and isinstance(categories[0], tuple): # assumes if any individual category is a tuple, then all our. ATM # I don't really want to support just some of the categories being # tuples. categories = list(categories) # breaks if a np.array of categories cat_array = hash_tuples(categories) else: if categories.dtype == 'O': types = [type(x) for x in categories] if not len(set(types)) == 1: # TODO: hash_array doesn't handle mixed types. It casts # everything to a str first, which means we treat # {'1', '2'} the same as {'1', 2} # find a better solution hashed = hash((tuple(categories), ordered)) return hashed cat_array = hash_array(np.asarray(categories), categorize=False) if ordered: cat_array = np.vstack( [cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)]) else: cat_array = [cat_array] hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) if len(hashed) == 0: # bug in Numpy<1.12 for length 0 arrays. Just return the correct # value of 0 return 0 else: return np.bitwise_xor.reduce(hashed)
def test_hash_tuple(tup): # Test equivalence between # hash_tuples and hash_tuple. result = hash_tuple(tup) expected = hash_tuples([tup])[0] assert result == expected
def _hash_categories(categories, ordered=True): from pandas.core.util.hashing import (hash_array, _combine_hash_arrays, hash_tuples) from pandas.core.dtypes.common import is_datetime64tz_dtype, _NS_DTYPE if len(categories) and isinstance(categories[0], tuple): # assumes if any individual category is a tuple, then all our. ATM # I don't really want to support just some of the categories being # tuples. categories = list(categories) # breaks if a np.array of categories cat_array = hash_tuples(categories) else: if categories.dtype == 'O': if len({type(x) for x in categories}) != 1: # TODO: hash_array doesn't handle mixed types. It casts # everything to a str first, which means we treat # {'1', '2'} the same as {'1', 2} # find a better solution hashed = hash((tuple(categories), ordered)) return hashed if is_datetime64tz_dtype(categories.dtype): # Avoid future warning. categories = categories.astype(_NS_DTYPE) cat_array = hash_array(np.asarray(categories), categorize=False) if ordered: cat_array = np.vstack( [cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)]) else: cat_array = [cat_array] hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) return np.bitwise_xor.reduce(hashed)
def _hash_categories(categories, ordered=True): from pandas.core.util.hashing import ( hash_array, _combine_hash_arrays, hash_tuples ) if len(categories) and isinstance(categories[0], tuple): # assumes if any individual category is a tuple, then all our. ATM # I don't really want to support just some of the categories being # tuples. categories = list(categories) # breaks if a np.array of categories cat_array = hash_tuples(categories) else: if categories.dtype == 'O': types = [type(x) for x in categories] if not len(set(types)) == 1: # TODO: hash_array doesn't handle mixed types. It casts # everything to a str first, which means we treat # {'1', '2'} the same as {'1', 2} # find a better solution hashed = hash((tuple(categories), ordered)) return hashed cat_array = hash_array(np.asarray(categories), categorize=False) if ordered: cat_array = np.vstack([ cat_array, np.arange(len(cat_array), dtype=cat_array.dtype) ]) else: cat_array = [cat_array] hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) return np.bitwise_xor.reduce(hashed)
def _hash_categories(self) -> int: from pandas.core.util.hashing import ( combine_hash_arrays, hash_array, hash_tuples, ) categories = self.categories ordered = self.ordered if len(categories) and isinstance(categories[0], tuple): # assumes if any individual category is a tuple, then all our. ATM # I don't really want to support just some of the categories being # tuples. cat_list = list(categories) # breaks if a np.array of categories cat_array = hash_tuples(cat_list) else: if categories.dtype == "O" and len({type(x) for x in categories}) != 1: # TODO: hash_array doesn't handle mixed types. It casts # everything to a str first, which means we treat # {'1', '2'} the same as {'1', 2} # find a better solution hashed = hash((tuple(categories), ordered)) return hashed if DatetimeTZDtype.is_dtype(categories.dtype): # Avoid future warning. categories = categories.view("datetime64[ns]") cat_array = hash_array(np.asarray(categories), categorize=False) if ordered: cat_array = np.vstack( [cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)]) else: # error: Incompatible types in assignment (expression has type # "List[ndarray]", variable has type "ndarray") cat_array = [cat_array] # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "ndarray", # variable has type "int") hashed = combine_hash_arrays( # type: ignore[assignment] iter(cat_array), num_items=len(cat_array)) return np.bitwise_xor.reduce(hashed)
def test_hash_tuples_err(val): msg = "must be convertible to a list-of-tuples" with pytest.raises(TypeError, match=msg): hash_tuples(val)
def test_hash_tuples_err(self, val): msg = 'must be convertible to a list-of-tuples' with pytest.raises(TypeError, match=msg): hash_tuples(val)
def test_hash_tuple(self, tup): # test equivalence between hash_tuples and hash_tuple result = hash_tuple(tup) expected = hash_tuples([tup])[0] assert result == expected
def test_hash_tuples_err(self, val): msg = 'must be convertible to a list-of-tuples' with tm.assert_raises_regex(TypeError, msg): hash_tuples(val)