def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = htable.Float64HashTable(len(values)) uniques = np.array(table.unique(_ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('M8[ns]') elif np.issubdtype(values.dtype, np.timedelta64): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('m8[ns]') elif np.issubdtype(values.dtype, np.signedinteger): table = htable.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) elif np.issubdtype(values.dtype, np.unsignedinteger): table = htable.UInt64HashTable(len(values)) uniques = table.unique(_ensure_uint64(values)) else: # its cheaper to use a String Hash Table than Object if lib.infer_dtype(values) in ['string']: table = htable.StringHashTable(len(values)) else: table = htable.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques
def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = _hash.Float64HashTable(len(values)) uniques = np.array(table.unique(com._ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(com._ensure_int64(values)) uniques = uniques.view('M8[ns]') elif np.issubdtype(values.dtype, np.integer): table = _hash.Int64HashTable(len(values)) uniques = table.unique(com._ensure_int64(values)) else: table = _hash.PyObjectHashTable(len(values)) uniques = table.unique(com._ensure_object(values)) return uniques
def test_lookup_nan(self): xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) m = hashtable.Float64HashTable() m.map_locations(xs) self.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.int64))