def test_unique_masked(self): # issue 8664 x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0], dtype='uint8') y = np.ma.masked_equal(x, 0) v = np.unique(y) v2, i, c = np.unique(y, return_index=True, return_counts=True) msg = 'Unique returned different results when asked for index' assert_array_equal(v.data, v2.data, msg) assert_array_equal(v.mask, v2.mask, msg)
def test_unique_sort_order_with_axis(self): # These tests fail if sorting along axis is done by treating subarrays # as unsigned byte strings. See gh-10495. fmt = "sort order incorrect for integer type '%s'" for dt in 'bhilq': a = np.array([[-1], [0]], dt) b = np.unique(a, axis=0) assert_array_equal(a, b, fmt % dt)
def in1d(ar1, ar2, assume_unique=False, invert=False): """ Test whether each element of a 1-D array is also present in a second array. Returns a boolean array the same length as `ar1` that is True where an element of `ar1` is in `ar2` and False otherwise. We recommend using :func:`isin` instead of `in1d` for new code. Parameters ---------- ar1 : (M,) array_like Input array. ar2 : array_like The values against which to test each value of `ar1`. assume_unique : bool, optional If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. invert : bool, optional If True, the values in the returned array are inverted (that is, False where an element of `ar1` is in `ar2` and True otherwise). Default is False. ``np.in1d(a, b, invert=True)`` is equivalent to (but is faster than) ``np.invert(in1d(a, b))``. .. versionadded:: 1.8.0 Returns ------- in1d : (M,) ndarray, bool The values `ar1[in1d]` are in `ar2`. See Also -------- isin : Version of this function that preserves the shape of ar1. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Notes ----- `in1d` can be considered as an element-wise function version of the python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly equivalent to ``np.array([item in b for item in a])``. However, this idea fails if `ar2` is a set, or similar (non-sequence) container: As ``ar2`` is converted to an array, in those cases ``asarray(ar2)`` is an object array rather than the expected array of contained values. .. versionadded:: 1.4.0 Examples -------- >>> test = np.array([0, 1, 2, 5, 0]) >>> states = [0, 2] >>> mask = np.in1d(test, states) >>> mask array([ True, False, True, False, True]) >>> test[mask] array([0, 2, 0]) >>> mask = np.in1d(test, states, invert=True) >>> mask array([False, True, False, True, False]) >>> test[mask] array([1, 5]) """ # Ravel both arrays, behavior for the first array could be different ar1 = np.asarray(ar1).ravel() ar2 = np.asarray(ar2).ravel() # Check if one of the arrays may contain arbitrary objects contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject # This code is run when # a) the first condition is true, making the code significantly faster # b) the second condition is true (i.e. `ar1` or `ar2` may contain # arbitrary objects), since then sorting is not guaranteed to work if len(ar2) < 10 * len(ar1)**0.145 or contains_object: if invert: mask = np.ones(len(ar1), dtype=bool) for a in ar2: mask &= (ar1 != a) else: mask = np.zeros(len(ar1), dtype=bool) for a in ar2: mask |= (ar1 == a) return mask # Otherwise use sorting if not assume_unique: ar1, rev_idx = np.unique(ar1, return_inverse=True) ar2 = np.unique(ar2) ar = np.concatenate((ar1, ar2)) # We need this to be a stable sort, so always use 'mergesort' # here. The values from the first array should always come before # the values from the second array. order = ar.argsort(kind='mergesort') sar = ar[order] if invert: bool_ar = (sar[1:] != sar[:-1]) else: bool_ar = (sar[1:] == sar[:-1]) flag = np.concatenate((bool_ar, [invert])) ret = np.empty(ar.shape, dtype=bool) ret[order] = flag if assume_unique: return ret[:len(ar1)] else: return ret[rev_idx]
def test_unique_1d(self): def check_all(a, b, i1, i2, c, dt): base_msg = 'check {0} failed for type {1}' msg = base_msg.format('values', dt) v = unique(a) assert_array_equal(v, b, msg) msg = base_msg.format('return_index', dt) v, j = unique(a, 1, 0, 0) assert_array_equal(v, b, msg) assert_array_equal(j, i1, msg) msg = base_msg.format('return_inverse', dt) v, j = unique(a, 0, 1, 0) assert_array_equal(v, b, msg) assert_array_equal(j, i2, msg) msg = base_msg.format('return_counts', dt) v, j = unique(a, 0, 0, 1) assert_array_equal(v, b, msg) assert_array_equal(j, c, msg) msg = base_msg.format('return_index and return_inverse', dt) v, j1, j2 = unique(a, 1, 1, 0) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) msg = base_msg.format('return_index and return_counts', dt) v, j1, j2 = unique(a, 1, 0, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, c, msg) msg = base_msg.format('return_inverse and return_counts', dt) v, j1, j2 = unique(a, 0, 1, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i2, msg) assert_array_equal(j2, c, msg) msg = base_msg.format(('return_index, return_inverse ' 'and return_counts'), dt) v, j1, j2, j3 = unique(a, 1, 1, 1) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) assert_array_equal(j3, c, msg) a = [5, 7, 1, 2, 1, 5, 7] * 10 b = [1, 2, 5, 7] i1 = [2, 3, 0, 1] i2 = [2, 3, 0, 1, 0, 2, 3] * 10 c = np.multiply([2, 1, 2, 2], 10) # test for numeric arrays types = [] types.extend(np.typecodes['AllInteger']) types.extend(np.typecodes['AllFloat']) types.append('datetime64[D]') types.append('timedelta64[D]') for dt in types: aa = np.array(a, dt) bb = np.array(b, dt) check_all(aa, bb, i1, i2, c, dt) # test for object arrays dt = 'O' aa = np.empty(len(a), dt) aa[:] = a bb = np.empty(len(b), dt) bb[:] = b check_all(aa, bb, i1, i2, c, dt) # test for structured arrays dt = [('', 'i'), ('', 'i')] aa = np.array(list(zip(a, a)), dt) bb = np.array(list(zip(b, b)), dt) check_all(aa, bb, i1, i2, c, dt) # test for ticket #2799 aa = [1. + 0.j, 1 - 1.j, 1] assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j]) # test for ticket #4785 a = [(1, 2), (1, 2), (2, 3)] unq = [1, 2, 3] inv = [0, 1, 0, 1, 1, 2] a1 = unique(a) assert_array_equal(a1, unq) a2, a2_inv = unique(a, return_inverse=True) assert_array_equal(a2, unq) assert_array_equal(a2_inv, inv) # test for chararrays with return_inverse (gh-5099) a = np.chararray(5) a[...] = '' a2, a2_inv = np.unique(a, return_inverse=True) assert_array_equal(a2_inv, np.zeros(5)) # test for ticket #9137 a = [] a1_idx = np.unique(a, return_index=True)[1] a2_inv = np.unique(a, return_inverse=True)[1] a3_idx, a3_inv = np.unique(a, return_index=True, return_inverse=True)[1:] assert_equal(a1_idx.dtype, np.intp) assert_equal(a2_inv.dtype, np.intp) assert_equal(a3_idx.dtype, np.intp) assert_equal(a3_inv.dtype, np.intp)
def test_unique_zero_sized(self): # Ticket #205 assert_array_equal([], np.unique(np.array([])))