def test_mode_array(): # ARROW-9917 arr = pa.array([1, 1, 3, 4, 3, 5], type='int64') expected = {"mode": 1, "count": 2} assert pc.mode(arr).as_py() == {"mode": 1, "count": 2} arr = pa.array([], type='int64') expected = {"mode": None, "count": None} assert pc.mode(arr).as_py() == expected
def test_mode_chunked_array(): # ARROW-9917 arr = pa.chunked_array([pa.array([1, 1, 3, 4, 3, 5], type='int64')]) expected = {"mode": 1, "count": 2} assert pc.mode(arr).as_py() == expected arr = pa.chunked_array((), type='int64') expected = {"mode": None, "count": None} assert arr.num_chunks == 0 assert pc.mode(arr).as_py() == expected
def test_mode_array(): # ARROW-9917 arr = pa.array([1, 1, 3, 4, 3, 5], type='int64') mode = pc.mode(arr) assert len(mode) == 1 assert mode[0].as_py() == {"mode": 1, "count": 2} mode = pc.mode(arr, 2) assert len(mode) == 2 assert mode[0].as_py() == {"mode": 1, "count": 2} assert mode[1].as_py() == {"mode": 3, "count": 2} arr = pa.array([], type='int64') assert len(pc.mode(arr)) == 0
def test_mode_chunked_array(): # ARROW-9917 arr = pa.chunked_array([pa.array([1, 1, 3, 4, 3, 5], type='int64')]) mode = pc.mode(arr) assert len(mode) == 1 assert mode[0].as_py() == {"mode": 1, "count": 2} mode = pc.mode(arr, 2) assert len(mode) == 2 assert mode[0].as_py() == {"mode": 1, "count": 2} assert mode[1].as_py() == {"mode": 3, "count": 2} arr = pa.chunked_array((), type='int64') assert arr.num_chunks == 0 assert len(pc.mode(arr)) == 0
def _mode(self: ArrowExtensionArrayT, dropna: bool = True) -> ArrowExtensionArrayT: """ Returns the mode(s) of the ExtensionArray. Always returns `ExtensionArray` even if only one value. Parameters ---------- dropna : bool, default True Don't consider counts of NA values. Not implemented by pyarrow. Returns ------- same type as self Sorted, if possible. """ if pa_version_under6p0: raise NotImplementedError( "mode only supported for pyarrow version >= 6.0") modes = pc.mode(self._data, pc.count_distinct(self._data).as_py()) values = modes.field(0) counts = modes.field(1) # counts sorted descending i.e counts[0] = max mask = pc.equal(counts, counts[0]) most_common = values.filter(mask) return type(self)(most_common)