def argsort(self, ascending=True, kind="quicksort", *args, **kwargs): """ Return the indices that would sort this array. Parameters ---------- ascending : bool, default True Whether the indices should result in an ascending or descending sort. kind : {'quicksort', 'mergesort', 'heapsort'}, optional Sorting algorithm. *args, **kwargs: passed through to :func:`numpy.argsort`. Returns ------- index_array : ndarray Array of indices that sort ``self``. If NaN values are contained, NaN values are placed at the end. See Also -------- numpy.argsort : Sorting implementation used internally. """ # Implementor note: You have two places to override the behavior of # argsort. # 1. _values_for_argsort : construct the values passed to np.argsort # 2. argsort : total control over sorting. ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) result = nargsort(self, kind=kind, ascending=ascending, na_position="last") return result
def test_nargsort(self): # np.argsort(items) places NaNs last items = [np.nan] * 5 + list(range(100)) + [np.nan] * 5 # np.argsort(items2) may not place NaNs first items2 = np.array(items, dtype="O") # mergesort is the most difficult to get right because we want it to be # stable. # According to numpy/core/tests/test_multiarray, """The number of # sorted items must be greater than ~50 to check the actual algorithm # because quick and merge sort fall over to insertion sort for small # arrays.""" # mergesort, ascending=True, na_position='last' result = nargsort(items, kind="mergesort", ascending=True, na_position="last") exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='first' result = nargsort(items, kind="mergesort", ascending=True, na_position="first") exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='last' result = nargsort(items, kind="mergesort", ascending=False, na_position="last") exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='first' result = nargsort(items, kind="mergesort", ascending=False, na_position="first") exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='last' result = nargsort(items2, kind="mergesort", ascending=True, na_position="last") exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='first' result = nargsort(items2, kind="mergesort", ascending=True, na_position="first") exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='last' result = nargsort(items2, kind="mergesort", ascending=False, na_position="last") exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='first' result = nargsort( items2, kind="mergesort", ascending=False, na_position="first" ) exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
def _stable_series_sort(ser, ascending): """ Stable sort for pandas series Temporary Solution until https://github.com/pandas-dev/pandas/issues/28697 https://github.com/pandas-dev/pandas/pull/28698 are resolved """ from pandas.core.sorting import nargsort values = ser._values indexer = nargsort( values, kind='mergesort', ascending=ascending, na_position='last') return pd.Series(values[indexer], index=ser.index[indexer])
def test_nargsort(self, ascending, na_position, exp, box): # list places NaNs last, np.array(..., dtype="O") may not place NaNs first items = box([np.nan] * 5 + list(range(100)) + [np.nan] * 5) # mergesort is the most difficult to get right because we want it to be # stable. # According to numpy/core/tests/test_multiarray, """The number of # sorted items must be greater than ~50 to check the actual algorithm # because quick and merge sort fall over to insertion sort for small # arrays.""" result = nargsort(items, kind="mergesort", ascending=ascending, na_position=na_position) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
def test_nargsort(self, data_missing_for_sorting, na_position, expected): # GH 25439 result = nargsort(data_missing_for_sorting, na_position=na_position) tm.assert_numpy_array_equal(result, expected)
def test_nargsort_datetimearray_warning(self): # https://github.com/pandas-dev/pandas/issues/25439 # can be removed once the FutureWarning for np.array(DTA) is removed data = to_datetime([0, 2, 0, 1]).tz_localize('Europe/Brussels') with tm.assert_produces_warning(None): nargsort(data)
def test_nargsort(self): # np.argsort(items) places NaNs last items = [nan] * 5 + list(range(100)) + [nan] * 5 # np.argsort(items2) may not place NaNs first items2 = np.array(items, dtype='O') try: # GH 2785; due to a regression in NumPy1.6.2 np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) np.argsort(items2, kind='mergesort') except TypeError: pytest.skip('requested sort not available for type') # mergesort is the most difficult to get right because we want it to be # stable. # According to numpy/core/tests/test_multiarray, """The number of # sorted items must be greater than ~50 to check the actual algorithm # because quick and merge sort fall over to insertion sort for small # arrays.""" # mergesort, ascending=True, na_position='last' result = nargsort(items, kind='mergesort', ascending=True, na_position='last') exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='first' result = nargsort(items, kind='mergesort', ascending=True, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='last' result = nargsort(items, kind='mergesort', ascending=False, na_position='last') exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='first' result = nargsort(items, kind='mergesort', ascending=False, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='last' result = nargsort(items2, kind='mergesort', ascending=True, na_position='last') exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='first' result = nargsort(items2, kind='mergesort', ascending=True, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='last' result = nargsort(items2, kind='mergesort', ascending=False, na_position='last') exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='first' result = nargsort(items2, kind='mergesort', ascending=False, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
def test_nargsort(self): # np.argsort(items) places NaNs last items = [nan] * 5 + list(range(100)) + [nan] * 5 # np.argsort(items2) may not place NaNs first items2 = np.array(items, dtype='O') # mergesort is the most difficult to get right because we want it to be # stable. # According to numpy/core/tests/test_multiarray, """The number of # sorted items must be greater than ~50 to check the actual algorithm # because quick and merge sort fall over to insertion sort for small # arrays.""" # mergesort, ascending=True, na_position='last' result = nargsort(items, kind='mergesort', ascending=True, na_position='last') exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='first' result = nargsort(items, kind='mergesort', ascending=True, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='last' result = nargsort(items, kind='mergesort', ascending=False, na_position='last') exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='first' result = nargsort(items, kind='mergesort', ascending=False, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='last' result = nargsort(items2, kind='mergesort', ascending=True, na_position='last') exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='first' result = nargsort(items2, kind='mergesort', ascending=True, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='last' result = nargsort(items2, kind='mergesort', ascending=False, na_position='last') exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='first' result = nargsort(items2, kind='mergesort', ascending=False, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)