def test_unique(index_or_series_obj): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) with tm.maybe_produces_warning( PerformanceWarning, pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", ): result = obj.unique() # dict.fromkeys preserves the order unique_values = list(dict.fromkeys(obj.values)) if isinstance(obj, pd.MultiIndex): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): expected = expected.normalize() tm.assert_index_equal(result, expected, exact=True) else: expected = np.array(unique_values) tm.assert_numpy_array_equal(result, expected)
def test_insert_na(self, nulls_fixture, simple_index): # GH 18295 (test missing) index = simple_index na_val = nulls_fixture if na_val is pd.NaT: expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) else: expected = Float64Index([index[0], np.nan] + list(index[1:])) if index._is_backward_compat_public_numeric_index: # GH#43921 we preserve NumericIndex if index.dtype.kind == "f": expected = NumericIndex(expected, dtype=index.dtype) else: expected = NumericIndex(expected) result = index.insert(1, na_val) tm.assert_index_equal(result, expected, exact=True)
def test_unique_null(null_obj, index_or_series_obj): obj = index_or_series_obj if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") elif len(obj) < 1: pytest.skip("Test doesn't make sense on empty data") elif isinstance(obj, pd.MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values if needs_i8_conversion(obj.dtype): values[0:2] = iNaT else: values[0:2] = null_obj klass = type(obj) repeated_values = np.repeat(values, range(1, len(values) + 1)) obj = klass(repeated_values, dtype=obj.dtype) result = obj.unique() unique_values_raw = dict.fromkeys(obj.values) # because np.nan == np.nan is False, but None == None is True # np.nan would be duplicated, whereas None wouldn't unique_values_not_null = [ val for val in unique_values_raw if not pd.isnull(val) ] unique_values = [null_obj] + unique_values_not_null if isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): result = result.normalize() expected = expected.normalize() tm.assert_index_equal(result, expected, exact=True) else: expected = np.array(unique_values, dtype=obj.dtype) tm.assert_numpy_array_equal(result, expected)
def makeNumericIndex(k=10, name=None, *, dtype): dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) if is_integer_dtype(dtype): values = np.arange(k, dtype=dtype) if is_unsigned_integer_dtype(dtype): values += 2**(dtype.itemsize * 8 - 1) elif is_float_dtype(dtype): values = np.random.random_sample(k) - np.random.random_sample(1) values.sort() values = values * (10**np.random.randint(0, 9)) else: raise NotImplementedError(f"wrong dtype {dtype}") return NumericIndex(values, dtype=dtype, name=name)