Exemple #1
0
def test_unique(index_or_series_obj):
    obj = index_or_series_obj
    obj = np.repeat(obj, range(1, len(obj) + 1))
    with tm.maybe_produces_warning(
        PerformanceWarning,
        pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
    ):
        result = obj.unique()

    # dict.fromkeys preserves the order
    unique_values = list(dict.fromkeys(obj.values))
    if isinstance(obj, pd.MultiIndex):
        expected = pd.MultiIndex.from_tuples(unique_values)
        expected.names = obj.names
        tm.assert_index_equal(result, expected, exact=True)
    elif isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index:
        expected = NumericIndex(unique_values, dtype=obj.dtype)
        tm.assert_index_equal(result, expected, exact=True)
    elif isinstance(obj, pd.Index):
        expected = pd.Index(unique_values, dtype=obj.dtype)
        if is_datetime64tz_dtype(obj.dtype):
            expected = expected.normalize()
        tm.assert_index_equal(result, expected, exact=True)
    else:
        expected = np.array(unique_values)
        tm.assert_numpy_array_equal(result, expected)
Exemple #2
0
    def test_insert_na(self, nulls_fixture, simple_index):
        # GH 18295 (test missing)
        index = simple_index
        na_val = nulls_fixture

        if na_val is pd.NaT:
            expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
        else:
            expected = Float64Index([index[0], np.nan] + list(index[1:]))

            if index._is_backward_compat_public_numeric_index:
                # GH#43921 we preserve NumericIndex
                if index.dtype.kind == "f":
                    expected = NumericIndex(expected, dtype=index.dtype)
                else:
                    expected = NumericIndex(expected)

        result = index.insert(1, na_val)
        tm.assert_index_equal(result, expected, exact=True)
Exemple #3
0
def test_unique_null(null_obj, index_or_series_obj):
    obj = index_or_series_obj

    if not allow_na_ops(obj):
        pytest.skip("type doesn't allow for NA operations")
    elif len(obj) < 1:
        pytest.skip("Test doesn't make sense on empty data")
    elif isinstance(obj, pd.MultiIndex):
        pytest.skip(f"MultiIndex can't hold '{null_obj}'")

    values = obj.values
    if needs_i8_conversion(obj.dtype):
        values[0:2] = iNaT
    else:
        values[0:2] = null_obj

    klass = type(obj)
    repeated_values = np.repeat(values, range(1, len(values) + 1))
    obj = klass(repeated_values, dtype=obj.dtype)
    result = obj.unique()

    unique_values_raw = dict.fromkeys(obj.values)
    # because np.nan == np.nan is False, but None == None is True
    # np.nan would be duplicated, whereas None wouldn't
    unique_values_not_null = [
        val for val in unique_values_raw if not pd.isnull(val)
    ]
    unique_values = [null_obj] + unique_values_not_null

    if isinstance(obj,
                  pd.Index) and obj._is_backward_compat_public_numeric_index:
        expected = NumericIndex(unique_values, dtype=obj.dtype)
        tm.assert_index_equal(result, expected, exact=True)
    elif isinstance(obj, pd.Index):
        expected = pd.Index(unique_values, dtype=obj.dtype)
        if is_datetime64tz_dtype(obj.dtype):
            result = result.normalize()
            expected = expected.normalize()
        tm.assert_index_equal(result, expected, exact=True)
    else:
        expected = np.array(unique_values, dtype=obj.dtype)
        tm.assert_numpy_array_equal(result, expected)
Exemple #4
0
def makeNumericIndex(k=10, name=None, *, dtype):
    dtype = pandas_dtype(dtype)
    assert isinstance(dtype, np.dtype)

    if is_integer_dtype(dtype):
        values = np.arange(k, dtype=dtype)
        if is_unsigned_integer_dtype(dtype):
            values += 2**(dtype.itemsize * 8 - 1)
    elif is_float_dtype(dtype):
        values = np.random.random_sample(k) - np.random.random_sample(1)
        values.sort()
        values = values * (10**np.random.randint(0, 9))
    else:
        raise NotImplementedError(f"wrong dtype {dtype}")

    return NumericIndex(values, dtype=dtype, name=name)