def test_value_counts_null(null_obj, index_or_series_obj): orig = index_or_series_obj obj = orig.copy() if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") elif len(obj) < 1: pytest.skip("Test doesn't make sense on empty data") elif isinstance(orig, pd.MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values if needs_i8_conversion(obj.dtype): values[0:2] = iNaT else: values[0:2] = null_obj klass = type(obj) repeated_values = np.repeat(values, range(1, len(values) + 1)) obj = klass(repeated_values, dtype=obj.dtype) # because np.nan == np.nan is False, but None == None is True # np.nan would be duplicated, whereas None wouldn't counter = collections.Counter(obj.dropna()) expected = Series(dict(counter.most_common()), dtype=np.int64) expected.index = expected.index.astype(obj.dtype) result = obj.value_counts() if obj.duplicated().any(): # TODO: # Order of entries with the same count is inconsistent on CI (gh-32449) expected = expected.sort_index() result = result.sort_index() tm.assert_series_equal(result, expected) # can't use expected[null_obj] = 3 as # IntervalIndex doesn't allow assignment new_entry = Series({np.nan: 3}, dtype=np.int64) expected = expected.append(new_entry) result = obj.value_counts(dropna=False) if obj.duplicated().any(): # TODO: # Order of entries with the same count is inconsistent on CI (gh-32449) expected = expected.sort_index() result = result.sort_index() tm.assert_series_equal(result, expected)
def test_unique_null(null_obj, index_or_series_obj): obj = index_or_series_obj if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") elif len(obj) < 1: pytest.skip("Test doesn't make sense on empty data") elif isinstance(obj, pd.MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values if needs_i8_conversion(obj.dtype): values[0:2] = iNaT else: values[0:2] = null_obj klass = type(obj) repeated_values = np.repeat(values, range(1, len(values) + 1)) obj = klass(repeated_values, dtype=obj.dtype) result = obj.unique() unique_values_raw = dict.fromkeys(obj.values) # because np.nan == np.nan is False, but None == None is True # np.nan would be duplicated, whereas None wouldn't unique_values_not_null = [ val for val in unique_values_raw if not pd.isnull(val) ] unique_values = [null_obj] + unique_values_not_null if isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): result = result.normalize() expected = expected.normalize() tm.assert_index_equal(result, expected, exact=True) else: expected = np.array(unique_values, dtype=obj.dtype) tm.assert_numpy_array_equal(result, expected)
def test_nunique_null(null_obj, index_or_series_obj): obj = index_or_series_obj if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") elif isinstance(obj, pd.MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj._values values[0:2] = null_obj klass = type(obj) repeated_values = np.repeat(values, range(1, len(values) + 1)) obj = klass(repeated_values, dtype=obj.dtype) if isinstance(obj, pd.CategoricalIndex): assert obj.nunique() == len(obj.categories) assert obj.nunique(dropna=False) == len(obj.categories) + 1 else: num_unique_values = len(obj.unique()) assert obj.nunique() == max(0, num_unique_values - 1) assert obj.nunique(dropna=False) == max(0, num_unique_values)