def test_unique_util_with_all_missing_values(): # test for all types of missing values for object dtype values = np.array([np.nan, "a", "c", "c", None, float("nan"), None], dtype=object) uniques = _unique(values) assert_array_equal(uniques[:-1], ["a", "c", None]) # last value is nan assert np.isnan(uniques[-1]) expected_inverse = [3, 0, 1, 1, 2, 3, 2] _, inverse = _unique(values, return_inverse=True) assert_array_equal(inverse, expected_inverse)
def test_unique_util_missing_values_numeric(): # Check missing values in numerical values values = np.array([3, 1, np.nan, 5, 3, np.nan], dtype=float) expected_uniques = np.array([1, 3, 5, np.nan], dtype=float) expected_inverse = np.array([1, 0, 3, 2, 1, 3]) uniques = _unique(values) assert_array_equal(uniques, expected_uniques) uniques, inverse = _unique(values, return_inverse=True) assert_array_equal(uniques, expected_uniques) assert_array_equal(inverse, expected_inverse) encoded = _encode(values, uniques=uniques) assert_array_equal(encoded, expected_inverse)
def test_encode_util(values, expected): uniques = _unique(values) assert_array_equal(uniques, expected) result, encoded = _unique(values, return_inverse=True) assert_array_equal(result, expected) assert_array_equal(encoded, np.array([1, 0, 2, 0, 2])) encoded = _encode(values, uniques=uniques) assert_array_equal(encoded, np.array([1, 0, 2, 0, 2])) result, counts = _unique(values, return_counts=True) assert_array_equal(result, expected) assert_array_equal(counts, np.array([2, 1, 2])) result, encoded, counts = _unique(values, return_inverse=True, return_counts=True) assert_array_equal(result, expected) assert_array_equal(encoded, np.array([1, 0, 2, 0, 2])) assert_array_equal(counts, np.array([2, 1, 2]))
def test_unique_util_missing_values_objects(missing_value, pickle_uniques): # check for _unique and _encode with missing values with object dtypes values = np.array(['a', 'c', 'c', missing_value, 'b'], dtype=object) expected_uniques = np.array(['a', 'b', 'c', missing_value], dtype=object) uniques = _unique(values) if missing_value is None: assert_array_equal(uniques, expected_uniques) else: # missing_value == np.nan assert_array_equal(uniques[:-1], expected_uniques[:-1]) assert np.isnan(uniques[-1]) if pickle_uniques: uniques = pickle.loads(pickle.dumps(uniques)) encoded = _encode(values, uniques=uniques) assert_array_equal(encoded, np.array([0, 2, 2, 3, 1]))
def test_encode_util(values, expected): uniques = _unique(values) assert_array_equal(uniques, expected) encoded = _encode(values, uniques=uniques) assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))