def test_unique_util_missing_values_numeric(): # Check missing values in numerical values values = np.array([3, 1, np.nan, 5, 3, np.nan], dtype=float) expected_uniques = np.array([1, 3, 5, np.nan], dtype=float) expected_inverse = np.array([1, 0, 3, 2, 1, 3]) uniques = _unique(values) assert_array_equal(uniques, expected_uniques) uniques, inverse = _unique(values, return_inverse=True) assert_array_equal(uniques, expected_uniques) assert_array_equal(inverse, expected_inverse) encoded = _encode(values, uniques=uniques) assert_array_equal(encoded, expected_inverse)
def test_unique_util_missing_values_objects(missing_value, pickle_uniques): # check for _unique and _encode with missing values with object dtypes values = np.array(['a', 'c', 'c', missing_value, 'b'], dtype=object) expected_uniques = np.array(['a', 'b', 'c', missing_value], dtype=object) uniques = _unique(values) if missing_value is None: assert_array_equal(uniques, expected_uniques) else: # missing_value == np.nan assert_array_equal(uniques[:-1], expected_uniques[:-1]) assert np.isnan(uniques[-1]) if pickle_uniques: uniques = pickle.loads(pickle.dumps(uniques)) encoded = _encode(values, uniques=uniques) assert_array_equal(encoded, np.array([0, 2, 2, 3, 1]))
def test_encode_util(values, expected): uniques = _unique(values) assert_array_equal(uniques, expected) result, encoded = _unique(values, return_inverse=True) assert_array_equal(result, expected) assert_array_equal(encoded, np.array([1, 0, 2, 0, 2])) encoded = _encode(values, uniques=uniques) assert_array_equal(encoded, np.array([1, 0, 2, 0, 2])) result, counts = _unique(values, return_counts=True) assert_array_equal(result, expected) assert_array_equal(counts, np.array([2, 1, 2])) result, encoded, counts = _unique(values, return_inverse=True, return_counts=True) assert_array_equal(result, expected) assert_array_equal(encoded, np.array([1, 0, 2, 0, 2])) assert_array_equal(counts, np.array([2, 1, 2]))
def test_encode_with_check_unknown(): # test for the check_unknown parameter of _encode() uniques = np.array([1, 2, 3]) values = np.array([1, 2, 3, 4]) # Default is True, raise error with pytest.raises(ValueError, match="y contains previously unseen labels"): _encode(values, uniques=uniques, check_unknown=True) # dont raise error if False _encode(values, uniques=uniques, check_unknown=False) # parameter is ignored for object dtype uniques = np.array(["a", "b", "c"], dtype=object) values = np.array(["a", "b", "c", "d"], dtype=object) with pytest.raises(ValueError, match="y contains previously unseen labels"): _encode(values, uniques=uniques, check_unknown=False)
def test_encode_util(values, expected): uniques = _unique(values) assert_array_equal(uniques, expected) encoded = _encode(values, uniques=uniques) assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))