Exemple #1
0
def test_unique_util_missing_values_numeric():
    # Check missing values in numerical values
    values = np.array([3, 1, np.nan, 5, 3, np.nan], dtype=float)
    expected_uniques = np.array([1, 3, 5, np.nan], dtype=float)
    expected_inverse = np.array([1, 0, 3, 2, 1, 3])

    uniques = _unique(values)
    assert_array_equal(uniques, expected_uniques)

    uniques, inverse = _unique(values, return_inverse=True)
    assert_array_equal(uniques, expected_uniques)
    assert_array_equal(inverse, expected_inverse)

    encoded = _encode(values, uniques=uniques)
    assert_array_equal(encoded, expected_inverse)
Exemple #2
0
def test_unique_util_missing_values_objects(missing_value, pickle_uniques):
    # check for _unique and _encode with missing values with object dtypes
    values = np.array(['a', 'c', 'c', missing_value, 'b'], dtype=object)
    expected_uniques = np.array(['a', 'b', 'c', missing_value], dtype=object)

    uniques = _unique(values)

    if missing_value is None:
        assert_array_equal(uniques, expected_uniques)
    else:  # missing_value == np.nan
        assert_array_equal(uniques[:-1], expected_uniques[:-1])
        assert np.isnan(uniques[-1])

    if pickle_uniques:
        uniques = pickle.loads(pickle.dumps(uniques))

    encoded = _encode(values, uniques=uniques)
    assert_array_equal(encoded, np.array([0, 2, 2, 3, 1]))
def test_encode_util(values, expected):
    uniques = _unique(values)
    assert_array_equal(uniques, expected)

    result, encoded = _unique(values, return_inverse=True)
    assert_array_equal(result, expected)
    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))

    encoded = _encode(values, uniques=uniques)
    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))

    result, counts = _unique(values, return_counts=True)
    assert_array_equal(result, expected)
    assert_array_equal(counts, np.array([2, 1, 2]))

    result, encoded, counts = _unique(values,
                                      return_inverse=True,
                                      return_counts=True)
    assert_array_equal(result, expected)
    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))
    assert_array_equal(counts, np.array([2, 1, 2]))
Exemple #4
0
def test_encode_with_check_unknown():
    # test for the check_unknown parameter of _encode()
    uniques = np.array([1, 2, 3])
    values = np.array([1, 2, 3, 4])

    # Default is True, raise error
    with pytest.raises(ValueError, match="y contains previously unseen labels"):
        _encode(values, uniques=uniques, check_unknown=True)

    # dont raise error if False
    _encode(values, uniques=uniques, check_unknown=False)

    # parameter is ignored for object dtype
    uniques = np.array(["a", "b", "c"], dtype=object)
    values = np.array(["a", "b", "c", "d"], dtype=object)
    with pytest.raises(ValueError, match="y contains previously unseen labels"):
        _encode(values, uniques=uniques, check_unknown=False)
Exemple #5
0
def test_encode_util(values, expected):
    uniques = _unique(values)
    assert_array_equal(uniques, expected)
    encoded = _encode(values, uniques=uniques)
    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))