Exemple #1
0
def test_random_choice_csc(n_samples=10000, random_state=24):
    # Explicit class probabilities
    classes = [np.array([0, 1]), np.array([0, 1, 2])]
    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]

    got = _random_choice_csc(n_samples, classes, class_probabilities,
                             random_state)
    assert sp.issparse(got)

    for k in range(len(classes)):
        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
        assert_array_almost_equal(class_probabilities[k], p, decimal=1)

    # Implicit class probabilities
    classes = [[0, 1], [1, 2]]  # test for array-like support
    class_probabilities = [np.array([0.5, 0.5]), np.array([0, 1 / 2, 1 / 2])]

    got = _random_choice_csc(n_samples=n_samples,
                             classes=classes,
                             random_state=random_state)
    assert sp.issparse(got)

    for k in range(len(classes)):
        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
        assert_array_almost_equal(class_probabilities[k], p, decimal=1)

    # Edge case probabilities 1.0 and 0.0
    classes = [np.array([0, 1]), np.array([0, 1, 2])]
    class_probabilities = [np.array([1.0, 0.0]), np.array([0.0, 1.0, 0.0])]

    got = _random_choice_csc(n_samples, classes, class_probabilities,
                             random_state)
    assert sp.issparse(got)

    for k in range(len(classes)):
        p = np.bincount(got.getcol(k).toarray().ravel(),
                        minlength=len(class_probabilities[k])) / n_samples
        assert_array_almost_equal(class_probabilities[k], p, decimal=1)

    # One class target data
    classes = [[1], [0]]  # test for array-like support
    class_probabilities = [np.array([0.0, 1.0]), np.array([1.0])]

    got = _random_choice_csc(n_samples=n_samples,
                             classes=classes,
                             random_state=random_state)
    assert sp.issparse(got)

    for k in range(len(classes)):
        p = np.bincount(got.getcol(k).toarray().ravel()) / n_samples
        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
Exemple #2
0
def test_random_choice_csc_errors():
    # the length of an array in classes and class_probabilities is mismatched
    classes = [np.array([0, 1]), np.array([0, 1, 2, 3])]
    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
    with pytest.raises(ValueError):
        _random_choice_csc(4, classes, class_probabilities, 1)

    # the class dtype is not supported
    classes = [np.array(["a", "1"]), np.array(["z", "1", "2"])]
    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
    with pytest.raises(ValueError):
        _random_choice_csc(4, classes, class_probabilities, 1)

    # the class dtype is not supported
    classes = [np.array([4.2, 0.1]), np.array([0.1, 0.2, 9.4])]
    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
    with pytest.raises(ValueError):
        _random_choice_csc(4, classes, class_probabilities, 1)

    # Given probabilities don't sum to 1
    classes = [np.array([0, 1]), np.array([0, 1, 2])]
    class_probabilities = [np.array([0.5, 0.6]), np.array([0.6, 0.1, 0.3])]
    with pytest.raises(ValueError):
        _random_choice_csc(4, classes, class_probabilities, 1)