def make_fixture(binary=False, balanced=False, split=False):
    """
    Make a dataset for testing ClassBalance based on the specified params.
    """
    kwargs = {
        "n_samples": 100,
        "n_features": 20,
        "n_informative": 8,
        "n_redundant": 2,
        "n_clusters_per_class": 1,
        "random_state": 89092,
    }

    if binary:
        kwargs["n_classes"] = 2
        kwargs["weights"] = None if balanced else [0.3, 0.7]
    else:
        kwargs["n_classes"] = 5
        kwargs["weights"] = None if balanced else [0.1, 0.2, 0.4, 0.2, 0.01]

    X, y = make_classification(**kwargs)

    if split:
        X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=101)
        return Dataset(Split(X_train, X_test), Split(y_train, y_test))

    return Dataset(X, y)
Beispiel #2
0
def digits(request):
    """
    Creates a fixture of train and test splits for the sklearn digits dataset
    For ease of use returns a Dataset named tuple composed of two Split tuples.
    """
    data = load_digits()
    X_train, X_test, y_train, y_test = tts(
        data.data, data.target, test_size=0.2, random_state=11
    )

    # Set a class attribute for digits
    request.cls.digits = Dataset(Split(X_train, X_test), Split(y_train, y_test))
Beispiel #3
0
def continuous(request):
    """
    Creates a random continuous regression dataset fixture
    """
    X, y = make_regression(
        n_samples=500,
        n_features=22,
        n_informative=8,
        random_state=42,
        noise=0.2,
        bias=0.2,
    )

    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=11)

    # Set a class attribute for regression
    request.cls.continuous = Dataset(Split(X_train, X_test), Split(y_train, y_test))
Beispiel #4
0
def multiclass(request):
    """
    Creates a random multiclass classification dataset fixture
    """
    X, y = make_classification(
        n_samples=500,
        n_features=20,
        n_informative=8,
        n_redundant=2,
        n_classes=6,
        n_clusters_per_class=3,
        random_state=87,
    )

    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=93)

    dataset = Dataset(Split(X_train, X_test), Split(y_train, y_test))
    request.cls.multiclass = dataset
def data(request):
    """
    Creates a fixture of train and test splits for the sklearn digits dataset
    For ease of use returns a Dataset named tuple composed of two Split tuples.
    """
    X, y = make_regression(
        n_samples=500,
        n_features=22,
        n_informative=8,
        random_state=42,
        noise=0.2,
        bias=0.2,
    )

    X_train, X_test, y_train, y_test = tts(X,
                                           y,
                                           test_size=0.2,
                                           random_state=11)

    # Set a class attribute for digits
    request.cls.data = Dataset(Split(X_train, X_test), Split(y_train, y_test))