コード例 #1
0
def make_fixture(binary=False, balanced=False, split=False):
    """
    Make a dataset for testing ClassBalance based on the specified params.
    """
    kwargs = {
        "n_samples": 100,
        "n_features": 20,
        "n_informative": 8,
        "n_redundant": 2,
        "n_clusters_per_class": 1,
        "random_state": 89092,
    }

    if binary:
        kwargs['n_classes'] = 2
        kwargs['weights'] = None if balanced else [0.3, 0.7]
    else:
        kwargs['n_classes'] = 5
        kwargs['weights'] = None if balanced else [0.1, 0.2, 0.4, 0.2, .01]

    X, y = make_classification(**kwargs)

    if split:
        X_train, X_test, y_train, y_test = tts(X,
                                               y,
                                               test_size=0.2,
                                               random_state=101)
        return Dataset(Split(X_train, X_test), Split(y_train, y_test))

    return Dataset(X, y)
コード例 #2
0
def digits(request):
    """
    Creates a fixture of train and test splits for the sklearn digits dataset
    For ease of use returns a Dataset named tuple composed of two Split tuples.
    """
    data = load_digits()
    X_train, X_test, y_train, y_test = tts(data.data,
                                           data.target,
                                           test_size=0.2,
                                           random_state=11)

    # Set a class attribute for digits
    request.cls.digits = Dataset(Split(X_train, X_test),
                                 Split(y_train, y_test))
コード例 #3
0
ファイル: test_residuals.py プロジェクト: minhto2802/mlviz
def data(request):
    """
    Creates a fixture of train and test splits for the sklearn digits dataset
    For ease of use returns a Dataset named tuple composed of two Split tuples.
    """
    X, y = make_regression(n_samples=500,
                           n_features=22,
                           n_informative=8,
                           random_state=42)

    X_train, X_test, y_train, y_test = tts(X,
                                           y,
                                           test_size=0.2,
                                           random_state=11)

    # Set a class attribute for digits
    request.cls.data = Dataset(Split(X_train, X_test), Split(y_train, y_test))
コード例 #4
0
ファイル: conftest.py プロジェクト: minhto2802/mlviz
def multiclass(request):
    """
    Creates a random multiclass classification dataset fixture
    """
    X, y = make_classification(n_samples=500,
                               n_features=20,
                               n_informative=8,
                               n_redundant=2,
                               n_classes=6,
                               n_clusters_per_class=3,
                               random_state=87)

    X_train, X_test, y_train, y_test = tts(X,
                                           y,
                                           test_size=0.2,
                                           random_state=93)

    dataset = Dataset(Split(X_train, X_test), Split(y_train, y_test))
    request.cls.multiclass = dataset