def make_fixture(binary=False, balanced=False, split=False): """ Make a dataset for testing ClassBalance based on the specified params. """ kwargs = { "n_samples": 100, "n_features": 20, "n_informative": 8, "n_redundant": 2, "n_clusters_per_class": 1, "random_state": 89092, } if binary: kwargs["n_classes"] = 2 kwargs["weights"] = None if balanced else [0.3, 0.7] else: kwargs["n_classes"] = 5 kwargs["weights"] = None if balanced else [0.1, 0.2, 0.4, 0.2, 0.01] X, y = make_classification(**kwargs) if split: X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=101) return Dataset(Split(X_train, X_test), Split(y_train, y_test)) return Dataset(X, y)
def digits(request): """ Creates a fixture of train and test splits for the sklearn digits dataset For ease of use returns a Dataset named tuple composed of two Split tuples. """ data = load_digits() X_train, X_test, y_train, y_test = tts( data.data, data.target, test_size=0.2, random_state=11 ) # Set a class attribute for digits request.cls.digits = Dataset(Split(X_train, X_test), Split(y_train, y_test))
def continuous(request): """ Creates a random continuous regression dataset fixture """ X, y = make_regression( n_samples=500, n_features=22, n_informative=8, random_state=42, noise=0.2, bias=0.2, ) X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=11) # Set a class attribute for regression request.cls.continuous = Dataset(Split(X_train, X_test), Split(y_train, y_test))
def multiclass(request): """ Creates a random multiclass classification dataset fixture """ X, y = make_classification( n_samples=500, n_features=20, n_informative=8, n_redundant=2, n_classes=6, n_clusters_per_class=3, random_state=87, ) X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=93) dataset = Dataset(Split(X_train, X_test), Split(y_train, y_test)) request.cls.multiclass = dataset
def data(request): """ Creates a fixture of train and test splits for the sklearn digits dataset For ease of use returns a Dataset named tuple composed of two Split tuples. """ X, y = make_regression( n_samples=500, n_features=22, n_informative=8, random_state=42, noise=0.2, bias=0.2, ) X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=11) # Set a class attribute for digits request.cls.data = Dataset(Split(X_train, X_test), Split(y_train, y_test))