def _get_data(format='numpy', n_classes=2): X, y = make_classification(n_samples=100, n_features=10, n_informative=5, n_classes=n_classes, random_state=seed) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed) data = ns(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test) if format == 'h2o': for k, v in data.__dict__.items(): setattr(data, k, h2o.H2OFrame(v)) return data
def _get_data(format='numpy', n_classes=2): generator = make_classification if n_classes > 0 else make_regression params = dict(n_samples=100, n_features=5, n_informative=n_classes or 2, n_repeated=0, random_state=seed) if generator is make_classification: params.update(n_classes=n_classes) X, y = generator(**params) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed, train_size=0.75) data = ns(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test) if format == 'h2o': for k, v in data.__dict__.items(): setattr(data, k, h2o.H2OFrame(v)) return data