def test_weighted_elm(): """Check class weight impact on AUC. Re-weighting classes is expected to improve model performance for imbalanced classification problems. """ rng = np.random.RandomState(random_state) n_samples_1 = 500 n_samples_2 = 10 X = np.r_[1.5 * rng.randn(n_samples_1, 20), 1.2 * rng.randn(n_samples_2, 20) + [2] * 20] y = [0] * (n_samples_1) + [1] * (n_samples_2) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.8, random_state=random_state) n_hidden = 20 for activation in ACTIVATION_TYPES: elm_weightless = ELMClassifier(n_hidden=n_hidden, class_weight=None, random_state=random_state) elm_weightless.fit(X_train, y_train) elm_weight_auto = ELMClassifier(n_hidden=n_hidden, class_weight='auto', random_state=random_state) elm_weight_auto.fit(X_train, y_train) y_pred_weightless = elm_weightless.predict_proba(X_test)[:, 1] score_weightless = roc_auc_score(y_test, y_pred_weightless) y_pred_weighted = elm_weight_auto.predict_proba(X_test)[:, 1] score_weighted = roc_auc_score(y_test, y_pred_weighted) assert_greater(score_weighted, score_weightless)
def test_partial_fit_classification(): """Test partial_fit for classification. It should output the same results as 'fit' for binary and multi-class classification. """ for X, y in classification_datasets.values(): batch_size = 100 n_samples = X.shape[0] elm_fit = ELMClassifier(random_state=random_state, batch_size=batch_size) elm_partial_fit = ELMClassifier(random_state=random_state) elm_fit.fit(X, y) for batch_slice in gen_batches(n_samples, batch_size): elm_partial_fit.partial_fit(X[batch_slice], y[batch_slice], classes=np.unique(y)) pred1 = elm_fit.predict(X) pred2 = elm_partial_fit.predict(X) assert_array_equal(pred1, pred2) assert_greater(elm_fit.score(X, y), 0.95) assert_greater(elm_partial_fit.score(X, y), 0.95)
def test_sparse_matrices(): """Test that sparse and dense input matrices yield equal output.""" X = Xdigits_binary[:50] y = ydigits_binary[:50] X = csr_matrix(X) n_hidden = 15 batch_size = 10 # Standard ELM elm = ELMClassifier(random_state=1, n_hidden=n_hidden) # Batch based elm_batch_based = ELMClassifier(random_state=1, n_hidden=n_hidden, batch_size=10) # ELM for partial fitting elm_parital = ELMClassifier(random_state=1, n_hidden=n_hidden) # Train classifiers elm.fit(X, y) elm_batch_based.fit(X, y) for batch_slice in gen_batches(X.shape[0], batch_size): elm_parital.partial_fit(X[batch_slice], y[batch_slice]) # Get decision scores y_pred = elm.decision_function(X) y_pred_batch_based = elm_batch_based.decision_function(X) y_pred_partial = elm_parital.decision_function(X) # The prediction values should be the same assert_almost_equal(y_pred, y_pred_batch_based) assert_almost_equal(y_pred_batch_based, y_pred_partial)
def test_multilabel_classification(): """Test that multi-label classification works as expected.""" # test fit method X, y = make_multilabel_classification(n_samples=50, random_state=0, return_indicator=True) elm = ELMClassifier(weight_scale=100) elm.fit(X, y) assert_greater(elm.score(X, y), 0.95)
def test_predict_proba_multi(): """Test whether predict_proba works as expected for multi class.""" X = Xdigits_multi[:10] y = ydigits_multi[:10] clf = ELMClassifier(n_hidden=5) clf.fit(X, y) y_proba = clf.predict_proba(X) (n_samples, n_classes) = y.shape[0], np.unique(y).size assert_equal(y_proba.shape, (n_samples, n_classes))
def test_predict_proba_binary(): """Test whether predict_proba works as expected for binary class.""" X = Xdigits_binary[:50] y = ydigits_binary[:50] clf = ELMClassifier(n_hidden=10) clf.fit(X, y) y_proba = clf.predict_proba(X) (n_samples, n_classes) = y.shape[0], 2 assert_equal(y_proba.shape, (n_samples, n_classes)) assert_greater(roc_auc_score(y, y_proba[:, 1]), 0.95)
def test_overfitting(): """Larger number of hidden neurons should increase training score.""" X, y = classification_datasets['multi-class'] for activation in ACTIVATION_TYPES: elm = ELMClassifier(n_hidden=5, activation=activation, random_state=random_state) elm.fit(X, y) score_5_n_hidden = elm.score(X, y) elm = ELMClassifier(n_hidden=15, activation=activation, random_state=random_state) elm.fit(X, y) score_15_n_hidden = elm.score(X, y) assert_greater(score_15_n_hidden, score_5_n_hidden)
def test_recursive_and_standard(): """Test that recursive lsqr return the same result as standard lsqr.""" batch_size = 50 for dataset, class_weight in product(classification_datasets.values(), [None, 'auto']): X, y = dataset elm_standard = ELMClassifier(class_weight=class_weight, random_state=random_state) elm_recursive = ELMClassifier(class_weight=class_weight, random_state=random_state, batch_size=batch_size) elm_standard.fit(X, y) elm_recursive.fit(X, y) pred1 = elm_standard.predict(X) pred2 = elm_recursive.predict(X) assert_array_equal(pred1, pred2) assert_greater(elm_standard.score(X, y), 0.95)