def test_weighted_elm():
    """Check class weight impact on AUC.

    Re-weighting classes is expected to improve model performance
    for imbalanced classification problems.
    """
    rng = np.random.RandomState(random_state)
    n_samples_1 = 500
    n_samples_2 = 10
    X = np.r_[1.5 * rng.randn(n_samples_1, 20),
              1.2 * rng.randn(n_samples_2, 20) + [2] * 20]
    y = [0] * (n_samples_1) + [1] * (n_samples_2)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.8, random_state=random_state)

    n_hidden = 20
    for activation in ACTIVATION_TYPES:
        elm_weightless = ELMClassifier(n_hidden=n_hidden,
                                       class_weight=None,
                                       random_state=random_state)
        elm_weightless.fit(X_train, y_train)

        elm_weight_auto = ELMClassifier(n_hidden=n_hidden,
                                        class_weight='auto',
                                        random_state=random_state)
        elm_weight_auto.fit(X_train, y_train)

        y_pred_weightless = elm_weightless.predict_proba(X_test)[:, 1]
        score_weightless = roc_auc_score(y_test, y_pred_weightless)

        y_pred_weighted = elm_weight_auto.predict_proba(X_test)[:, 1]
        score_weighted = roc_auc_score(y_test, y_pred_weighted)

        assert_greater(score_weighted, score_weightless)
def test_partial_fit_classification():
    """Test partial_fit for classification.

    It should output the same results as 'fit' for binary and
    multi-class classification.
    """
    for X, y in classification_datasets.values():
        batch_size = 100
        n_samples = X.shape[0]

        elm_fit = ELMClassifier(random_state=random_state,
                                batch_size=batch_size)
        elm_partial_fit = ELMClassifier(random_state=random_state)

        elm_fit.fit(X, y)
        for batch_slice in gen_batches(n_samples, batch_size):
            elm_partial_fit.partial_fit(X[batch_slice], y[batch_slice],
                                        classes=np.unique(y))

        pred1 = elm_fit.predict(X)
        pred2 = elm_partial_fit.predict(X)

        assert_array_equal(pred1, pred2)
        assert_greater(elm_fit.score(X, y), 0.95)
        assert_greater(elm_partial_fit.score(X, y), 0.95)
def test_sparse_matrices():
    """Test that sparse and dense input matrices yield equal output."""
    X = Xdigits_binary[:50]
    y = ydigits_binary[:50]
    X = csr_matrix(X)
    n_hidden = 15
    batch_size = 10

    # Standard ELM
    elm = ELMClassifier(random_state=1, n_hidden=n_hidden)
    # Batch based
    elm_batch_based = ELMClassifier(random_state=1, n_hidden=n_hidden,
                                    batch_size=10)
    # ELM for partial fitting
    elm_parital = ELMClassifier(random_state=1, n_hidden=n_hidden)
    # Train classifiers
    elm.fit(X, y)
    elm_batch_based.fit(X, y)

    for batch_slice in gen_batches(X.shape[0], batch_size):
        elm_parital.partial_fit(X[batch_slice], y[batch_slice])

    # Get decision scores
    y_pred = elm.decision_function(X)
    y_pred_batch_based = elm_batch_based.decision_function(X)
    y_pred_partial = elm_parital.decision_function(X)

    # The prediction values should be the same
    assert_almost_equal(y_pred, y_pred_batch_based)
    assert_almost_equal(y_pred_batch_based, y_pred_partial)
def test_multilabel_classification():
    """Test that multi-label classification works as expected."""
    # test fit method
    X, y = make_multilabel_classification(n_samples=50, random_state=0,
                                          return_indicator=True)
    elm = ELMClassifier(weight_scale=100)
    elm.fit(X, y)
    assert_greater(elm.score(X, y), 0.95)
def test_predict_proba_multi():
    """Test whether predict_proba works as expected for multi class."""
    X = Xdigits_multi[:10]
    y = ydigits_multi[:10]

    clf = ELMClassifier(n_hidden=5)
    clf.fit(X, y)
    y_proba = clf.predict_proba(X)

    (n_samples, n_classes) = y.shape[0], np.unique(y).size

    assert_equal(y_proba.shape, (n_samples, n_classes))
def test_predict_proba_binary():
    """Test whether predict_proba works as expected for binary class."""
    X = Xdigits_binary[:50]
    y = ydigits_binary[:50]

    clf = ELMClassifier(n_hidden=10)
    clf.fit(X, y)
    y_proba = clf.predict_proba(X)

    (n_samples, n_classes) = y.shape[0], 2

    assert_equal(y_proba.shape, (n_samples, n_classes))
    assert_greater(roc_auc_score(y, y_proba[:, 1]), 0.95)
def test_overfitting():
    """Larger number of hidden neurons should increase training score."""
    X, y = classification_datasets['multi-class']

    for activation in ACTIVATION_TYPES:
        elm = ELMClassifier(n_hidden=5, activation=activation,
                            random_state=random_state)
        elm.fit(X, y)
        score_5_n_hidden = elm.score(X, y)

        elm = ELMClassifier(n_hidden=15, activation=activation,
                            random_state=random_state)
        elm.fit(X, y)
        score_15_n_hidden = elm.score(X, y)

        assert_greater(score_15_n_hidden, score_5_n_hidden)
def test_recursive_and_standard():
    """Test that recursive lsqr return the same result as standard lsqr."""
    batch_size = 50
    for dataset, class_weight in product(classification_datasets.values(),
                                         [None, 'auto']):
        X, y = dataset
        elm_standard = ELMClassifier(class_weight=class_weight,
                                     random_state=random_state)
        elm_recursive = ELMClassifier(class_weight=class_weight,
                                      random_state=random_state,
                                      batch_size=batch_size)
        elm_standard.fit(X, y)
        elm_recursive.fit(X, y)

        pred1 = elm_standard.predict(X)
        pred2 = elm_recursive.predict(X)

        assert_array_equal(pred1, pred2)
        assert_greater(elm_standard.score(X, y), 0.95)