Example #1
0
def test_multi_output_learner_regressor():

    stream = RegressionGenerator(n_samples=5500,
                                 n_features=10,
                                 n_informative=20,
                                 n_targets=2,
                                 random_state=1)
    stream.prepare_for_use()

    estimator = SGDRegressor(random_state=112,
                             tol=1e-3,
                             max_iter=10,
                             loss='squared_loss')
    learner = MultiOutputLearner(base_estimator=estimator)

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_targets = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            true_targets.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

    expected_performance = 2.444365309339395
    performance = mean_absolute_error(true_targets, predictions)
    assert np.isclose(performance, expected_performance)

    assert learner._estimator_type == "regressor"
    assert type(learner.predict(X)) == np.ndarray

    with pytest.raises(AttributeError):
        learner.predict_proba(X)
def demo(output_file=None, instances=40000):
    """ _test_prequential_mol

    This demo shows the evaluation process of a MOL classifier, initialized 
    with sklearn's SGDClassifier.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    # stream = FileStream("../data/datasets/music.csv", 0, 6)
    stream = MultilabelGenerator(n_samples=instances)
    # stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    classifier = MultiOutputLearner(SGDClassifier(n_iter=100))
    # classifier = SGDClassifier()
    # classifier = PassiveAggressiveClassifier()
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(
        pretrain_size=5000,
        max_samples=instances - 10000,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        output_file=output_file,
        show_plot=True,
        metrics=['hamming_score', 'j_index', 'exact_match'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
Example #3
0
def test_multi_output_learner_classifier():

    stream = MultilabelGenerator(n_samples=5150,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)

    estimator = SGDClassifier(random_state=112, max_iter=10, loss='log')
    classifier = MultiOutputLearner(base_estimator=estimator)

    X, y = get_next_n_samples(stream, 150)
    classifier.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(classifier.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        classifier.partial_fit(X, y)
        cnt += 1

    if LooseVersion(sklearn_version) < LooseVersion("0.21"):
        expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [0.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 1.0, 0.0],
                                [0.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0]]
        assert np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 26
        assert correct_predictions == expected_correct_predictions

        expected_performance = 0.7755102040816326
        performance = hamming_score(true_labels, predictions)
        assert np.isclose(performance, expected_performance)

        expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(loss='log', " \
                        "random_state=112))"
        info = " ".join(
            [line.strip() for line in classifier.get_info().split()])
        assert info == expected_info

    else:
        expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 0.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0]]
        np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 23
        assert correct_predictions == expected_correct_predictions

        expected_performance = 0.7482993197278911
        performance = hamming_score(true_labels, predictions)
        assert np.isclose(performance, expected_performance)

        expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(loss='log', " \
                        "max_iter=10, random_state=112))"

        info = " ".join(
            [line.strip() for line in classifier.get_info().split()])
        assert info == expected_info

    assert type(classifier.predict(X)) == np.ndarray
    assert type(classifier.predict_proba(X)) == np.ndarray
Example #4
0
def test_multi_output_learner_classifier():

    stream = MultilabelGenerator(n_samples=5150,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)
    stream.prepare_for_use()

    estimator = SGDClassifier(random_state=112,
                              tol=1e-3,
                              max_iter=10,
                              loss='log')
    classifier = MultiOutputLearner(base_estimator=estimator)

    X, y = stream.next_sample(150)
    classifier.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(classifier.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        classifier.partial_fit(X, y)
        cnt += 1

    if StrictVersion(sklearn_version) < StrictVersion("0.21"):
        expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [0.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 1.0, 0.0],
                                [0.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0]]
        assert np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 26
        assert correct_predictions == expected_correct_predictions

        expected_performance = 0.7755102040816326
        performance = hamming_score(true_labels, predictions)
        assert np.isclose(performance, expected_performance)

        expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(alpha=0.0001, average=False, " \
                        "class_weight=None,\n" \
                        "       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \
                        "       l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=10,\n" \
                        "       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n" \
                        "       power_t=0.5, random_state=112, shuffle=True, tol=0.001,\n" \
                        "       validation_fraction=0.1, verbose=0, warm_start=False))"
        assert classifier.get_info() == expected_info

    else:
        expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 0.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0]]
        np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 23
        assert correct_predictions == expected_correct_predictions

        expected_performance = 0.7482993197278911
        performance = hamming_score(true_labels, predictions)
        assert np.isclose(performance, expected_performance)

        expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(alpha=0.0001, average=False, " \
                        "class_weight=None,\n" \
                        "              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \
                        "              l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=10,\n" \
                        "              n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,\n" \
                        "              random_state=112, shuffle=True, tol=0.001,\n" \
                        "              validation_fraction=0.1, verbose=0, warm_start=False))"

        assert classifier.get_info() == expected_info

    assert type(classifier.predict(X)) == np.ndarray
    assert type(classifier.predict_proba(X)) == np.ndarray
Example #5
0
def test_multi_output_learner():

    stream = MultilabelGenerator(n_samples=5150,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)
    stream.prepare_for_use()

    classifier = MultiOutputLearner(base_estimator=HoeffdingTree())

    X, y = stream.next_sample(150)
    classifier.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(classifier.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        classifier.partial_fit(X, y)
        cnt += 1

    perf = hamming_score(true_labels, predictions)
    expected_predictions = [[1., 1., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 1., 1.], [1., 1., 1.], [0., 1., 1.],
                            [1., 0., 1.], [1., 0., 1.], [1., 1., 1.],
                            [0., 0., 1.], [0., 1., 1.], [0., 1., 1.],
                            [1., 1., 1.], [0., 1., 1.], [1., 1., 0.],
                            [1., 1., 1.], [0., 1., 1.], [1., 0., 0.],
                            [1., 0., 1.], [1., 1., 1.], [1., 0., 1.],
                            [1., 1., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 1., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 0., 0.], [0., 1., 1.], [1., 1., 0.],
                            [1., 1., 1.], [0., 1., 1.], [1., 1., 1.],
                            [0., 1., 1.], [1., 0., 1.], [1., 0., 1.],
                            [0., 0., 1.], [0., 1., 1.], [1., 1., 0.],
                            [0., 1., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 0., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 0., 1.], [1., 1., 1.], [1., 1., 1.],
                            [0., 1., 1.]]
    expected_correct_predictions = 32

    expected_performance = 0.8503401360544217

    assert np.alltrue(np.array_equal(predictions, expected_predictions))
    assert np.isclose(expected_performance, perf)
    assert correct_predictions == expected_correct_predictions

    assert type(classifier.predict(X)) == np.ndarray
    assert type(classifier.predict_proba(X)) == np.ndarray
Example #6
0
from skmultiflow.data import MultilabelGenerator
from skmultiflow.meta.multi_output_learner import MultiOutputLearner
from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.data.file_stream import FileStream
from sklearn.linear_model import Perceptron
from skmultiflow.metrics import hamming_score
# Setup the file stream
stream = MultilabelGenerator(random_state=1, n_samples=200,
                             n_targets=5, n_features=10)
ht = HoeffdingTreeClassifier()
br = MultiOutputLearner(ht)
# Setup the pipeline
# Pre training the classifier with 150 samples
X, y = stream.next_sample(150)
br.partial_fit(X, y, classes=stream.target_values)
# Keeping track of sample count, true labels and predictions to later
# compute the classifier's hamming score
count = 0
true_labels = []
predicts = []
while stream.has_more_samples():
    X, y = stream.next_sample()
    p = br.predict(X)
    br.partial_fit(X, y)
    predicts.extend(p)
    true_labels.extend(y)
    count += 1

perf = hamming_score(true_labels, predicts)
print('Total samples analyzed: ' + str(count))
print("The classifier's static Hamming score    : " + str(perf))
from skmultiflow.trees import LabelCombinationHoeffdingTreeClassifier,\
    iSOUPTreeRegressor, \
    HoeffdingTreeClassifier

from common.helpers import (load_custom_dataset, load_moa_stream,
                            evaluar, repeatInstances)
from common.evaluation_metrics import evaluation_metrics


TIME_STR = "%Y%m%d_%H%M%S"

SUPPORTED_MODELS = {
    "br": {
        "name": "Binary Relevance - Perceptron",
        "model": lambda data_stream: MultiOutputLearner(
            Perceptron(),
            n_targets=data_stream.n_targets
        ),
        "ensemble": False
    },
    "br_ht": {
        "name": "Binary Relevance - Hoeffding Tree",
        "model": lambda data_stream: MultiOutputLearner(
            HoeffdingTreeClassifier(),
            n_targets=data_stream.n_targets
        ),
        "ensemble": False
    },
    "br_nb": {
        "name": "Binary Relevance - Naive Bayes",
        "model": lambda data_stream: MultiOutputLearner(
            NaiveBayes(),
Example #8
0
            batch_size=60,
            max_samples=max_samples,
            metrics=[
                'true_vs_predicted', 'mean_square_error',
                'mean_absolute_error', 'running_time', 'model_size'
            ])
    evaluator.evaluate(stream=stream, model=model, model_names=[model_name])
else:
    # For Multi-AP approach
    if args.chained:
        print("Using Regressor Chain for Multi-label")
        multiOutputModel = RegressorChain(model, random_state=1)
        mode = "rc"
    else:
        print("Using Binary Relevance for Multi-label")
        multiOutputModel = MultiOutputLearner(base_estimator=model)
        mode = "br"

    if args.holdout:
        evaluator = EvaluateHoldout(
            output_file=model_name + "_eval_one_label_v2_holdout_" + mode +
            ".txt",
            show_plot=args.show_plot,
            n_wait=60,
            test_size=60,
            batch_size=60,
            max_samples=max_samples,
            metrics=[
                'average_mean_square_error', 'average_mean_absolute_error',
                'running_time'
            ])