Beispiel #1
0
def test_evaluate_delayed_multi_target_classification_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    from skmultiflow.data import MultilabelGenerator
    from skmultiflow.meta import MultiOutputLearner

    max_samples = 1000

    # Stream
    data = MultilabelGenerator(n_samples=max_samples, random_state=1)
    # Get X and y
    X, y = data.next_sample(max_samples)
    time = generate_random_dates(seed=1, samples=max_samples)

    # Setup temporal stream
    stream = TemporalDataStream(X, y, time, ordered=True)

    # Learner
    mol = MultiOutputLearner()

    output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv")
    metrics = ['hamming_score', 'hamming_loss', 'exact_match', 'j_index']
    evaluator = EvaluatePrequentialDelayed(max_samples=max_samples,
                                           metrics=metrics,
                                           output_file=output_file)

    evaluator.evaluate(stream=stream, model=[mol], model_names=['MOL1'])
Beispiel #2
0
def test_label_combination_hoeffding_tree_nb(test_path):
    stream = MultilabelGenerator(n_samples=10000,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)

    stream.prepare_for_use()

    learner = LabelCombinationHoeffdingTreeClassifier(n_labels=3,
                                                      leaf_prediction='nb')

    cnt = 0
    max_samples = 5000
    predictions = []
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        learner.partial_fit(X, y)
        if cnt % wait_samples == 0 and (cnt != 0):
            predictions.append(learner.predict(X)[0].tolist())
            proba_predictions.append(learner.predict_proba(X)[0])
        cnt += 1

    print(predictions)
    expected_predictions = [[0, 0, 1], [1, 1, 1], [0, 1, 1], [0, 1, 1],
                            [1, 1, 1], [0, 1, 1], [1, 1, 0], [1, 1, 1],
                            [1, 1, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0],
                            [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 1],
                            [0, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 0],
                            [1, 0, 0], [1, 0, 1], [1, 1, 1], [0, 0, 1],
                            [1, 0, 1], [1, 1, 1], [1, 0, 0], [1, 1, 1],
                            [1, 1, 1], [0, 0, 1], [1, 1, 1], [0, 0, 0],
                            [0, 1, 0], [1, 1, 1], [0, 1, 1], [1, 1, 0],
                            [0, 0, 1], [0, 0, 0], [1, 1, 1], [1, 1, 1],
                            [1, 0, 1], [0, 1, 1], [1, 1, 1], [1, 1, 1],
                            [0, 1, 0], [0, 1, 0], [1, 1, 1], [1, 1, 1],
                            [1, 1, 1]]

    assert np.alltrue(predictions == expected_predictions)
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "LabelCombinationHoeffdingTreeClassifier(binary_split=False, grace_period=200, " \
                    "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, n_labels=3, " \
                    "nb_threshold=0, no_preprune=False, nominal_attributes=None, remove_poor_atts=False, " \
                    "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, " \
                    "tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
def test_lc_hoeffding_tree(test_path):
    stream = MultilabelGenerator(n_samples=10000,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)

    stream.prepare_for_use()

    learner = LCHT(n_labels=3)

    cnt = 0
    max_samples = 5000
    predictions = []
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        learner.partial_fit(X, y)
        if cnt % wait_samples == 0 and (cnt != 0):
            predictions.append(learner.predict(X)[0].tolist())
            proba_predictions.append(learner.predict_proba(X)[0])
        cnt += 1

    print(predictions)
    expected_predictions = [[0, 0, 1], [1, 1, 1], [0, 1, 1], [0, 1, 1],
                            [1, 1, 1], [0, 1, 1], [1, 1, 0], [1, 1, 1],
                            [1, 1, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0],
                            [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 1],
                            [0, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 0],
                            [1, 0, 0], [1, 0, 1], [1, 1, 1], [0, 0, 1],
                            [1, 0, 1], [1, 1, 1], [1, 0, 0], [1, 1, 1],
                            [1, 1, 1], [0, 0, 1], [1, 1, 1], [0, 0, 0],
                            [0, 1, 0], [1, 1, 1], [0, 1, 1], [1, 1, 0],
                            [0, 0, 1], [0, 0, 0], [1, 1, 1], [1, 1, 1],
                            [1, 0, 1], [0, 1, 1], [1, 1, 1], [1, 1, 1],
                            [0, 1, 0], [0, 1, 0], [1, 1, 1], [1, 1, 1],
                            [1, 1, 1]]

    assert np.alltrue(predictions == expected_predictions)
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Beispiel #4
0
def test_evaluate_multi_target_classification_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    from skmultiflow.data import MultilabelGenerator
    from skmultiflow.meta import MultiOutputLearner

    max_samples = 1000

    # Stream
    stream = MultilabelGenerator(n_samples=max_samples, random_state=1)
    stream.prepare_for_use()

    # Learner
    mol = MultiOutputLearner()

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = ['hamming_score', 'hamming_loss', 'exact_match', 'j_index']
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=[mol], model_names=['MOL'])
Beispiel #5
0
def test_label_combination_hoeffding_tree_coverage():
    # Cover memory management
    max_samples = 10000
    max_size_kb = 50
    stream = MultilabelGenerator(n_samples=10000,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)

    # Unconstrained model has over 62 kB
    learner = LabelCombinationHoeffdingTreeClassifier(
        n_labels=3,
        leaf_prediction='mc',
        memory_estimate_period=200,
        max_byte_size=max_size_kb * 2**10)

    X, y = stream.next_sample(max_samples)
    learner.partial_fit(X, y)

    assert calculate_object_size(learner, 'kB') <= max_size_kb
def test_classifier_chains():

    stream = MultilabelGenerator(random_state=112, n_targets=3, n_samples=5150)
    stream.prepare_for_use()
    estimator = SGDClassifier(random_state=112, max_iter=10)
    learner = ClassifierChain(base_estimator=estimator, random_state=112)

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1
    expected_predictions = [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0],
                            [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 0.0],
                            [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                            [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                            [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                            [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                            [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0],
                            [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0],
                            [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0],
                            [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                            [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                            [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0],
                            [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0],
                            [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                            [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0],
                            [0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0],
                            [1.0, 1.0, 1.0]]

    expected_correct_predictions = 21

    assert np.alltrue(np.array_equal(predictions, expected_predictions))
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
Beispiel #7
0
def test_multi_output_learner_classifier():

    stream = MultilabelGenerator(n_samples=5150,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)
    stream.prepare_for_use()

    estimator = SGDClassifier(random_state=112,
                              tol=1e-3,
                              max_iter=10,
                              loss='log')
    classifier = MultiOutputLearner(base_estimator=estimator)

    X, y = stream.next_sample(150)
    classifier.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(classifier.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        classifier.partial_fit(X, y)
        cnt += 1

    if StrictVersion(sklearn_version) < StrictVersion("0.21"):
        expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [0.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 1.0, 0.0],
                                [0.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0]]
        assert np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 26
        assert correct_predictions == expected_correct_predictions

        expected_performance = 0.7755102040816326
        performance = hamming_score(true_labels, predictions)
        assert np.isclose(performance, expected_performance)

        expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(alpha=0.0001, average=False, " \
                        "class_weight=None,\n" \
                        "       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \
                        "       l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=10,\n" \
                        "       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n" \
                        "       power_t=0.5, random_state=112, shuffle=True, tol=0.001,\n" \
                        "       validation_fraction=0.1, verbose=0, warm_start=False))"
        assert classifier.get_info() == expected_info

    else:
        expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 0.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0]]
        np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 23
        assert correct_predictions == expected_correct_predictions

        expected_performance = 0.7482993197278911
        performance = hamming_score(true_labels, predictions)
        assert np.isclose(performance, expected_performance)

        expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(alpha=0.0001, average=False, " \
                        "class_weight=None,\n" \
                        "              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \
                        "              l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=10,\n" \
                        "              n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,\n" \
                        "              random_state=112, shuffle=True, tol=0.001,\n" \
                        "              validation_fraction=0.1, verbose=0, warm_start=False))"

        assert classifier.get_info() == expected_info

    assert type(classifier.predict(X)) == np.ndarray
    assert type(classifier.predict_proba(X)) == np.ndarray
Beispiel #8
0
def test_multi_output_learner():

    stream = MultilabelGenerator(n_samples=5150,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)
    stream.prepare_for_use()

    classifier = MultiOutputLearner(base_estimator=HoeffdingTree())

    X, y = stream.next_sample(150)
    classifier.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(classifier.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        classifier.partial_fit(X, y)
        cnt += 1

    perf = hamming_score(true_labels, predictions)
    expected_predictions = [[1., 1., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 1., 1.], [1., 1., 1.], [0., 1., 1.],
                            [1., 0., 1.], [1., 0., 1.], [1., 1., 1.],
                            [0., 0., 1.], [0., 1., 1.], [0., 1., 1.],
                            [1., 1., 1.], [0., 1., 1.], [1., 1., 0.],
                            [1., 1., 1.], [0., 1., 1.], [1., 0., 0.],
                            [1., 0., 1.], [1., 1., 1.], [1., 0., 1.],
                            [1., 1., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 1., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 0., 0.], [0., 1., 1.], [1., 1., 0.],
                            [1., 1., 1.], [0., 1., 1.], [1., 1., 1.],
                            [0., 1., 1.], [1., 0., 1.], [1., 0., 1.],
                            [0., 0., 1.], [0., 1., 1.], [1., 1., 0.],
                            [0., 1., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 0., 1.], [1., 1., 1.], [1., 1., 1.],
                            [1., 0., 1.], [1., 1., 1.], [1., 1., 1.],
                            [0., 1., 1.]]
    expected_correct_predictions = 32

    expected_performance = 0.8503401360544217

    assert np.alltrue(np.array_equal(predictions, expected_predictions))
    assert np.isclose(expected_performance, perf)
    assert correct_predictions == expected_correct_predictions

    assert type(classifier.predict(X)) == np.ndarray
    assert type(classifier.predict_proba(X)) == np.ndarray
def test_classifier_chains():
    seed = 112
    stream = MultilabelGenerator(random_state=seed, n_targets=3, n_samples=5150)
    stream.prepare_for_use()
    estimator = SGDClassifier(random_state=seed, tol=1e-3, max_iter=10)
    learner = ClassifierChain(base_estimator=estimator, random_state=seed)
    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

    if not sklearn_version.startswith("0.21"):
        expected_predictions = [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0],
                                [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0],
                                [0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]
        assert np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 21
        assert correct_predictions == expected_correct_predictions

        expected_info = "ClassifierChain(base_estimator=SGDClassifier(alpha=0.0001, average=False, class_weight=None,\n" \
                        "       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \
                        "       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=10,\n" \
                        "       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n" \
                        "       power_t=0.5, random_state=112, shuffle=True, tol=0.001,\n" \
                        "       validation_fraction=0.1, verbose=0, warm_start=False),\n" \
                        "                order=None, random_state=112)"
        assert learner.get_info() == expected_info

    else:
        expected_predictions = [[0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0],
                                [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0],
                                [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0],
                                [0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]
        assert np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 26
        assert correct_predictions == expected_correct_predictions

        expected_info = "ClassifierChain(base_estimator=SGDClassifier(alpha=0.0001, average=False, class_weight=None,\n" \
                        "              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \
                        "              l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=10,\n" \
                        "              n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,\n" \
                        "              random_state=112, shuffle=True, tol=0.001,\n" \
                        "              validation_fraction=0.1, verbose=0, warm_start=False),\n" \
                        "                order=None, random_state=112)"
        assert learner.get_info() == expected_info

    assert type(learner.predict(X)) == np.ndarray
Beispiel #10
0
                              DynamicWeightedMajorityMultiLabel,
                              OzaBaggingMLClassifier)
from skmultiflow.bayes import NaiveBayes
from skmultiflow.trees import HoeffdingTreeClassifier
from sklearn.linear_model import Perceptron
from skmultiflow.metrics import hamming_score
# from sklearn.linear_model import SGDClassifier

# Setup a data stream
n_features = 10
n_labels = 5
pretrain_size = 150
max_samples = 10000 + pretrain_size

stream = MultilabelGenerator(n_samples=max_samples,
                             random_state=1,
                             n_features=n_features,
                             n_targets=n_labels)

# Binary Relevance Model
br = MultiOutputLearner(Perceptron())
X, y = stream.next_sample(pretrain_size)
br.partial_fit(X, y, classes=stream.target_values)

stream.restart()

# Dynamic weighted classifier model
X, y = stream.next_sample(pretrain_size)
dwm_ml_base_estimator = MultiOutputLearner(Perceptron())
dwm_ml_base_estimator.partial_fit(X, y, classes=stream.target_values)
dwm_ml = DynamicWeightedMajorityMultiLabel(
    labels=n_labels, base_estimator=dwm_ml_base_estimator)
# Imports
from skmultiflow.data import MultilabelGenerator
from skmultiflow.meta import MultiOutputLearner
from skmultiflow.trees import LabelCombinationHoeffdingTreeClassifier, HoeffdingTreeClassifier
from skmultiflow.metrics import hamming_score

# Setting up a data stream
stream = MultilabelGenerator(random_state=1, n_samples=200,
                             n_targets=5, n_features=10)

# Setup Label Combination Hoeffding Tree classifier
lc_ht = LabelCombinationHoeffdingTreeClassifier(n_labels=stream.n_targets)

# Setup variables to control loop and track performance
n_samples = 0
max_samples = 200
true_labels = []
predicts = []

# Train the estimator with the samples provided by the data stream
while n_samples < max_samples and stream.has_more_samples():
    X, y = stream.next_sample()
    y_pred = lc_ht.predict(X)
    lc_ht.partial_fit(X, y, classes=stream.target_values)
    predicts.extend(y_pred)
    true_labels.extend(y)
    n_samples += 1

# Display results
perf = hamming_score(true_labels, predicts)
print('{} samples analyzed.'.format(n_samples))
def test_classifier_chains():
    seed = 112
    stream = MultilabelGenerator(random_state=seed,
                                 n_targets=3,
                                 n_samples=5150)

    estimator = SGDClassifier(random_state=seed, max_iter=10)
    learner = ClassifierChain(base_estimator=estimator, random_state=seed)
    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

    if not sklearn_version.startswith("0.21"):
        expected_predictions = [[0., 0., 1.], [0., 0., 0.], [1., 0., 1.],
                                [1., 0., 1.], [0., 0., 1.], [1., 0., 0.],
                                [1., 0., 1.], [1., 0., 1.], [0., 0., 1.],
                                [0., 0., 0.], [1., 0., 1.], [0., 0., 1.],
                                [0., 0., 1.], [0., 0., 1.], [0., 0., 1.],
                                [0., 0., 1.], [1., 0., 1.], [0., 0., 0.],
                                [1., 0., 1.], [0., 0., 0.], [0., 1., 1.],
                                [0., 1., 1.], [0., 0., 1.], [0., 1., 1.],
                                [0., 1., 1.], [0., 1., 1.], [0., 1., 0.],
                                [0., 1., 0.], [1., 1., 1.], [0., 1., 0.],
                                [0., 1., 1.], [1., 0., 1.], [0., 1., 1.],
                                [0., 0., 0.], [0., 0., 0.], [1., 0., 0.],
                                [1., 1., 1.], [0., 1., 1.], [0., 0., 0.],
                                [1., 0., 1.], [0., 0., 1.], [0., 0., 0.],
                                [0., 0., 0.], [0., 0., 1.], [0., 1., 0.],
                                [0., 0., 0.], [1., 1., 1.], [0., 0., 0.],
                                [1., 1., 1.]]
        assert np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 26
        assert correct_predictions == expected_correct_predictions

        expected_info = "ClassifierChain(base_estimator=SGDClassifier(max_iter=10, " \
                        "random_state=112), order=None, random_state=112)"
        info = " ".join([line.strip() for line in learner.get_info().split()])
        assert info == expected_info

    else:
        expected_predictions = [[0.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [0.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 0.0],
                                [0.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 0.0, 0.0],
                                [0.0, 0.0, 0.0], [1.0, 0.0, 0.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 0.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 0.0],
                                [0.0, 0.0, 0.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [0.0, 0.0, 0.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 0.0],
                                [1.0, 1.0, 1.0]]
        assert np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 26
        assert correct_predictions == expected_correct_predictions

        expected_info = "ClassifierChain(base_estimator=SGDClassifier(max_iter=10, " \
                        "random_state=112), order=None, random_state=112)"
        info = " ".join([line.strip() for line in learner.get_info().split()])
        assert info == expected_info

    assert type(learner.predict(X)) == np.ndarray
Beispiel #13
0
def test_multi_output_learner_classifier():

    stream = MultilabelGenerator(n_samples=5150,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)

    estimator = SGDClassifier(random_state=112, max_iter=10, loss='log')
    classifier = MultiOutputLearner(base_estimator=estimator)

    X, y = stream.next_sample(150)
    classifier.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(classifier.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        classifier.partial_fit(X, y)
        cnt += 1

    if LooseVersion(sklearn_version) < LooseVersion("0.21"):
        expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [0.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 1.0, 0.0],
                                [0.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0]]
        assert np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 26
        assert correct_predictions == expected_correct_predictions

        expected_performance = 0.7755102040816326
        performance = hamming_score(true_labels, predictions)
        assert np.isclose(performance, expected_performance)

        expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(loss='log', " \
                        "random_state=112))"
        info = " ".join(
            [line.strip() for line in classifier.get_info().split()])
        assert info == expected_info

    else:
        expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [1.0, 0.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 0.0],
                                [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 0.0], [1.0, 0.0, 1.0],
                                [0.0, 1.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [0.0, 1.0, 1.0],
                                [1.0, 1.0, 1.0], [1.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [1.0, 0.0, 0.0], [0.0, 1.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 0.0],
                                [1.0, 0.0, 1.0], [0.0, 0.0, 1.0],
                                [0.0, 0.0, 1.0], [1.0, 1.0, 1.0],
                                [1.0, 0.0, 0.0], [1.0, 1.0, 1.0],
                                [0.0, 1.0, 1.0]]
        np.alltrue(np.array_equal(predictions, expected_predictions))

        expected_correct_predictions = 23
        assert correct_predictions == expected_correct_predictions

        expected_performance = 0.7482993197278911
        performance = hamming_score(true_labels, predictions)
        assert np.isclose(performance, expected_performance)

        expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(loss='log', " \
                        "max_iter=10, random_state=112))"

        info = " ".join(
            [line.strip() for line in classifier.get_info().split()])
        assert info == expected_info

    assert type(classifier.predict(X)) == np.ndarray
    assert type(classifier.predict_proba(X)) == np.ndarray