Beispiel #1
0
def test_label_combination_hoeffding_tree_nb(test_path):
    stream = MultilabelGenerator(n_samples=10000,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)

    stream.prepare_for_use()

    learner = LabelCombinationHoeffdingTreeClassifier(n_labels=3,
                                                      leaf_prediction='nb')

    cnt = 0
    max_samples = 5000
    predictions = []
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        learner.partial_fit(X, y)
        if cnt % wait_samples == 0 and (cnt != 0):
            predictions.append(learner.predict(X)[0].tolist())
            proba_predictions.append(learner.predict_proba(X)[0])
        cnt += 1

    print(predictions)
    expected_predictions = [[0, 0, 1], [1, 1, 1], [0, 1, 1], [0, 1, 1],
                            [1, 1, 1], [0, 1, 1], [1, 1, 0], [1, 1, 1],
                            [1, 1, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0],
                            [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 1],
                            [0, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 0],
                            [1, 0, 0], [1, 0, 1], [1, 1, 1], [0, 0, 1],
                            [1, 0, 1], [1, 1, 1], [1, 0, 0], [1, 1, 1],
                            [1, 1, 1], [0, 0, 1], [1, 1, 1], [0, 0, 0],
                            [0, 1, 0], [1, 1, 1], [0, 1, 1], [1, 1, 0],
                            [0, 0, 1], [0, 0, 0], [1, 1, 1], [1, 1, 1],
                            [1, 0, 1], [0, 1, 1], [1, 1, 1], [1, 1, 1],
                            [0, 1, 0], [0, 1, 0], [1, 1, 1], [1, 1, 1],
                            [1, 1, 1]]

    assert np.alltrue(predictions == expected_predictions)
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "LabelCombinationHoeffdingTreeClassifier(binary_split=False, grace_period=200, " \
                    "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, n_labels=3, " \
                    "nb_threshold=0, no_preprune=False, nominal_attributes=None, remove_poor_atts=False, " \
                    "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, " \
                    "tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
Beispiel #2
0
def test_label_combination_hoeffding_tree_coverage():
    # Cover memory management
    max_samples = 10000
    max_size_kb = 50
    stream = MultilabelGenerator(n_samples=10000,
                                 n_features=15,
                                 n_targets=3,
                                 n_labels=4,
                                 random_state=112)

    # Unconstrained model has over 62 kB
    learner = LabelCombinationHoeffdingTreeClassifier(
        n_labels=3,
        leaf_prediction='mc',
        memory_estimate_period=200,
        max_byte_size=max_size_kb * 2**10)

    X, y = stream.next_sample(max_samples)
    learner.partial_fit(X, y)

    assert calculate_object_size(learner, 'kB') <= max_size_kb
# Imports
from skmultiflow.data import MultilabelGenerator
from skmultiflow.meta import MultiOutputLearner
from skmultiflow.trees import LabelCombinationHoeffdingTreeClassifier, HoeffdingTreeClassifier
from skmultiflow.metrics import hamming_score

# Setting up a data stream
stream = MultilabelGenerator(random_state=1, n_samples=200,
                             n_targets=5, n_features=10)

# Setup Label Combination Hoeffding Tree classifier
lc_ht = LabelCombinationHoeffdingTreeClassifier(n_labels=stream.n_targets)

# Setup variables to control loop and track performance
n_samples = 0
max_samples = 200
true_labels = []
predicts = []

# Train the estimator with the samples provided by the data stream
while n_samples < max_samples and stream.has_more_samples():
    X, y = stream.next_sample()
    y_pred = lc_ht.predict(X)
    lc_ht.partial_fit(X, y, classes=stream.target_values)
    predicts.extend(y_pred)
    true_labels.extend(y)
    n_samples += 1

# Display results
perf = hamming_score(true_labels, predicts)
print('{} samples analyzed.'.format(n_samples))
 },
 "mcc": {
     "name": "Monte Carlo Sampling Classifier Chains",
     "model": lambda _: MonteCarloClassifierChain(),
     "ensemble": False
 },
 "pcc": {
     "name": "Probabilistic Sampling Classifier Chains",
     "model": lambda _: ProbabilisticClassifierChain(SGDClassifier(max_iter=100, loss='log', random_state=1)),
     "ensemble": False
 },
 "lcht": {
     "name": "Label Combination Hoeffding Tree",
     "model": lambda data_stream: LabelCombinationHoeffdingTreeClassifier(
         n_labels=data_stream.n_targets,
         stop_mem_management=True,
         memory_estimate_period=100,
         remove_poor_atts=False  # There is a bug when True
     ),
     "ensemble": False
 },
 "awec": {
     "name": "Accuracy Weighted Ensemble Classifier",
     "model": lambda data_stream: MultiOutputLearner(
         NaiveBayes(),
         n_targets=data_stream.n_targets
     ),
     "ensemble": lambda model, _: AccuracyWeightedEnsemble(
         base_estimator=model
     )
 },
 "me": {