def test_evaluate_delayed_multi_target_classification_coverage(tmpdir): # A simple coverage test. Tests for metrics are placed in the corresponding test module. from skmultiflow.data import MultilabelGenerator from skmultiflow.meta import MultiOutputLearner max_samples = 1000 # Stream data = MultilabelGenerator(n_samples=max_samples, random_state=1) # Get X and y X, y = data.next_sample(max_samples) time = generate_random_dates(seed=1, samples=max_samples) # Setup temporal stream stream = TemporalDataStream(X, y, time, ordered=True) # Learner mol = MultiOutputLearner() output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv") metrics = ['hamming_score', 'hamming_loss', 'exact_match', 'j_index'] evaluator = EvaluatePrequentialDelayed(max_samples=max_samples, metrics=metrics, output_file=output_file) evaluator.evaluate(stream=stream, model=[mol], model_names=['MOL1'])
def test_label_combination_hoeffding_tree_nb(test_path): stream = MultilabelGenerator(n_samples=10000, n_features=15, n_targets=3, n_labels=4, random_state=112) stream.prepare_for_use() learner = LabelCombinationHoeffdingTreeClassifier(n_labels=3, leaf_prediction='nb') cnt = 0 max_samples = 5000 predictions = [] proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples learner.partial_fit(X, y) if cnt % wait_samples == 0 and (cnt != 0): predictions.append(learner.predict(X)[0].tolist()) proba_predictions.append(learner.predict_proba(X)[0]) cnt += 1 print(predictions) expected_predictions = [[0, 0, 1], [1, 1, 1], [0, 1, 1], [0, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 0], [1, 1, 1], [1, 1, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 0], [1, 0, 0], [1, 0, 1], [1, 1, 1], [0, 0, 1], [1, 0, 1], [1, 1, 1], [1, 0, 0], [1, 1, 1], [1, 1, 1], [0, 0, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0], [1, 1, 1], [0, 1, 1], [1, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1], [1, 1, 1], [1, 1, 1], [0, 1, 0], [0, 1, 0], [1, 1, 1], [1, 1, 1], [1, 1, 1]] assert np.alltrue(predictions == expected_predictions) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "LabelCombinationHoeffdingTreeClassifier(binary_split=False, grace_period=200, " \ "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, n_labels=3, " \ "nb_threshold=0, no_preprune=False, nominal_attributes=None, remove_poor_atts=False, " \ "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, " \ "tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_lc_hoeffding_tree(test_path): stream = MultilabelGenerator(n_samples=10000, n_features=15, n_targets=3, n_labels=4, random_state=112) stream.prepare_for_use() learner = LCHT(n_labels=3) cnt = 0 max_samples = 5000 predictions = [] proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples learner.partial_fit(X, y) if cnt % wait_samples == 0 and (cnt != 0): predictions.append(learner.predict(X)[0].tolist()) proba_predictions.append(learner.predict_proba(X)[0]) cnt += 1 print(predictions) expected_predictions = [[0, 0, 1], [1, 1, 1], [0, 1, 1], [0, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 0], [1, 1, 1], [1, 1, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 0], [1, 0, 0], [1, 0, 1], [1, 1, 1], [0, 0, 1], [1, 0, 1], [1, 1, 1], [1, 0, 0], [1, 1, 1], [1, 1, 1], [0, 0, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0], [1, 1, 1], [0, 1, 1], [1, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1], [1, 1, 1], [1, 1, 1], [0, 1, 0], [0, 1, 0], [1, 1, 1], [1, 1, 1], [1, 1, 1]] assert np.alltrue(predictions == expected_predictions) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_evaluate_multi_target_classification_coverage(tmpdir): # A simple coverage test. Tests for metrics are placed in the corresponding test module. from skmultiflow.data import MultilabelGenerator from skmultiflow.meta import MultiOutputLearner max_samples = 1000 # Stream stream = MultilabelGenerator(n_samples=max_samples, random_state=1) stream.prepare_for_use() # Learner mol = MultiOutputLearner() output_file = os.path.join(str(tmpdir), "prequential_summary.csv") metrics = ['hamming_score', 'hamming_loss', 'exact_match', 'j_index'] evaluator = EvaluatePrequential(max_samples=max_samples, metrics=metrics, output_file=output_file) evaluator.evaluate(stream=stream, model=[mol], model_names=['MOL'])
def test_label_combination_hoeffding_tree_coverage(): # Cover memory management max_samples = 10000 max_size_kb = 50 stream = MultilabelGenerator(n_samples=10000, n_features=15, n_targets=3, n_labels=4, random_state=112) # Unconstrained model has over 62 kB learner = LabelCombinationHoeffdingTreeClassifier( n_labels=3, leaf_prediction='mc', memory_estimate_period=200, max_byte_size=max_size_kb * 2**10) X, y = stream.next_sample(max_samples) learner.partial_fit(X, y) assert calculate_object_size(learner, 'kB') <= max_size_kb
def test_classifier_chains(): stream = MultilabelGenerator(random_state=112, n_targets=3, n_samples=5150) stream.prepare_for_use() estimator = SGDClassifier(random_state=112, max_iter=10) learner = ClassifierChain(base_estimator=estimator, random_state=112) X, y = stream.next_sample(150) learner.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.0], [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]] expected_correct_predictions = 21 assert np.alltrue(np.array_equal(predictions, expected_predictions)) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray
def test_multi_output_learner_classifier(): stream = MultilabelGenerator(n_samples=5150, n_features=15, n_targets=3, n_labels=4, random_state=112) stream.prepare_for_use() estimator = SGDClassifier(random_state=112, tol=1e-3, max_iter=10, loss='log') classifier = MultiOutputLearner(base_estimator=estimator) X, y = stream.next_sample(150) classifier.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(classifier.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 classifier.partial_fit(X, y) cnt += 1 if StrictVersion(sklearn_version) < StrictVersion("0.21"): expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]] assert np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 26 assert correct_predictions == expected_correct_predictions expected_performance = 0.7755102040816326 performance = hamming_score(true_labels, predictions) assert np.isclose(performance, expected_performance) expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(alpha=0.0001, average=False, " \ "class_weight=None,\n" \ " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \ " l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=10,\n" \ " n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n" \ " power_t=0.5, random_state=112, shuffle=True, tol=0.001,\n" \ " validation_fraction=0.1, verbose=0, warm_start=False))" assert classifier.get_info() == expected_info else: expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]] np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 23 assert correct_predictions == expected_correct_predictions expected_performance = 0.7482993197278911 performance = hamming_score(true_labels, predictions) assert np.isclose(performance, expected_performance) expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(alpha=0.0001, average=False, " \ "class_weight=None,\n" \ " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \ " l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=10,\n" \ " n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,\n" \ " random_state=112, shuffle=True, tol=0.001,\n" \ " validation_fraction=0.1, verbose=0, warm_start=False))" assert classifier.get_info() == expected_info assert type(classifier.predict(X)) == np.ndarray assert type(classifier.predict_proba(X)) == np.ndarray
def test_multi_output_learner(): stream = MultilabelGenerator(n_samples=5150, n_features=15, n_targets=3, n_labels=4, random_state=112) stream.prepare_for_use() classifier = MultiOutputLearner(base_estimator=HoeffdingTree()) X, y = stream.next_sample(150) classifier.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(classifier.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 classifier.partial_fit(X, y) cnt += 1 perf = hamming_score(true_labels, predictions) expected_predictions = [[1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [0., 1., 1.], [1., 0., 1.], [1., 0., 1.], [1., 1., 1.], [0., 0., 1.], [0., 1., 1.], [0., 1., 1.], [1., 1., 1.], [0., 1., 1.], [1., 1., 0.], [1., 1., 1.], [0., 1., 1.], [1., 0., 0.], [1., 0., 1.], [1., 1., 1.], [1., 0., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 0., 0.], [0., 1., 1.], [1., 1., 0.], [1., 1., 1.], [0., 1., 1.], [1., 1., 1.], [0., 1., 1.], [1., 0., 1.], [1., 0., 1.], [0., 0., 1.], [0., 1., 1.], [1., 1., 0.], [0., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 0., 1.], [1., 1., 1.], [1., 1., 1.], [1., 0., 1.], [1., 1., 1.], [1., 1., 1.], [0., 1., 1.]] expected_correct_predictions = 32 expected_performance = 0.8503401360544217 assert np.alltrue(np.array_equal(predictions, expected_predictions)) assert np.isclose(expected_performance, perf) assert correct_predictions == expected_correct_predictions assert type(classifier.predict(X)) == np.ndarray assert type(classifier.predict_proba(X)) == np.ndarray
def test_classifier_chains(): seed = 112 stream = MultilabelGenerator(random_state=seed, n_targets=3, n_samples=5150) stream.prepare_for_use() estimator = SGDClassifier(random_state=seed, tol=1e-3, max_iter=10) learner = ClassifierChain(base_estimator=estimator, random_state=seed) X, y = stream.next_sample(150) learner.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 if not sklearn_version.startswith("0.21"): expected_predictions = [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.0], [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]] assert np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 21 assert correct_predictions == expected_correct_predictions expected_info = "ClassifierChain(base_estimator=SGDClassifier(alpha=0.0001, average=False, class_weight=None,\n" \ " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \ " l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=10,\n" \ " n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n" \ " power_t=0.5, random_state=112, shuffle=True, tol=0.001,\n" \ " validation_fraction=0.1, verbose=0, warm_start=False),\n" \ " order=None, random_state=112)" assert learner.get_info() == expected_info else: expected_predictions = [[0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]] assert np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 26 assert correct_predictions == expected_correct_predictions expected_info = "ClassifierChain(base_estimator=SGDClassifier(alpha=0.0001, average=False, class_weight=None,\n" \ " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \ " l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=10,\n" \ " n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,\n" \ " random_state=112, shuffle=True, tol=0.001,\n" \ " validation_fraction=0.1, verbose=0, warm_start=False),\n" \ " order=None, random_state=112)" assert learner.get_info() == expected_info assert type(learner.predict(X)) == np.ndarray
DynamicWeightedMajorityMultiLabel, OzaBaggingMLClassifier) from skmultiflow.bayes import NaiveBayes from skmultiflow.trees import HoeffdingTreeClassifier from sklearn.linear_model import Perceptron from skmultiflow.metrics import hamming_score # from sklearn.linear_model import SGDClassifier # Setup a data stream n_features = 10 n_labels = 5 pretrain_size = 150 max_samples = 10000 + pretrain_size stream = MultilabelGenerator(n_samples=max_samples, random_state=1, n_features=n_features, n_targets=n_labels) # Binary Relevance Model br = MultiOutputLearner(Perceptron()) X, y = stream.next_sample(pretrain_size) br.partial_fit(X, y, classes=stream.target_values) stream.restart() # Dynamic weighted classifier model X, y = stream.next_sample(pretrain_size) dwm_ml_base_estimator = MultiOutputLearner(Perceptron()) dwm_ml_base_estimator.partial_fit(X, y, classes=stream.target_values) dwm_ml = DynamicWeightedMajorityMultiLabel( labels=n_labels, base_estimator=dwm_ml_base_estimator)
# Imports from skmultiflow.data import MultilabelGenerator from skmultiflow.meta import MultiOutputLearner from skmultiflow.trees import LabelCombinationHoeffdingTreeClassifier, HoeffdingTreeClassifier from skmultiflow.metrics import hamming_score # Setting up a data stream stream = MultilabelGenerator(random_state=1, n_samples=200, n_targets=5, n_features=10) # Setup Label Combination Hoeffding Tree classifier lc_ht = LabelCombinationHoeffdingTreeClassifier(n_labels=stream.n_targets) # Setup variables to control loop and track performance n_samples = 0 max_samples = 200 true_labels = [] predicts = [] # Train the estimator with the samples provided by the data stream while n_samples < max_samples and stream.has_more_samples(): X, y = stream.next_sample() y_pred = lc_ht.predict(X) lc_ht.partial_fit(X, y, classes=stream.target_values) predicts.extend(y_pred) true_labels.extend(y) n_samples += 1 # Display results perf = hamming_score(true_labels, predicts) print('{} samples analyzed.'.format(n_samples))
def test_classifier_chains(): seed = 112 stream = MultilabelGenerator(random_state=seed, n_targets=3, n_samples=5150) estimator = SGDClassifier(random_state=seed, max_iter=10) learner = ClassifierChain(base_estimator=estimator, random_state=seed) X, y = stream.next_sample(150) learner.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 if not sklearn_version.startswith("0.21"): expected_predictions = [[0., 0., 1.], [0., 0., 0.], [1., 0., 1.], [1., 0., 1.], [0., 0., 1.], [1., 0., 0.], [1., 0., 1.], [1., 0., 1.], [0., 0., 1.], [0., 0., 0.], [1., 0., 1.], [0., 0., 1.], [0., 0., 1.], [0., 0., 1.], [0., 0., 1.], [0., 0., 1.], [1., 0., 1.], [0., 0., 0.], [1., 0., 1.], [0., 0., 0.], [0., 1., 1.], [0., 1., 1.], [0., 0., 1.], [0., 1., 1.], [0., 1., 1.], [0., 1., 1.], [0., 1., 0.], [0., 1., 0.], [1., 1., 1.], [0., 1., 0.], [0., 1., 1.], [1., 0., 1.], [0., 1., 1.], [0., 0., 0.], [0., 0., 0.], [1., 0., 0.], [1., 1., 1.], [0., 1., 1.], [0., 0., 0.], [1., 0., 1.], [0., 0., 1.], [0., 0., 0.], [0., 0., 0.], [0., 0., 1.], [0., 1., 0.], [0., 0., 0.], [1., 1., 1.], [0., 0., 0.], [1., 1., 1.]] assert np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 26 assert correct_predictions == expected_correct_predictions expected_info = "ClassifierChain(base_estimator=SGDClassifier(max_iter=10, " \ "random_state=112), order=None, random_state=112)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info else: expected_predictions = [[0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]] assert np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 26 assert correct_predictions == expected_correct_predictions expected_info = "ClassifierChain(base_estimator=SGDClassifier(max_iter=10, " \ "random_state=112), order=None, random_state=112)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert type(learner.predict(X)) == np.ndarray
def test_multi_output_learner_classifier(): stream = MultilabelGenerator(n_samples=5150, n_features=15, n_targets=3, n_labels=4, random_state=112) estimator = SGDClassifier(random_state=112, max_iter=10, loss='log') classifier = MultiOutputLearner(base_estimator=estimator) X, y = stream.next_sample(150) classifier.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(classifier.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 classifier.partial_fit(X, y) cnt += 1 if LooseVersion(sklearn_version) < LooseVersion("0.21"): expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]] assert np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 26 assert correct_predictions == expected_correct_predictions expected_performance = 0.7755102040816326 performance = hamming_score(true_labels, predictions) assert np.isclose(performance, expected_performance) expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(loss='log', " \ "random_state=112))" info = " ".join( [line.strip() for line in classifier.get_info().split()]) assert info == expected_info else: expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]] np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 23 assert correct_predictions == expected_correct_predictions expected_performance = 0.7482993197278911 performance = hamming_score(true_labels, predictions) assert np.isclose(performance, expected_performance) expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(loss='log', " \ "max_iter=10, random_state=112))" info = " ".join( [line.strip() for line in classifier.get_info().split()]) assert info == expected_info assert type(classifier.predict(X)) == np.ndarray assert type(classifier.predict_proba(X)) == np.ndarray