def test_multi_output_learner_regressor(): stream = RegressionGenerator(n_samples=5500, n_features=10, n_informative=20, n_targets=2, random_state=1) stream.prepare_for_use() estimator = SGDRegressor(random_state=112, tol=1e-3, max_iter=10, loss='squared_loss') learner = MultiOutputLearner(base_estimator=estimator) X, y = stream.next_sample(150) learner.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_targets = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) true_targets.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_performance = 2.444365309339395 performance = mean_absolute_error(true_targets, predictions) assert np.isclose(performance, expected_performance) assert learner._estimator_type == "regressor" assert type(learner.predict(X)) == np.ndarray with pytest.raises(AttributeError): learner.predict_proba(X)
def demo(output_file=None, instances=40000): """ _test_prequential_mol This demo shows the evaluation process of a MOL classifier, initialized with sklearn's SGDClassifier. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # stream = FileStream("../data/datasets/music.csv", 0, 6) stream = MultilabelGenerator(n_samples=instances) # stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier classifier = MultiOutputLearner(SGDClassifier(n_iter=100)) # classifier = SGDClassifier() # classifier = PassiveAggressiveClassifier() # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential( pretrain_size=5000, max_samples=instances - 10000, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, show_plot=True, metrics=['hamming_score', 'j_index', 'exact_match']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def test_multi_output_learner_classifier(): stream = MultilabelGenerator(n_samples=5150, n_features=15, n_targets=3, n_labels=4, random_state=112) estimator = SGDClassifier(random_state=112, max_iter=10, loss='log') classifier = MultiOutputLearner(base_estimator=estimator) X, y = get_next_n_samples(stream, 150) classifier.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(classifier.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 classifier.partial_fit(X, y) cnt += 1 if LooseVersion(sklearn_version) < LooseVersion("0.21"): expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]] assert np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 26 assert correct_predictions == expected_correct_predictions expected_performance = 0.7755102040816326 performance = hamming_score(true_labels, predictions) assert np.isclose(performance, expected_performance) expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(loss='log', " \ "random_state=112))" info = " ".join( [line.strip() for line in classifier.get_info().split()]) assert info == expected_info else: expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]] np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 23 assert correct_predictions == expected_correct_predictions expected_performance = 0.7482993197278911 performance = hamming_score(true_labels, predictions) assert np.isclose(performance, expected_performance) expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(loss='log', " \ "max_iter=10, random_state=112))" info = " ".join( [line.strip() for line in classifier.get_info().split()]) assert info == expected_info assert type(classifier.predict(X)) == np.ndarray assert type(classifier.predict_proba(X)) == np.ndarray
def test_multi_output_learner_classifier(): stream = MultilabelGenerator(n_samples=5150, n_features=15, n_targets=3, n_labels=4, random_state=112) stream.prepare_for_use() estimator = SGDClassifier(random_state=112, tol=1e-3, max_iter=10, loss='log') classifier = MultiOutputLearner(base_estimator=estimator) X, y = stream.next_sample(150) classifier.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(classifier.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 classifier.partial_fit(X, y) cnt += 1 if StrictVersion(sklearn_version) < StrictVersion("0.21"): expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]] assert np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 26 assert correct_predictions == expected_correct_predictions expected_performance = 0.7755102040816326 performance = hamming_score(true_labels, predictions) assert np.isclose(performance, expected_performance) expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(alpha=0.0001, average=False, " \ "class_weight=None,\n" \ " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \ " l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=10,\n" \ " n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n" \ " power_t=0.5, random_state=112, shuffle=True, tol=0.001,\n" \ " validation_fraction=0.1, verbose=0, warm_start=False))" assert classifier.get_info() == expected_info else: expected_predictions = [[1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]] np.alltrue(np.array_equal(predictions, expected_predictions)) expected_correct_predictions = 23 assert correct_predictions == expected_correct_predictions expected_performance = 0.7482993197278911 performance = hamming_score(true_labels, predictions) assert np.isclose(performance, expected_performance) expected_info = "MultiOutputLearner(base_estimator=SGDClassifier(alpha=0.0001, average=False, " \ "class_weight=None,\n" \ " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n" \ " l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=10,\n" \ " n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,\n" \ " random_state=112, shuffle=True, tol=0.001,\n" \ " validation_fraction=0.1, verbose=0, warm_start=False))" assert classifier.get_info() == expected_info assert type(classifier.predict(X)) == np.ndarray assert type(classifier.predict_proba(X)) == np.ndarray
def test_multi_output_learner(): stream = MultilabelGenerator(n_samples=5150, n_features=15, n_targets=3, n_labels=4, random_state=112) stream.prepare_for_use() classifier = MultiOutputLearner(base_estimator=HoeffdingTree()) X, y = stream.next_sample(150) classifier.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(classifier.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 classifier.partial_fit(X, y) cnt += 1 perf = hamming_score(true_labels, predictions) expected_predictions = [[1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [0., 1., 1.], [1., 0., 1.], [1., 0., 1.], [1., 1., 1.], [0., 0., 1.], [0., 1., 1.], [0., 1., 1.], [1., 1., 1.], [0., 1., 1.], [1., 1., 0.], [1., 1., 1.], [0., 1., 1.], [1., 0., 0.], [1., 0., 1.], [1., 1., 1.], [1., 0., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 0., 0.], [0., 1., 1.], [1., 1., 0.], [1., 1., 1.], [0., 1., 1.], [1., 1., 1.], [0., 1., 1.], [1., 0., 1.], [1., 0., 1.], [0., 0., 1.], [0., 1., 1.], [1., 1., 0.], [0., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 0., 1.], [1., 1., 1.], [1., 1., 1.], [1., 0., 1.], [1., 1., 1.], [1., 1., 1.], [0., 1., 1.]] expected_correct_predictions = 32 expected_performance = 0.8503401360544217 assert np.alltrue(np.array_equal(predictions, expected_predictions)) assert np.isclose(expected_performance, perf) assert correct_predictions == expected_correct_predictions assert type(classifier.predict(X)) == np.ndarray assert type(classifier.predict_proba(X)) == np.ndarray
from skmultiflow.data import MultilabelGenerator from skmultiflow.meta.multi_output_learner import MultiOutputLearner from skmultiflow.trees import HoeffdingTreeClassifier from skmultiflow.data.file_stream import FileStream from sklearn.linear_model import Perceptron from skmultiflow.metrics import hamming_score # Setup the file stream stream = MultilabelGenerator(random_state=1, n_samples=200, n_targets=5, n_features=10) ht = HoeffdingTreeClassifier() br = MultiOutputLearner(ht) # Setup the pipeline # Pre training the classifier with 150 samples X, y = stream.next_sample(150) br.partial_fit(X, y, classes=stream.target_values) # Keeping track of sample count, true labels and predictions to later # compute the classifier's hamming score count = 0 true_labels = [] predicts = [] while stream.has_more_samples(): X, y = stream.next_sample() p = br.predict(X) br.partial_fit(X, y) predicts.extend(p) true_labels.extend(y) count += 1 perf = hamming_score(true_labels, predicts) print('Total samples analyzed: ' + str(count)) print("The classifier's static Hamming score : " + str(perf))
from skmultiflow.trees import LabelCombinationHoeffdingTreeClassifier,\ iSOUPTreeRegressor, \ HoeffdingTreeClassifier from common.helpers import (load_custom_dataset, load_moa_stream, evaluar, repeatInstances) from common.evaluation_metrics import evaluation_metrics TIME_STR = "%Y%m%d_%H%M%S" SUPPORTED_MODELS = { "br": { "name": "Binary Relevance - Perceptron", "model": lambda data_stream: MultiOutputLearner( Perceptron(), n_targets=data_stream.n_targets ), "ensemble": False }, "br_ht": { "name": "Binary Relevance - Hoeffding Tree", "model": lambda data_stream: MultiOutputLearner( HoeffdingTreeClassifier(), n_targets=data_stream.n_targets ), "ensemble": False }, "br_nb": { "name": "Binary Relevance - Naive Bayes", "model": lambda data_stream: MultiOutputLearner( NaiveBayes(),
batch_size=60, max_samples=max_samples, metrics=[ 'true_vs_predicted', 'mean_square_error', 'mean_absolute_error', 'running_time', 'model_size' ]) evaluator.evaluate(stream=stream, model=model, model_names=[model_name]) else: # For Multi-AP approach if args.chained: print("Using Regressor Chain for Multi-label") multiOutputModel = RegressorChain(model, random_state=1) mode = "rc" else: print("Using Binary Relevance for Multi-label") multiOutputModel = MultiOutputLearner(base_estimator=model) mode = "br" if args.holdout: evaluator = EvaluateHoldout( output_file=model_name + "_eval_one_label_v2_holdout_" + mode + ".txt", show_plot=args.show_plot, n_wait=60, test_size=60, batch_size=60, max_samples=max_samples, metrics=[ 'average_mean_square_error', 'average_mean_absolute_error', 'running_time' ])