def demo(): """ _test_mol This demo tests the MOL learner on a file stream, which reads from the music.csv file. The test computes the performance of the MOL learner as well as the time to create the structure and classify all the samples in the file. """ # Setup logging logging.basicConfig(format='%(message)s', level=logging.INFO) # Setup the file stream opt = FileOption("FILE", "OPT_NAME", "../datasets/music.csv", "CSV", False) stream = FileStream(opt, 0, 6) stream.prepare_for_use() # Setup the classifier, by default it uses Logistic Regression #classifier = MultiOutputLearner() #classifier = MultiOutputLearner(h=SGDClassifier(n_iter=100)) classifier = MultiOutputLearner(h=Perceptron()) # Setup the pipeline pipe = Pipeline([('classifier', classifier)]) pretrain_size = 150 logging.info('Pre training on %s samples', str(pretrain_size)) X, y = stream.next_instance(pretrain_size) #classifier.fit(X, y) pipe.partial_fit(X, y, classes=stream.get_classes()) count = 0 true_labels = [] predicts = [] init_time = timer() logging.info('Evaluating...') while stream.has_more_instances(): X, y = stream.next_instance() #p = classifier.predict(X) p = pipe.predict(X) predicts.extend(p) true_labels.extend(y) count += 1 perf = hamming_score(true_labels, predicts) logging.info('Evaluation time: %s s', str(timer() - init_time)) logging.info('Total samples analyzed: %s', str(count)) logging.info('The classifier\'s static Hamming score : %0.3f' % perf)
def demo(input_file, output_file=None): """ _test_mtr_regression This demo demonstrates how to evaluate a Multi-Target Regressor. The employed dataset is 'scm1d', which is contained in the data folder. Parameters ---------- input_file: string A string describind the path for the input dataset output_file: string The name of the csv output file """ stream = RegressionGenerator(n_samples=5000, n_features=20, n_informative=15, random_state=1, n_targets=7) stream.prepare_for_use() classifier = MultiTargetRegressionHoeffdingTree(leaf_prediction='adaptive') # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=1, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, show_plot=False, metrics=['average_mean_square_error', 'average_mean_absolute_error', 'average_root_mean_square_error']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def demo(instances=2000): """ _test_comparison_prequential This demo will test a prequential evaluation when more than one learner is passed, which makes it a comparison task. Parameters ---------- instances: int The evaluation's maximum number of instances. """ # Stream setup stream = FileStream("../datasets/covtype.csv", -1, 1) # stream = SEAGenerator(classification_function=2, sample_seed=53432, balance_classes=False) stream.prepare_for_use() # Setup the classifier clf = SGDClassifier() # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None) # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) clf_one = KNNAdwin(k=8, max_window_size=1000, leaf_size=30) # clf_two = KNN(k=8, max_window_size=1000, leaf_size=30) # clf_two = LeverageBagging(h=KNN(), ensemble_length=2) t_one = OneHotToCategorical([[10, 11, 12, 13], [ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53 ]]) # t_two = OneHotToCategorical([[10, 11, 12, 13], # [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, # 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]]) pipe_one = Pipeline([('one_hot_to_categorical', t_one), ('KNN', clf_one)]) # pipe_two = Pipeline([('one_hot_to_categorical', t_two), ('KNN', clf_two)]) classifier = [clf, pipe_one] # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline # pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=2000, output_file='teste.csv', max_samples=instances, batch_size=1, n_wait=200, max_time=1000, show_plot=True, metrics=['performance', 'kappa_t']) # Evaluate evaluator.evaluate(stream=stream, model=classifier)
def demo(output_file=None, instances=40000): """ _test_prequential This demo shows how to produce a prequential evaluation. The first thing needed is a stream. For this case we use a file stream which gets its samples from the sea_big.csv file, inside the datasets folder. Then we need to setup a classifier, which in this case is an instance of sklearn's PassiveAggressiveClassifier. Then, optionally we create a pipeline structure, initialized on that classifier. The evaluation is then run. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_big.csv", "CSV", False) stream = FileStream(opt, -1, 1) #stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier #classifier = SGDClassifier() # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None) #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) classifier = PassiveAggressiveClassifier() #classifier = SGDRegressor() #classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential( pretrain_size=200, max_instances=instances, batch_size=1, n_wait=100, max_time=1000, output_file=output_file, task_type='classification', show_plot=True, plot_options=['kappa', 'kappa_t', 'performance']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000): """ _test_prequential This demo shows how to produce a prequential evaluation. The first thing needed is a stream. For this case we use a file stream which gets its samples from the sea_big.csv file. Then we need to setup a classifier, which in this case is an instance of sklearn's PassiveAggressiveClassifier. Then, optionally we create a pipeline structure, initialized on that classifier. The evaluation is then run. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # stream = FileStream("https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/" # "master/sea_big.csv") # stream = WaveformGenerator() # Setup the classifier # classifier = SGDClassifier() # classifier = KNNADWINClassifier(n_neighbors=8, max_window_size=2000,leaf_size=40, nominal_attributes=None) # classifier = OzaBaggingADWINClassifier(base_estimator=KNNClassifier(n_neighbors=8, max_window_size=2000, # leaf_size=30)) classifier = PassiveAggressiveClassifier() # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential( pretrain_size=200, max_samples=instances, batch_size=1, n_wait=100, max_time=1000, output_file=output_file, show_plot=True, metrics=['kappa', 'kappa_t', 'performance']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def test_batch_incremental(): stream = RandomTreeGenerator(tree_random_state=112, sample_random_state=112) stream.prepare_for_use() estimator = DecisionTreeClassifier(random_state=112) classifier = BatchIncremental(base_estimator=estimator, n_estimators=10) learner = Pipeline([('classifier', classifier)]) X, y = stream.next_sample(150) learner.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0] expected_correct_predictions = 31 expected_performance = 0.6326530612244898 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray
def demo(output_file=None, instances=40000): """ _test_holdout This demo runs a holdout evaluation task with one learner. The default stream is a WaveformGenerator. The default learner is a SGDClassifier, which is inserted into a Pipeline structure. All the default values can be changing by uncommenting/commenting the code below. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) #stream = FileStream(opt, -1, 1) stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier classifier = SGDClassifier() #classifier = PassiveAggressiveClassifier() #classifier = SGDRegressor() #classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluateHoldout(pretrain_size=10000, test_size=2000, dynamic_test_set=True, max_instances=instances, batch_size=1, n_wait=15000, max_time=1000, output_file=output_file, task_type='classification', show_plot=True, plot_options=['kappa', 'kappa_t', 'performance']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000): """ _test_prequential_mol This demo shows the evaluation process of a MOL classifier, initialized with sklearn's SGDClassifier. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream #opt = FileOption("FILE", "OPT_NAME", "../datasets/music.csv", "CSV", False) #stream = FileStream(opt, 0, 6) stream = MultilabelGenerator(n_samples=instances) #stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier classifier = MultiOutputLearner(SGDClassifier(n_iter=100)) #classifier = SGDClassifier() #classifier = PassiveAggressiveClassifier() #classifier = SGDRegressor() #classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential( pretrain_size=5000, max_instances=instances - 10000, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, task_type='multi_output', show_plot=True, plot_options=['hamming_score', 'j_index', 'exact_match']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000): """ _test_prequential_bagging This demo shows the evaluation process of a LeverageBagging classifier, initialized with KNN classifiers. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_big.csv", "CSV", False) # stream = FileStream(opt, -1, 1) stream = SEAGenerator(classification_function=2, instance_seed=755437, noise_percentage=0.0) stream.prepare_for_use() # Setup the classifier #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000, leaf_size=30), ensemble_length=1) # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential( pretrain_size=2000, max_instances=instances, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, task_type='classification', show_plot=True, plot_options=['kappa', 'kappa_t', 'performance']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000): """ _test_regression This demo demonstrates how to evaluate a regressor. The data stream used is an instance of the RegressionGenerator, which feeds an instance from sklearn's SGDRegressor. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) #stream = FileStream(opt, -1, 1) #stream = WaveformGenerator() #stream.prepare_for_use() stream = RegressionGenerator(n_samples=40000) # Setup the classifier #classifier = SGDClassifier() #classifier = PassiveAggressiveClassifier() classifier = SGDRegressor() #classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential(pretrain_size=1, max_instances=instances, batch_size=1, n_wait=1, max_time=1000, output_file=output_file, task_type='regression', show_plot=True, plot_options=['true_vs_predicts']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000): """ _test_prequential_mol This demo shows the evaluation process of a MOL classifier, initialized with sklearn's SGDClassifier. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream stream = MultilabelGenerator(n_samples=instances) # stream = WaveformGenerator() # Setup the classifier classifier = MultiOutputLearner(SGDClassifier(n_iter=100)) # classifier = SGDClassifier() # classifier = PassiveAggressiveClassifier() # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential( pretrain_size=5000, max_samples=instances - 10000, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, show_plot=True, metrics=['hamming_score', 'j_index', 'exact_match']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def demo(output_file=None, instances=40000): """ _test_regression This demo demonstrates how to evaluate a regressor. The data stream used is an instance of the RegressionGenerator, which feeds an instance from sklearn's SGDRegressor. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # stream = FileStream("../data/datasets/covtype.csv", -1, 1) # stream = WaveformGenerator() # stream.prepare_for_use() stream = RegressionGenerator(n_samples=40000) # Setup the classifier # classifier = SGDClassifier() # classifier = PassiveAggressiveClassifier() classifier = RegressionHoeffdingTree() # classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=1, max_samples=instances, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, show_plot=False, metrics=['mean_square_error']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def demo(): """ _test_pipeline This demo demonstrates the Pipeline structure seemingly working as a learner, while being passed as parameter to an EvaluatePrequential object. """ # # Setup the stream # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) # stream = FileStream(opt, -1, 1) # stream.prepare_for_use() # # If used for Hoeffding Trees then need to pass indices for Nominal attributes # Test with RandomTreeGenerator # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5) # stream.prepare_for_use() # Test with WaveformGenerator stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier #classifier = PerceptronMask() #classifier = NaiveBayes() #classifier = PassiveAggressiveClassifier() classifier = HoeffdingTree() # Setup the pipeline pipe = Pipeline([('Hoeffding Tree', classifier)]) # Setup the evaluator eval = EvaluatePrequential(show_plot=True, pretrain_size=1000, max_instances=100000) # Evaluate eval.eval(stream=stream, classifier=pipe)
# Setup the File Stream #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) #opt = FileOption("FILE", "OPT_NAME", "../datasets/movingSquares.csv", "CSV", False) opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_stream.csv", "CSV", False) stream = FileStream(opt, -1, 1) stream.prepare_for_use() # Setup the classifiers clf_one = BernoulliNB() clf_two = AdaptiveRandomForest() # Setup the pipeline for clf_one pipe = Pipeline([('classifier', clf_one)]) # Create the list to hold both classifiers classifier = [pipe, clf_two] # Setup the evaluator eval = EvaluatePrequential(pretrain_size=200, max_instances=100000, batch_size=1, max_time=1000, output_file='comparison_Bernoulli_ADFH_Preq.csv', task_type='classification', show_plot=True, plot_options=['kappa', 'kappa_t', 'performance']) # Evaluate