def demo(output_file=None, instances=50000): """ _test_sam_knn_prequential This demo shows how to produce a prequential evaluation. The first thing needed is a stream. For this case we use a file stream which gets its samples from the movingSquares.csv file, inside the datasets folder. Then we need to setup a classifier, which in this case is an instance of scikit-multiflow's SAMKNN. Then, optionally we create a pipeline structure, initialized on that classifier. The evaluation is then run. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) opt = FileOption("FILE", "OPT_NAME", "../datasets/movingSquares.csv", "CSV", False) stream = FileStream(opt, -1, 1) # stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier # classifier = SGDClassifier() # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None) # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) classifier = SAMKNN(n_neighbors=5, knnWeights='distance', maxSize=1000, STMSizeAdaption='maxACCApprox', useLTM=False) # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline #pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential(pretrain_size=0, max_instances=instances, batch_size=1, n_wait=100, max_time=1000, output_file=output_file, task_type='classification', show_plot=True, plot_options=['performance']) # Evaluate eval.eval(stream=stream, classifier=classifier)
def demo_parameterized(h, dset="sea_stream.csv", show_plot=True): # Setup Stream opt = FileOption("FILE", "OPT_NAME", "../datasets/"+dset, "CSV", False) stream = FileStream(opt, -1, 1) stream.prepare_for_use() # For each classifier, e... T_init = 100 eval = EvaluatePrequential(pretrain_size=T_init, output_file='output.csv', max_instances=10000, batch_size=1, n_wait=1000, task_type='classification', show_plot=show_plot, plot_options=['performance']) eval.eval(stream=stream, classifier=h)
def demo(output_file=None, instances=40000): """ _test_prequential_mol This demo shows the evaluation process of a MOL classifier, initialized with sklearn's SGDClassifier. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream #opt = FileOption("FILE", "OPT_NAME", "../datasets/music.csv", "CSV", False) #stream = FileStream(opt, 0, 6) stream = MultilabelGenerator(n_samples=instances) #stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier classifier = MultiOutputLearner(SGDClassifier(n_iter=100)) #classifier = SGDClassifier() #classifier = PassiveAggressiveClassifier() #classifier = SGDRegressor() #classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential( pretrain_size=5000, max_instances=instances - 10000, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, task_type='multi_output', show_plot=True, plot_options=['hamming_score', 'j_index', 'exact_match']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000): """ _test_prequential_bagging This demo shows the evaluation process of a LeverageBagging classifier, initialized with KNN classifiers. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_big.csv", "CSV", False) # stream = FileStream(opt, -1, 1) stream = SEAGenerator(classification_function=2, instance_seed=755437, noise_percentage=0.0) stream.prepare_for_use() # Setup the classifier #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000, leaf_size=30), ensemble_length=1) # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential( pretrain_size=2000, max_instances=instances, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, task_type='classification', show_plot=True, plot_options=['kappa', 'kappa_t', 'performance']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000): """ _test_regression This demo demonstrates how to evaluate a regressor. The data stream used is an instance of the RegressionGenerator, which feeds an instance from sklearn's SGDRegressor. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) #stream = FileStream(opt, -1, 1) #stream = WaveformGenerator() #stream.prepare_for_use() stream = RegressionGenerator(n_samples=40000) # Setup the classifier #classifier = SGDClassifier() #classifier = PassiveAggressiveClassifier() classifier = SGDRegressor() #classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential(pretrain_size=1, max_instances=instances, batch_size=1, n_wait=1, max_time=1000, output_file=output_file, task_type='regression', show_plot=True, plot_options=['true_vs_predicts']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(): # The classifier we will use (other options: SAMKNN, LeverageBagging, SGD) h = HoeffdingTree() # Setup Stream opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_stream.csv", "CSV", False) stream = FileStream(opt, -1, 1) stream.prepare_for_use() T_init = 100 eval = EvaluatePrequential(pretrain_size=T_init, output_file='output.csv', max_instances=10000, batch_size=1, n_wait=1000, task_type='classification', show_plot=True, plot_options=['performance']) eval.eval(stream=stream, classifier=h)
def demo(): """ _test_pipeline This demo demonstrates the Pipeline structure seemingly working as a learner, while being passed as parameter to an EvaluatePrequential object. """ # # Setup the stream # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) # stream = FileStream(opt, -1, 1) # stream.prepare_for_use() # # If used for Hoeffding Trees then need to pass indices for Nominal attributes # Test with RandomTreeGenerator # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5) # stream.prepare_for_use() # Test with WaveformGenerator stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier #classifier = PerceptronMask() #classifier = NaiveBayes() #classifier = PassiveAggressiveClassifier() classifier = HoeffdingTree() # Setup the pipeline pipe = Pipeline([('Hoeffding Tree', classifier)]) # Setup the evaluator eval = EvaluatePrequential(show_plot=True, pretrain_size=1000, max_instances=100000) # Evaluate eval.eval(stream=stream, classifier=pipe)
from skmultiflow.options.file_option import FileOption from my_classifier import BatchClassifier dataset = "elec" # 1. Create a stream opt = FileOption("FILE", "OPT_NAME", "./data/" + dataset + ".csv", "CSV", False) stream = FileStream(opt, -1, 1) # 2. Prepare for use stream.prepare_for_use() # 2. Instantiate the HoeffdingTree classifier h = [ KNN(k=10, max_window_size=100, leaf_size=30), HoeffdingTree(), BatchClassifier(window_size=100, max_models=10), ] # 3. Setup the evaluator eval = EvaluatePrequential(pretrain_size=1000, output_file='result_' + dataset + '.csv', max_instances=10000, batch_size=1, n_wait=500, max_time=1000000000, task_type='classification', show_plot=True, plot_options=['performance']) # 4. Run eval.eval(stream=stream, classifier=h)
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential # 1. Create a stream stream = WaveformGenerator() stream.prepare_for_use() # 2. Instantiate the classifier adf = AdaptiveRandomForest() # 3. Setup the evaluator eval = EvaluatePrequential(show_plot=True, pretrain_size=100, max_instances=10000) # 4. Run evaluation eval.eval(stream=stream, classifier=adf) # # # Eval Prequential with datasets.csv for ARF # # In[47]: from skmultiflow.options.file_option import FileOption from skmultiflow.data.file_stream import FileStream from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential # 1. Create a stream #options = FileOption(option_value="../datasets/covtype.csv", file_extension="CSV") #options = FileOption(option_value="../datasets/movingSquares.csv", file_extension="CSV") options = FileOption(option_value="../datasets/sea_stream.csv",
def demo(instances=2000): """ _test_comparison_prequential This demo will test a prequential evaluation when more than one learner is passed, which makes it a comparison task. Parameters ---------- instances: int The evaluation's maximum number of instances. """ # Stream setup opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) #opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_big.csv", "CSV", False) stream = FileStream(opt, -1, 1) #stream = SEAGenerator(classification_function=2, instance_seed=53432, balance_classes=False) stream.prepare_for_use() # Setup the classifier clf = SGDClassifier() # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None) # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) clf_one = KNNAdwin(k=8, max_window_size=1000, leaf_size=30) #clf_two = KNN(k=8, max_window_size=1000, leaf_size=30) #clf_two = LeverageBagging(h=KNN(), ensemble_length=2) t_one = OneHotToCategorical([[10, 11, 12, 13], [ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53 ]]) #t_two = OneHotToCategorical([[10, 11, 12, 13], # [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, # 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]]) pipe_one = Pipeline([('one_hot_to_categorical', t_one), ('KNN', clf_one)]) #pipe_two = Pipeline([('one_hot_to_categorical', t_two), ('KNN', clf_two)]) classifier = [clf, pipe_one] # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline #pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluatePrequential(pretrain_size=2000, output_file='teste.csv', max_instances=instances, batch_size=1, n_wait=200, max_time=1000, task_type='classification', show_plot=True, plot_options=['performance', 'kappa_t']) # Evaluate eval.eval(stream=stream, classifier=classifier)