def demo(output_file=None, instances=40000): """ _test_holdout This demo runs a holdout evaluation task with one learner. The default stream is a WaveformGenerator. The default learner is a SGDClassifier, which is inserted into a Pipeline structure. All the default values can be changing by uncommenting/commenting the code below. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) #stream = FileStream(opt, -1, 1) stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier classifier = SGDClassifier() #classifier = PassiveAggressiveClassifier() #classifier = SGDRegressor() #classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator eval = EvaluateHoldout(pretrain_size=10000, test_size=2000, dynamic_test_set=True, max_instances=instances, batch_size=1, n_wait=15000, max_time=1000, output_file=output_file, task_type='classification', show_plot=True, plot_options=['kappa', 'kappa_t', 'performance']) # Evaluate eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000): """ _test_comparison_holdout This demo will test a holdout evaluation task when more than one learner is evaluated, which makes it a comparison task. Parameters ---------- output_file: string, optional If passed this parameter indicates the output file name. If left blank, no output file will be generated. instances: int (Default: 40000) The evaluation's maximum number of instances. """ # Setup the File Stream # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) # stream = FileStream(opt, -1, 1) stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier clf_one = SGDClassifier() clf_two = KNNAdwin(k=8, max_window_size=2000) # classifier = PassiveAggressiveClassifier() # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline classifier = [clf_one, clf_two] # Setup the evaluator evaluator = EvaluateHoldout(pretrain_size=2000, test_size=2000, dynamic_test_set=True, max_instances=instances, batch_size=1, n_wait=5000, max_time=1000, output_file=output_file, task_type='classification', show_plot=True, plot_options=['kappa']) # Evaluate evaluator.eval(stream=stream, classifier=classifier)
def demo(output_file=None, instances=40000): """ _test_prequential_bagging This demo shows the evaluation process of a LeverageBagging classifier, initialized with KNN classifiers. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # stream = FileStream("../datasets/sea_big.csv", -1, 1) #stream = SEAGenerator(classification_function=2, noise_percentage=0.0) #stream.prepare_for_use() stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) #classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000, leaf_size=30), ensemble_length=1) pipe = LeverageBagging(h=HoeffdingTree(), ensemble_length=2) # Setup the pipeline #pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=2000, max_samples=instances, output_file=output_file, show_plot=False) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def demo(): """ _test_pipeline This demo demonstrates the Pipeline structure seemingly working as a learner, while being passed as parameter to an EvaluatePrequential object. """ # # Setup the stream # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False) # stream = FileStream(opt, -1, 1) # stream.prepare_for_use() # # If used for Hoeffding Trees then need to pass indices for Nominal attributes # Test with RandomTreeGenerator # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5) # stream.prepare_for_use() # Test with WaveformGenerator stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier #classifier = PerceptronMask() #classifier = NaiveBayes() #classifier = PassiveAggressiveClassifier() classifier = HoeffdingTree() # Setup the pipeline pipe = Pipeline([('Hoeffding Tree', classifier)]) # Setup the evaluator eval = EvaluatePrequential(show_plot=True, pretrain_size=1000, max_instances=100000) # Evaluate eval.eval(stream=stream, classifier=pipe)
# # -Accuracy: # # -Kappa statistic: k=1 the classifier is always correct. # k=0 the predictions coincide with the correct ones as often as those of the chance classifier # # # In[43]: from skmultiflow.data.generators.waveform_generator import WaveformGenerator from skmultiflow.classification.trees.hoeffding_tree import HoeffdingTree from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential # 1. Create a stream stream = WaveformGenerator() stream.prepare_for_use() # 2. Instantiate the classifier adf = AdaptiveRandomForest() # 3. Setup the evaluator eval = EvaluatePrequential(show_plot=True, pretrain_size=100, max_instances=10000) # 4. Run evaluation eval.eval(stream=stream, classifier=adf) # # # Eval Prequential with datasets.csv for ARF
def test_waveform_generator(test_path): stream = WaveformGenerator(seed=23, add_noise=False) stream.prepare_for_use() assert stream.estimated_remaining_instances() == -1 expected_header = [ 'att_num_0', 'att_num_1', 'att_num_2', 'att_num_3', 'att_num_4', 'att_num_5', 'att_num_6', 'att_num_7', 'att_num_8', 'att_num_9', 'att_num_10', 'att_num_11', 'att_num_12', 'att_num_13', 'att_num_14', 'att_num_15', 'att_num_16', 'att_num_17', 'att_num_18', 'att_num_19', 'att_num_20' ] assert stream.get_attributes_header() == expected_header expected_classes = [0, 1, 2] assert stream.get_classes() == expected_classes assert stream.get_classes_header() == ['class'] assert stream.get_num_attributes() == 21 assert stream.get_num_nominal_attributes() == 0 assert stream.get_num_numerical_attributes() == 21 assert stream.get_num_targets() == 3 assert stream.get_num_values_per_nominal_attribute() == 0 assert stream.get_plot_name() == 'Waveform Generator - 3 class labels' assert stream.has_more_instances() is True assert stream.is_restartable() is True # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'waveform_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_instance() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) X, y = stream.get_last_instance() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) stream.restart() X, y = stream.next_instance(10) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected) # Noise test stream = WaveformGenerator(seed=23, add_noise=True) stream.prepare_for_use() assert stream.estimated_remaining_instances() == -1 expected_header = [ 'att_num_0', 'att_num_1', 'att_num_2', 'att_num_3', 'att_num_4', 'att_num_5', 'att_num_6', 'att_num_7', 'att_num_8', 'att_num_9', 'att_num_10', 'att_num_11', 'att_num_12', 'att_num_13', 'att_num_14', 'att_num_15', 'att_num_16', 'att_num_17', 'att_num_18', 'att_num_19', 'att_num_20', 'att_num_21', 'att_num_22', 'att_num_23', 'att_num_24', 'att_num_25', 'att_num_26', 'att_num_27', 'att_num_28', 'att_num_29', 'att_num_30', 'att_num_31', 'att_num_32', 'att_num_33', 'att_num_34', 'att_num_35', 'att_num_36', 'att_num_37', 'att_num_38', 'att_num_39', ] assert stream.get_attributes_header() == expected_header expected_classes = [0, 1, 2] assert stream.get_classes() == expected_classes assert stream.get_classes_header() == ['class'] assert stream.get_num_attributes() == 40 assert stream.get_num_nominal_attributes() == 0 assert stream.get_num_numerical_attributes() == 40 assert stream.get_num_targets() == 3 assert stream.get_num_values_per_nominal_attribute() == 0 assert stream.get_plot_name() == 'Waveform Generator - 3 class labels' assert stream.has_more_instances() is True assert stream.is_restartable() is True # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'waveform_noise_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_instance() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) X, y = stream.get_last_instance() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) stream.restart() X, y = stream.next_instance(10) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected)
def test_waveform_generator_noise(test_path): # Noise test stream = WaveformGenerator(random_state=23, has_noise=True) stream.prepare_for_use() assert stream.n_remaining_samples() == -1 expected_names = ['att_num_0', 'att_num_1', 'att_num_2', 'att_num_3', 'att_num_4', 'att_num_5', 'att_num_6', 'att_num_7', 'att_num_8', 'att_num_9', 'att_num_10', 'att_num_11', 'att_num_12', 'att_num_13', 'att_num_14', 'att_num_15', 'att_num_16', 'att_num_17', 'att_num_18', 'att_num_19', 'att_num_20', 'att_num_21', 'att_num_22', 'att_num_23', 'att_num_24', 'att_num_25', 'att_num_26', 'att_num_27', 'att_num_28', 'att_num_29', 'att_num_30', 'att_num_31', 'att_num_32', 'att_num_33', 'att_num_34', 'att_num_35', 'att_num_36', 'att_num_37', 'att_num_38', 'att_num_39', ] assert stream.feature_names == expected_names expected_targets = [0, 1, 2] assert stream.target_values == expected_targets assert stream.target_names == ['target_0'] assert stream.n_features == 40 assert stream.n_cat_features == 0 assert stream.n_num_features == 40 assert stream.n_targets == 1 assert stream.get_data_info() == 'Waveform Generator - 1 targets, 3 classes, 40 features' assert stream.has_more_samples() is True assert stream.is_restartable() is True # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'waveform_noise_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) X, y = stream.last_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) stream.restart() X, y = stream.next_sample(10) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected) assert stream.n_targets == np.array(y).ndim assert stream.n_features == X.shape[1]