def demo(): # The classifier we will use (other options: SAMKNNClassifier, LeveragingBaggingClassifier, SGD) h1 = [ HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier() ] h2 = [ HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier() ] h3 = [ HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier() ] model_names = ['HT', 'SAMKNNClassifier', 'LBkNN', 'SGDC'] # Demo 1 -- plot should not fail demo_parameterized(h1, model_names=model_names) # Demo 2 -- csv output should look nice demo_parameterized(h2, "sea_stream.csv", False, model_names) # Demo 3 -- should not give "'NoneType' object is not iterable" error demo_parameterized(h3, "covtype.csv", False, model_names)
def test_leverage_bagging(): stream = SEAGenerator(classification_function=1, noise_percentage=0.067, random_state=112) knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000) learner = LeveragingBaggingClassifier(base_estimator=knn, n_estimators=3, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1] assert np.alltrue(predictions == expected_predictions) expected_performance = 0.8571428571428571 assert np.isclose(expected_performance, performance) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "LeveragingBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \ "max_window_size=2000, metric='euclidean', n_neighbors=8), " \ "delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag'," \ " n_estimators=3, random_state=112, w=6)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_leverage_bagging_me(): stream = SEAGenerator(classification_function=1, noise_percentage=0.067, random_state=112) knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000) # leveraging_bag_me learner = LeveragingBaggingClassifier( base_estimator=knn, n_estimators=3, random_state=112, leverage_algorithm='leveraging_bag_me') y_expected = np.asarray([ 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0 ], dtype=np.int) run_prequential_supervised(stream, learner, max_samples=2000, n_wait=40, y_expected=y_expected)
def test_leverage_bagging_me(): nb = NaiveBayes() # leveraging_bag_me learner = LeveragingBaggingClassifier(base_estimator=nb, n_estimators=5, random_state=112, leverage_algorithm='leveraging_bag_me') y_expected = np.asarray([0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0], dtype=np.int) run_prequential_supervised(ConceptDriftStreamGenerator(position=500, width=100, random_state=112), learner, max_samples=2000, n_wait=40, target_values=[0,1], y_expected=y_expected)
def test_leverage_bagging_code_matrix(): nb = NaiveBayes() # enable the output detection code matrix learner = LeveragingBaggingClassifier(base_estimator=nb, n_estimators=5, random_state=12, enable_code_matrix=True) y_expected = np.asarray([0, 0, 3, 2, 3, 1, 4, 1, 3, 4, 2, 4, 2, 2, 0, 0, 2, 4, 2, 4, 0, 4, 2, 4, 2, 4, 0, 4, 1, 3, 2, 1, 2, 4, 2, 4, 1, 3, 0, 4, 2, 0, 0, 4, 3, 2, 4, 4, 2, 4], dtype=np.int) run_prequential_supervised(RandomTreeGenerator(tree_random_state=1, sample_random_state=12, n_classes=5), learner, max_samples=2000, n_wait=40, target_values=[0,1,2,3,4], y_expected=y_expected)
def demo(output_file=None, instances=40000): """ _test_prequential_bagging This demo shows the evaluation process of a LeveragingBaggingClassifier, initialized with different base estimators. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream #stream = SEAGenerator(classification_function=2, noise_percentage=0.0) stream = WaveformGenerator() # Setup the classifier #classifier = OzaBaggingADWINClassifier(base_estimator=KNNClassifier(n_neighbors=8, max_window_size=2000, # leaf_size=30)) #classifier = LeveragingBaggingClassifier(base_estimator=KNNClassifier(n_neighbors=8, max_window_size=2000, # leaf_size=30), # n_estimators=1) pipe = LeveragingBaggingClassifier( base_estimator=HoeffdingTreeClassifier(), n_estimators=2) # Setup the pipeline #pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=2000, max_samples=instances, output_file=output_file, show_plot=False) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
# Applying Leveraging Bagging Classifier on a synthetic data stream from skmultiflow.meta import LeveragingBaggingClassifier from skmultiflow.lazy import KNNADWINClassifier from skmultiflow.evaluation import EvaluatePrequential from skmultiflow.data.sea_generator import SEAGenerator # Simulate the data stream dstream = SEAGenerator(classification_function=2, balance_classes=True, noise_percentage=0.3, random_state=333) # Instantiate the Leveraging Bagging classifier method with KNN ADWIN classifier as the base model leverage_class = LeveragingBaggingClassifier(base_estimator=KNNADWINClassifier( n_neighbors=10, max_window_size=1000), n_estimators=6, random_state=333) # Prequential Evaluation evaluate1 = EvaluatePrequential(show_plot=False, pretrain_size=1000, max_samples=10000, metrics=['accuracy']) # Run the evaluation evaluate1.evaluate(stream=dstream, model=leverage_class) ################################################### # Applying Online Boosting Classifier on a synthetic data stream from skmultiflow.meta import OnlineBoostingClassifier from skmultiflow.evaluation import EvaluatePrequential
def test_leverage_bagging_coverage(): # Invalid leverage_algorithm with pytest.raises(ValueError): LeveragingBaggingClassifier(leverage_algorithm='invalid') estimator = LeveragingBaggingClassifier(random_state=4321) stream = SEAGenerator(random_state=4321) X, y = stream.next_sample() # classes not passed in partial_fit with pytest.raises(ValueError): estimator.partial_fit(X, y, classes=None) estimator.partial_fit(X, y, classes=[0, 1]) # different observed classes with pytest.raises(ValueError): estimator.partial_fit(X, y, classes=[0, 1] + [-1]) # Invalid leverage_algorithm, changed after initialization with pytest.raises(RuntimeError): estimator.leverage_algorithm = 'invalid' estimator.partial_fit(X, y, classes=[0, 1]) # Reset ensemble estimator.reset() assert estimator.classes is None
if cm.sum_col[i] != 0 else 'Ill-defined' print("Class {}: {}".format(i, recall)) ''' #------------------------------------------------Experiment 3--------------------------------------------------------------- from skmultiflow.meta import AdaptiveRandomForestClassifier from skmultiflow.meta import LeveragingBaggingClassifier # Read in stream stream = FileStream(r"C:\Users\luyj0\OneDrive\Desktop\COMPX523-Data Stream Mining\covtype_numeric.csv") # Set up different classifiers knn = MyKNNClassifier() ht = HoeffdingTreeClassifier() nb = NaiveBayes() wv_knn = MyKNNClassifier(weighted_vote=True) s_knn = MyKNNClassifier(standardize=True) arf = AdaptiveRandomForestClassifier() lb = LeveragingBaggingClassifier() # Set up two ensemble algorithms metrics = ['accuracy', 'kappa', 'kappa_m','kappa_t', 'running_time', 'model_size'] # use a test-then-train evaluation approach evaluator = EvaluatePrequential(max_samples=30000, n_wait=100, show_plot=False, metrics=metrics) model_list = [knn,ht,nb,wv_knn,s_knn,arf,lb] name_list = ['KNN','HoeffdingTree','NaiveBayes','KNN+WeightedVote','KNN+Standardize','AdaptiveRandomForest','Leverage Bagging'] # Execute each evaluation in the list until it reaches the end for index in range(len(model_list)): evaluator.evaluate(stream=stream,model=[model_list[index]],model_names=[name_list[index]]) cm = evaluator.get_mean_measurements(0).confusion_matrix print("Recall per class")