def test_oza_bagging(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000) learner = OzaBaggingClassifier(base_estimator=knn, n_estimators=3, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=stream.target_values) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] assert np.alltrue(predictions == expected_predictions) expected_performance = 0.8979591836734694 assert np.isclose(expected_performance, performance) expected_correct_predictions = 44 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "OzaBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \ "max_window_size=2000, metric='euclidean', n_neighbors=8), " \ "n_estimators=3, random_state=112)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def demo(): """ _test_oza_bagging This demo tests the OzaBaggingClassifier using KNNADWINClassifier as base estimator on samples given by a SEAGenerator. The test computes the performance of the OzaBaggingClassifier as well as the time to create the structure and classify max_samples (5000 by default) instances. """ logging.basicConfig(format='%(message)s', level=logging.INFO) warnings.filterwarnings("ignore", ".*Passing 1d.*") stream = SEAGenerator(1, noise_percentage=.067, random_state=1) clf = OzaBaggingClassifier(base_estimator=KNNADWINClassifier( n_neighbors=8, max_window_size=2000, leaf_size=30), n_estimators=2, random_state=1) sample_count = 0 correctly_classified = 0 max_samples = 5000 train_size = 8 first = True if train_size > 0: X, y = stream.next_sample(train_size) clf.partial_fit(X, y, classes=stream.target_values) first = False while sample_count < max_samples: if sample_count % (max_samples / 20) == 0: logging.info('%s%%', str((sample_count // (max_samples / 20) * 5))) X, y = stream.next_sample() my_pred = clf.predict(X) if first: clf.partial_fit(X, y, classes=stream.target_values) first = False else: clf.partial_fit(X, y) if my_pred is not None: if y[0] == my_pred[0]: correctly_classified += 1 sample_count += 1 print(str(sample_count) + ' samples analyzed.') print('My performance: ' + str(correctly_classified / sample_count))
# Applying Oza Bagging Classifier on a synthetic data stream from skmultiflow.meta import OzaBaggingClassifier from skmultiflow.lazy import KNNADWINClassifier from skmultiflow.evaluation import EvaluatePrequential from skmultiflow.data.sea_generator import SEAGenerator # Simulate the data stream dstream = SEAGenerator(classification_function=2, balance_classes=True, noise_percentage=0.3, random_state=333) # Instantiate the Oza Bagging classifier method with KNN ADWIN classifier as the base model oza_class = OzaBaggingClassifier(base_estimator=KNNADWINClassifier( n_neighbors=10, max_window_size=1000), n_estimators=6, random_state=333) # Prequential Evaluation evaluate1 = EvaluatePrequential(show_plot=False, pretrain_size=1000, max_samples=10000, metrics=['accuracy']) # Run the evaluation evaluate1.evaluate(stream=dstream, model=oza_class) ################################################### # Applying Leveraging Bagging Classifier on a synthetic data stream from skmultiflow.meta import LeveragingBaggingClassifier from skmultiflow.lazy import KNNADWINClassifier
#data, X, y = read_data_csv('./data/streaming-datasets-master/elec.csv') #data, X, y = read_data_csv('./data/streaming-datasets-master/airlines.csv') #data, X, y = read_data_csv('./data/streaming-datasets-master/agr_a.csv') #data, X, y = read_data_csv('./data/streaming-datasets-master/covtype.csv') stream = DataStream(X, y) stream.prepare_for_use() # 2a. Models initialization nb = NaiveBayes() ht = HoeffdingTreeClassifier() aw = AccuracyWeightedEnsembleClassifier() dw = DynamicWeightedMajorityClassifier() ob = OnlineBoostingClassifier() oz = OzaBaggingClassifier() # 2b. Inicialization of DDCW model for comparsion tests dwc = DiversifiedDynamicClassWeightedClassifier( period=100, base_estimators=[NaiveBayes(), HoeffdingTreeClassifier()], min_estimators=5, max_estimators=20, alpha=0.2, beta=3, theta=0.2) #0.5 # 2c. Inicialization of DDCW models for parameter testing #ht1_p1 = DiversifiedDynamicClassWeightedClassifier(period=500) #ht1_p5 = DiversifiedDynamicClassWeightedClassifier(period=500) #ht1_p10 = DiversifiedDynamicClassWeightedClassifier(period=1000)