def demo(): """ _test_knn This demo tests the KNN classifier on a file stream, which gives instances coming from a SEA generator. The test computes the performance of the KNN classifier as well as the time to create the structure and classify max_samples (5000 by default) instances. """ stream = FileStream('../data/datasets/sea_big.csv', -1, 1) stream.prepare_for_use() train = 200 X, y = stream.next_sample(train) # t = OneHotToCategorical([[10, 11, 12, 13], # [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, # 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]]) # t2 = OneHotToCategorical([[10, 11, 12, 13], # [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, # 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]]) start = timer() knn = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40) # pipe = Pipeline([('one_hot_to_categorical', t), ('KNN', knn)]) # compare = KNeighborsClassifier(n_neighbors=8, algorithm='kd_tree', leaf_size=40, metric='euclidean') # pipe2 = Pipeline([('one_hot_to_categorical', t2), ('KNN', compare)]) # pipe.fit(X, y) # pipe2.fit(X, y) knn.partial_fit(X, y) # compare.fit(X, y) n_samples = 0 max_samples = 5000 my_corrects = 0 # compare_corrects = 0 while n_samples < max_samples: X, y = stream.next_sample() # my_pred = pipe.predict(X) my_pred = knn.predict(X) # compare_pred = pipe2.predict(X) # compare_pred = compare.predict(X) if y[0] == my_pred[0]: my_corrects += 1 # if y[0] == compare_pred[0]: # compare_corrects += 1 n_samples += 1 end = timer() print('Evaluation time: ' + str(end - start)) print(str(n_samples) + ' samples analyzed.') print('My performance: ' + str(my_corrects / n_samples))
def __init__(self, base_estimator=KNN(), n_estimators=10, w=6, delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag', random_state=None): super().__init__() # default values self.ensemble = None self.adwin_ensemble = None self.n_detected_changes = None self.matrix_codes = None self.classes = None self.init_matrix_codes = None self._random_state = None # This is the actual random_state object used internally self.base_estimator = base_estimator self.n_estimators = n_estimators self.enable_code_matrix = enable_code_matrix self.w = w self.delta = delta if leverage_algorithm not in self.LEVERAGE_ALGORITHMS: raise ValueError("Leverage algorithm not supported.") self.leverage_algorithm = leverage_algorithm self.random_state = random_state self.__configure()
def demo(): """ _test_leverage_bagging This demo tests the LeverageBagging classifier on a file stream, which gives instances coming from a SEA generator. The test computes the performance of the LeverageBagging classifier as well as the time to create the structure and classify max_samples (2000 by default) instances. """ logging.basicConfig(format='%(message)s', level=logging.INFO) warnings.filterwarnings("ignore", ".*Passing 1d.*") stream = SEAGenerator(1, noise_percentage=0.067, random_state=1) stream.prepare_for_use() clf = LeverageBagging(base_estimator=KNN(n_neighbors=8, max_window_size=2000, leaf_size=30), n_estimators=1, random_state=1) sample_count = 0 correctly_classified = 0 max_samples = 2000 train_size = 200 first = True if train_size > 0: X, y = stream.next_sample(train_size) clf.partial_fit(X, y, classes=stream.target_values) first = False logging.info('%s%%', 0.0) while sample_count < max_samples: if (sample_count + 1) % (max_samples / 20) == 0: logging.info('%s%%', str(((sample_count // (max_samples / 20) + 1) * 5))) X, y = stream.next_sample(2) my_pred = clf.predict(X) if first: clf.partial_fit(X, y, classes=stream.target_values) first = False else: clf.partial_fit(X, y) if my_pred is not None: if y[0] == my_pred[0]: correctly_classified += 1 sample_count += 1 print(str(sample_count) + ' samples analyzed.') print('My performance: ' + str(correctly_classified / sample_count)) print(clf.get_info())
def test_oza_bagging_adwin(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) stream.prepare_for_use() knn = KNN(n_neighbors=8, leaf_size=40, max_window_size=2000) learner = OzaBaggingAdwin(base_estimator=knn, n_estimators=3, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=stream.target_values) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] assert np.alltrue(predictions == expected_predictions) expected_performance = 0.8979591836734694 assert np.isclose(expected_performance, performance) expected_correct_predictions = 44 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "OzaBaggingAdwin(base_estimator=KNN(leaf_size=40, max_window_size=2000,\n" \ " n_neighbors=8, nominal_attributes=None),\n" \ " n_estimators=3, random_state=112)" assert learner.get_info() == expected_info
def init_classifiers(): n_prototypes_per_class = 4 sigma = 4 rslvq = RSLVQ(prototypes_per_class=4, sigma=4) arslvq = ARSLVQ(prototypes_per_class=n_prototypes_per_class, sigma=sigma, confidence=0.0001, window_size=300) oza = OzaBaggingAdwin(base_estimator=KNN()) adf = AdaptiveRandomForest() samknn = SAMKNN() hat = HAT() clfs = [samknn] names = ["SamKnn"] # clfs = [rslvq] # names = ["rslvq"] return clfs, names
def test_leverage_bagging(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) stream.prepare_for_use() knn = KNN(n_neighbors=8, leaf_size=40, max_window_size=2000) learner = LeverageBagging(base_estimator=knn, n_estimators=3, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=stream.target_values) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 42 expected_performance = 0.8571428571428571 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
if __name__ == "__main__": s1 = MIXEDGenerator(classification_function=1, random_state=112, balance_classes=False) s2 = MIXEDGenerator(classification_function=0, random_state=112, balance_classes=False) """1. Create stream""" stream = ReoccuringDriftStream( stream=s1, drift_stream=s2, random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=2000, width=500) stream.prepare_for_use() oza = OzaBaggingAdwin(base_estimator=KNN()) """3. Setup evaluator""" evaluator = EvaluatePrequential( show_plot=True, batch_size=10, max_samples=5000, metrics=['accuracy', 'kappa_t', 'kappa_m', 'kappa'], output_file=None) """4. Run evaluator""" evaluator.evaluate(stream=stream, model=oza)