def test_KNN_adwin(test_path, package_path): test_file = os.path.join(package_path, 'src/skmultiflow/data/datasets/sea_big.csv') stream = FileStream(test_file, -1, 1) stream.prepare_for_use() learner = KNNAdwin(n_neighbors=8, leaf_size=40, max_window_size=2000) cnt = 0 max_samples = 5000 predictions = [] correct_predictions = 0 wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1 ] expected_correct_predictions = 40 expected_performance = 0.8163265306122449 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions
def test_knn_adwin(): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1), drift_stream=SEAGenerator( random_state=2, classification_function=2), random_state=1, position=250, width=10) stream.prepare_for_use() learner = KNNAdwin(n_neighbors=8, leaf_size=40, max_window_size=200) cnt = 0 max_samples = 1000 predictions = array('i') correct_predictions = 0 wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_correct_predictions = 46 assert correct_predictions == expected_correct_predictions learner.reset() assert learner.window.n_samples == 0 expected_info = 'KNNAdwin(leaf_size=40, max_window_size=200, n_neighbors=8,\n' \ ' nominal_attributes=None)' assert learner.get_info() == expected_info stream.restart() X, y = stream.next_sample(max_samples) learner.fit(X[:950], y[:950]) predictions = learner.predict(X[951:]) correct_predictions = sum(np.array(predictions) == y[951:]) expected_correct_predictions = 47 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def demo(): """ _test_knn_adwin This demo tests the KNNAdwin classifier on a file stream, which gives instances coming from a SEA generator. The test computes the performance of the KNNAdwin classifier as well as the time to create the structure and classify max_samples (10000 by default) instances. """ start = timer() logging.basicConfig(format='%(message)s', level=logging.INFO) # warnings.filterwarnings("ignore", ".*Passing 1d.*") stream = FileStream('../data/datasets/sea_big.csv', -1, 1) # stream = RandomRBFGeneratorDrift(change_speed=41.00, n_centroids=50, model_random_state=32523423, # sample_seed=5435, n_classes=2, num_att=10, num_drift_centroids=50) stream.prepare_for_use() t = OneHotToCategorical([[10, 11, 12, 13], [ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53 ]]) t2 = OneHotToCategorical([[10, 11, 12, 13], [ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53 ]]) # knn = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40) knn = KNNAdwin(n_neighbors=8, leaf_size=40, max_window_size=2000) # pipe = Pipeline([('one_hot_to_categorical', t), ('KNN', knn)]) compare = KNeighborsClassifier(n_neighbors=8, algorithm='kd_tree', leaf_size=40, metric='euclidean') # pipe2 = Pipeline([('one_hot_to_categorical', t2), ('KNN', compare)]) first = True train = 200 if train > 0: X, y = stream.next_sample(train) # pipe.partial_fit(X, y, classes=stream.target_values) # pipe.partial_fit(X, y, classes=stream.target_values) # pipe2.fit(X, y) knn.partial_fit(X, y, classes=stream.target_values) compare.fit(X, y) first = False n_samples = 0 max_samples = 10000 my_corrects = 0 compare_corrects = 0 while n_samples < max_samples: if n_samples % (max_samples / 20) == 0: logging.info('%s%%', str((n_samples // (max_samples / 20) * 5))) X, y = stream.next_sample() # my_pred = pipe.predict(X) my_pred = knn.predict(X) # my_pred = [1] if first: # pipe.partial_fit(X, y, classes=stream.target_values) # pipe.partial_fit(X, y, classes=stream.target_values) knn.partial_fit(X, y, classes=stream.target_values) first = False else: # pipe.partial_fit(X, y) knn.partial_fit(X, y) # compare_pred = pipe2.predict(X) compare_pred = compare.predict(X) if y[0] == my_pred[0]: my_corrects += 1 if y[0] == compare_pred[0]: compare_corrects += 1 n_samples += 1 end = timer() print('Evaluation time: ' + str(end - start)) print(str(n_samples) + ' samples analyzed.') print('My performance: ' + str(my_corrects / n_samples)) print('Compare performance: ' + str(compare_corrects / n_samples))