def demo(): """ _test_mol This demo tests the MOL learner on a file stream, which reads from the music.csv file. The test computes the performance of the MOL learner as well as the time to create the structure and classify all the samples in the file. """ # Setup logging logging.basicConfig(format='%(message)s', level=logging.INFO) # Setup the file stream stream = FileStream("../datasets/music.csv", 0, 6) stream.prepare_for_use() # Setup the classifier, by default it uses Logistic Regression # classifier = MultiOutputLearner() # classifier = MultiOutputLearner(h=SGDClassifier(n_iter=100)) classifier = MultiOutputLearner(h=Perceptron()) # Setup the pipeline pipe = Pipeline([('classifier', classifier)]) pretrain_size = 150 logging.info('Pre training on %s samples', str(pretrain_size)) X, y = stream.next_sample(pretrain_size) # classifier.fit(X, y) pipe.partial_fit(X, y, classes=stream.get_targets()) count = 0 true_labels = [] predicts = [] init_time = timer() logging.info('Evaluating...') while stream.has_more_samples(): X, y = stream.next_sample() # p = classifier.predict(X) p = pipe.predict(X) predicts.extend(p) true_labels.extend(y) count += 1 perf = hamming_score(true_labels, predicts) logging.info('Evaluation time: %s s', str(timer() - init_time)) logging.info('Total samples analyzed: %s', str(count)) logging.info('The classifier\'s static Hamming score : %0.3f' % perf)
def demo(): """ _test_knn_adwin This demo tests the KNNAdwin classifier on a file stream, which gives instances coming from a SEA generator. The test computes the performance of the KNNAdwin classifier as well as the time to create the structure and classify max_samples (10000 by default) instances. """ start = timer() logging.basicConfig(format='%(message)s', level=logging.INFO) #warnings.filterwarnings("ignore", ".*Passing 1d.*") stream = FileStream('../datasets/sea_big.csv', -1, 1) # stream = RandomRBFGeneratorDrift(change_speed=41.00, n_centroids=50, model_random_state=32523423, # sample_seed=5435, n_classes=2, num_att=10, num_drift_centroids=50) stream.prepare_for_use() t = OneHotToCategorical([[10, 11, 12, 13], [ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53 ]]) t2 = OneHotToCategorical([[10, 11, 12, 13], [ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53 ]]) # knn = KNN(k=8, max_window_size=2000, leaf_size=40) knn = KNNAdwin(k=8, leaf_size=40, max_window_size=2000) # pipe = Pipeline([('one_hot_to_categorical', t), ('KNN', knn)]) compare = KNeighborsClassifier(n_neighbors=8, algorithm='kd_tree', leaf_size=40, metric='euclidean') # pipe2 = Pipeline([('one_hot_to_categorical', t2), ('KNN', compare)]) first = True train = 200 if train > 0: X, y = stream.next_sample(train) # pipe.partial_fit(X, y, target_values=stream.get_targets()) # pipe.partial_fit(X, y, target_values=stream.get_targets()) # pipe2.fit(X, y) knn.partial_fit(X, y, classes=stream.get_targets()) compare.fit(X, y) first = False n_samples = 0 max_samples = 10000 my_corrects = 0 compare_corrects = 0 while n_samples < max_samples: if n_samples % (max_samples / 20) == 0: logging.info('%s%%', str((n_samples // (max_samples / 20) * 5))) X, y = stream.next_sample() # my_pred = pipe.predict(X) my_pred = knn.predict(X) # my_pred = [1] if first: # pipe.partial_fit(X, y, target_values=stream.get_targets()) # pipe.partial_fit(X, y, target_values=stream.get_targets()) knn.partial_fit(X, y, classes=stream.get_targets()) first = False else: # pipe.partial_fit(X, y) knn.partial_fit(X, y) # compare_pred = pipe2.predict(X) compare_pred = compare.predict(X) if y[0] == my_pred[0]: my_corrects += 1 if y[0] == compare_pred[0]: compare_corrects += 1 n_samples += 1 end = timer() print('Evaluation time: ' + str(end - start)) print(str(n_samples) + ' samples analyzed.') print('My performance: ' + str(my_corrects / n_samples)) print('Compare performance: ' + str(compare_corrects / n_samples))