Ejemplo n.º 1
0
def demo():
    """ _test_mol

    This demo tests the MOL learner on a file stream, which reads from 
    the music.csv file.

    The test computes the performance of the MOL learner as well as 
    the time to create the structure and classify all the samples in 
    the file.

    """
    # Setup logging
    logging.basicConfig(format='%(message)s', level=logging.INFO)

    # Setup the file stream
    stream = FileStream("../datasets/music.csv", 0, 6)
    stream.prepare_for_use()

    # Setup the classifier, by default it uses Logistic Regression
    # classifier = MultiOutputLearner()
    # classifier = MultiOutputLearner(h=SGDClassifier(n_iter=100))
    classifier = MultiOutputLearner(h=Perceptron())

    # Setup the pipeline
    pipe = Pipeline([('classifier', classifier)])

    pretrain_size = 150
    logging.info('Pre training on %s samples', str(pretrain_size))
    X, y = stream.next_sample(pretrain_size)
    # classifier.fit(X, y)
    pipe.partial_fit(X, y, classes=stream.get_targets())
    count = 0
    true_labels = []
    predicts = []
    init_time = timer()
    logging.info('Evaluating...')
    while stream.has_more_samples():
        X, y = stream.next_sample()
        # p = classifier.predict(X)
        p = pipe.predict(X)
        predicts.extend(p)
        true_labels.extend(y)
        count += 1
    perf = hamming_score(true_labels, predicts)
    logging.info('Evaluation time: %s s', str(timer() - init_time))
    logging.info('Total samples analyzed: %s', str(count))
    logging.info('The classifier\'s static Hamming score    : %0.3f' % perf)
Ejemplo n.º 2
0
def demo():
    """ _test_knn_adwin

    This demo tests the KNNAdwin classifier on a file stream, which gives 
    instances coming from a SEA generator. 
    
    The test computes the performance of the KNNAdwin classifier as well as 
    the time to create the structure and classify max_samples (10000 by 
    default) instances.
    
    """
    start = timer()
    logging.basicConfig(format='%(message)s', level=logging.INFO)
    #warnings.filterwarnings("ignore", ".*Passing 1d.*")
    stream = FileStream('../datasets/sea_big.csv', -1, 1)
    # stream = RandomRBFGeneratorDrift(change_speed=41.00, n_centroids=50, model_random_state=32523423,
    #                                  sample_seed=5435, n_classes=2, num_att=10, num_drift_centroids=50)
    stream.prepare_for_use()
    t = OneHotToCategorical([[10, 11, 12, 13],
                             [
                                 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
                                 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
                                 47, 48, 49, 50, 51, 52, 53
                             ]])
    t2 = OneHotToCategorical([[10, 11, 12, 13],
                              [
                                  14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                  25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
                                  36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
                                  47, 48, 49, 50, 51, 52, 53
                              ]])

    # knn = KNN(k=8, max_window_size=2000, leaf_size=40)
    knn = KNNAdwin(k=8, leaf_size=40, max_window_size=2000)
    # pipe = Pipeline([('one_hot_to_categorical', t), ('KNN', knn)])

    compare = KNeighborsClassifier(n_neighbors=8,
                                   algorithm='kd_tree',
                                   leaf_size=40,
                                   metric='euclidean')
    # pipe2 = Pipeline([('one_hot_to_categorical', t2), ('KNN', compare)])
    first = True
    train = 200
    if train > 0:
        X, y = stream.next_sample(train)
        # pipe.partial_fit(X, y, target_values=stream.get_targets())
        # pipe.partial_fit(X, y, target_values=stream.get_targets())
        # pipe2.fit(X, y)

        knn.partial_fit(X, y, classes=stream.get_targets())
        compare.fit(X, y)
        first = False
    n_samples = 0
    max_samples = 10000
    my_corrects = 0
    compare_corrects = 0

    while n_samples < max_samples:
        if n_samples % (max_samples / 20) == 0:
            logging.info('%s%%', str((n_samples // (max_samples / 20) * 5)))
        X, y = stream.next_sample()
        # my_pred = pipe.predict(X)
        my_pred = knn.predict(X)
        # my_pred = [1]
        if first:
            # pipe.partial_fit(X, y, target_values=stream.get_targets())
            # pipe.partial_fit(X, y, target_values=stream.get_targets())
            knn.partial_fit(X, y, classes=stream.get_targets())
            first = False
        else:
            # pipe.partial_fit(X, y)
            knn.partial_fit(X, y)
        # compare_pred = pipe2.predict(X)
        compare_pred = compare.predict(X)
        if y[0] == my_pred[0]:
            my_corrects += 1
        if y[0] == compare_pred[0]:
            compare_corrects += 1
        n_samples += 1

    end = timer()

    print('Evaluation time: ' + str(end - start))
    print(str(n_samples) + ' samples analyzed.')
    print('My performance: ' + str(my_corrects / n_samples))
    print('Compare performance: ' + str(compare_corrects / n_samples))