Example #1
0
def demo(output_file=None, instances=40000):
    """ _test_prequential_bagging
    
    This demo shows the evaluation process of a LeverageBagging classifier, 
    initialized with KNN classifiers.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
    
    """
    # Setup the File Stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_big.csv", "CSV", False)
    # stream = FileStream(opt, -1, 1)
    stream = SEAGenerator(classification_function=2,
                          instance_seed=755437,
                          noise_percentage=0.0)
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000,
                                       leaf_size=30),
                                 ensemble_length=1)

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(
        pretrain_size=2000,
        max_instances=instances,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        output_file=output_file,
        task_type='classification',
        show_plot=True,
        plot_options=['kappa', 'kappa_t', 'performance'])

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
def demo():
    """ _test_streams
    
    This demo tests if the streams are correctly generating samples.
    
    :return: 
    """
    opt = FileOption('FILE', 'OPT_NAME', '../datasets/covtype.csv', 'csv',
                     False)
    stream = FileStream(opt, -1, 1)
    stream.prepare_for_use()
    rbf_drift = RandomRBFGeneratorDrift(change_speed=41.00,
                                        num_centroids=50,
                                        model_seed=32523423,
                                        instance_seed=5435,
                                        num_classes=2,
                                        num_att=10,
                                        num_drift_centroids=50)
    rbf_drift.prepare_for_use()

    sea = SEAGenerator()

    print('1 instance:\n')

    X, y = stream.next_instance()
    print(X)
    print(y)

    X, y = sea.next_instance()
    print(X)
    print(y)

    print('\n\n10 instances:\n')
    X, y = stream.next_instance(10)
    print(X)
    print(y)

    X, y = sea.next_instance(10)
    print(X)
    print(y)
def demo():
    """ _test_leverage_bagging

    This demo tests the LeverageBagging classifier on a file stream, which gives 
    instances coming from a SEA generator. 

    The test computes the performance of the LeverageBagging classifier as well 
    as the time to create the structure and classify max_samples (2000 by default) 
    instances.

    """
    logging.basicConfig(format='%(message)s', level=logging.INFO)
    warnings.filterwarnings("ignore", ".*Passing 1d.*")
    stream = SEAGenerator(1, noise_percentage=6.7)
    stream.prepare_for_use()
    clf = LeverageBagging(h=KNN(k=8, max_window_size=2000, leaf_size=30),
                          ensemble_length=1)
    sample_count = 0
    correctly_classified = 0
    max_samples = 2000
    train_size = 200
    first = True
    if train_size > 0:
        X, y = stream.next_instance(train_size)
        clf.partial_fit(X, y, classes=stream.get_classes())
        first = False

    logging.info('%s%%', 0.0)
    while sample_count < max_samples:
        if (sample_count + 1) % (max_samples / 20) == 0:
            logging.info('%s%%',
                         str(((sample_count // (max_samples / 20) + 1) * 5)))
        X, y = stream.next_instance(2)
        my_pred = clf.predict(X)
        if first:
            clf.partial_fit(X, y, classes=stream.get_classes())
            first = False
        else:
            clf.partial_fit(X, y)

        if my_pred is not None:
            if y[0] == my_pred[0]:
                correctly_classified += 1

        sample_count += 1

    print(str(sample_count) + ' samples analyzed.')
    print('My performance: ' + str(correctly_classified / sample_count))
    print(clf.get_info())
Example #4
0
def demo():
    """ _test_oza_bagging

    This demo tests the OzaBagging classifier using KNNAdwin classifiers, 
    on samples given by a SEAGenerator. 

    The test computes the performance of the OzaBagging classifier as well 
    as the time to create the structure and classify max_samples (5000 by 
    default) instances.

    """
    logging.basicConfig(format='%(message)s', level=logging.INFO)
    warnings.filterwarnings("ignore", ".*Passing 1d.*")
    stream = SEAGenerator(1, noise_percentage=6.7)
    stream.prepare_for_use()
    #print(stream.get_targets())
    clf = OzaBagging(h=KNNAdwin(k=8, max_window_size=2000, leaf_size=30),
                     ensemble_length=2)
    sample_count = 0
    correctly_classified = 0
    max_samples = 5000
    train_size = 8
    first = True
    if train_size > 0:
        X, y = stream.next_sample(train_size)
        clf.partial_fit(X, y, classes=stream.get_targets())
        first = False

    while sample_count < max_samples:
        if sample_count % (max_samples / 20) == 0:
            logging.info('%s%%', str((sample_count // (max_samples / 20) * 5)))
        X, y = stream.next_sample()
        my_pred = clf.predict(X)

        if first:
            clf.partial_fit(X, y, classes=stream.get_targets())
            first = False
        else:
            clf.partial_fit(X, y)

        if my_pred is not None:
            if y[0] == my_pred[0]:
                correctly_classified += 1

        sample_count += 1

    print(str(sample_count) + ' samples analyzed.')
    print('My performance: ' + str(correctly_classified / sample_count))
Example #5
0
def test_sea_generator(test_path):
    stream = SEAGenerator(classification_function=2, instance_seed=112, balance_classes=False, noise_percentage=0.28)
    stream.prepare_for_use()

    assert stream.estimated_remaining_instances() == -1

    expected_header = ['att_num_0', 'att_num_1', 'att_num_2']
    assert stream.get_attributes_header() == expected_header

    expected_classes = [0, 1]
    assert stream.get_classes() == expected_classes

    assert stream.get_classes_header() == ['class']

    assert stream.get_num_attributes() == 3

    assert stream.get_num_nominal_attributes() == 0

    assert stream.get_num_numerical_attributes() == 3

    assert stream.get_num_targets() == 2

    assert stream.get_num_values_per_nominal_attribute() == 0

    assert stream.get_plot_name() == 'SEA Generator - 2 class labels'

    assert stream.has_more_instances() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'sea_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_instance()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.get_last_instance()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_instance(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)
def test_sea_generator(test_path):
    stream = SEAGenerator(classification_function=2,
                          random_state=112,
                          balance_classes=False,
                          noise_percentage=0.28)
    stream.prepare_for_use()

    assert stream.n_remaining_samples() == -1

    expected_names = ['att_num_0', 'att_num_1', 'att_num_2']
    assert stream.feature_names == expected_names

    expected_targets = [0, 1]
    assert stream.target_values == expected_targets

    assert stream.target_names == ['target_0']

    assert stream.n_features == 3

    assert stream.n_cat_features == 0

    assert stream.n_num_features == 3

    assert stream.n_targets == 1

    assert stream.get_data_info(
    ) == 'SEA Generator - 1 targets, 2 classes, 3 features'

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'sea_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]