예제 #1
0
def test_oza_bagging():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000)
    learner = OzaBaggingClassifier(base_estimator=knn,
                                   n_estimators=3,
                                   random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8979591836734694
    assert np.isclose(expected_performance, performance)

    expected_correct_predictions = 44
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OzaBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \
                    "max_window_size=2000, metric='euclidean', n_neighbors=8), " \
                    "n_estimators=3, random_state=112)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
def demo():
    """ _test_oza_bagging

    This demo tests the OzaBaggingClassifier using KNNADWINClassifier as base estimator
    on samples given by a SEAGenerator. 

    The test computes the performance of the OzaBaggingClassifier as well
    as the time to create the structure and classify max_samples (5000 by 
    default) instances.

    """
    logging.basicConfig(format='%(message)s', level=logging.INFO)
    warnings.filterwarnings("ignore", ".*Passing 1d.*")
    stream = SEAGenerator(1, noise_percentage=.067, random_state=1)

    clf = OzaBaggingClassifier(base_estimator=KNNADWINClassifier(
        n_neighbors=8, max_window_size=2000, leaf_size=30),
                               n_estimators=2,
                               random_state=1)
    sample_count = 0
    correctly_classified = 0
    max_samples = 5000
    train_size = 8
    first = True
    if train_size > 0:
        X, y = stream.next_sample(train_size)
        clf.partial_fit(X, y, classes=stream.target_values)
        first = False

    while sample_count < max_samples:
        if sample_count % (max_samples / 20) == 0:
            logging.info('%s%%', str((sample_count // (max_samples / 20) * 5)))
        X, y = stream.next_sample()
        my_pred = clf.predict(X)

        if first:
            clf.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            clf.partial_fit(X, y)

        if my_pred is not None:
            if y[0] == my_pred[0]:
                correctly_classified += 1

        sample_count += 1

    print(str(sample_count) + ' samples analyzed.')
    print('My performance: ' + str(correctly_classified / sample_count))
# Applying Oza Bagging Classifier on a synthetic data stream
from skmultiflow.meta import OzaBaggingClassifier
from skmultiflow.lazy import KNNADWINClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.sea_generator import SEAGenerator

# Simulate the data stream
dstream = SEAGenerator(classification_function=2,
                       balance_classes=True,
                       noise_percentage=0.3,
                       random_state=333)

# Instantiate the Oza Bagging classifier method with KNN ADWIN classifier as the base model
oza_class = OzaBaggingClassifier(base_estimator=KNNADWINClassifier(
    n_neighbors=10, max_window_size=1000),
                                 n_estimators=6,
                                 random_state=333)

# Prequential Evaluation
evaluate1 = EvaluatePrequential(show_plot=False,
                                pretrain_size=1000,
                                max_samples=10000,
                                metrics=['accuracy'])
# Run the evaluation
evaluate1.evaluate(stream=dstream, model=oza_class)

###################################################

# Applying Leveraging Bagging Classifier on a synthetic data stream
from skmultiflow.meta import LeveragingBaggingClassifier
from skmultiflow.lazy import KNNADWINClassifier
예제 #4
0
#data, X, y = read_data_csv('./data/streaming-datasets-master/elec.csv')
#data, X, y = read_data_csv('./data/streaming-datasets-master/airlines.csv')
#data, X, y = read_data_csv('./data/streaming-datasets-master/agr_a.csv')
#data, X, y = read_data_csv('./data/streaming-datasets-master/covtype.csv')

stream = DataStream(X, y)

stream.prepare_for_use()

# 2a. Models initialization
nb = NaiveBayes()
ht = HoeffdingTreeClassifier()
aw = AccuracyWeightedEnsembleClassifier()
dw = DynamicWeightedMajorityClassifier()
ob = OnlineBoostingClassifier()
oz = OzaBaggingClassifier()

# 2b. Inicialization of DDCW model for comparsion tests
dwc = DiversifiedDynamicClassWeightedClassifier(
    period=100,
    base_estimators=[NaiveBayes(), HoeffdingTreeClassifier()],
    min_estimators=5,
    max_estimators=20,
    alpha=0.2,
    beta=3,
    theta=0.2)  #0.5

# 2c. Inicialization of DDCW models for parameter testing
#ht1_p1 = DiversifiedDynamicClassWeightedClassifier(period=500)
#ht1_p5 = DiversifiedDynamicClassWeightedClassifier(period=500)
#ht1_p10 = DiversifiedDynamicClassWeightedClassifier(period=1000)