Ejemplo n.º 1
0
def test_hat_nba(test_path):
    stream = HyperplaneGenerator(mag_change=0.001,
                                 noise_percentage=0.1,
                                 random_state=2)

    stream.prepare_for_use()

    learner = HAT(leaf_prediction='nba')

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
        0
    ])

    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nba.npy')
    data = np.load(test_file)
    assert np.allclose(y_proba, data)

    expected_info = "HAT(binary_split=False, bootstrap_sampling=True, grace_period=200,\n" \
                    "    leaf_prediction='nba', max_byte_size=33554432,\n" \
                    "    memory_estimate_period=1000000, nb_threshold=0, no_preprune=False,\n" \
                    "    nominal_attributes=None, remove_poor_atts=False, split_confidence=1e-07,\n" \
                    "    split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)"

    assert learner.get_info() == expected_info
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_hat_nba(test_path):
    stream = HyperplaneGenerator(mag_change=0.001,
                                 noise_percentage=0.1,
                                 random_state=2)

    stream.prepare_for_use()

    learner = HAT(leaf_prediction='nba')

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
        0
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nba.npy')
    data = np.load(test_file)
    assert np.allclose(y_proba, data)

    expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \
                    ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \
                    ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \
                    ' - no_pre_prune: False - leaf_prediction: nba - nb_threshold: 0' \
                    ' - nominal_attributes: [] - '

    assert learner.get_info() == expected_info
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
@author: Dr. Sayan Putatunda
"""
""" Chapter 3 Codes """

import os

os.getcwd()  # to see the current path of working directory
os.chdir('./Python codes')

### Creating a synthetic dataset
from skmultiflow.data import HyperplaneGenerator
import pandas as pd
import numpy as np

create = HyperplaneGenerator(random_state=888,
                             n_features=10,
                             noise_percentage=0)
create.prepare_for_use()
X, Y = create.next_sample(10000)
data = pd.DataFrame(np.hstack((X, np.array([Y]).T)))
# Cast the last column to int
data = data.astype({10: int})

data.shape
# output- (10000, 11)

# Store it in csv
data.to_csv('data_stream.csv', index=False)

# Applying Hoeffding Tree on the synthetic data stream
Ejemplo n.º 4
0
import logging
from GooweMSS import GooweMS
import random

logger = logging.getLogger()
logger.setLevel(logging.INFO)

# Prepare the data stream
streams = []
N_STREAMS = 10
instances_num = 10000

for i in range(N_STREAMS):
    stream = HyperplaneGenerator(random_state=None,
                                 n_features=10,
                                 n_drift_features=2,
                                 mag_change=0.1,
                                 noise_percentage=0.0,
                                 sigma_percentage=0.1)
    streams.append(stream)
    stream.prepare_for_use()

stream_t = HyperplaneGenerator(random_state=None,
                               n_features=10,
                               n_drift_features=2,
                               mag_change=0.1,
                               noise_percentage=0.0,
                               sigma_percentage=0.1)
stream_t = streams[0]
stream_t.prepare_for_use()

instances_counter = 0
    run(agrawal, 'agrawal_gen', DATASE_SIZE)

    sea = ConceptDriftStream(SEAGenerator(classification_function=1,
                                          noise_percentage=0.13),
                             SEAGenerator(classification_function=2,
                                          noise_percentage=0.13),
                             position=DATASE_SIZE / 2)
    run(sea, 'sea_gen', DATASE_SIZE)

    led = LEDGeneratorDrift(has_noise=True,
                            noise_percentage=0.28,
                            n_drift_features=4)
    run(led, 'led_gen', DATASE_SIZE)

    stagger = ConceptDriftStream(STAGGERGenerator(classification_function=1,
                                                  balance_classes=False),
                                 STAGGERGenerator(classification_function=2,
                                                  balance_classes=False),
                                 position=DATASE_SIZE / 2)
    run(stagger, 'stagger_gen', DATASE_SIZE)

    hyperplane = HyperplaneGenerator(noise_percentage=0.28,
                                     n_features=10,
                                     mag_change=0.25,
                                     sigma_percentage=0.3,
                                     n_drift_features=5)
    run(hyperplane, 'hyperplane_gen', DATASE_SIZE)

    rbf = RandomRBFGeneratorDrift(change_speed=0.4)
    run(rbf, 'rbf_gen', DATASE_SIZE)