def test_hat_nba(test_path): stream = HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1, random_state=2) stream.prepare_for_use() learner = HAT(leaf_prediction='nba') cnt = 0 max_samples = 5000 y_pred = array('i') y_proba = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nba.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = "HAT(binary_split=False, bootstrap_sampling=True, grace_period=200,\n" \ " leaf_prediction='nba', max_byte_size=33554432,\n" \ " memory_estimate_period=1000000, nb_threshold=0, no_preprune=False,\n" \ " nominal_attributes=None, remove_poor_atts=False, split_confidence=1e-07,\n" \ " split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)" assert learner.get_info() == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_hat_nba(test_path): stream = HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1, random_state=2) stream.prepare_for_use() learner = HAT(leaf_prediction='nba') cnt = 0 max_samples = 5000 y_pred = array('i') y_proba = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nba.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \ ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \ ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \ ' - no_pre_prune: False - leaf_prediction: nba - nb_threshold: 0' \ ' - nominal_attributes: [] - ' assert learner.get_info() == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
@author: Dr. Sayan Putatunda """ """ Chapter 3 Codes """ import os os.getcwd() # to see the current path of working directory os.chdir('./Python codes') ### Creating a synthetic dataset from skmultiflow.data import HyperplaneGenerator import pandas as pd import numpy as np create = HyperplaneGenerator(random_state=888, n_features=10, noise_percentage=0) create.prepare_for_use() X, Y = create.next_sample(10000) data = pd.DataFrame(np.hstack((X, np.array([Y]).T))) # Cast the last column to int data = data.astype({10: int}) data.shape # output- (10000, 11) # Store it in csv data.to_csv('data_stream.csv', index=False) # Applying Hoeffding Tree on the synthetic data stream
import logging from GooweMSS import GooweMS import random logger = logging.getLogger() logger.setLevel(logging.INFO) # Prepare the data stream streams = [] N_STREAMS = 10 instances_num = 10000 for i in range(N_STREAMS): stream = HyperplaneGenerator(random_state=None, n_features=10, n_drift_features=2, mag_change=0.1, noise_percentage=0.0, sigma_percentage=0.1) streams.append(stream) stream.prepare_for_use() stream_t = HyperplaneGenerator(random_state=None, n_features=10, n_drift_features=2, mag_change=0.1, noise_percentage=0.0, sigma_percentage=0.1) stream_t = streams[0] stream_t.prepare_for_use() instances_counter = 0
run(agrawal, 'agrawal_gen', DATASE_SIZE) sea = ConceptDriftStream(SEAGenerator(classification_function=1, noise_percentage=0.13), SEAGenerator(classification_function=2, noise_percentage=0.13), position=DATASE_SIZE / 2) run(sea, 'sea_gen', DATASE_SIZE) led = LEDGeneratorDrift(has_noise=True, noise_percentage=0.28, n_drift_features=4) run(led, 'led_gen', DATASE_SIZE) stagger = ConceptDriftStream(STAGGERGenerator(classification_function=1, balance_classes=False), STAGGERGenerator(classification_function=2, balance_classes=False), position=DATASE_SIZE / 2) run(stagger, 'stagger_gen', DATASE_SIZE) hyperplane = HyperplaneGenerator(noise_percentage=0.28, n_features=10, mag_change=0.25, sigma_percentage=0.3, n_drift_features=5) run(hyperplane, 'hyperplane_gen', DATASE_SIZE) rbf = RandomRBFGeneratorDrift(change_speed=0.4) run(rbf, 'rbf_gen', DATASE_SIZE)