def test_hat_mc(test_path):
    stream = ConceptDriftStream(stream=SEAGenerator(random_state=1,
                                                    noise_percentage=0.05),
                                drift_stream=SEAGenerator(
                                    random_state=2,
                                    classification_function=2,
                                    noise_percentage=0.05),
                                random_state=1,
                                position=250,
                                width=10)
    stream.prepare_for_use()

    learner = HAT(leaf_prediction='mc')

    cnt = 0
    max_samples = 1000
    y_pred = array('i')
    y_proba = []
    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_mc.npy')
    data = np.load(test_file)
    assert np.allclose(y_proba, data)

    expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \
                    ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \
                    ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \
                    ' - no_pre_prune: False - leaf_prediction: mc - nb_threshold: 0' \
                    ' - nominal_attributes: [] - '

    assert learner.get_info() == expected_info

    expected_model_1 = 'Leaf = Class 1.0 | {0.0: 0.005295278636481529, 1.0: 1.9947047213635185}\n'
    expected_model_2 = 'Leaf = Class 1.0 | {0.0: 0.0052952786364815294, 1.0: 1.9947047213635185}\n'
    expected_model_3 = 'Leaf = Class 1.0 | {1.0: 1.9947047213635185, 0.0: 0.0052952786364815294}\n'
    assert (learner.get_model_description() == expected_model_1) \
           or  (learner.get_model_description() == expected_model_2) \
           or  (learner.get_model_description() == expected_model_3)

    stream.restart()
    X, y = stream.next_sample(5000)

    learner = HAT(max_byte_size=30, leaf_prediction='mc', grace_period=10)
    learner.partial_fit(X, y)
def test_hat_mc(test_path):
    stream = ConceptDriftStream(stream=SEAGenerator(random_state=1,
                                                    noise_percentage=0.05),
                                drift_stream=SEAGenerator(
                                    random_state=2,
                                    classification_function=2,
                                    noise_percentage=0.05),
                                random_state=1,
                                position=250,
                                width=10)
    stream.prepare_for_use()

    learner = HAT(leaf_prediction='mc')

    cnt = 0
    max_samples = 1000
    y_pred = array('i')
    y_proba = []
    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_mc.npy')
    data = np.load(test_file)
    assert np.allclose(y_proba, data)

    expected_info = "HAT(binary_split=False, grace_period=200, leaf_prediction='mc',\n" \
                    "    max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,\n" \
                    "    no_preprune=False, nominal_attributes=None, remove_poor_atts=False,\n" \
                    "    split_confidence=1e-07, split_criterion='info_gain',\n" \
                    "    stop_mem_management=False, tie_threshold=0.05)"

    assert learner.get_info() == expected_info

    expected_model_1 = 'Leaf = Class 1.0 | {0.0: 398.0, 1.0: 1000.0}\n'

    assert (learner.get_model_description() == expected_model_1)

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    stream.restart()
    X, y = stream.next_sample(5000)

    learner = HAT(max_byte_size=30, leaf_prediction='mc', grace_period=10)
    learner.partial_fit(X, y)
def test_hat_nb(test_path):
    stream = ConceptDriftStream(stream=SEAGenerator(random_state=1,
                                                    noise_percentage=0.05),
                                drift_stream=SEAGenerator(
                                    random_state=2,
                                    classification_function=2,
                                    noise_percentage=0.05),
                                random_state=1,
                                position=250,
                                width=10)
    stream.prepare_for_use()

    learner = HAT(leaf_prediction='nb')

    cnt = 0
    max_samples = 1000
    y_pred = array('i')
    y_proba = []
    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
        1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy')
    data = np.load(test_file)
    assert np.allclose(y_proba, data)

    expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \
                    ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \
                    ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \
                    ' - no_pre_prune: False - leaf_prediction: nb - nb_threshold: 0' \
                    ' - nominal_attributes: [] - '

    assert learner.get_info() == expected_info
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Esempio n. 4
0
def test_hoeffding_adaptive_tree_nb(test_path):
    stream = ConceptDriftStream(stream=SEAGenerator(random_state=1,
                                                    noise_percentage=0.05),
                                drift_stream=SEAGenerator(
                                    random_state=2,
                                    classification_function=2,
                                    noise_percentage=0.05),
                                random_state=1,
                                position=250,
                                width=10)

    learner = HoeffdingAdaptiveTreeClassifier(leaf_prediction='nb',
                                              random_state=1)

    cnt = 0
    max_samples = 1000
    y_pred = array('i')
    y_proba = []
    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
        1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy')
    data = np.load(test_file)
    assert np.allclose(y_proba, data)

    expected_info = "HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True, grace_period=200, " \
                    "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, " \
                    "no_preprune=False, nominal_attributes=None, random_state=1, remove_poor_atts=False, " \
                    "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Esempio n. 5
0
    def next_sample(self, batch_size=1):
        if batch_size > 1:
            print("Only batch size of 1 for now")
            return None

        if not self.in_drift:
            samples = self.concepts[self.current_concept].next_sample(
                batch_size)
        else:
            samples = self.transition_stream.next_sample(batch_size)

        last_switch_point = 0 - self.window_size // 2
        next_switch_point = self.num_samples + self.window_size
        self.example_count += batch_size
        for concept_switch_index in sorted(self.concept_chain.keys()):
            if (concept_switch_index <= self.example_count):
                last_switch_point = concept_switch_index
            if concept_switch_index >= self.example_count:
                next_switch_point = concept_switch_index
                break

        self.drifted = False
        if not self.in_drift:
            # print(f"START GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}")
            if self.example_count >= next_switch_point - self.window_size // 2:
                # print(f"{self.example_count}: START GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}")
                self.in_drift = True
                self.drift_switch = True
                self.transition_stream = ConceptDriftStream(
                    stream=self.concepts[self.concept_chain[last_switch_point]]
                    .get_datastream(),
                    drift_stream=self.concepts[self.concept_chain[
                        next_switch_point]].get_datastream(),
                    position=self.window_size // 2,
                    width=self.window_size)
                self.transition_stream.prepare_for_use()
        else:
            if self.example_count == next_switch_point:
                self.current_concept = self.concept_chain[next_switch_point]
                self.drifted = True
                self.drift_switch = False
                # print(f"{self.example_count}: SWITCH POINT")
            if self.example_count >= (last_switch_point + self.window_size //
                                      2) and not self.drift_switch:
                self.in_drift = False
                # print(f"{self.example_count}: END GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}")

        return samples
Esempio n. 6
0
    def get_conceptdrift_data_generated(self,
                                        classification_function=0,
                                        noise_percentage=0.1,
                                        random_state=112,
                                        drift_classification_function=3,
                                        drift_random_state=112,
                                        drift_noise_percentage=0.0,
                                        drift_start_position=5000,
                                        drift_width=1000,
                                        n_num_features=2,
                                        n_cat_features=0):
        from skmultiflow.data import ConceptDriftStream
        from skmultiflow.data import AGRAWALGenerator

        stream = AGRAWALGenerator(
            classification_function=classification_function,
            perturbation=noise_percentage,
            random_state=random_state
            #,n_num_features = n_num_features, n_cat_features = n_cat_features
        )

        drift_stream = AGRAWALGenerator(
            classification_function=drift_classification_function,
            perturbation=drift_noise_percentage,
            random_state=drift_random_state
            #,n_num_features = n_num_features, n_cat_features = n_cat_features
        )

        return ConceptDriftStream(stream=stream,
                                  drift_stream=drift_stream,
                                  position=drift_start_position,
                                  width=drift_width)
Esempio n. 7
0
def test_knn_adwin():
    stream = ConceptDriftStream(stream=SEAGenerator(random_state=1),
                                drift_stream=SEAGenerator(
                                    random_state=2, classification_function=2),
                                random_state=1,
                                position=250,
                                width=10)
    stream.prepare_for_use()
    learner = KNNADWINClassifier(n_neighbors=8,
                                 leaf_size=40,
                                 max_window_size=200)

    cnt = 0
    max_samples = 1000
    predictions = array('i')
    correct_predictions = 0
    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
        1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_correct_predictions = 46
    assert correct_predictions == expected_correct_predictions

    learner.reset()
    assert learner.window.n_samples == 0

    expected_info = 'KNNADWINClassifier(leaf_size=40, max_window_size=200, n_neighbors=8, nominal_attributes=None)'
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    stream.restart()

    X, y = stream.next_sample(max_samples)
    learner.fit(X[:950], y[:950])
    predictions = learner.predict(X[951:])

    correct_predictions = sum(np.array(predictions) == y[951:])
    expected_correct_predictions = 47
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_concept_drift_stream_with_alpha(test_path):
    stream = ConceptDriftStream(alpha=0.01, random_state=1, position=20)

    expected_info = "ConceptDriftStream(alpha=0.01,\n" \
                    "                   drift_stream=AGRAWALGenerator(balance_classes=False,\n" \
                    "                                                 classification_function=2,\n" \
                    "                                                 perturbation=0.0,\n" \
                    "                                                 random_state=112),\n" \
                    "                   position=20, random_state=1,\n" \
                    "                   stream=AGRAWALGenerator(balance_classes=False,\n" \
                    "                                           classification_function=0,\n" \
                    "                                           perturbation=0.0, random_state=112),\n" \
                    "                   width=5729)"
    assert stream.get_info() == expected_info

    with pytest.warns(FutureWarning) as actual_warning:
        ConceptDriftStream(alpha=0, random_state=1, position=20)

    assert actual_warning[0].message.args[0] == "Default value for 'alpha' has changed from 0 " \
                                            "to None. 'alpha=0' will throw an error from v0.7.0"
def test_leverage_bagging_me():
    stream = ConceptDriftStream(position=500, width=100, random_state=112)
    nb = NaiveBayes()

    # leveraging_bag_me
    learner = LeveragingBaggingClassifier(
        base_estimator=nb,
        n_estimators=5,
        random_state=112,
        leverage_algorithm='leveraging_bag_me')

    y_expected = np.asarray([
        0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0,
        1, 0
    ],
                            dtype=np.int)

    run_prequential_supervised(stream,
                               learner,
                               max_samples=2000,
                               n_wait=40,
                               y_expected=y_expected)
Esempio n. 10
0
def test_concept_drift_stream(test_path):
    stream = ConceptDriftStream(random_state=1, position=20, width=5)
    stream.prepare_for_use()

    assert stream.n_remaining_samples() == -1

    expected_names = [
        "salary", "commission", "age", "elevel", "car", "zipcode", "hvalue",
        "hyears", "loan"
    ]
    assert stream.feature_names == expected_names

    expected_targets = [0, 1]
    assert stream.target_values == expected_targets

    assert stream.target_names == ['target']

    assert stream.n_features == 9

    assert stream.n_cat_features == 3

    assert stream.n_num_features == 6

    assert stream.n_targets == 1

    assert stream.get_info() == 'ConceptDriftStream: ' \
                                'First Stream: AGRAWALGenerator - ' \
                                'Drift Stream: AGRAWALGenerator - ' \
                                'alpha: 0.0 - position: 20 - width: 5'

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'concept_drift_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(30)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]

    assert 'stream' == stream.get_class_type()
Esempio n. 11
0
from dyn2sel.apply_dcs import DYNSEMethod
from dyn2sel.dcs_techniques import KNORAU
from skmultiflow.data import RandomTreeGenerator, ConceptDriftStream
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.bayes import NaiveBayes
from skmultiflow.meta import OzaBagging

generator = ConceptDriftStream(
    stream=RandomTreeGenerator(sample_random_state=42, tree_random_state=42),
    drift_stream=RandomTreeGenerator(sample_random_state=43,
                                     tree_random_state=43),
    position=2500,
    width=1,
)
dynse = DYNSEMethod(NaiveBayes(), 200, KNORAU(), max_ensemble_size=10)
ozabag = OzaBagging(NaiveBayes(), n_estimators=10)

evaluator = EvaluatePrequential(max_samples=5000,
                                n_wait=200,
                                batch_size=200,
                                pretrain_size=0)
evaluator.evaluate(generator, [dynse, ozabag], ["Dynse", "Ozabag"])
evaluate1 = EvaluatePrequential(show_plot=False,
                                pretrain_size=400,
                                max_samples=10000,
                                metrics=['accuracy'])
evaluate1.evaluate(stream=dstream, model=ht_class)

###################################################

# Hoeffding Adaptive tree
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.data import ConceptDriftStream
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.evaluation import EvaluateHoldout

# Simulate a sample data stream
ds = ConceptDriftStream(random_state=777, position=30000)
ds
# Output:
#ConceptDriftStream(alpha=0.0,
#                   drift_stream=AGRAWALGenerator(balance_classes=False,
#                                                 classification_function=2,
#                                                 perturbation=0.0,
#                                                 random_state=112),
#                   position=30000, random_state=777,
#                   stream=AGRAWALGenerator(balance_classes=False,
#                                           classification_function=0,
#                                           perturbation=0.0, random_state=112),
#                   width=1000)

# Instantiate the model object
model_hat = HoeffdingAdaptiveTreeClassifier()
Esempio n. 13
0
#    stream = ConceptDriftStream(STAGGERGenerator(classification_function=0),
#                            STAGGERGenerator(classification_function=2),
#                            position=n_samples/2,
#                            width=n_samples/5)
#    streams.append(stream)

    """Abrupt STAGGER"""
#    stream = ConceptDriftStream(STAGGERGenerator(classification_function=0),
#                            STAGGERGenerator(classification_function=2),
#                            position=n_samples/2,
#                            alpha=90.0)
#    streams.append(stream)
    
    # """Gradual SEA"""
    stream = ConceptDriftStream(SEAGenerator(classification_function=0),
                                SEAGenerator(classification_function=2),
                                position=N_SAMPLES/2,
                                width=N_SAMPLES/5)
    stream.name = 'SEA GRADUAL'
    STREAMS.append(stream)

    """Abrupt SEA"""
    stream = ConceptDriftStream(SEAGenerator(classification_function=0),
                                SEAGenerator(classification_function=1),
                                alpha=90.0, position=N_SAMPLES / 2)
    stream.name = 'SEA ABRUPBT'
    STREAMS.append(stream)

    """GRADUAL LED"""
    stream = ConceptDriftStream(LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3),
                                drift_stream=LEDGeneratorDrift(
                                    has_noise=False, noise_percentage=0.0, n_drift_features=7),
Esempio n. 14
0
class RecurringConceptGradualStream(RecurringConceptStream):
    def __init__(self,
                 rctype,
                 num_samples,
                 noise,
                 concept_chain,
                 window_size=1000,
                 seed=None,
                 desc=None,
                 boost_first_occurance=True):
        self.in_drift = False
        self.drift_switch = False
        self.window_size = window_size
        self.transition_stream = None
        super().__init__(rctype,
                         num_samples,
                         noise,
                         concept_chain,
                         seed=seed,
                         desc=desc,
                         boost_first_occurance=boost_first_occurance)

    def next_sample(self, batch_size=1):
        if batch_size > 1:
            print("Only batch size of 1 for now")
            return None

        if not self.in_drift:
            samples = self.concepts[self.current_concept].next_sample(
                batch_size)
        else:
            samples = self.transition_stream.next_sample(batch_size)

        last_switch_point = 0 - self.window_size // 2
        next_switch_point = self.num_samples + self.window_size
        self.example_count += batch_size
        for concept_switch_index in sorted(self.concept_chain.keys()):
            if (concept_switch_index <= self.example_count):
                last_switch_point = concept_switch_index
            if concept_switch_index >= self.example_count:
                next_switch_point = concept_switch_index
                break

        self.drifted = False
        if not self.in_drift:
            # print(f"START GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}")
            if self.example_count >= next_switch_point - self.window_size // 2:
                # print(f"{self.example_count}: START GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}")
                self.in_drift = True
                self.drift_switch = True
                self.transition_stream = ConceptDriftStream(
                    stream=self.concepts[self.concept_chain[last_switch_point]]
                    .get_datastream(),
                    drift_stream=self.concepts[self.concept_chain[
                        next_switch_point]].get_datastream(),
                    position=self.window_size // 2,
                    width=self.window_size)
                self.transition_stream.prepare_for_use()
        else:
            if self.example_count == next_switch_point:
                self.current_concept = self.concept_chain[next_switch_point]
                self.drifted = True
                self.drift_switch = False
                # print(f"{self.example_count}: SWITCH POINT")
            if self.example_count >= (last_switch_point + self.window_size //
                                      2) and not self.drift_switch:
                self.in_drift = False
                # print(f"{self.example_count}: END GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}")

        return samples
Esempio n. 15
0
max_depth = 6  # Max depth for each tree in the ensemble
max_window_size = 1000  # Max window size
min_window_size = 1  # set to activate the dynamic window strategy
detect_drift = False  # Enable/disable drift detection

AXGBp = AdaptiveXGBoostClassifier(update_strategy='push',
                                  n_estimators=n_estimators,
                                  learning_rate=learning_rate,
                                  max_depth=max_depth,
                                  max_window_size=max_window_size,
                                  min_window_size=min_window_size,
                                  detect_drift=detect_drift)
AXGBr = AdaptiveXGBoostClassifier(update_strategy='replace',
                                  n_estimators=n_estimators,
                                  learning_rate=learning_rate,
                                  max_depth=max_depth,
                                  max_window_size=max_window_size,
                                  min_window_size=min_window_size,
                                  detect_drift=detect_drift)

stream = ConceptDriftStream(random_state=1000, position=5000)
# stream.prepare_for_use()   # Required for skmultiflow v0.4.1

evaluator = EvaluatePrequential(pretrain_size=0,
                                max_samples=20000,
                                show_plot=True)

evaluator.evaluate(stream=stream,
                   model=[AXGBp, AXGBr],
                   model_names=['AXGBp', 'AXGBr'])
def test_concept_drift_stream(test_path):
    stream = ConceptDriftStream(random_state=1, position=20, width=5)

    assert stream.n_remaining_samples() == -1

    expected_names = ["salary", "commission", "age", "elevel", "car", "zipcode", "hvalue", "hyears", "loan"]
    assert stream.feature_names == expected_names

    expected_targets = [0, 1]
    assert stream.target_values == expected_targets

    assert stream.target_names == ['target']

    assert stream.n_features == 9

    assert stream.n_cat_features == 3

    assert stream.n_num_features == 6

    assert stream.n_targets == 1

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'concept_drift_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(30)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]

    assert 'stream' == stream._estimator_type

    expected_info = "ConceptDriftStream(alpha=0.0,\n" \
                    "                   drift_stream=AGRAWALGenerator(balance_classes=False,\n" \
                    "                                                 classification_function=2,\n" \
                    "                                                 perturbation=0.0,\n" \
                    "                                                 random_state=112),\n" \
                    "                   position=20, random_state=1,\n" \
                    "                   stream=AGRAWALGenerator(balance_classes=False,\n" \
                    "                                           classification_function=0,\n" \
                    "                                           perturbation=0.0, random_state=112),\n" \
                    "                   width=5)"
    assert stream.get_info() == expected_info
DATASE_SIZE = 25000


def run(generator, filename='generated', n=10000):
    print(f'Generating dataset from generator ', filename)
    generator.prepare_for_use()
    X, y = generator.next_sample(n)
    X = pd.DataFrame(X)
    y = pd.DataFrame(y, dtype='int32')
    df = pd.concat([X, y], axis=1)
    df.to_csv(f'_datasets/{filename}.csv', header=None, index=None)


if __name__ == "__main__":

    agrawal = ConceptDriftStream(alpha=45.0, position=DATASE_SIZE / 2)
    run(agrawal, 'agrawal_gen', DATASE_SIZE)

    sea = ConceptDriftStream(SEAGenerator(classification_function=1,
                                          noise_percentage=0.13),
                             SEAGenerator(classification_function=2,
                                          noise_percentage=0.13),
                             position=DATASE_SIZE / 2)
    run(sea, 'sea_gen', DATASE_SIZE)

    led = LEDGeneratorDrift(has_noise=True,
                            noise_percentage=0.28,
                            n_drift_features=4)
    run(led, 'led_gen', DATASE_SIZE)

    stagger = ConceptDriftStream(STAGGERGenerator(classification_function=1,
from skmultiflow.data import ConceptDriftStream
from skmultiflow.data import AGRAWALGenerator
import logging
from GooweMS import GooweMS
import random

logger = logging.getLogger()
logger.setLevel(logging.INFO)
# Prepare the data stream
stream_1 = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False,
                            classification_function=1,
                            perturbation=0.0,
                            random_state=112),
    drift_stream=AGRAWALGenerator(balance_classes=False,
                                  classification_function=2,
                                  perturbation=0.0,
                                  random_state=112),
    position=3000,
    width=1000,
    random_state=None,
    alpha=0.0)
stream_2 = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False,
                            classification_function=3,
                            perturbation=0.0,
                            random_state=21),
    drift_stream=AGRAWALGenerator(balance_classes=False,
                                  classification_function=1,
                                  perturbation=0.0,
                                  random_state=22),
class RecurringConceptGradualStream(RecurringConceptStream):
    """ A stream featuring gradual drift between given concepts.
    Uses the scikit-multiflow concept drift stream to blend concepts over
    a window.

    Parameters
    ----------

    rctype: RCStreamType
        An enum describing the type of stream
    
    num_samples: int
        The number of samples in the stream
    
    noise: float
        The probability that noise will happen in the generation. At each
        new sample generated, the sample with will perturbed by the amount of
        perturbation.
        Values go from 0.0 to 1.0.
    
    concept_chain: list<int> or dict
        A dict with key observation number and value
        the concept begining at that observation
        or
        A list of concept ids. A dict will be generated
        with each concept lasting its length given in desc
        or uniform length.
    
    window_size: int
        The number of observations each gradual drift is
        spread over.
    
    seed: int
        Random seed.
    
    desc: dict<int><conceptOccurence>
        A map of concept ID to options

    boost_first_occurance: bool
        If true, double the observations drawn from
        the first occurence of a concept. Allows 
        a better model to be built and stored.

    Examples
    --------

    >>> # An example stream using the STAGGER Generator.
    >>> # Starts using generating function 0, then at
    >>> # observation 5000 transitions to generating function
    >>> # 1 then at 10000 transitions back to 0.
    >>> from skika.data.synthetic.reccurring_concept_stream import RCStreamType, RecurringConceptGradualStream, conceptOccurence
    >>> concept_chain = {0: 0, 5000: 1, 10000: 0}
    >>> num_samples = 15000
    >>> # init concept
    >>> concept_0 = conceptOccurence(id = 0, difficulty = 2, noise = 0,
                        appearences = 2, examples_per_appearence = 5000)
    >>> concept_1 = conceptOccurence(id = 1, difficulty = 3, noise = 0,
                        appearences = 1, examples_per_appearence = 5000)
    >>> desc = {0: concept_0, 1: concept_1}
    >>> datastream = RecurringConceptGradualStream(
                        rctype = RCStreamType.STAGGER,
                        num_samples =num_samples,
                        noise = 0,
                        concept_chain = concept_chain,
                        window_size = 1000,
                        seed = 42,
                        desc = desc,
                        boost_first_occurance = False)
    >>> datastream.has_more_samples()
    True
    >>> datastream.get_drift_info()
    {0: 0, 5000: 1, 10000: 0}
    >>> datastream.n_remaining_samples()
    15000
    >>> datastream.get_stream_info()
    {0: 0, 5000: 1, 10000: 0}
    0 - 5000: STAGGERGenerator(balance_classes=False, classification_function=0,
                    random_state=42)
    5000 - 10000: STAGGERGenerator(balance_classes=False, classification_function=1,
                    random_state=43)
    10000 - 15000: STAGGERGenerator(balance_classes=False, classification_function=0,
                    random_state=42)
    >>> datastream.get_moa_stream_info()
    {0: 0, 5000: 1, 10000: 0}
    '(ConceptDriftStream -s (generators.STAGGERGenerator -f 1 -i 42) -d (ConceptDriftStream -s (generators.STAGGERGenerator -f 2 -i 43) -d (generators.STAGGERGenerator -f 1 -i 42) -p 5000 -w 1) -p 5000 -w 1)'
    >>> datastream.get_supplementary_info()
    >>> datastream.next_sample()
    (array([[2., 0., 2.]]), array([0]))
    >>> datastream.n_remaining_samples()
    14999
    >>> datastream.next_sample()
    (array([[2., 0., 0.]]), array([0]))
    >>> datastream.n_remaining_samples()
    14998
    """
    def __init__(self,
                 rctype,
                 num_samples,
                 noise,
                 concept_chain,
                 window_size=1000,
                 seed=None,
                 desc=None,
                 boost_first_occurance=True):
        self.in_drift = False
        self.drift_switch = False
        self.window_size = window_size
        self.transition_stream = None
        super().__init__(rctype,
                         num_samples,
                         noise,
                         concept_chain,
                         seed=seed,
                         desc=desc,
                         boost_first_occurance=boost_first_occurance)

    def next_sample(self, batch_size=1):
        if batch_size > 1:
            print("Only batch size of 1 for now")
            return None

        if not self.in_drift:
            samples = self.concepts[self.current_concept].next_sample(
                batch_size)
        else:
            samples = self.transition_stream.next_sample(batch_size)

        last_switch_point = 0 - self.window_size // 2
        next_switch_point = self.num_samples + self.window_size
        self.example_count += batch_size
        for concept_switch_index in sorted(self.concept_chain.keys()):
            if (concept_switch_index <= self.example_count):
                last_switch_point = concept_switch_index
            if concept_switch_index >= self.example_count:
                next_switch_point = concept_switch_index
                break

        self.drifted = False
        if not self.in_drift:
            if self.example_count >= next_switch_point - self.window_size // 2:
                self.in_drift = True
                self.drift_switch = True
                self.transition_stream = ConceptDriftStream(
                    stream=self.concepts[self.concept_chain[last_switch_point]]
                    .get_datastream(),
                    drift_stream=self.concepts[self.concept_chain[
                        next_switch_point]].get_datastream(),
                    position=self.window_size // 2,
                    width=self.window_size)
                self.transition_stream.prepare_for_use()
        else:
            if self.example_count == next_switch_point:
                self.current_concept = self.concept_chain[next_switch_point]
                self.drifted = True
                self.drift_switch = False
            if self.example_count >= (last_switch_point + self.window_size //
                                      2) and not self.drift_switch:
                self.in_drift = False

        return samples
Esempio n. 20
0
def calculateStatistic(array):
    avg = np.average(array)
    std = np.std(array)
    return avg, std


randomStates = np.arange(1, 25)

DRIFT_CENTRAL = 8000
DRIFT_WIDTH = 1000
DRIFT_BORDER = np.round(DRIFT_CENTRAL - DRIFT_WIDTH / 2)

driftStreams = [
    ConceptDriftStream(width=DRIFT_WIDTH,
                       position=DRIFT_BORDER + 2000,
                       random_state=i) for i in randomStates
]

##EKSPERYENT 1
adwin_param = [0.002, 0.005, 0.01]
ddm_param = [3, 5, 7]
ks_param1 = [100, 150, 200]
ks_param2 = [30, 50, 100]
ph_param1 = [25, 50, 75]
ph_param2 = [0.005, 0.01, 0.02]

knn = KNNClassifier()

stream = driftStreams[0]
Esempio n. 21
0
# Imports
from skmultiflow.data.sine_generator import SineGenerator
import matplotlib.pyplot as plt
from skmultiflow.data import ConceptDriftStream

import numpy as np
# Setting up the stream
# stream = SineGenerator(classification_function = 2, random_state = 112,
#                        balance_classes = False, has_noise = False)
stream = ConceptDriftStream(random_state=123456, position=25000)
# Retrieving one sample
# stream.generate_drift()
a = stream.next_sample(100)
b = []
for i in range(a[0].size):
    b.append(a[0].item(i))

plt.plot(b)
# fig= plt.gcf()
# fig.set_size_inches(20, 5.5)
# plt.ylabel('value')
# plt.xlabel('Time')
# plt.show()

# b=[]
# for i in range(a[1].size):
#     b.append(a[1].item(i))
#
# plt.plot(b,color='r', linestyle='--',linewidth=0.3)
fig = plt.gcf()
fig.set_size_inches(20, 5.5)