Python AGRAWALGenerator Examples, skmultiflow.data.AGRAWALGenerator Python Examples

Example #1

0

Show file

File: concept_drift_stream.py Project: Darkmyter/scikit-multiflow

    def __init__(self, stream=AGRAWALGenerator(random_state=112),
                 drift_stream=AGRAWALGenerator(random_state=112, classification_function=2),
                 position=5000,
                 width=1000,
                 random_state=None,
                 alpha=0.0):
        super(ConceptDriftStream, self).__init__()

        self.n_samples = stream.n_samples
        self.n_targets = stream.n_targets
        self.n_features = stream.n_features
        self.n_num_features = stream.n_num_features
        self.n_cat_features = stream.n_cat_features
        self.n_classes = stream.n_classes
        self.cat_features_idx = stream.cat_features_idx
        self.feature_names = stream.feature_names
        self.target_names = stream.target_names
        self.target_values = stream.target_values
        self.n_targets = stream.n_targets
        self.name = 'Drifting' + stream.name

        self._original_random_state = random_state
        self.random_state = None
        self.alpha = alpha
        if self.alpha != 0.0:
            if 0 < self.alpha <= 90.0:
                w = int(1 / np.tan(self.alpha * np.pi / 180))
                self.width = w if w > 0 else 1
            else:
                raise ValueError('Invalid alpha value: {}'.format(alpha))
        else:
            self.width = width
        self.position = position
        self._input_stream = stream
        self._drift_stream = drift_stream

Example #2

0

Show file

    def get_conceptdrift_data_generated(self,
                                        classification_function=0,
                                        noise_percentage=0.1,
                                        random_state=112,
                                        drift_classification_function=3,
                                        drift_random_state=112,
                                        drift_noise_percentage=0.0,
                                        drift_start_position=5000,
                                        drift_width=1000,
                                        n_num_features=2,
                                        n_cat_features=0):
        from skmultiflow.data import ConceptDriftStream
        from skmultiflow.data import AGRAWALGenerator

        stream = AGRAWALGenerator(
            classification_function=classification_function,
            perturbation=noise_percentage,
            random_state=random_state
            #,n_num_features = n_num_features, n_cat_features = n_cat_features
        )

        drift_stream = AGRAWALGenerator(
            classification_function=drift_classification_function,
            perturbation=drift_noise_percentage,
            random_state=drift_random_state
            #,n_num_features = n_num_features, n_cat_features = n_cat_features
        )

        return ConceptDriftStream(stream=stream,
                                  drift_stream=drift_stream,
                                  position=drift_start_position,
                                  width=drift_width)

Example #3

0

Show file

File: test_vfdr.py Project: Darkmyter/scikit-multiflow

def test_vfdr():

    learner = VFDR(ordered_rules=True,
                   rule_prediction='first_hit',
                   nominal_attributes=[3,4,5],
                   expand_criterion='info_gain',
                   remove_poor_atts=True,
                   min_weight=100,
                   nb_prediction=False)
    stream = AGRAWALGenerator(random_state=11)
    stream.prepare_for_use()

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
                                       0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
                                       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0])

    assert np.alltrue(predictions == expected_predictions)

    expected_info = 'VFDR: ordered_rules: True - grace_period: 200 - split_confidence: 1e-07 ' + \
                                  '- tie_threshold: 0.05 - remove_poor_atts: True - rule_prediction: first_hit ' + \
                                  '- nb_threshold: 0 - nominal_attributes: [3, 4, 5] - drift_detector: NoneType ' + \
                                  '- Predict using Naive Bayes: False'
    assert learner.get_info() == expected_info

    expected_model_description = 'Rule 0 :Att (2) <= 39.550| class :0  {0: 1365.7101742993455}\n' + \
                                 'Rule 1 :Att (2) <= 58.180| class :1  {1: 1269.7307449971418}\n' + \
                                 'Rule 2 :Att (2) <= 60.910| class :0  {0: 66.24158839706533, 1: 54.0}\n' + \
                                 'Default Rule :| class :0  {0: 1316.7584116029348}'

    expected_model_description_ = 'Rule 0 :Att (2) <= 39.550| class :0  {0: 1365.7101742993455}\n' + \
                                 'Rule 1 :Att (2) <= 58.180| class :1  {1: 1269.7307449971418}\n' + \
                                 'Rule 2 :Att (2) <= 60.910| class :0  {0: 66.241588397065328, 1: 54.0}\n' + \
                                 'Default Rule :| class :0  {0: 1316.7584116029348}'

    assert (learner.get_model_description() == expected_model_description) or \
           (learner.get_model_description() == expected_model_description_)

    expected_model_measurements = {'Number of rules: ': 3, 'model_size in bytes': 62295}
    expected_model_measurements_ = {'Number of rules: ': 3, 'model_size in bytes': 73167}

    if sys.version_info.minor != 6:
        assert (learner.get_model_measurements() == expected_model_measurements) or\
               (learner.get_model_measurements() == expected_model_measurements_)

Example #4

0

Show file

def test_vfdr_info_gain():

    learner = VeryFastDecisionRulesClassifier(ordered_rules=True,
                                              rule_prediction='first_hit',
                                              nominal_attributes=[3, 4, 5],
                                              expand_criterion='info_gain',
                                              remove_poor_atts=True,
                                              min_weight=100,
                                              nb_prediction=False)
    stream = AGRAWALGenerator(random_state=11)

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
                                       0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
                                       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0])

    assert np.alltrue(predictions == expected_predictions)

    expected_info = "VeryFastDecisionRulesClassifier(drift_detector=None, expand_confidence=1e-07, " \
                    "expand_criterion='info_gain', grace_period=200, max_rules=1000, min_weight=100, " \
                    "nb_prediction=False, nb_threshold=0, nominal_attributes=[3, 4, 5], ordered_rules=True, " \
                    "remove_poor_atts=True, rule_prediction='first_hit', tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    expected_model_description_1 = 'Rule 0 :Att (2) <= 39.550| class :0  {0: 1365.7101742993455}\n' + \
                                   'Rule 1 :Att (2) <= 58.180| class :1  {1: 1269.7307449971418}\n' + \
                                   'Rule 2 :Att (2) <= 60.910| class :0  {0: 66.24158839706533, 1: 54.0}\n' + \
                                   'Default Rule :| class :0  {0: 1316.7584116029348}'

    expected_model_description_2 = 'Rule 0 :Att (2) <= 39.550| class :0  {0: 1365.7101742993455}\n' + \
                                   'Rule 1 :Att (2) <= 58.180| class :1  {1: 1269.7307449971418}\n' + \
                                   'Rule 2 :Att (2) <= 60.910| class :0  {0: 66.241588397065328, 1: 54.0}\n' + \
                                   'Default Rule :| class :0  {0: 1316.7584116029348}'

    assert (learner.get_model_description() == expected_model_description_1) or \
           (learner.get_model_description() == expected_model_description_2)

    # Following test only covers 'Number of rules' since 'model_size in bytes' is calculated using
    # the 'calculate_object_size' utility function which is validated in its own test
    expected_number_of_rules = 3
    assert learner.get_model_measurements()['Number of rules: '] == expected_number_of_rules

Example #5

0

Show file

    def __init__(self,
                 stream=AGRAWALGenerator(random_state=112),
                 drift_stream=AGRAWALGenerator(random_state=112,
                                               classification_function=2),
                 pause=1000,
                 random_state=None,
                 alpha=0.0,
                 position=0,
                 width=1):

        self.n_samples = stream.n_samples
        self.n_targets = stream.n_targets
        self.n_features = stream.n_features
        self.n_num_features = stream.n_num_features
        self.n_cat_features = stream.n_cat_features
        self.n_classes = stream.n_classes
        self.cat_features_idx = stream.cat_features_idx
        self.feature_names = stream.feature_names
        self.target_names = ['target'] if self.n_targets == 1 else [
            'target_' + i for i in range(self.n_targets)
        ]
        self.target_values = stream.target_values
        self.name = stream.name + "_" + drift_stream.name + "_" + str(
            pause) + "_" + str(width)
        self.probability_function = "sigmoid_prob"
        self.pause = pause
        self.counter = -1
        self._original_random_state = random_state
        self.random_state = None
        self.alpha = alpha
        if self.alpha != 0.0:
            if 0 < self.alpha <= 90.0:
                w = int(1 / np.tan(self.alpha * np.pi / 180))
                self.width = w if w > 0 else 1
            else:
                raise ValueError('Invalid alpha value: {}'.format(alpha))
        else:
            self.width = width

        if self.width < 0:
            raise ValueError("Width must be greater than 0")
        if self.pause < 0:
            raise ValueError("Pause must be greater than 0")

        self.position = position
        self._input_stream = stream
        self._drift_stream = drift_stream
        self.n_targets = stream.n_targets
        self._prepare_for_use()

Example #6

0

Show file

def test_vfdr_hellinger():

    learner = VFDR(ordered_rules=False,
                   rule_prediction='weighted_sum',
                   nominal_attributes=[3, 4, 5],
                   expand_criterion='hellinger',
                   remove_poor_atts=True,
                   min_weight=100,
                   nb_prediction=True)
    stream = AGRAWALGenerator(random_state=11)
    stream.prepare_for_use()

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
        0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0,
        0
    ])

    assert np.alltrue(predictions == expected_predictions)

    expected_model_description = 'Rule 0 :Att (2) > 58.180 and Att (5) = 4.000| class :0  {0: 202.0, 1: 3.0}\n' + \
                                 'Rule 1 :Att (2) <= 41.820| class :0  {0: 1387.1186637804824, 1: 151.83928023717402}\n' + \
                                 'Default Rule :| class :1  {0: 512.8813362195176, 1: 1356.160719762826}'

    expected_model_description_ = 'Rule 0 :Att (2) > 58.180 and Att (5) = 4.000| class :0  {0: 202.0, 1: 3.0}\n' + \
                                 'Rule 1 :Att (2) <= 41.820| class :0  {0: 1387.1186637804824, 1: 151.83928023717402}\n' + \
                                 'Default Rule :| class :1  {0: 512.8813362195176, 1: 1356.1607197628259}'

    if sys.version_info.minor != 6:
        assert (learner.get_model_description() == expected_model_description) or \
               (learner.get_model_description() == expected_model_description_)

Example #7

0

Show file

File: concept_drift_stream.py Project: zeyefkey/scikit-multiflow

    def __init__(self,
                 stream=AGRAWALGenerator(random_state=112),
                 drift_stream=AGRAWALGenerator(random_state=112,
                                               classification_function=2),
                 position=5000,
                 width=1000,
                 random_state=None,
                 alpha=None):
        super(ConceptDriftStream, self).__init__()

        self.n_samples = stream.n_samples
        self.n_targets = stream.n_targets
        self.n_features = stream.n_features
        self.n_num_features = stream.n_num_features
        self.n_cat_features = stream.n_cat_features
        self.n_classes = stream.n_classes
        self.cat_features_idx = stream.cat_features_idx
        self.feature_names = stream.feature_names
        self.target_names = stream.target_names
        self.target_values = stream.target_values
        self.n_targets = stream.n_targets
        self.name = 'Drifting' + stream.name

        self.random_state = random_state
        self._random_state = None  # This is the actual random_state object used internally
        self.alpha = alpha
        if self.alpha == 0:
            warnings.warn(
                "Default value for 'alpha' has changed from 0 to None. 'alpha=0' will "
                "throw an error from v0.7.0",
                category=FutureWarning)
            self.alpha = None
        if self.alpha is not None:
            if 0 < self.alpha <= 90.0:
                w = int(1 / np.tan(self.alpha * np.pi / 180))
                self.width = w if w > 0 else 1
            else:
                raise ValueError('Invalid alpha value: {}'.format(alpha))
        else:
            self.width = width
        self.position = position
        self.stream = stream
        self.drift_stream = drift_stream

        self._prepare_for_use()

Example #8

0

Show file

def test_hoeffding_adaptive_tree_alternate_tree():
    stream = AGRAWALGenerator(random_state=7)

    learner = HoeffdingAdaptiveTreeClassifier(random_state=1)

    cnt = 0
    change_point1 = 1500
    change_point2 = 2500
    change_point3 = 4000
    max_samples = 5000

    while cnt < max_samples:
        X, y = stream.next_sample()
        learner.partial_fit(X, y)
        cnt += 1

        if cnt > change_point1:
            stream.generate_drift()
            change_point1 = float('Inf')

            expected_description = "if Attribute 2 <= 63.63636363636363:\n" \
                                   "  if Attribute 2 <= 39.54545454545455:\n" \
                                   "    Leaf = Class 0 | {0: 397.5023676194098}\n" \
                                   "  if Attribute 2 > 39.54545454545455:\n" \
                                   "    if Attribute 2 <= 58.81818181818181:\n" \
                                   "      Leaf = Class 1 | {1: 299.8923824199619}\n" \
                                   "    if Attribute 2 > 58.81818181818181:\n" \
                                   "      Leaf = Class 0 | {0: 54.0, 1: 20.107617580038095}\n" \
                                   "if Attribute 2 > 63.63636363636363:\n" \
                                   "  Leaf = Class 0 | {0: 512.5755895049351}\n"
            assert expected_description == learner.get_model_description()

        if cnt > change_point2:
            stream.generate_drift()
            change_point2 = float('Inf')
            expected_description = "if Attribute 8 <= 268547.7178694747:\n" \
                                   "  Leaf = Class 0 | {0: 446.18690518790413, 1: 80.6180778406834}\n" \
                                   "if Attribute 8 > 268547.7178694747:\n" \
                                   "  Leaf = Class 1 | {0: 36.8130948120959, 1: 356.38192215931656}\n"
            assert expected_description == learner.get_model_description()

        if cnt > change_point3:
            stream.generate_drift()
            change_point3 = float('Inf')

    expected_description = "Leaf = Class 0 | {0: 1083.0, 1: 2.0}\n"
    assert expected_description == learner.get_model_description()

Example #9

0

Show file

def test_vfdr_foil():

    learner = VFDR(ordered_rules=False,
                   rule_prediction='weighted_sum',
                   nominal_attributes=[3, 4, 5],
                   expand_criterion='foil_gain',
                   remove_poor_atts=True,
                   min_weight=100,
                   nb_prediction=True)
    stream = AGRAWALGenerator(random_state=11)
    stream.prepare_for_use()

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
        0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0
    ])

    assert np.alltrue(predictions == expected_predictions)

    expected_model_description = 'Rule 0 :Att (2) <= 25.450 | class: 1| class :0  {0: 464.44730579120136}\n' + \
                                 'Rule 1 :Att (4) = 3.000 | class: 0| class :0  {0: 95.0, 1: 45.0}\n' + \
                                 'Rule 2 :Att (2) <= 30.910 | class: 1| class :0  {0: 330.68821225514125}\n' + \
                                 'Default Rule :| class :0  {0: 573.0, 1: 336.0}'

    assert (learner.get_model_description() == expected_model_description)

Example #10

0

Show file

    def prepare_for_use(self):
        if self.generator in ['sea', 'sine']:
            self.concepts = [v for v in range(0, 4)]
        elif self.generator in ['stagger']:
            self.concepts = [v for v in range(0, 3)]
        elif self.generator in ['mixed']:
            self.concepts = [v for v in range(0, 2)]
        elif self.generator in ['led']:
            self.concepts = [v for v in range(0, 7)]
        elif self.generator in ['tree']:
            self.concepts = [2, 3, 4, 5, 6, 7, 8, 9, 10]

        if self.concept_shift_step > 0:
            for concept in self.all_concepts:
                stream = AGRAWALGenerator(classification_function=concept,
                                          random_state=self.random_state,
                                          balance_classes=False,
                                          perturbation=0.05)
                stream.prepare_for_use()
                self.streams.append(stream)
        else:

            for concept in self.concepts:
                if self.generator == 'agrawal':
                    stream = AGRAWALGenerator(classification_function=concept,
                                              random_state=self.random_state,
                                              balance_classes=False,
                                              perturbation=0.05)
                elif self.generator == 'sea':
                    stream = SEAGenerator(classification_function=concept,
                                          random_state=self.random_state,
                                          balance_classes=False,
                                          noise_percentage=0.05)
                elif self.generator == 'sine':
                    stream = SineGenerator(classification_function=concept,
                                           random_state=self.random_state,
                                           balance_classes=False,
                                           has_noise=False)
                elif self.generator == 'stagger':
                    stream = STAGGERGenerator(classification_function=concept,
                                              random_state=self.random_state,
                                              balance_classes=False)
                elif self.generator == 'mixed':
                    stream = MIXEDGenerator(classification_function=concept,
                                            random_state=self.random_state,
                                            balance_classes=False)
                elif self.generator == 'led':
                    stream = LEDGeneratorDrift(random_state=self.random_state,
                                               has_noise=True,
                                               n_drift_features=concept)
                elif self.generator == 'tree':
                    stream = RandomTreeGenerator(tree_random_state=concept,
                                                 sample_random_state=concept,
                                                 max_tree_depth=concept+2,
                                                 min_leaf_depth=concept,
                                                 n_classes=2)
                else:
                    print(f"unknown stream generator {self.generator}")
                    exit()

                stream.prepare_for_use()
                self.streams.append(stream)

        self.cur_stream = self.streams[0]
        self.drift_stream = self.streams[1]

        stream = self.cur_stream
        self.n_samples = stream.n_samples
        self.n_targets = stream.n_targets
        self.n_features = stream.n_features
        self.n_num_features = stream.n_num_features
        self.n_cat_features = stream.n_cat_features
        self.n_classes = stream.n_classes
        self.cat_features_idx = stream.cat_features_idx
        self.feature_names = stream.feature_names
        self.target_names = stream.target_names
        self.target_values = stream.target_values
        self.n_targets = stream.n_targets
        self.name = 'drifting' + stream.name

        print(f"len: {len(self.concepts)}")
        self.concept_probs = \
                self.__get_poisson_probs(len(self.concepts), self.lam)

Example #11

0

Show file

File: Chapter1_codes.py Project: sayanddude/practical-ml-streaming-data-python

#2  0.091587  0.977452  0.411501  0.458305  ...  0.181444  0.303406  0.174454  0.0
#3  0.635272  0.496203  0.014126  0.627222  ...  0.517752  0.570683  0.546333  1.0
#4  0.450078  0.876507  0.537356  0.495684  ...  0.606895  0.217841  0.912944  1.0
#
#[5 rows x 11 columns]

# Store it in csv
data.to_csv('data_stream_hyperplane.csv', index=False)
#####################################################################################

### Agarwal generator using scikit-multiflow
from skmultiflow.data import AGRAWALGenerator
import pandas as pd
import numpy as np

create = AGRAWALGenerator(random_state=333)
create.prepare_for_use()
X, Y = create.next_sample(10000)
data = pd.DataFrame(np.hstack((X, np.array([Y]).T)))

data.shape
# output- (1000, 10)
print(data.head())
# Output:
#               0             1     2  ...     7              8    9
#0   90627.841313      0.000000  33.0  ...  20.0   24151.832875  0.0
#1   33588.924462  17307.813671  72.0  ...  29.0  315025.363876  0.0
#2   24375.065287  12426.917711  39.0  ...   4.0  363158.576720  0.0
#3   82949.727691      0.000000  68.0  ...   2.0   35758.528073  0.0
#4  149423.790417      0.000000  52.0  ...  29.0   98440.362484  1.0
#

Example #12

0

Show file

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1uHKbJ3KLUITTHJRxegzbTvA_-6M7eO5v
"""

!pip install -U scikit-multiflow

from skmultiflow.data import AGRAWALGenerator
from skmultiflow.trees import HoeffdingTree
from skmultiflow.evaluation import EvaluatePrequential
import numpy as np

# 1. Create a stream
stream = AGRAWALGenerator()
stream.prepare_for_use()

# 2. Instantiate the HoeffdingTree classifier
ht = HoeffdingTree()

# # 3. Setup the evaluator
# evaluator = EvaluatePrequential(show_plot=False,
#                                 pretrain_size=500,
#                                 max_samples=500)

# # 4. Run evaluation
# evaluator.evaluate(stream=stream, model=ht)

def base_classifier(e, U, I, L, D, wd, ws):
  return print("I am here")

Example #13

0

Show file

File: ms_cd_experiment.py Project: PouyaGhahramanian/goowe-python

from skmultiflow.data.file_stream import FileStream
import numpy as np
from Goowe import Goowe
from skmultiflow.data import ConceptDriftStream
from skmultiflow.data import AGRAWALGenerator
import logging
from GooweMS import GooweMS
import random

logger = logging.getLogger()
logger.setLevel(logging.INFO)
# Prepare the data stream
stream_1 = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False,
                            classification_function=1,
                            perturbation=0.0,
                            random_state=112),
    drift_stream=AGRAWALGenerator(balance_classes=False,
                                  classification_function=2,
                                  perturbation=0.0,
                                  random_state=112),
    position=3000,
    width=1000,
    random_state=None,
    alpha=0.0)
stream_2 = ConceptDriftStream(
    stream=AGRAWALGenerator(balance_classes=False,
                            classification_function=3,
                            perturbation=0.0,
                            random_state=21),
    drift_stream=AGRAWALGenerator(balance_classes=False,