Exemple #1
0
    def __iter__(self):
        self._generate_centroids()
        rng_sample = check_random_state(self.seed_sample)

        while True:
            x, y = self._generate_sample(rng_sample)
            yield x, y
Exemple #2
0
    def __iter__(self):
        self._rng = check_random_state(self.seed)
        self.next_class_should_be_zero = False

        while True:
            size = 0
            color = 0
            shape = 0
            y = 0
            desired_class_found = False
            while not desired_class_found:
                size = self._rng.randint(3)
                color = self._rng.randint(3)
                shape = self._rng.randint(3)

                y = self._functions[self.classification_function](size, color,
                                                                  shape)

                if not self.balance_classes:
                    desired_class_found = True
                else:
                    if (self.next_class_should_be_zero and
                        (y == 0)) or ((not self.next_class_should_be_zero) and
                                      (y == 1)):
                        desired_class_found = True
                        self.next_class_should_be_zero = (
                            not self.next_class_should_be_zero)

            x = {"size": size, "color": color, "shape": shape}

            yield x, y
Exemple #3
0
    def __init__(
        self,
        max_features: int = 2,
        grace_period: int = 200,
        max_depth: int = None,
        split_criterion: str = "info_gain",
        split_confidence: float = 1e-7,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "nba",
        nb_threshold: int = 0,
        nominal_attributes: list = None,
        splitter: Splitter = None,
        seed=None,
        **kwargs,
    ):
        super().__init__(
            grace_period=grace_period,
            max_depth=max_depth,
            split_criterion=split_criterion,
            split_confidence=split_confidence,
            tie_threshold=tie_threshold,
            leaf_prediction=leaf_prediction,
            nb_threshold=nb_threshold,
            nominal_attributes=nominal_attributes,
            splitter=splitter,
            **kwargs,
        )

        self.max_features = max_features
        self.seed = seed
        self._rng = check_random_state(self.seed)
Exemple #4
0
    def __iter__(self):
        self._rng = check_random_state(self.seed)
        self.next_class_should_be_zero = False

        while True:
            att_0 = False
            att_1 = False
            att_2 = 0.
            att_3 = 0.
            y = 0
            desired_class_found = False
            while not desired_class_found:
                att_0 = self._rng.rand() >= 0.5
                att_1 = self._rng.rand() >= 0.5
                att_2 = self._rng.rand()
                att_3 = self._rng.rand()

                y = self._functions[self.classification_function](att_0, att_1, att_2, att_3)

                if not self.balance_classes:
                    desired_class_found = True
                else:
                    if (self.next_class_should_be_zero and (y == 0)) or \
                            ((not self.next_class_should_be_zero) and (y == 1)):
                        desired_class_found = True
                        self.next_class_should_be_zero = not self.next_class_should_be_zero

            x = {0: att_0, 1: att_1, 2: att_2, 3: att_3}

            yield x, y
    def __init__(self,
                 model=HoeffdingTreeClassifier(grace_period=50,
                                               split_confidence=0.01),
                 n_models: int = 100,
                 subspace_size: typing.Union[int, float, str] = .6,
                 training_method: str = "patches",
                 lam: float = 6.0,
                 drift_detector: typing.Union[base.DriftDetector,
                                              None] = ADWIN(delta=1e-5),
                 warning_detector: base.DriftDetector = ADWIN(delta=1e-4),
                 disable_weighted_vote: bool = False,
                 nominal_attributes=None,
                 seed=None,
                 metric: MultiClassMetric = Accuracy()):
        super().__init__([None
                          ])  # List of models is properly initialized later
        self.models = []
        self.model = model  # Not restricted to a specific base estimator.
        self.n_models = n_models
        self.subspace_size = subspace_size
        self.training_method = training_method
        self.lam = lam
        self.drift_detector = drift_detector
        self.warning_detector = warning_detector
        self.disable_weighted_vote = disable_weighted_vote
        self.metric = metric
        self.nominal_attributes = nominal_attributes if nominal_attributes else []
        self.seed = seed
        self._rng = check_random_state(self.seed)

        self._n_samples_seen = 0
        self._subspaces = None

        self._base_learner_class = StreamingRandomPatchesBaseLearner
Exemple #6
0
    def __init__(
        self,
        max_features: int = 2,
        grace_period: int = 200,
        max_depth: int = None,
        split_confidence: float = 1e-7,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "model",
        leaf_model: base.Regressor = None,
        model_selector_decay: float = 0.95,
        nominal_attributes: list = None,
        splitter: Splitter = None,
        min_samples_split: int = 5,
        seed=None,
        **kwargs,
    ):
        super().__init__(
            grace_period=grace_period,
            max_depth=max_depth,
            split_confidence=split_confidence,
            tie_threshold=tie_threshold,
            leaf_prediction=leaf_prediction,
            leaf_model=leaf_model,
            model_selector_decay=model_selector_decay,
            nominal_attributes=nominal_attributes,
            splitter=splitter,
            min_samples_split=min_samples_split,
            **kwargs,
        )

        self.max_features = max_features
        self.seed = seed
        self._rng = check_random_state(self.seed)
Exemple #7
0
    def __init__(
        self,
        n_models: int,
        max_features: typing.Union[bool, str, int],
        lambda_value: int,
        drift_detector: typing.Union[base.DriftDetector, None],
        warning_detector: typing.Union[base.DriftDetector, None],
        metric: typing.Union[metrics.MultiClassMetric, metrics.RegressionMetric],
        disable_weighted_vote,
        seed,
    ):
        super().__init__([None])  # List of models is properly initialized later
        self.models = []
        self.n_models = n_models
        self.max_features = max_features
        self.lambda_value = lambda_value
        self.metric = metric
        self.disable_weighted_vote = disable_weighted_vote
        self.drift_detector = drift_detector
        self.warning_detector = warning_detector
        self.seed = seed
        self._rng = check_random_state(self.seed)  # Actual random number generator

        # Internal parameters
        self._n_samples_seen = 0
        self._base_member_class = None
    def __init__(self,
                 grace_period: int = 200,
                 max_depth: int = None,
                 split_criterion: str = 'info_gain',
                 split_confidence: float = 1e-7,
                 tie_threshold: float = 0.05,
                 leaf_prediction: str = 'nba',
                 nb_threshold: int = 0,
                 nominal_attributes: list = None,
                 attr_obs: str = 'gaussian',
                 attr_obs_params: dict = None,
                 max_features: int = 2,
                 seed=None,
                 **kwargs):
        super().__init__(grace_period=grace_period,
                         max_depth=max_depth,
                         split_criterion=split_criterion,
                         split_confidence=split_confidence,
                         tie_threshold=tie_threshold,
                         leaf_prediction=leaf_prediction,
                         nb_threshold=nb_threshold,
                         nominal_attributes=nominal_attributes,
                         attr_obs=attr_obs,
                         attr_obs_params=attr_obs_params,
                         **kwargs)

        self.max_features = max_features
        self.seed = seed
        self._rng = check_random_state(self.seed)
Exemple #9
0
 def __init__(self, stats, depth, attr_obs, attr_obs_params, adwin_delta,
              seed):
     super().__init__(stats, depth, attr_obs, attr_obs_params)
     self.adwin_delta = adwin_delta
     self._adwin = ADWIN(delta=self.adwin_delta)
     self.error_change = False
     self._rng = check_random_state(seed)
Exemple #10
0
    def __iter__(self):
        self._rng = check_random_state(self.seed)
        self._attr_idx = np.arange(self._N_FEATURES_INCLUDING_NOISE)

        # Change attributes
        if self.irrelevant_features and self.n_drift_features > 0:
            random_int = self._rng.randint(7)
            offset = self._rng.randint(self._N_IRRELEVANT_ATTRIBUTES)
            for i in range(self.n_drift_features):
                value_1 = (i + random_int) % 7
                value_2 = 7 + (i + offset) % self._N_IRRELEVANT_ATTRIBUTES
                self._attr_idx[value_1] = value_2
                self._attr_idx[value_2] = value_1

        while True:
            x = {i: -1
                 for i in range(self.n_features)
                 }  # Initialize to keep order in dictionary
            y = self._rng.randint(self.n_classes)

            for i in range(self._N_RELEVANT_FEATURES):
                if (0.01 + self._rng.rand()) <= self.noise_percentage:
                    x[self._attr_idx[i]] = int(
                        self._ORIGINAL_INSTANCES[y, i] == 0)
                else:
                    x[self._attr_idx[i]] = self._ORIGINAL_INSTANCES[y, i]
            if self.irrelevant_features:
                for i in range(self._N_RELEVANT_FEATURES,
                               self._N_FEATURES_INCLUDING_NOISE):
                    x[self._attr_idx[i]] = self._rng.randint(2)

            yield x, y
Exemple #11
0
    def _generate_data(self):
        # Generate anomaly data arrays
        self._random_state = check_random_state(self.seed)
        self.y = np.zeros(self.n_samples)
        self.X = np.column_stack([
            np.sin(np.arange(self.n_samples) / 4.0) +
            self._random_state.randn(self.n_samples) * self.noise,
            np.cos(np.arange(self.n_samples) / 4.0) +
            self._random_state.randn(self.n_samples) * self.noise,
        ])

        if self.contextual:
            # contextual anomaly indices
            contextual_anomalies = self._random_state.choice(
                self.n_samples - self.shift,
                self.n_contextual,
                replace=self.replace)
            # set contextual anomalies
            contextual_idx = contextual_anomalies + self.shift
            contextual_idx[contextual_idx >= self.n_samples] -= self.n_samples
            self.X[contextual_idx, 1] = self.X[contextual_anomalies, 0]

        # Anomaly indices
        anomalies_idx = self._random_state.choice(self.n_samples,
                                                  self.n_anomalies,
                                                  replace=self.replace)
        self.X[anomalies_idx,
               1] = (np.sin(
                   self._random_state.choice(self.n_anomalies,
                                             replace=self.replace)) +
                     self._random_state.randn(self.n_anomalies) * self.noise +
                     2.0)
        # Mark sample as anomalous
        self.y[anomalies_idx] = 1
Exemple #12
0
    def __iter__(self):
        self._rng = check_random_state(self.seed)
        self.next_class_should_be_zero = False

        while True:
            x = dict()
            y = 0
            desired_class_found = False
            while not desired_class_found:
                x[0] = self._rng.rand()
                x[1] = self._rng.rand()
                y = self._functions[self.classification_function](x[0], x[1])

                if not self.balance_classes:
                    desired_class_found = True
                else:
                    if (self.next_class_should_be_zero and (y == 0)) or (
                        (not self.next_class_should_be_zero) and (y == 1)
                    ):
                        desired_class_found = True
                        self.next_class_should_be_zero = not self.next_class_should_be_zero

            if self.has_noise:
                x[2] = self._rng.rand()
                x[3] = self._rng.rand()

            yield x, y
Exemple #13
0
    def _generate_centroids(self):
        """Generates centroids

        The centroids are generated just as it is done in the parent class,
        an extra step is taken to introduce drift, if there is any.

        To configure the drift, random offset speeds are chosen for
        `n_drift_centroids` centroids. Finally, the speed is normalized.

        """
        super()._generate_centroids()
        rng_model = check_random_state(self.seed_model)
        self.centroid_speed = []

        for i in range(self.n_drift_centroids):
            rand_speed = np.zeros(self.n_features)
            norm_speed = 0.0

            for j in range(self.n_features):
                rand_speed[j] = rng_model.rand()
                norm_speed += rand_speed[j] * rand_speed[j]

            norm_speed = np.sqrt(norm_speed)

            for j in range(self.n_features):
                rand_speed[j] /= norm_speed

            self.centroid_speed.append(rand_speed)
    def __init__(self, split_test, stats, depth, adwin_delta, seed):
        super().__init__(split_test, stats, depth)
        self.adwin_delta = adwin_delta
        self._adwin = ADWIN(delta=self.adwin_delta)
        self._alternate_tree = None
        self._error_change = False

        self._rng = check_random_state(seed)
 def __init__(
     self, stats, depth, attr_obs, attr_obs_params, max_features, seed, **kwargs
 ):
     super().__init__(stats, depth, attr_obs, attr_obs_params, **kwargs)  # noqa
     self.max_features = max_features
     self.seed = seed
     self._rng = check_random_state(self.seed)
     self.feature_indices = []
Exemple #16
0
    def __init__(self, stats, *children, adwin_delta, seed, **attributes):
        super().__init__(stats, *children, **attributes)
        self.adwin_delta = adwin_delta
        self._adwin = ADWIN(delta=self.adwin_delta)
        self._alternate_tree = None
        self._error_change = False

        self._rng = check_random_state(seed)
Exemple #17
0
    def __init__(self, stats, depth, splitter, adwin_delta, seed, **kwargs):
        super().__init__(stats, depth, splitter, **kwargs)

        self.adwin_delta = adwin_delta
        self._adwin = ADWIN(delta=self.adwin_delta)
        self._error_change = False
        self._rng = check_random_state(seed)

        # Normalization of info monitored by drift detectors (using Welford's algorithm)
        self._error_normalizer = Var(ddof=1)
Exemple #18
0
    def __iter__(self):
        rng = check_random_state(self.seed)
        X, Y = self._make_logical(n_tiles=self.n_tiles,
                                  shuffle=self.shuffle,
                                  random_state=rng)

        for xi, yi in itertools.zip_longest(
                X, Y if hasattr(Y, "__iter__") else []):
            yield dict(zip(self.feature_names,
                           xi)), dict(zip(self.target_names, yi))
Exemple #19
0
    def __init__(self, stats, depth, attr_obs, attr_obs_params, leaf_model, adwin_delta, seed):
        super().__init__(stats, depth, attr_obs, attr_obs_params, leaf_model)

        self.adwin_delta = adwin_delta
        self._adwin = ADWIN(delta=self.adwin_delta)
        self.error_change = False
        self._rng = check_random_state(seed)

        # Normalization of info monitored by drift detectors (using Welford's algorithm)
        self._error_normalizer = Var(ddof=1)
Exemple #20
0
    def __init__(self, model, order=None, seed=None):
        super().__init__()
        self.model = model
        self.order = order
        self.seed = seed
        self._rng = check_random_state(self.seed)

        # If the order is specified, then we can instantiate a model for each label, if not we'll
        # do it in the first call to learn_one
        if isinstance(order, list):
            self._init_models()
    def __init__(self, split_test, stats, depth, adwin_delta, seed):
        stats = stats if stats else Var()
        super().__init__(split_test, stats, depth)
        self.adwin_delta = adwin_delta
        self._adwin = ADWIN(delta=self.adwin_delta)
        self._alternate_tree = None
        self._error_change = False

        self._rng = check_random_state(seed)

        # Normalization of info monitored by drift detectors (using Welford's algorithm)
        self._error_normalizer = Var(ddof=1)
Exemple #22
0
    def __iter__(self):
        rng_sample = check_random_state(self.seed_sample)
        # Generate random tree model which will be used to classify instances
        self._generate_random_tree()

        # Randomly generate features, and then classify the resulting instance.
        while True:
            x = dict()
            for feature in self.features_num:
                x[feature] = rng_sample.rand()
            for feature in self.features_cat:
                x[feature] = rng_sample.randint(self.n_categories_per_feature)
            y = self._classify_instance(self.tree_root, x)
            yield x, y
Exemple #23
0
    def __iter__(self):
        self._rng = check_random_state(self.seed)
        self._next_class_should_be_zero = False

        while True:
            y = 0
            desired_class_found = False
            while not desired_class_found:
                salary = 20000 + 130000 * self._rng.rand()
                commission = (0 if (salary >= 75000) else
                              (10000 + 75000 * self._rng.rand()))
                age = 20 + self._rng.randint(61)
                elevel = self._rng.randint(5)
                car = self._rng.randint(20)
                zipcode = self._rng.randint(9)
                hvalue = (9 - zipcode) * 100000 * (0.5 + self._rng.rand())
                hyears = 1 + self._rng.randint(30)
                loan = self._rng.rand() * 500000
                y = self._classification_functions[
                    self.classification_function](salary, commission, age,
                                                  elevel, car, zipcode, hvalue,
                                                  hyears, loan)
                if not self.balance_classes:
                    desired_class_found = True
                else:
                    if (self._next_class_should_be_zero and
                        (y == 0)) or ((not self._next_class_should_be_zero) and
                                      (y == 1)):
                        desired_class_found = True
                        self._next_class_should_be_zero = (
                            not self._next_class_should_be_zero)

            if self.perturbation > 0.0:
                salary = self._perturb_value(salary, 20000, 150000)
                if commission > 0:
                    commission = self._perturb_value(commission, 10000, 75000)
                age = np.round(self._perturb_value(age, 20, 80))
                hvalue = self._perturb_value(hvalue, (9 - zipcode) * 100000, 0,
                                             135000)
                hyears = np.round(self._perturb_value(hyears, 1, 30))
                loan = self._perturb_value(loan, 0, 500000)

            x = dict()
            for feature in self.feature_names:
                x[feature] = eval(feature)

            yield x, y
Exemple #24
0
    def _generate_random_tree(self):
        """
        Generates the random tree, starting from the root node and following
        the constraints passed as parameters to the initializer.

        The tree is recursively generated, node by node, until it reaches the
        maximum tree depth.
        """
        rng_tree = check_random_state(self.seed_tree)
        candidate_features = np.arange(self.n_num_features +
                                       self.n_cat_features)
        min_numeric_values = np.zeros(self.n_num_features)
        max_numeric_values = np.ones(self.n_num_features)

        self.tree_root = self._generate_random_tree_node(
            0, candidate_features, min_numeric_values, max_numeric_values,
            rng_tree)
Exemple #25
0
    def __iter__(self):
        self._rng = check_random_state(self.seed)

        while True:
            x = dict()
            y = self._rng.randint(self.n_classes)

            for i in range(self._N_RELEVANT_FEATURES):
                if (0.01 + self._rng.rand()) <= self.noise_percentage:
                    x[i] = int(self._ORIGINAL_INSTANCES[y, i] == 0)
                else:
                    x[i] = self._ORIGINAL_INSTANCES[y, i]

            if self.irrelevant_features:
                for i in range(self._N_RELEVANT_FEATURES, self._N_FEATURES_INCLUDING_NOISE):
                    x[i] = self._rng.randint(2)

            yield x, y
    def __iter__(self):
        rng = check_random_state(self.seed)
        stream_generator = iter(self.stream)
        drift_stream_generator = iter(self.drift_stream)
        sample_idx = 0

        while True:
            sample_idx += 1
            v = -4.0 * float(sample_idx - self.position) / float(self.width)
            probability_drift = 1.0 / (1.0 + np.exp(v))
            try:
                if rng.rand() > probability_drift:
                    x, y = next(stream_generator)
                else:
                    x, y = next(drift_stream_generator)
            except StopIteration:
                break
            yield x, y
Exemple #27
0
    def __init__(
        self,
        max_features: int = 2,
        grace_period: int = 200,
        max_depth: int = None,
        split_confidence: float = 1e-7,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "model",
        leaf_model: base.Regressor = None,
        model_selector_decay: float = 0.95,
        nominal_attributes: list = None,
        splitter: Splitter = None,
        min_samples_split: int = 5,
        binary_split: bool = False,
        max_size: int = 100,
        memory_estimate_period: int = 1000000,
        stop_mem_management: bool = False,
        remove_poor_attrs: bool = False,
        merit_preprune: bool = True,
        seed=None,
    ):
        super().__init__(
            grace_period=grace_period,
            max_depth=max_depth,
            split_confidence=split_confidence,
            tie_threshold=tie_threshold,
            leaf_prediction=leaf_prediction,
            leaf_model=leaf_model,
            model_selector_decay=model_selector_decay,
            nominal_attributes=nominal_attributes,
            splitter=splitter,
            min_samples_split=min_samples_split,
            binary_split=binary_split,
            max_size=max_size,
            memory_estimate_period=memory_estimate_period,
            stop_mem_management=stop_mem_management,
            remove_poor_attrs=remove_poor_attrs,
            merit_preprune=merit_preprune,
        )

        self.max_features = max_features
        self.seed = seed
        self._rng = check_random_state(self.seed)
Exemple #28
0
    def __iter__(self):
        self._generate_centroids()
        rng_sample = check_random_state(self.seed_sample)

        while True:
            # Move centroids
            for i in range(self.n_drift_centroids):
                for j in range(self.n_features):
                    self.centroids[i].centre[
                        j] += self.centroid_speed[i][j] * self.change_speed

                    if (self.centroids[i].centre[j] >
                            1) or (self.centroids[i].centre[j] < 0):
                        self.centroids[i].centre[j] = 1 if (
                            self.centroids[i].centre[j] > 1) else 0
                        self.centroid_speed[i][j] = -self.centroid_speed[i][j]

            x, y = self._generate_sample(rng_sample)
            yield x, y
Exemple #29
0
    def _generate_centroids(self):
        """ Generates centroids

        Sequentially creates all the centroids, choosing at random a center,
        a label, a standard deviation and a weight.

        """
        rng_model = check_random_state(self.seed_model)
        self.centroids = []
        self.centroid_weights = []
        for i in range(self.n_centroids):
            self.centroids.append(Centroid())
            rand_centre = []
            for j in range(self.n_num_features):
                rand_centre.append(rng_model.rand())
            self.centroids[i].centre = rand_centre
            self.centroids[i].class_label = rng_model.randint(self.n_classes)
            self.centroids[i].std_dev = rng_model.rand()
            self.centroid_weights.append(rng_model.rand())
Exemple #30
0
    def __init__(
        self,
        max_features: int = 2,
        grace_period: int = 200,
        max_depth: int = None,
        split_criterion: str = "info_gain",
        split_confidence: float = 1e-7,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "nba",
        nb_threshold: int = 0,
        nominal_attributes: list = None,
        splitter: Splitter = None,
        binary_split: bool = False,
        max_size: int = 100,
        memory_estimate_period: int = 1000000,
        stop_mem_management: bool = False,
        remove_poor_attrs: bool = False,
        merit_preprune: bool = True,
        seed=None,
    ):
        super().__init__(
            grace_period=grace_period,
            max_depth=max_depth,
            split_criterion=split_criterion,
            split_confidence=split_confidence,
            tie_threshold=tie_threshold,
            leaf_prediction=leaf_prediction,
            nb_threshold=nb_threshold,
            nominal_attributes=nominal_attributes,
            splitter=splitter,
            binary_split=binary_split,
            max_size=max_size,
            memory_estimate_period=memory_estimate_period,
            stop_mem_management=stop_mem_management,
            remove_poor_attrs=remove_poor_attrs,
            merit_preprune=merit_preprune,
        )

        self.max_features = max_features
        self.seed = seed
        self._rng = check_random_state(self.seed)