def __iter__(self): self._generate_centroids() rng_sample = check_random_state(self.seed_sample) while True: x, y = self._generate_sample(rng_sample) yield x, y
def __iter__(self): self._rng = check_random_state(self.seed) self.next_class_should_be_zero = False while True: size = 0 color = 0 shape = 0 y = 0 desired_class_found = False while not desired_class_found: size = self._rng.randint(3) color = self._rng.randint(3) shape = self._rng.randint(3) y = self._functions[self.classification_function](size, color, shape) if not self.balance_classes: desired_class_found = True else: if (self.next_class_should_be_zero and (y == 0)) or ((not self.next_class_should_be_zero) and (y == 1)): desired_class_found = True self.next_class_should_be_zero = ( not self.next_class_should_be_zero) x = {"size": size, "color": color, "shape": shape} yield x, y
def __init__( self, max_features: int = 2, grace_period: int = 200, max_depth: int = None, split_criterion: str = "info_gain", split_confidence: float = 1e-7, tie_threshold: float = 0.05, leaf_prediction: str = "nba", nb_threshold: int = 0, nominal_attributes: list = None, splitter: Splitter = None, seed=None, **kwargs, ): super().__init__( grace_period=grace_period, max_depth=max_depth, split_criterion=split_criterion, split_confidence=split_confidence, tie_threshold=tie_threshold, leaf_prediction=leaf_prediction, nb_threshold=nb_threshold, nominal_attributes=nominal_attributes, splitter=splitter, **kwargs, ) self.max_features = max_features self.seed = seed self._rng = check_random_state(self.seed)
def __iter__(self): self._rng = check_random_state(self.seed) self.next_class_should_be_zero = False while True: att_0 = False att_1 = False att_2 = 0. att_3 = 0. y = 0 desired_class_found = False while not desired_class_found: att_0 = self._rng.rand() >= 0.5 att_1 = self._rng.rand() >= 0.5 att_2 = self._rng.rand() att_3 = self._rng.rand() y = self._functions[self.classification_function](att_0, att_1, att_2, att_3) if not self.balance_classes: desired_class_found = True else: if (self.next_class_should_be_zero and (y == 0)) or \ ((not self.next_class_should_be_zero) and (y == 1)): desired_class_found = True self.next_class_should_be_zero = not self.next_class_should_be_zero x = {0: att_0, 1: att_1, 2: att_2, 3: att_3} yield x, y
def __init__(self, model=HoeffdingTreeClassifier(grace_period=50, split_confidence=0.01), n_models: int = 100, subspace_size: typing.Union[int, float, str] = .6, training_method: str = "patches", lam: float = 6.0, drift_detector: typing.Union[base.DriftDetector, None] = ADWIN(delta=1e-5), warning_detector: base.DriftDetector = ADWIN(delta=1e-4), disable_weighted_vote: bool = False, nominal_attributes=None, seed=None, metric: MultiClassMetric = Accuracy()): super().__init__([None ]) # List of models is properly initialized later self.models = [] self.model = model # Not restricted to a specific base estimator. self.n_models = n_models self.subspace_size = subspace_size self.training_method = training_method self.lam = lam self.drift_detector = drift_detector self.warning_detector = warning_detector self.disable_weighted_vote = disable_weighted_vote self.metric = metric self.nominal_attributes = nominal_attributes if nominal_attributes else [] self.seed = seed self._rng = check_random_state(self.seed) self._n_samples_seen = 0 self._subspaces = None self._base_learner_class = StreamingRandomPatchesBaseLearner
def __init__( self, max_features: int = 2, grace_period: int = 200, max_depth: int = None, split_confidence: float = 1e-7, tie_threshold: float = 0.05, leaf_prediction: str = "model", leaf_model: base.Regressor = None, model_selector_decay: float = 0.95, nominal_attributes: list = None, splitter: Splitter = None, min_samples_split: int = 5, seed=None, **kwargs, ): super().__init__( grace_period=grace_period, max_depth=max_depth, split_confidence=split_confidence, tie_threshold=tie_threshold, leaf_prediction=leaf_prediction, leaf_model=leaf_model, model_selector_decay=model_selector_decay, nominal_attributes=nominal_attributes, splitter=splitter, min_samples_split=min_samples_split, **kwargs, ) self.max_features = max_features self.seed = seed self._rng = check_random_state(self.seed)
def __init__( self, n_models: int, max_features: typing.Union[bool, str, int], lambda_value: int, drift_detector: typing.Union[base.DriftDetector, None], warning_detector: typing.Union[base.DriftDetector, None], metric: typing.Union[metrics.MultiClassMetric, metrics.RegressionMetric], disable_weighted_vote, seed, ): super().__init__([None]) # List of models is properly initialized later self.models = [] self.n_models = n_models self.max_features = max_features self.lambda_value = lambda_value self.metric = metric self.disable_weighted_vote = disable_weighted_vote self.drift_detector = drift_detector self.warning_detector = warning_detector self.seed = seed self._rng = check_random_state(self.seed) # Actual random number generator # Internal parameters self._n_samples_seen = 0 self._base_member_class = None
def __init__(self, grace_period: int = 200, max_depth: int = None, split_criterion: str = 'info_gain', split_confidence: float = 1e-7, tie_threshold: float = 0.05, leaf_prediction: str = 'nba', nb_threshold: int = 0, nominal_attributes: list = None, attr_obs: str = 'gaussian', attr_obs_params: dict = None, max_features: int = 2, seed=None, **kwargs): super().__init__(grace_period=grace_period, max_depth=max_depth, split_criterion=split_criterion, split_confidence=split_confidence, tie_threshold=tie_threshold, leaf_prediction=leaf_prediction, nb_threshold=nb_threshold, nominal_attributes=nominal_attributes, attr_obs=attr_obs, attr_obs_params=attr_obs_params, **kwargs) self.max_features = max_features self.seed = seed self._rng = check_random_state(self.seed)
def __init__(self, stats, depth, attr_obs, attr_obs_params, adwin_delta, seed): super().__init__(stats, depth, attr_obs, attr_obs_params) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self.error_change = False self._rng = check_random_state(seed)
def __iter__(self): self._rng = check_random_state(self.seed) self._attr_idx = np.arange(self._N_FEATURES_INCLUDING_NOISE) # Change attributes if self.irrelevant_features and self.n_drift_features > 0: random_int = self._rng.randint(7) offset = self._rng.randint(self._N_IRRELEVANT_ATTRIBUTES) for i in range(self.n_drift_features): value_1 = (i + random_int) % 7 value_2 = 7 + (i + offset) % self._N_IRRELEVANT_ATTRIBUTES self._attr_idx[value_1] = value_2 self._attr_idx[value_2] = value_1 while True: x = {i: -1 for i in range(self.n_features) } # Initialize to keep order in dictionary y = self._rng.randint(self.n_classes) for i in range(self._N_RELEVANT_FEATURES): if (0.01 + self._rng.rand()) <= self.noise_percentage: x[self._attr_idx[i]] = int( self._ORIGINAL_INSTANCES[y, i] == 0) else: x[self._attr_idx[i]] = self._ORIGINAL_INSTANCES[y, i] if self.irrelevant_features: for i in range(self._N_RELEVANT_FEATURES, self._N_FEATURES_INCLUDING_NOISE): x[self._attr_idx[i]] = self._rng.randint(2) yield x, y
def _generate_data(self): # Generate anomaly data arrays self._random_state = check_random_state(self.seed) self.y = np.zeros(self.n_samples) self.X = np.column_stack([ np.sin(np.arange(self.n_samples) / 4.0) + self._random_state.randn(self.n_samples) * self.noise, np.cos(np.arange(self.n_samples) / 4.0) + self._random_state.randn(self.n_samples) * self.noise, ]) if self.contextual: # contextual anomaly indices contextual_anomalies = self._random_state.choice( self.n_samples - self.shift, self.n_contextual, replace=self.replace) # set contextual anomalies contextual_idx = contextual_anomalies + self.shift contextual_idx[contextual_idx >= self.n_samples] -= self.n_samples self.X[contextual_idx, 1] = self.X[contextual_anomalies, 0] # Anomaly indices anomalies_idx = self._random_state.choice(self.n_samples, self.n_anomalies, replace=self.replace) self.X[anomalies_idx, 1] = (np.sin( self._random_state.choice(self.n_anomalies, replace=self.replace)) + self._random_state.randn(self.n_anomalies) * self.noise + 2.0) # Mark sample as anomalous self.y[anomalies_idx] = 1
def __iter__(self): self._rng = check_random_state(self.seed) self.next_class_should_be_zero = False while True: x = dict() y = 0 desired_class_found = False while not desired_class_found: x[0] = self._rng.rand() x[1] = self._rng.rand() y = self._functions[self.classification_function](x[0], x[1]) if not self.balance_classes: desired_class_found = True else: if (self.next_class_should_be_zero and (y == 0)) or ( (not self.next_class_should_be_zero) and (y == 1) ): desired_class_found = True self.next_class_should_be_zero = not self.next_class_should_be_zero if self.has_noise: x[2] = self._rng.rand() x[3] = self._rng.rand() yield x, y
def _generate_centroids(self): """Generates centroids The centroids are generated just as it is done in the parent class, an extra step is taken to introduce drift, if there is any. To configure the drift, random offset speeds are chosen for `n_drift_centroids` centroids. Finally, the speed is normalized. """ super()._generate_centroids() rng_model = check_random_state(self.seed_model) self.centroid_speed = [] for i in range(self.n_drift_centroids): rand_speed = np.zeros(self.n_features) norm_speed = 0.0 for j in range(self.n_features): rand_speed[j] = rng_model.rand() norm_speed += rand_speed[j] * rand_speed[j] norm_speed = np.sqrt(norm_speed) for j in range(self.n_features): rand_speed[j] /= norm_speed self.centroid_speed.append(rand_speed)
def __init__(self, split_test, stats, depth, adwin_delta, seed): super().__init__(split_test, stats, depth) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self._alternate_tree = None self._error_change = False self._rng = check_random_state(seed)
def __init__( self, stats, depth, attr_obs, attr_obs_params, max_features, seed, **kwargs ): super().__init__(stats, depth, attr_obs, attr_obs_params, **kwargs) # noqa self.max_features = max_features self.seed = seed self._rng = check_random_state(self.seed) self.feature_indices = []
def __init__(self, stats, *children, adwin_delta, seed, **attributes): super().__init__(stats, *children, **attributes) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self._alternate_tree = None self._error_change = False self._rng = check_random_state(seed)
def __init__(self, stats, depth, splitter, adwin_delta, seed, **kwargs): super().__init__(stats, depth, splitter, **kwargs) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self._error_change = False self._rng = check_random_state(seed) # Normalization of info monitored by drift detectors (using Welford's algorithm) self._error_normalizer = Var(ddof=1)
def __iter__(self): rng = check_random_state(self.seed) X, Y = self._make_logical(n_tiles=self.n_tiles, shuffle=self.shuffle, random_state=rng) for xi, yi in itertools.zip_longest( X, Y if hasattr(Y, "__iter__") else []): yield dict(zip(self.feature_names, xi)), dict(zip(self.target_names, yi))
def __init__(self, stats, depth, attr_obs, attr_obs_params, leaf_model, adwin_delta, seed): super().__init__(stats, depth, attr_obs, attr_obs_params, leaf_model) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self.error_change = False self._rng = check_random_state(seed) # Normalization of info monitored by drift detectors (using Welford's algorithm) self._error_normalizer = Var(ddof=1)
def __init__(self, model, order=None, seed=None): super().__init__() self.model = model self.order = order self.seed = seed self._rng = check_random_state(self.seed) # If the order is specified, then we can instantiate a model for each label, if not we'll # do it in the first call to learn_one if isinstance(order, list): self._init_models()
def __init__(self, split_test, stats, depth, adwin_delta, seed): stats = stats if stats else Var() super().__init__(split_test, stats, depth) self.adwin_delta = adwin_delta self._adwin = ADWIN(delta=self.adwin_delta) self._alternate_tree = None self._error_change = False self._rng = check_random_state(seed) # Normalization of info monitored by drift detectors (using Welford's algorithm) self._error_normalizer = Var(ddof=1)
def __iter__(self): rng_sample = check_random_state(self.seed_sample) # Generate random tree model which will be used to classify instances self._generate_random_tree() # Randomly generate features, and then classify the resulting instance. while True: x = dict() for feature in self.features_num: x[feature] = rng_sample.rand() for feature in self.features_cat: x[feature] = rng_sample.randint(self.n_categories_per_feature) y = self._classify_instance(self.tree_root, x) yield x, y
def __iter__(self): self._rng = check_random_state(self.seed) self._next_class_should_be_zero = False while True: y = 0 desired_class_found = False while not desired_class_found: salary = 20000 + 130000 * self._rng.rand() commission = (0 if (salary >= 75000) else (10000 + 75000 * self._rng.rand())) age = 20 + self._rng.randint(61) elevel = self._rng.randint(5) car = self._rng.randint(20) zipcode = self._rng.randint(9) hvalue = (9 - zipcode) * 100000 * (0.5 + self._rng.rand()) hyears = 1 + self._rng.randint(30) loan = self._rng.rand() * 500000 y = self._classification_functions[ self.classification_function](salary, commission, age, elevel, car, zipcode, hvalue, hyears, loan) if not self.balance_classes: desired_class_found = True else: if (self._next_class_should_be_zero and (y == 0)) or ((not self._next_class_should_be_zero) and (y == 1)): desired_class_found = True self._next_class_should_be_zero = ( not self._next_class_should_be_zero) if self.perturbation > 0.0: salary = self._perturb_value(salary, 20000, 150000) if commission > 0: commission = self._perturb_value(commission, 10000, 75000) age = np.round(self._perturb_value(age, 20, 80)) hvalue = self._perturb_value(hvalue, (9 - zipcode) * 100000, 0, 135000) hyears = np.round(self._perturb_value(hyears, 1, 30)) loan = self._perturb_value(loan, 0, 500000) x = dict() for feature in self.feature_names: x[feature] = eval(feature) yield x, y
def _generate_random_tree(self): """ Generates the random tree, starting from the root node and following the constraints passed as parameters to the initializer. The tree is recursively generated, node by node, until it reaches the maximum tree depth. """ rng_tree = check_random_state(self.seed_tree) candidate_features = np.arange(self.n_num_features + self.n_cat_features) min_numeric_values = np.zeros(self.n_num_features) max_numeric_values = np.ones(self.n_num_features) self.tree_root = self._generate_random_tree_node( 0, candidate_features, min_numeric_values, max_numeric_values, rng_tree)
def __iter__(self): self._rng = check_random_state(self.seed) while True: x = dict() y = self._rng.randint(self.n_classes) for i in range(self._N_RELEVANT_FEATURES): if (0.01 + self._rng.rand()) <= self.noise_percentage: x[i] = int(self._ORIGINAL_INSTANCES[y, i] == 0) else: x[i] = self._ORIGINAL_INSTANCES[y, i] if self.irrelevant_features: for i in range(self._N_RELEVANT_FEATURES, self._N_FEATURES_INCLUDING_NOISE): x[i] = self._rng.randint(2) yield x, y
def __iter__(self): rng = check_random_state(self.seed) stream_generator = iter(self.stream) drift_stream_generator = iter(self.drift_stream) sample_idx = 0 while True: sample_idx += 1 v = -4.0 * float(sample_idx - self.position) / float(self.width) probability_drift = 1.0 / (1.0 + np.exp(v)) try: if rng.rand() > probability_drift: x, y = next(stream_generator) else: x, y = next(drift_stream_generator) except StopIteration: break yield x, y
def __init__( self, max_features: int = 2, grace_period: int = 200, max_depth: int = None, split_confidence: float = 1e-7, tie_threshold: float = 0.05, leaf_prediction: str = "model", leaf_model: base.Regressor = None, model_selector_decay: float = 0.95, nominal_attributes: list = None, splitter: Splitter = None, min_samples_split: int = 5, binary_split: bool = False, max_size: int = 100, memory_estimate_period: int = 1000000, stop_mem_management: bool = False, remove_poor_attrs: bool = False, merit_preprune: bool = True, seed=None, ): super().__init__( grace_period=grace_period, max_depth=max_depth, split_confidence=split_confidence, tie_threshold=tie_threshold, leaf_prediction=leaf_prediction, leaf_model=leaf_model, model_selector_decay=model_selector_decay, nominal_attributes=nominal_attributes, splitter=splitter, min_samples_split=min_samples_split, binary_split=binary_split, max_size=max_size, memory_estimate_period=memory_estimate_period, stop_mem_management=stop_mem_management, remove_poor_attrs=remove_poor_attrs, merit_preprune=merit_preprune, ) self.max_features = max_features self.seed = seed self._rng = check_random_state(self.seed)
def __iter__(self): self._generate_centroids() rng_sample = check_random_state(self.seed_sample) while True: # Move centroids for i in range(self.n_drift_centroids): for j in range(self.n_features): self.centroids[i].centre[ j] += self.centroid_speed[i][j] * self.change_speed if (self.centroids[i].centre[j] > 1) or (self.centroids[i].centre[j] < 0): self.centroids[i].centre[j] = 1 if ( self.centroids[i].centre[j] > 1) else 0 self.centroid_speed[i][j] = -self.centroid_speed[i][j] x, y = self._generate_sample(rng_sample) yield x, y
def _generate_centroids(self): """ Generates centroids Sequentially creates all the centroids, choosing at random a center, a label, a standard deviation and a weight. """ rng_model = check_random_state(self.seed_model) self.centroids = [] self.centroid_weights = [] for i in range(self.n_centroids): self.centroids.append(Centroid()) rand_centre = [] for j in range(self.n_num_features): rand_centre.append(rng_model.rand()) self.centroids[i].centre = rand_centre self.centroids[i].class_label = rng_model.randint(self.n_classes) self.centroids[i].std_dev = rng_model.rand() self.centroid_weights.append(rng_model.rand())
def __init__( self, max_features: int = 2, grace_period: int = 200, max_depth: int = None, split_criterion: str = "info_gain", split_confidence: float = 1e-7, tie_threshold: float = 0.05, leaf_prediction: str = "nba", nb_threshold: int = 0, nominal_attributes: list = None, splitter: Splitter = None, binary_split: bool = False, max_size: int = 100, memory_estimate_period: int = 1000000, stop_mem_management: bool = False, remove_poor_attrs: bool = False, merit_preprune: bool = True, seed=None, ): super().__init__( grace_period=grace_period, max_depth=max_depth, split_criterion=split_criterion, split_confidence=split_confidence, tie_threshold=tie_threshold, leaf_prediction=leaf_prediction, nb_threshold=nb_threshold, nominal_attributes=nominal_attributes, splitter=splitter, binary_split=binary_split, max_size=max_size, memory_estimate_period=memory_estimate_period, stop_mem_management=stop_mem_management, remove_poor_attrs=remove_poor_attrs, merit_preprune=merit_preprune, ) self.max_features = max_features self.seed = seed self._rng = check_random_state(self.seed)