def __init__(self, idx_original, base_estimator, performance_evaluator, created_on, disable_background_learner, disable_drift_detector, drift_detection_method, warning_detection_method, drift_detection_criteria, is_background_learner, feature_indexes=None, nominal_attributes=None, random_state=None): super().__init__(idx_original=idx_original, base_estimator=base_estimator, performance_evaluator=performance_evaluator, created_on=created_on, disable_background_learner=disable_background_learner, disable_drift_detector=disable_drift_detector, drift_detection_method=drift_detection_method, warning_detection_method=warning_detection_method, drift_detection_criteria=drift_detection_criteria, is_background_learner=is_background_learner, feature_indexes=feature_indexes, nominal_attributes=nominal_attributes) # Background learner self._background_learner: StreamingRandomPatchesRegressorBaseLearner = None self._background_learner_class = StreamingRandomPatchesRegressorBaseLearner # Rest only applies when using periodic pseudo drift detectors # Use the same random_state object of the meta learner self.random_state = random_state self._random_state = check_random_state(self.random_state) # Drift detection self.drift_detection_criteria = drift_detection_criteria # If the drift detection method is periodic-fixed, # then set the shift option based on the instance index if isinstance(self.drift_detection_method, PeriodicTrigger): if self.drift_detection_method.trigger_method == PeriodicTrigger._FIXED_TRIGGER: self.drift_detection_method.set_params(w=self.idx_original) if self.drift_detection_method.trigger_method == PeriodicTrigger._RANDOM_TRIGGER: self.drift_detection_method.set_params(random_state= check_random_state(self.random_state)) if isinstance(self.warning_detection_method, PeriodicTrigger): if self.warning_detection_method.trigger_method == PeriodicTrigger._FIXED_TRIGGER: self.warning_detection_method.set_params(w=self.idx_original) if self.warning_detection_method.trigger_method == PeriodicTrigger._RANDOM_TRIGGER: self.warning_detection_method.set_params(random_state= check_random_state(self.random_state)) # Only used when paired with periodic drift detectors self.disable_warning_detector = False
def test_check_random_state(): rand = None rand = check_random_state(rand) assert isinstance(rand, np.random.mtrand.RandomState) rand = check_random_state(rand) assert isinstance(rand, np.random.mtrand.RandomState) rand = check_random_state(int(1)) assert isinstance(rand, np.random.mtrand.RandomState) with pytest.raises(ValueError): check_random_state(2.0)
def prepare_for_use(self): """ Prepares the stream for use. Notes ----- This functions should always be called after the stream initialization. """ self._random_state = check_random_state(self.random_state) self._next_class_should_be_zero = False self.sample_idx = 0 for i in range(self.n_features): self._weights[i] = self._random_state.rand() self._sigma[i] = 1 if (i < self.n_drift_features) else 0 self.n_redund_features = math.floor( (self.n_features * self.perc_redund_features)) self.n_not_redund_features = self.n_features - self.n_redund_features # Initialise variable for redundancy self.index_redund = [ self._sample_random_state.randint( 0, (self.n_features - self.n_redund_features - 1)) for ind in range(self.n_redund_features) ] self.coef_redund = [ self._random_state.rand() + 0.1 for ind in range(self.n_redund_features) ]
def __init__(self, split_test, stats=None, random_state=None): super().__init__(split_test, stats) self._adwin = ADWIN() self._alternate_tree = None self.error_change = False self._random_state = check_random_state(random_state)
def partial_fit(self, X, y, classes=None, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The features to train the model. y: numpy.ndarray of shape (n_samples) An array-like with the class labels of all samples in X. classes: None Not used by this method. sample_weight: None Not used by this method. Returns ------- self """ row_cnt, _ = X.shape if self.samples_seen == 0: self._random_state = check_random_state(self.random_state) self.build_trees() for i in range(row_cnt): self._partial_fit(X[i], y[i]) return self
def reset(self): """Reset ARF.""" self.ensemble = None self.max_features = 0 self.instances_seen = 0 self._train_weight_seen_by_model = 0.0 self.random_state = check_random_state(self._init_random_state)
def generate_random_tree(self): """ generate_random_tree Generates the random tree, starting from the root node and following the constraints passed as parameters to the initializer. The tree is recursively generated, node by node, until it reaches the maximum tree depth. """ # Starting random generators and parameter arrays tree_random_state = check_random_state(self.tree_random_state) nominal_att_candidates = array('i') min_numeric_value = array('d') max_numeric_value = array('d') for i in range(self.n_num_features): min_numeric_value.append(0.0) max_numeric_value.append(1.0) for i in range(self.n_num_features + self.n_cat_features): nominal_att_candidates.append(i) self.tree_root = self.generate_random_tree_node( 0, nominal_att_candidates, min_numeric_value, max_numeric_value, tree_random_state)
def _generate_centroids(self): """ Generates centroids The centroids are generated just as it's done in the parent class, the difference is the extra step taken to setup the drift, if there's any. To configure the drift, random offset speeds are chosen for ``self.num_drift_centroids`` centroids. Finally, the speed is normalized. """ super()._generate_centroids() model_random_state = check_random_state(self.model_random_state) num_drift_centroids = self.num_drift_centroids self.centroid_speed = [] if num_drift_centroids > self.n_centroids: num_drift_centroids = self.n_centroids for i in range(num_drift_centroids): rand_speed = [] norm_speed = 0.0 for j in range(self.n_num_features): rand_speed.append(model_random_state.rand()) norm_speed += rand_speed[j] * rand_speed[j] norm_speed = np.sqrt(norm_speed) for j in range(self.n_num_features): rand_speed[j] /= norm_speed self.centroid_speed.append(rand_speed)
def __init__(self, max_byte_size=33554432, memory_estimate_period=2000000, grace_period=50, split_criterion='info_gain', split_confidence=0.01, tie_threshold=0.05, binary_split=False, stop_mem_management=False, remove_poor_atts=False, no_preprune=False, leaf_prediction='nba', nb_threshold=0, nominal_attributes=None, max_features=2, random_state=None): """ARFHoeffdingTreeClassifier class constructor.""" super().__init__(max_byte_size=max_byte_size, memory_estimate_period=memory_estimate_period, grace_period=grace_period, split_criterion=split_criterion, split_confidence=split_confidence, tie_threshold=tie_threshold, binary_split=binary_split, stop_mem_management=stop_mem_management, remove_poor_atts=remove_poor_atts, no_preprune=no_preprune, leaf_prediction=leaf_prediction, nb_threshold=nb_threshold, nominal_attributes=nominal_attributes) self.max_features = max_features self.remove_poor_attributes = False self.random_state = random_state self._random_state = check_random_state(self.random_state)
def __init__(self, initial_stats=None, max_features=2, random_state=None): """ LearningNodeNB class constructor. """ super().__init__(initial_stats) self.max_features = max_features self.feature_indices = np.array([]) self.random_state = random_state self._random_state = check_random_state(self.random_state)
def __init__(self, split_test, class_observations): super().__init__(split_test, class_observations) self._estimation_error_weight = ADWIN() self._alternate_tree = None self.error_change = False self._random_seed = 1 self._classifier_random = check_random_state(self._random_seed)
def __init__(self, initial_class_observations, max_features, random_state=None): """ ActiveLearningNodeForRegression class constructor. """ super().__init__(initial_class_observations) self.max_features = max_features self.list_attributes = np.array([]) self.random_state = check_random_state(random_state)
def prepare_for_use(self): """ Should be called before generating the samples. """ self.random_state = check_random_state(self._original_random_state) self.sample_idx = 0
def __init__(self, max_byte_size=33554432, memory_estimate_period=2000000, grace_period=50, split_confidence=0.01, tie_threshold=0.05, binary_split=False, stop_mem_management=False, remove_poor_atts=False, leaf_prediction="perceptron", no_preprune=False, nominal_attributes=None, learning_ratio_perceptron=0.02, learning_ratio_decay=0.001, learning_ratio_const=True, max_features=2, random_state=None): super().__init__(max_byte_size=max_byte_size, memory_estimate_period=memory_estimate_period, grace_period=grace_period, split_confidence=split_confidence, tie_threshold=tie_threshold, binary_split=binary_split, stop_mem_management=stop_mem_management, remove_poor_atts=remove_poor_atts, leaf_prediction=leaf_prediction, no_preprune=no_preprune, nominal_attributes=nominal_attributes, learning_ratio_perceptron=learning_ratio_perceptron, learning_ratio_decay=learning_ratio_decay, learning_ratio_const=learning_ratio_const) self.max_features = max_features self.random_state = random_state self._random_state = check_random_state(self.random_state)
def partial_fit(self, X, y=None, classes=None, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The features to train the model. y: Not used Kept in the signature for compatibility with parent class. classes: None Not used by this method. sample_weight: None Not used by this method. Returns ------- self """ row_cnt, _ = X.shape if self.samples_seen == 0: self._random_state = check_random_state(self.random_state) self.n_features = get_dimensions(X)[1] self.build_trees() for i in range(row_cnt): self._partial_fit(X[i]) return self
def _prepare_for_use(self): self._random_state = check_random_state(self.random_state) self.y = np.zeros(self.n_samples) self.X = np.column_stack( [np.sin(np.arange(self.n_samples) / 4.) + self._random_state.randn(self.n_samples) * self.noise, np.cos(np.arange(self.n_samples) / 4.) + self._random_state.randn(self.n_samples) * self.noise] ) if self.contextual: # contextual anomaly indices contextual_anomalies = self._random_state.choice(self.n_samples - self.shift, self.n_contextual, replace=self.replace) # set contextual anomalies contextual_idx = contextual_anomalies + self.shift contextual_idx[contextual_idx >= self.n_samples] -= self.n_samples self.X[contextual_idx, 1] = self.X[contextual_anomalies, 0] # Anomaly indices anomalies_idx = self._random_state.choice(self.n_samples, self.n_anomalies, replace=self.replace) self.X[anomalies_idx, 1] = np.sin(self._random_state.choice(self.n_anomalies, replace=self.replace)) \ + self._random_state.randn(self.n_anomalies) * self.noise + 2. # Mark sample as anomalous self.y[anomalies_idx] = 1
def __init__(self, generator='agrawal', stable_period=3000, position=3000, concepts=[4, 0, 8], width=1, lam=1.0, has_noise=False, all_concepts=[4, 0, 8, 6, 2, 1, 3, 5, 7, 9], concept_shift_step=-1, concept_shift_sample_intervals=[200000, 250000, 300000], stable_period_lam=-1, stable_period_start=1000, stable_period_base=200, stable_period_logger=None, drift_interval_distr="poisson", random_state=0): super().__init__() self.streams = [] self.cur_stream = None self.stream_idx = 0 self.drift_stream_idx = 0 self.sample_idx = 0 self.generator = generator self.stable_period = stable_period self.position = position self.concepts = concepts self.random_state = random_state self._random_state = check_random_state(self.random_state) self.width = width self.lam = lam self.concepts_probs = [] self.has_noise = has_noise self.noises = [0.1, 0.2, 0.3, 0.4] self.noise_probs = self.__get_poisson_probs(4, self.lam) self.stable_period_lam = stable_period_lam self.stable_period_start = stable_period_start self.stable_period_base = stable_period_base if drift_interval_distr == "poisson": self.stable_period_probs = \ self.__get_poisson_probs(20, self.stable_period_lam) elif drift_interval_distr == "uniform": self.stable_period_probs = \ self.__get_uniform_probs(20) self.stable_period_logger = stable_period_logger print(f"stable_period_probs: {self.stable_period_probs}") self.concept_shift_step = concept_shift_step self.concept_shift_sample_intervals = concept_shift_sample_intervals self.all_concepts = all_concepts self.total_sample_idx = 0
def __init__(self, initial_class_observations, perceptron_weight=None, random_state=None): """ActiveLearningNodePerceptronMultiTarget class constructor.""" super().__init__(initial_class_observations) self.perceptron_weight = perceptron_weight self.random_state = check_random_state(random_state)
def __configure(self, base_estimator): base_estimator.reset() self.base_estimator = base_estimator self.n_estimators = self._init_n_estimators self.ensemble = [ cp.deepcopy(base_estimator) for _ in range(self.n_estimators) ] self.random_state = check_random_state(self._init_random_state)
def __init__(self, sample_size, n_trees, random_state): self.sample_size = sample_size self.n_trees = n_trees self.depth = np.log2(sample_size) self.trees = [] self.random_state = random_state self._random_state = check_random_state(self.random_state) self.is_learning_phase_on = True
def __configure(self): self.random_state = check_random_state(self._original_random_state) self.n_cat_features = self._TOTAL_ATTRIBUTES_INCLUDING_NOISE if self.has_noise else self._NUM_BASE_ATTRIBUTES self.n_features = self.n_cat_features self.feature_names = [ "att_num_" + str(i) for i in range(self.n_cat_features) ] self.target_values = [i for i in range(self.n_classes)]
def prepare_for_use(self): """ Should be called before generating the samples. """ self.random_state = check_random_state(self._original_random_state) self._next_class_should_be_zero = False self.sample_idx = 0
def __init__(self, initial_class_observations, perceptron_weight=None, random_state=None): super().__init__(initial_class_observations) self.perceptron_weight = perceptron_weight self.random_state = check_random_state(random_state)
def __configure(self): if hasattr(self.base_estimator, "reset"): self.base_estimator.reset() self.actual_n_estimators = self.n_estimators self.ensemble = [ cp.deepcopy(self.base_estimator) for _ in range(self.actual_n_estimators) ] self._random_state = check_random_state(self.random_state)
def _prepare_for_use(self): self._random_state = check_random_state(self.random_state) self.X, self.y = make_regression(n_samples=self.n_samples, n_features=self.n_features, n_informative=self.n_informative, n_targets=self.n_targets, random_state=self._random_state) self.y = np.resize(self.y, (self.y.size, self.n_targets)) self.target_values = [float] * self.n_targets
def __init__(self, initial_class_observations, perceptron_weight=None, random_state=None): """ InactiveLearningNodeForRegression class constructor.""" super().__init__(initial_class_observations) self.perceptron_weight = perceptron_weight self.random_state = check_random_state(random_state)
def reset(self): """Reset ARFR.""" # TODO: check whether this is enough self.ensemble = None self.max_features = 0 self.instances_seen = 0 self._random_state = check_random_state(self.random_state)
def __configure(self, base_estimator): self.n_estimators = self._init_n_estimators self.adwin_ensemble = [] for i in range(self.n_estimators): self.adwin_ensemble.append(ADWIN()) base_estimator.reset() self.base_estimator = base_estimator self.ensemble = [cp.deepcopy(base_estimator) for _ in range(self.n_estimators)] self.random_state = check_random_state(self._init_random_state)
def __configure(self): if hasattr(self.base_estimator, "reset"): self.base_estimator.reset() self.actual_n_estimators = self.n_estimators self.ensemble = [cp.deepcopy(self.base_estimator) for _ in range(self.actual_n_estimators)] self.adwin_ensemble = [ADWIN(self.delta) for _ in range(self.actual_n_estimators)] self._random_state = check_random_state(self.random_state) self.n_detected_changes = 0 self.classes = None self.init_matrix_codes = True
def __init__(self, initial_class_observations, perceptron_weight, random_state=None): super().__init__(initial_class_observations, perceptron_weight, random_state) self._estimation_error_weight = ADWIN() self._error_change = False self._randomSeed = 1 self._classifier_random = check_random_state(self._randomSeed)