def test_sim_crop(input_stream, file_name, crop_size=0): adwin = ADWIN() change_point = [] for i in range(len(input_stream)): adwin.add_element(input_stream[i]) if adwin.detected_change(): # plt.axvline(i, color='r', linestyle='dashed') change_point.append(i) end_point_crop = change_point[0] + crop_size start_point_crop = change_point[0] - 100 for i in change_point: if (i <= end_point_crop): plt.axvline(i, color='r', linestyle='dashed') crop_stream = input_stream[start_point_crop:end_point_crop] zoom_xi = list(range(start_point_crop, end_point_crop)) plt.plot(zoom_xi, crop_stream) plt.ylabel('value') plt.xlabel('Time') fig = plt.gcf() fig.set_size_inches(10, 5.5) plt.savefig(os.path.join('image', file_name + "_result_zoom.png"), aspect='auto', bbox_inches='tight', dpi=200) plt.show() return change_point
def learn_from_instance(self, X, y, weight, rhat, parent, parent_branch): super().learn_from_instance(X, y, weight, rhat) true_target = y target_prediction = rhat.predict([X])[0] normalized_error = rhat.get_normalized_error( target_prediction, true_target) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to Adwin self._estimation_error_weight.add_element(normalized_error) # Detect change with Adwin self._error_change = self._estimation_error_weight.detected_change( ) if self._error_change is True and old_error > self.get_error_estimation( ): self._error_change = False # call ActiveLearningNode weight_seen = self.get_weight_seen() if weight_seen - self.get_weight_seen_at_last_split_evaluation( ) >= rhat.grace_period: rhat._attempt_to_split(self, parent, parent_branch) self.set_weight_seen_at_last_split_evaluation(weight_seen)
def learn_one(self, X, y, weight, tree, parent, parent_branch): y_pred = self.predict_one(X, tree=tree) normalized_error = get_normalized_error(y, y_pred, self) if tree.bootstrap_sampling: # Perform bootstrap-sampling k = self._random_state.poisson(1.0) if k > 0: weight = weight * k if self._adwin is None: self._adwin = ADWIN() old_error = self.error_estimation # Add element to Adwin self._adwin.add_element(normalized_error) # Detect change with Adwin self._error_change = self._adwin.detected_change() if self._error_change and old_error > self.error_estimation: self._error_change = False # Update statistics super().learn_one(X, y, weight=weight, tree=tree) weight_seen = self.total_weight if weight_seen - self.last_split_attempt_at >= tree.grace_period: tree._attempt_to_split(self, parent, parent_branch) self.last_split_attempt_at = weight_seen
def __init__(self, split_test, class_observations): super().__init__(split_test, class_observations) self._estimation_error_weight = ADWIN() self._alternate_tree = None self.error_change = False self._random_seed = 1 self._classifier_random = check_random_state(self._random_seed)
def __init__(self, nb_ensemble=10, max_features='auto', disable_weighted_vote=False, lambda_value=6, performance_metric='acc', drift_detection_method: BaseDriftDetector = ADWIN(0.001), warning_detection_method: BaseDriftDetector = ADWIN(0.01), max_byte_size=33554432, memory_estimate_period=2000000, grace_period=50, split_criterion='info_gain', split_confidence=0.01, tie_threshold=0.05, binary_split=False, stop_mem_management=False, remove_poor_atts=False, no_preprune=False, leaf_prediction='nba', nb_threshold=0, nominal_attributes=None, random_state=None): """AdaptiveRandomForest class constructor.""" super().__init__() self.nb_ensemble = nb_ensemble self.max_features = max_features self.disable_weighted_vote = disable_weighted_vote self.lambda_value = lambda_value if isinstance(drift_detection_method, BaseDriftDetector): self.drift_detection_method = drift_detection_method else: self.drift_detection_method = None if isinstance(warning_detection_method, BaseDriftDetector): self.warning_detection_method = warning_detection_method else: self.warning_detection_method = None self.instances_seen = 0 self._train_weight_seen_by_model = 0.0 self.ensemble = None self.random_state = check_random_state(random_state) if performance_metric in ['acc', 'kappa']: self.performance_metric = performance_metric else: raise ValueError( 'Invalid performance metric: {}'.format(performance_metric)) # ARH Hoeffding Tree configuration self.max_byte_size = max_byte_size self.memory_estimate_period = memory_estimate_period self.grace_period = grace_period self.split_criterion = split_criterion self.split_confidence = split_confidence self.tie_threshold = tie_threshold self.binary_split = binary_split self.stop_mem_management = stop_mem_management self.remove_poor_atts = remove_poor_atts self.no_preprune = no_preprune self.leaf_prediction = leaf_prediction self.nb_threshold = nb_threshold self.nominal_attributes = nominal_attributes
class AdaptiveTree(object): def __init__(self, tree, kappa_window, warning_delta, drift_delta, tree_pool_id=-1): self.tree_pool_id = tree_pool_id self.tree = tree self.bg_adaptive_tree = None self.is_candidate = False self.warning_detector = ADWIN(warning_delta) self.drift_detector = ADWIN(drift_delta) self.predicted_labels = deque(maxlen=kappa_window) self.kappa = -sys.maxsize self.kappa_window = kappa_window def update_kappa(self, actual_labels): if len(self.predicted_labels) < self.kappa_window: self.kappa = -sys.maxsize else: self.kappa = cohen_kappa_score(actual_labels, self.predicted_labels) return self.kappa def reset(self): self.bg_adaptive_tree = None self.is_candidate = False self.warning_detector.reset() self.drift_detector.reset() self.predicted_labels.clear() self.kappa = -sys.maxsize
def perform_drift_detection(predict_dataframe, dataframe, feature_names, detector, drift_notification, token="") -> str: log("[INFO] Calling perform_drift_detection", token) log("[INFO] Selected data drift detection method: " + detector) baseline_data = dataframe.values.tolist() predict_data = predict_dataframe.values.tolist() overall_data = list() for a in baseline_data: overall_data.append(a) for b in predict_data: overall_data.append(b) overall_dataframe = pd.DataFrame(overall_data, columns=feature_names) drifts = dict() window = len(baseline_data) for feature in feature_names: detected_drifts_indices = list() # HDDM if detector == "HDDM": hddm_w = HDDM_W() for i in range(len(overall_dataframe[feature])): hddm_w.add_element(float(overall_dataframe[feature][i])) if hddm_w.detected_change() and i >= window: detected_drifts_indices.append(i - window) # Page Hinkley if detector == "Page Hinkley": ph = PageHinkley() for i in range(len(overall_dataframe[feature])): ph.add_element(float(overall_dataframe[feature][i])) if ph.detected_change() and i >= window: detected_drifts_indices.append(i - window) # ADWIN if detector == "ADWIN": adwin = ADWIN() for i in range(len(overall_dataframe[feature])): adwin.add_element(float(overall_dataframe[feature][i])) if adwin.detected_change() and i >= window: detected_drifts_indices.append(i - window) # Check for detected drifts if len(detected_drifts_indices) != 0: log("[INFO] Data drift detected in feature: " + feature) log("[INFO] The drifted rows are: " + str(detected_drifts_indices)) drifts[feature] = detected_drifts_indices if drift_notification: log("[INFO] Sending a web notification", token) message = "MaaS data drift detected from " + get_token_user( token) + " (" + token + ")" if submit_web_notification(message, token): log("[INFO] Web notification sent!") else: log("[ERROR] Error occurred while sending a web notification" ) return json.dumps(drifts, cls=NpEncoder)
def __init__(self, split_test, class_observations, random_state=None): super().__init__(split_test, class_observations) self._estimation_error_weight = ADWIN() self._alternate_tree = None self.error_change = False self.random_state = check_random_state(random_state) # To normalize the observed errors in the [0, 1] range self._min_error = float('Inf') self._max_error = float('-Inf')
def __init__(self, initial_stats=None, parent_node=None, random_state=None): super().__init__(initial_stats, parent_node, random_state) self._adwin = ADWIN() self._error_change = False # Normalization of info monitored by drift detectors (using Welford's algorithm) self._n = 0
def __init__(self, initial_class_observations, perceptron_weight, random_state=None): super().__init__(initial_class_observations, perceptron_weight, random_state) self._estimation_error_weight = ADWIN() self._error_change = False self._randomSeed = 1 self._classifier_random = check_random_state(self._randomSeed)
def __init__(self, k=5, max_window_size=sys.maxsize, leaf_size=30, categorical_list=[]): super().__init__(k=k, max_window_size=max_window_size, leaf_size=leaf_size, categorical_list=categorical_list) self.adwin = ADWIN() self.window = None
def sim_adwin(input_stream, start_point=0): adwin = ADWIN(delta=.3) change_point = [] for i in range(len(input_stream)): adwin.add_element(input_stream[i]) if adwin.detected_change(): # plt.axvline(i, color='r', linestyle='dashed') change_point.append(i + start_point) # print('Change detected in data: ' + str(input_stream[i]) + ' - at index: ' + str(i)+'\n\n') return change_point
def __init__(self, initial_class_observations, parent_node, random_state=None): super().__init__(initial_class_observations, parent_node, random_state) self._estimation_error_weight = ADWIN() self._error_change = False # To normalize the observed errors in the [0, 1] range self._min_error = float('Inf') self._max_error = float('-Inf')
def __init__(self, window_size=100, n_estimators=25, anomaly_threshold=0.5, drift_threshold=0.5, random_state=None, version="AnomalyRate", #Parameters for partial model update n_estimators_updated=0.5, updated_randomly=True, #Parameters for NDKSWIN alpha=0.01, data=None, n_dimensions=1, n_tested_samples=0.1, fixed_checked_dimension = False, fixed_checked_sample=False): super().__init__() self.n_estimators = n_estimators self.ensemble = None self.random_state = random_state self.window_size = window_size self.samples_seen = 0 self.anomaly_rate = 0.20 self.anomaly_threshold = anomaly_threshold self.drift_threshold = drift_threshold self.window = None self.prec_window = None self.cpt = 0 self.version = version self.model_update = [] #To count the number of times the model have been updated 0 Not updated and 1 updated self.model_update_windows = [] #To count the number of times the model have been updated 0 Not updated and 1 updated self.model_update.append(version) #Initialisation to know the concerned version of IForestASD self.model_update_windows.append("samples_seen_"+version) #Initialisation to know the number of data seen in the window self.n_estimators_updated=int(self.n_estimators*n_estimators_updated) # The percentage of new trees to compute when update on new window if n_estimators_updated <= 0.0 or n_estimators_updated > 1.0 : raise ValueError("n_estimators_updated must be > 0 and <= 1") self.updated_randomly=updated_randomly # If we will choose randomly the trees: True for randomly, # False to pick the first (n_estimators- int(n_estimators*n_estimators_updated)) trees self.alpha=alpha self.n_dimensions=n_dimensions self.n_tested_samples=n_tested_samples self.fixed_checked_dimension =fixed_checked_dimension self.fixed_checked_sample=fixed_checked_sample self.first_time_fit = True # TODO Maurras 27112020: Find a way to optimize the use of ADWIN() self.adwin = ADWIN()
def cp_detection_ADWIN(points): from skmultiflow.drift_detection.adwin import ADWIN adwin = ADWIN() detections = [] # Adding stream elements to ADWIN and verifying if drift occurred for i in range(len(points)): adwin.add_element(points[i]) if adwin.detected_change(): detections.append(i) print('Change detected in data: ' + str(points[i]) + ' - at index: ' + str(i)) rpt.show.display(points, detections, figsize=(10, 6)) plt.title('Change Point Detection: ADWIN') plt.show()
def reset(self): """ reset Resets the adwin algorithm as well as the base model kept by the KNN base class. Returns ------- KNNAdwin self """ self.adwin = ADWIN() return super().reset()
def __init__(self, tree, kappa_window, warning_delta, drift_delta, tree_pool_id=-1): self.tree_pool_id = tree_pool_id self.tree = tree self.bg_adaptive_tree = None self.is_candidate = False self.warning_detector = ADWIN(warning_delta) self.drift_detector = ADWIN(drift_delta) self.predicted_labels = deque(maxlen=kappa_window) self.kappa = -sys.maxsize self.kappa_window = kappa_window
def concept_drift_detection(self, X, Y): if self.init_drift_detection: if self.drift_detector == "KSWIN": self.cdd = [ KSWIN(w_size=100, stat_size=30, alpha=self.confidence) for elem in X.T ] if self.drift_detector == "ADWIN": self.cdd = [ADWIN() for elem in X.T] if self.drift_detector == "DDM": self.cdd = [DDM() for elem in X.T] if self.drift_detector == "EDDM": self.cdd = [EDDM() for elem in X.T] self.init_drift_detection = False self.drift_detected = False if not self.init_drift_detection: for elem, detector in zip(X.T, self.cdd): for e in elem: detector.add_element(e) if detector.detected_change(): self.drift_detected = True self.n_detections = self.n_detections + 1 return self.drift_detected # if name=="__main__": # from skmultiflow import
def concept_drift_detection(self, X, Y): if self.init_drift_detection: if self.drift_detector == "KSWIN": self.cdd = [KSWIN(w_size = 100, stat_size = 30, alpha=self.confidence) for elem in X.T] if self.drift_detector == "ADWIN": self.cdd = [ADWIN() for elem in X.T] if self.drift_detector == "DDM": self.cdd = [DDM() for elem in X.T] if self.drift_detector == "EDDM": self.cdd = [EDDM() for elem in X.T] if self.drift_detector == "KSVEC": self.cdd = KSVEC(vec_size=X.shape[1]) self.init_drift_detection = False self.drift_detected = False if not self.init_drift_detection: if self.drift_detector == "KSVEC": self.cdd.add_element(X) if self.cdd.detected_change(): self.drift_detected = True else: for elem, detector in zip(X.T, self.cdd): for e in elem: detector.add_element(e) if detector.detected_change(): self.drift_detected = True self.n_detections = self.n_detections +1 return self.drift_detected
def __init__(self, delta=0.002): """Inicjalizacja klasy algorytmu ADWIN""" self.name = 'ADWIN' self.delta = delta self.model = ADWIN(delta) self.change_indexes = [] self.warning_zones_indexes = []
def concept_drift_detection(self, X, Y): if self.init_drift_detection: if self.drift_detector == "KS": self.cdd = [KSWIN(alpha=self.confidence, w_size=self.window_size) for elem in X.T] if self.drift_detector == "ADWIN": self.cdd = [ADWIN(delta=self.confidence) for elem in X.T] if self.drift_detector == "DIST": self.cdd = [KSWIN(self.confidence, w_size=self.window_size) for c in self.classes_] self.init_drift_detection = False self.drift_detected = False if self.drift_detector == "DIST": try: class_prototypes = [self.w_[self.c_w_ == elem] for elem in self.classes_] new_distances = dict( [(c, self.calcDistances(pts, X[Y == c])) for c, pts in zip(self.classes_, class_prototypes)]) for (c, d_new), detector in zip(new_distances.items(), self.cdd): detector.add_element(d_new) if detector.detected_change(): self.drift_detected = True except Exception: print("Warning: Current Batch does not contain all labels!") # ValueError('zero-size array to reduction operation maximum which has no identity',) # In this batch not every label is present else: if not self.init_drift_detection: for elem, detector in zip(X.T, self.cdd): for e in elem: detector.add_element(e) if detector.detected_change(): self.drift_detected = True return self.drift_detected
def __adjust_ensemble_size(self): if len(self.classes) != len(self.ensemble): if len(self.classes) > len(self.ensemble): for i in range(len(self.ensemble), len(self.classes)): self.ensemble.append(cp.deepcopy(self.h)) self.adwin_ensemble.append(ADWIN()) self.ensemble_length += 1
def __adjust_ensemble_size(self): if len(self.classes) != len(self.ensemble): if len(self.classes) > len(self.ensemble): for i in range(len(self.ensemble), len(self.classes)): self.ensemble.append(cp.deepcopy(self.base_estimator)) self.adwin_ensemble.append(ADWIN(self.delta)) self.n_estimators += 1
def __init__(self, h=KNN(), ensemble_length=2, w=6, delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag'): super().__init__() # default values self.h = h.reset() self.ensemble_length = None self.ensemble = None self.adwin_ensemble = None self.n_detected_changes = None self.matrix_codes = None self.enable_matrix_codes = None self.w = None self.delta = None self.classes = None self.leveraging_algorithm = None self.__configure(h, ensemble_length, w, delta, enable_code_matrix, leverage_algorithm) self.init_matrix_codes = True self.adwin_ensemble = [] for i in range(ensemble_length): self.adwin_ensemble.append(ADWIN(self.delta))
def make_detector(warn=False, s=1e-5, drift_detector='adwin'): sensitivity = s * 10 if warn else s if drift_detector == 'adwin': return ADWIN(delta=sensitivity) if drift_detector == 'EDDM': return EDDM() if drift_detector == 'DDM': return DDM()
def test_adwin(test_path): """ ADWIN drift detection test. The first half of the stream contains a sequence corresponding to a normal distribution of integers from 0 to 1. From index 999 to 1999 the sequence is a normal distribution of integers from 0 to 7. """ adwin = ADWIN() test_file = os.path.join(test_path, 'drift_stream.npy') data_stream = np.load(test_file) expected_indices = [1023, 1055, 1087, 1151] detected_indices = [] for i in range(data_stream.size): adwin.add_element(data_stream[i]) if adwin.detected_change(): detected_indices.append(i) assert detected_indices == expected_indices
def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y k = self._classifier_random.poisson(1.0) # if k > 0: # weight = weight * k tmp = self.get_class_votes(X, hat) class_prediction = get_max_value_key(tmp) bl_correct = (true_class == class_prediction) if self.estimationErrorWeight is None: self.estimationErrorWeight = ADWIN() old_error = self.get_error_estimation() # Add element to Adwin add = 0.0 if (bl_correct is True) else 1.0 self.estimationErrorWeight.add_element(add) # Detect change with Adwin self.ErrorChange = self.estimationErrorWeight.detected_change() if self.ErrorChange is True and old_error > self.get_error_estimation( ): self.ErrorChange = False # Update statistics call LearningNodeNBAdaptive super().learn_from_instance(X, y, weight, hat) # CHECK changed self to super # call ActiveLearningNode weight_seen = self.get_weight_seen() if weight_seen - self.get_weight_seen_at_last_split_evaluation( ) >= hat.grace_period: hat._attempt_to_split(self, parent, parent_branch) self.set_weight_seen_at_last_split_evaluation(weight_seen)
def __init__(self, h=KNNAdwin(), ensemble_length=2): super().__init__() # default values self.ensemble = None self.ensemble_length = None self.classes = None self.h = h.reset() self.__configure(h, ensemble_length) self.adwin_ensemble = [] for i in range(ensemble_length): self.adwin_ensemble.append(ADWIN())
def __configure(self): self.base_estimator.reset() self.n_estimators = self._init_n_estimators self.ensemble = [ cp.deepcopy(self.base_estimator) for _ in range(self.n_estimators) ] self.adwin_ensemble = [] for i in range(self.n_estimators): self.adwin_ensemble.append(ADWIN(self.delta)) self.random_state = check_random_state(self._init_random_state) self.n_detected_changes = 0 self.classes = None self.init_matrix_codes = True
def get_ARF_HAT(): max_features = 3 disable_weighted_vote = False lambda_value = 6 performance_metric = 'acc' drift_detection_method = ADWIN(0.001) warning_detection_method = ADWIN(0.01) max_byte_size = 33554432 memory_estimate_period = 2000000 grace_period = 50 split_criterion = 'info_gain' split_confidence = 0.01 tie_threshold = 0.05 binary_split = False stop_mem_management = False remove_poor_atts = False no_preprune = False leaf_prediction = 'nba' nb_threshold = 0 nominal_attributes = None random_state = None classifier = TS_ARFHoeffdingTree( max_byte_size=max_byte_size, memory_estimate_period=memory_estimate_period, grace_period=grace_period, split_criterion=split_criterion, split_confidence=split_confidence, tie_threshold=tie_threshold, binary_split=binary_split, stop_mem_management=stop_mem_management, remove_poor_atts=remove_poor_atts, no_preprune=no_preprune, leaf_prediction=leaf_prediction, nb_threshold=nb_threshold, nominal_attributes=nominal_attributes, max_features=max_features, random_state=random_state) return classifier