def adwin_windows(data, delta, index_start=0): adwin = ADWIN(delta) windows = [] last_i = index_start for i, d in enumerate(data): adwin.add_element(d) if adwin.detected_change(): windows.append((last_i, i + index_start)) last_i = i + index_start return windows
def demo(): """ _test_adwin This demo will insert data into an ADWIN object when will display in which indexes change was detected. The data stream is simulated as a sequence of randomly generated 0's and 1's. Then the data from indexes 999 to 1999 is changed to a normal distribution of integers from 0 to 7. """ adwin = ADWIN() size = 2000 change_start = 999 np.random.seed(1) data_stream = np.random.randint(2, size=size) data_stream[change_start:] = np.random.randint(8, size=size - change_start) for i in range(size): adwin.add_element(data_stream[i]) if adwin.detected_change(): print('Change has been detected in data: ' + str(data_stream[i]) + ' - of index: ' + str(i))
class AdaSplitNode(SplitNode, NewNode): def __init__(self, split_test, class_observations): super().__init__(split_test, class_observations) self._estimation_error_weight = ADWIN() self._alternate_tree = None self.error_change = False self._random_seed = 1 self._classifier_random = check_random_state(self._random_seed) # Override NewNode def number_leaves(self): num_of_leaves = 0 for child in self._children: if child is not None: num_of_leaves += child.number_leaves() return num_of_leaves # Override NewNode def get_error_estimation(self): return self._estimation_error_weight.estimation # Override NewNode def get_error_width(self): w = 0.0 if self.is_null_error() is False: w = self._estimation_error_weight.width return w # Override NewNode def is_null_error(self): return self._estimation_error_weight is None # Override NewNode def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y class_prediction = 0 leaf = self.filter_instance_to_leaf(X, parent, parent_branch) if leaf.node is not None: class_prediction = get_max_value_key(leaf.node.get_class_votes(X, hat)) bl_correct = (true_class == class_prediction) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to ADWIN add = 0.0 if (bl_correct is True) else 1.0 self._estimation_error_weight.add_element(add) # Detect change with ADWIN self.error_change = self._estimation_error_weight.detected_change() if self.error_change is True and old_error > self.get_error_estimation(): self.error_change = False # Check condition to build a new alternate tree if self.error_change is True: self._alternate_tree = hat._new_learning_node() hat.alternate_trees_cnt += 1 # Condition to replace alternate tree elif self._alternate_tree is not None and self._alternate_tree.is_null_error() is False: if self.get_error_width() > error_width_threshold \ and self._alternate_tree.get_error_width() > error_width_threshold: old_error_rate = self.get_error_estimation() alt_error_rate = self._alternate_tree.get_error_estimation() fDelta = .05 fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / (self.get_error_width()) bound = math.sqrt(2.0 * old_error_rate * (1.0 - old_error_rate) * math.log(2.0 / fDelta) * fN) # To check, bound never less than (old_error_rate - alt_error_rate) if bound < (old_error_rate - alt_error_rate): hat._active_leaf_node_cnt -= self.number_leaves() hat._active_leaf_node_cnt += self._alternate_tree.number_leaves() self.kill_tree_children(hat) if parent is not None: parent.set_child(parent_branch, self._alternate_tree) else: # Switch tree root hat._tree_root = hat._tree_root.alternateTree hat.switch_alternate_trees_cnt += 1 elif bound < alt_error_rate - old_error_rate: if isinstance(self._alternate_tree, TS_HAT.ActiveLearningNode): self._alternate_tree = None elif isinstance(self._alternate_tree, TS_HAT.InactiveLearningNode): self._alternate_tree = None else: self._alternate_tree.kill_tree_children(hat) hat.pruned_alternate_trees_cnt += 1 # hat.pruned_alternate_trees_cnt to check # Learn_From_Instance alternate Tree and Child nodes if self._alternate_tree is not None: self._alternate_tree.learn_from_instance(X, y, weight, hat, parent, parent_branch) child_branch = self.instance_child_index(X) child = self.get_child(child_branch) if child is not None: child.learn_from_instance(X, y, weight, hat, parent, parent_branch) # Override NewNode def kill_tree_children(self, hat): for child in self._children: if child is not None: # Delete alternate tree if it exists if isinstance(child, TS_HAT.AdaSplitNode) and child._alternate_tree is not None: child._alternate_tree.kill_tree_children(hat) self._pruned_alternate_trees += 1 # Recursive delete of SplitNodes if isinstance(child, TS_HAT.AdaSplitNode): child.kill_tree_children(hat) if isinstance(child, TS_HAT.ActiveLearningNode): child = None hat._active_leaf_node_cnt -= 1 elif isinstance(child, TS_HAT.InactiveLearningNode): child = None hat._inactive_leaf_node_cnt -= 1 # override NewNode def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts=False, found_nodes=None): if found_nodes is None: found_nodes = [] if update_splitter_counts: try: self._observed_class_distribution[y] += weight # Dictionary (class_value, weight) except KeyError: self._observed_class_distribution[y] = weight child_index = self.instance_child_index(X) if child_index >= 0: child = self.get_child(child_index) if child is not None: child.filter_instance_to_leaves(X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes) else: found_nodes.append(HoeffdingTree.FoundNode(None, self, child_index)) if self._alternate_tree is not None: self._alternate_tree.filter_instance_to_leaves(X, y, weight, self, -999, update_splitter_counts, found_nodes)
class AdaSplitNode(SplitNode, AdaNode): """ Node that splits the data in a Hoeffding Adaptive Tree. Parameters ---------- split_test: skmultiflow.split_test.InstanceConditionalTest Split test. class_observations: dict (class_value, weight) or None Class observations """ def __init__(self, split_test, class_observations): super().__init__(split_test, class_observations) self._estimation_error_weight = ADWIN() self._alternate_tree = None self.error_change = False self._random_seed = 1 self._classifier_random = check_random_state(self._random_seed) # Override AdaNode def number_leaves(self): num_of_leaves = 0 for child in self._children: if child is not None: num_of_leaves += child.number_leaves() return num_of_leaves # Override AdaNode def get_error_estimation(self): return self._estimation_error_weight.estimation # Override AdaNode def get_error_width(self): w = 0.0 if self.is_null_error() is False: w = self._estimation_error_weight.width return w # Override AdaNode def is_null_error(self): return self._estimation_error_weight is None # Override AdaNode def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y class_prediction = 0 leaf = self.filter_instance_to_leaf(X, parent, parent_branch) if leaf.node is not None: class_prediction = get_max_value_key( leaf.node.get_class_votes(X, hat)) bl_correct = (true_class == class_prediction) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to ADWIN add = 0.0 if (bl_correct is True) else 1.0 self._estimation_error_weight.add_element(add) # Detect change with ADWIN self.error_change = self._estimation_error_weight.detected_change() if self.error_change is True and old_error > self.get_error_estimation( ): self.error_change = False # Check condition to build a new alternate tree if self.error_change is True: self._alternate_tree = hat._new_learning_node() hat.alternate_trees_cnt += 1 # Condition to replace alternate tree elif self._alternate_tree is not None and self._alternate_tree.is_null_error( ) is False: if self.get_error_width() > ERROR_WIDTH_THRESHOLD \ and self._alternate_tree.get_error_width() > ERROR_WIDTH_THRESHOLD: old_error_rate = self.get_error_estimation() alt_error_rate = self._alternate_tree.get_error_estimation() fDelta = .05 fN = 1.0 / self._alternate_tree.get_error_width() + 1.0 / ( self.get_error_width()) bound = math.sqrt(2.0 * old_error_rate * (1.0 - old_error_rate) * math.log(2.0 / fDelta) * fN) # To check, bound never less than (old_error_rate - alt_error_rate) if bound < (old_error_rate - alt_error_rate): hat._active_leaf_node_cnt -= self.number_leaves() hat._active_leaf_node_cnt += self._alternate_tree.number_leaves( ) self.kill_tree_children(hat) if parent is not None: parent.set_child(parent_branch, self._alternate_tree) else: # Switch tree root hat._tree_root = hat._tree_root.alternateTree hat.switch_alternate_trees_cnt += 1 elif bound < alt_error_rate - old_error_rate: if isinstance(self._alternate_tree, ActiveLearningNode): self._alternate_tree = None elif isinstance(self._alternate_tree, InactiveLearningNode): self._alternate_tree = None else: self._alternate_tree.kill_tree_children(hat) hat.pruned_alternate_trees_cnt += 1 # hat.pruned_alternate_trees_cnt to check # Learn_From_Instance alternate Tree and Child nodes if self._alternate_tree is not None: self._alternate_tree.learn_from_instance(X, y, weight, hat, parent, parent_branch) child_branch = self.instance_child_index(X) child = self.get_child(child_branch) if child is not None: child.learn_from_instance(X, y, weight, hat, self, child_branch) # Instance contains a categorical value previously unseen by the split # node elif isinstance(self.get_split_test(), NominalAttributeMultiwayTest) and \ self.get_split_test().branch_for_instance(X) < 0: # Creates a new learning node to encompass the new observed feature # value leaf_node = hat._new_learning_node() branch_id = self.get_split_test().add_new_branch( X[self.get_split_test().get_atts_test_depends_on()[0]]) self.set_child(branch_id, leaf_node) hat._active_leaf_node_cnt += 1 leaf_node.learn_from_instance(X, y, weight, hat, parent, parent_branch) # Override AdaNode def kill_tree_children(self, hat): for child in self._children: if child is not None: # Delete alternate tree if it exists if isinstance(child, SplitNode) and child._alternate_tree is not None: child._alternate_tree.kill_tree_children(hat) self._pruned_alternate_trees += 1 # Recursive delete of SplitNodes if isinstance(child, SplitNode): child.kill_tree_children(hat) if isinstance(child, ActiveLearningNode): child = None hat._active_leaf_node_cnt -= 1 elif isinstance(child, InactiveLearningNode): child = None hat._inactive_leaf_node_cnt -= 1 # override AdaNode def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts=False, found_nodes=None): if found_nodes is None: found_nodes = [] if update_splitter_counts: try: self._observed_class_distribution[ y] += weight # Dictionary (class_value, weight) except KeyError: self._observed_class_distribution[y] = weight child_index = self.instance_child_index(X) if child_index >= 0: child = self.get_child(child_index) if child is not None: child.filter_instance_to_leaves(X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes) else: found_nodes.append(FoundNode(None, self, child_index)) if self._alternate_tree is not None: self._alternate_tree.filter_instance_to_leaves( X, y, weight, self, -999, update_splitter_counts, found_nodes)
corrects += 1 knn = knn.partial_fit(X, y) n_samples += 1 coldstartData.append(corrects / n_samples) print(corrects, n_samples) while n_samples < 20000: driftDataX, driftDataY = stream.next_sample() my_pred = knn.predict(driftDataX) correct = driftDataY[0] == my_pred[0] if correct: corrects += 1 n_samples += 1 adwin.add_element(0 if correct else 1) if adwin.detected_change(): # print('ADWIN', n_samples) adwin_results.append(n_samples) ddm.add_element(0 if correct else 1) if ddm.detected_change(): # print('DDM', n_samples) ddm_results.append(n_samples) ph1.add_element(0 if correct else 1) if ph1.detected_change(): # print('PH', n_samples) ph1_results.append(n_samples) ph2.add_element(0 if correct else 1)
class ADWINChangeDetector(BaseDriftDetector): """ Drift detection method based in ADWIN. Parameters ---------- delta : float (default=0.002) The delta parameter for the ADWIN algorithm. Notes ----- ADWIN [1]_ (ADaptive WINdowing) is an adaptive sliding window algorithm for detecting change, and keeping updated statistics about a data stream. ADWIN allows algorithms not adapted for drifting data, to be resistant to this phenomenon. The general idea is to keep statistics from a window of variable size while detecting concept drift. The algorithm will decide the size of the window by cutting the statistics' window at different points and analysing the average of some statistic over these two windows. If the absolute value of the difference between the two averages surpasses a pre-defined threshold, change is detected at that point and all data before that time is discarded. References ---------- .. [1] Bifet, Albert, and Ricard Gavalda. "Learning from time-changing data with adaptive windowing." In Proceedings of the 2007 SIAM international conference on data mining, pp. 443-448. Society for Industrial and Applied Mathematics, 2007. Examples -------- >>> # Imports >>> import numpy as np >>> from skmultiflow.drift_detection import ADWINChangeDetector >>> adwin_change_detector = ADWINChangeDetector() >>> # Simulating a data stream as a normal distribution of 1's and 0's >>> data_stream = np.random.randint(2, size=2000) >>> # Changing the data concept from index 999 to 2000 >>> for i in range(999, 2000): ... data_stream[i] = np.random.randint(4, high=8) >>> # Adding stream elements to ADWIN and verifying if drift occurred >>> for i in range(2000): ... adwin_change_detector.add_element(data_stream[i]) ... if adwin_change_detector.detected_change(): ... print('Change detected in data: ' + str(data_stream[i]) + ' - at index: ' + str(i)) """ def __init__(self, delta=.002): super().__init__() self.adwin = ADWIN(delta=delta) super().reset() def add_element(self, input_value): err_estim = self.adwin.estimation self.adwin.add_element(input_value) res_input = self.adwin.detected_change() self.in_concept_change = False self.in_warning_zone = False if self.adwin.detected_warning_zone(): self.in_warning_zone = True if res_input: if self.adwin.estimation > err_estim: self.in_concept_change = True self.in_warning_zone = False self.estimation = self.adwin.estimation
class KNNADWINClassifier(KNNClassifier): """ K-Nearest Neighbors classifier with ADWIN change detector. This Classifier is an improvement from the regular KNNClassifier, as it is resistant to concept drift. It utilises the ADWIN change detector to decide which samples to keep and which ones to forget, and by doing so it regulates the sample window size. To know more about the ADWIN change detector, please see :class:`skmultiflow.drift_detection.ADWIN` It uses the regular KNNClassifier as a base class, with the major difference that this class keeps a variable size window, instead of a fixed size one and also it updates the adwin algorithm at each partial_fit call. Parameters ---------- n_neighbors: int (default=5) The number of nearest neighbors to search for. max_window_size: int (default=1000) The maximum size of the window storing the last viewed samples. leaf_size: int (default=30) The maximum number of samples that can be stored in one leaf node, which determines from which point the algorithm will switch for a brute-force approach. The bigger this number the faster the tree construction time, but the slower the query time will be. metric: string or sklearn.DistanceMetric object sklearn.KDTree parameter. The distance metric to use for the KDTree. Default=’euclidean’. KNNClassifier.valid_metrics() gives a list of the metrics which are valid for KDTree. Notes ----- This estimator is not optimal for a mixture of categorical and numerical features. This implementation treats all features from a given stream as numerical. Examples -------- >>> # Imports >>> from skmultiflow.lazy import KNNADWINClassifier >>> from skmultiflow.data import ConceptDriftStream >>> # Setting up the stream >>> stream = ConceptDriftStream(position=2500, width=100, random_state=1) >>> # Setting up the KNNAdwin classifier >>> knn_adwin = KNNADWINClassifier(n_neighbors=8, leaf_size=40, max_window_size=1000) >>> # Keep track of sample count and correct prediction count >>> n_samples = 0 >>> corrects = 0 >>> while n_samples < 5000: ... X, y = stream.next_sample() ... pred = knn_adwin.predict(X) ... if y[0] == pred[0]: ... corrects += 1 ... knn_adwin = knn_adwin.partial_fit(X, y) ... n_samples += 1 >>> >>> # Displaying the results >>> print('KNNClassifier usage example') >>> print(str(n_samples) + ' samples analyzed.') 5000 samples analyzed. >>> print("KNNADWINClassifier's performance: " + str(corrects/n_samples)) KNNAdwin's performance: 0.5714 """ def __init__(self, n_neighbors=5, max_window_size=1000, leaf_size=30, metric='euclidean'): super().__init__(n_neighbors=n_neighbors, max_window_size=max_window_size, leaf_size=leaf_size, metric=metric) self.adwin = ADWIN() def reset(self): """ Reset the estimator. Resets the ADWIN Drift detector as well as the KNN model. Returns ------- KNNADWINClassifier self """ self.adwin = ADWIN() return super().reset() def partial_fit(self, X, y, classes=None, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) The data upon which the algorithm will create its model. y: Array-like An array-like containing the classification targets for all samples in X. classes: numpy.ndarray, optional (default=None) Array with all possible/known classes. sample_weight: Not used. Returns ------- KNNADWINClassifier self Notes ----- Partially fits the model by updating the window with new samples while also updating the ADWIN algorithm. IF ADWIN detects a change, the window is split in such a wat that samples from the previous concept are dropped. """ r, c = get_dimensions(X) if classes is not None: self.classes = list(set().union(self.classes, classes)) for i in range(r): self.data_window.add_sample(X[i], y[i]) if self.data_window.size >= self.n_neighbors: correctly_classifies = 1 if self.predict(np.asarray( [X[i]])) == y[i] else 0 self.adwin.add_element(correctly_classifies) else: self.adwin.add_element(0) if self.data_window.size >= self.n_neighbors: if self.adwin.detected_change(): if self.adwin.width < self.data_window.size: for i in range(self.data_window.size, self.adwin.width, -1): self.data_window.delete_oldest_sample() return self
class DeepNNPytorch(BaseSKMObject, ClassifierMixin): def __init__( self, class_labels=['0', '1'], # {'up':0,'down':1} use_cpu=True, process_as_a_batch=False, use_threads=False, background_training_after=4): # configuration variables (which has the same name as init parameters) self.class_labels = class_labels self.use_threads = use_threads self.background_training_after = background_training_after super().__init__() # status variables self.class_to_label = {} self.foreground_nets = [] # type: List[ANN] self.background_nets = [] # type: List[ANN] self.drift_detection_method = None self.warning_detection_method = None self.detected_warnings = 0 self.samples_seen = 0 self.last_detected_drift_around = 0 self.background_learner_threads = [] self.background_train_results = None self.foreground_train_results = None self.init_status_values() def init_status_values(self): # init status variables self.class_to_label = {} for i in range(len(self.class_labels)): self.class_to_label.update({i: self.class_labels[i]}) for i in range(len(foreground_net_config)): self.foreground_nets.append( ANN(learning_rate=foreground_net_config[i]['l_rate'], optimizer_type=foreground_net_config[i]['optimizer_type'], class_labels=self.class_labels)) for i in range(len(background_net_config)): self.background_nets.append( ANN(learning_rate=foreground_net_config[i]['l_rate'], optimizer_type=background_net_config[i]['optimizer_type'], class_labels=self.class_labels)) self.drift_detection_method = ADWIN(delta=1e-3, direction=ADWIN.DETECT_DOWN) self.warning_detection_method = ADWIN(delta=1e-8, direction=ADWIN.DETECT_DOWN) self.detected_warnings = 0 self.samples_seen = 0 self.last_detected_drift_around = 0 self.background_learner_threads = [] self.background_train_results = None self.foreground_train_results = None print(self) def partial_fit(self, X, y, classes=None, sample_weight=None): r, c = get_dimensions(X) self.samples_seen += r # if self.samples_seen % 2 == 0: if len(self.background_learner_threads) == 0: if self.samples_seen % self.background_training_after == 0: self.background_train_results = { 'probas': [None] * len(self.background_nets), 'y_hats': [None] * len(self.background_nets), 'avg_loss_since_last_detected_drift_by_parent': [0] * len(self.background_nets) } for i in range(len(self.background_nets)): self.background_learner_threads.append( threading.Thread(target=net_train, args=( self.background_nets[i], X, r, c, y, self.background_train_results, i, self.last_detected_drift_around, ))) for i in range(len(self.background_nets)): self.background_learner_threads[i].start() else: # there are live background learner threads # wait for self.background_training_after instances to join them if self.samples_seen % self.background_training_after == self.background_training_after - 1: # TODO: CPython does not support multi threading: https://docs.python.org/3/library/threading.html # we still may be fine as long as we don't compile the module using CPython. # Multiprocessing is an alternative: # https://docs.python.org/3/library/multiprocessing.html#module-multiprocessing for i in range(len(self.background_nets)): self.background_learner_threads[i].join() self.background_learner_threads = [] if self.foreground_train_results is not None: min_back = np.argmin(self.background_train_results[ 'avg_loss_since_last_detected_drift_by_parent'], axis=0) max_fore = np.argmax(self.foreground_train_results[ 'avg_loss_since_last_detected_drift_by_parent'], axis=0) # min_back < max_fore if self.background_train_results['avg_loss_since_last_detected_drift_by_parent'][min_back] \ < self.foreground_train_results['avg_loss_since_last_detected_drift_by_parent'][max_fore]: tmp_net = self.foreground_nets[max_fore] self.foreground_nets[max_fore] = self.background_nets[ min_back] self.background_nets[min_back] = tmp_net self.foreground_train_results = { 'probas': [None] * len(self.foreground_nets), 'y_hats': [None] * len(self.foreground_nets), 'avg_loss_since_last_detected_drift_by_parent': [0] * len(self.foreground_nets) } if self.use_threads: t = [] for i in range(len(self.foreground_nets)): t.append( threading.Thread(target=net_train, args=( self.foreground_nets[i], X, r, c, y, self.foreground_train_results, i, self.last_detected_drift_around, ))) for i in range(len(self.foreground_nets)): t[i].start() for i in range(len(self.foreground_nets)): t[i].join() else: for i in range(len(self.foreground_nets)): net_train(self.foreground_nets[i], X, r, c, y, self.foreground_train_results, i, self.last_detected_drift_around) if self.drift_detection_method is not None: # get predicted class and compare with actual class label predicted_label = vectorized_map_class_to_label( np.argmax( np.sum(self.foreground_train_results['probas'], axis=0) / len(self.foreground_nets), axis=1), class_to_label_map=self.class_to_label) # TODO: we may have to have a special case for batch processing predicted_matches_actual = predicted_label == y self.drift_detection_method.add_element( 1 if predicted_matches_actual else 0) if self.warning_detection_method is not None: self.warning_detection_method.add_element( 1 if predicted_matches_actual else 0) # pass the difference to the detector # predicted_matches_actual = torch.abs(y-output).detach().numpy()[0] # self.drift_detection_method.add_element(predicted_matches_actual) # Check if the was a warning if self.warning_detection_method is not None: if self.warning_detection_method.detected_change(): self.detected_warnings += 1 else: # warning detector is None, hence drift detector has warning detection capability. if self.drift_detection_method.detected_warning_zone(): self.detected_warnings += 1 # 3 is the threshold level # Check if the was a change if self.detected_warnings > 3 and self.drift_detection_method.detected_change( ): print('Drift detected by {} around {} th sample.'.format( self.drift_detection_method, self.samples_seen)) self.detected_warnings = 0 self.last_detected_drift_around = self.samples_seen # Find the the worst learner from the foreground and replace it with the background return self def predict(self, X): y_proba = self.predict_proba(X) pred_sum_per_class = np.sum(y_proba, axis=0) pred_avgsum_per_class = np.divide(pred_sum_per_class, len(self.foreground_nets)) y_pred = np.argmax(pred_avgsum_per_class, axis=0) return vectorized_map_class_to_label( np.asarray([y_pred]), class_to_label_map=self.class_to_label) def predict_proba(self, X): r, c = get_dimensions(X) probas = np.zeros([len(self.foreground_nets), len(self.class_labels)]) # if self.use_threads: # t = [] # for i in range(len(self.nets)): # t.append(threading.Thread(target=net_predict_proba, args=(self.nets[i], X, r, c, probas, i,))) # # for i in range(len(self.nets)): # t[i].start() # # for i in range(len(self.nets)): # t[i].join() # else: for i in range(len(self.foreground_nets)): net_predict_proba(self.foreground_nets[i], X, r, c, probas, i) return np.asarray(probas) def reset(self): # configuration variables (which has the same name as init parameters) should be copied by the caller function for i in range(len(self.foreground_nets)): self.foreground_nets[i].reset() return self def __str__(self): return str(self.__class__) + ": " + str(self.__dict__) def stream_ended(self): print('\nNetwork configuration:\n' '{}\n' '=======================================\n' 'Foreground Nets\n'.format(self)) print( 'optimizer_type,learning_rate,accumulated_loss,accumulated_loss_since_last_detected_drift_by_parent' ) for i in range(len(self.foreground_nets)): print('{},{},{},{}'.format( self.foreground_nets[i].optimizer_type, self.foreground_nets[i].learning_rate, self.foreground_nets[i].accumulated_loss / self.foreground_nets[i].samples_seen, self.foreground_nets[i]. accumulated_loss_since_last_detected_drift_by_parent / self.foreground_nets[i]. samples_seen_after_last_detected_drift_by_parent)) print('\n' 'Background Nets\n'.format(self)) for i in range(len(self.background_nets)): print('{},{},{},{}'.format( self.background_nets[i].optimizer_type, self.background_nets[i].learning_rate, self.background_nets[i].accumulated_loss / self.background_nets[i].samples_seen, self.background_nets[i]. accumulated_loss_since_last_detected_drift_by_parent / self.background_nets[i]. samples_seen_after_last_detected_drift_by_parent)) print('\n')
plt.axvline(i, color='r', linestyle='--', linewidth=0.7) # print('Change has been detected in data: ' + str(data_stream[i]) + ' - of index: ' + str(i)) plt.show() plt.plot(data_stream) fig= plt.gcf() fig.set_size_inches(10, 5.5) plt.ylabel('value') plt.xlabel('Time') for i in range(200): eddm.add_element(data_stream[i]) if eddm.detected_warning_zone(): plt.axvline(i, color='g', linestyle='--', linewidth=0.7) # print('Warning zone has been detected in data: ' + str(data_stream[i]) + ' - of index: ' + str(i)) if eddm.detected_change(): plt.axvline(i, color='r', linestyle='--', linewidth=0.7) # print('Change has been detected in data: ' + str(data_stream[i]) + ' - of index: ' + str(i)) plt.show() plt.plot(data_stream) fig= plt.gcf() fig.set_size_inches(10, 5.5) plt.ylabel('value') plt.xlabel('Time') for i in range(200): adwin.add_element(data_stream[i]) if adwin.detected_change(): plt.axvline(i, color='r', linestyle='--', linewidth=0.7) # print('Change has been detected in data: ' + str(data_stream[i]) + ' - of index: ' + str(i)) plt.show()
class AdaptiveXGBoostClassifier(BaseSKMObject, ClassifierMixin): _PUSH_STRATEGY = 'push' _REPLACE_STRATEGY = 'replace' _UPDATE_STRATEGIES = [_PUSH_STRATEGY, _REPLACE_STRATEGY] def __init__(self, n_estimators=30, learning_rate=0.3, max_depth=6, max_window_size=1000, min_window_size=None, detect_drift=False, update_strategy='replace'): """ Adaptive XGBoost classifier. Parameters ---------- n_estimators: int (default=5) The number of estimators in the ensemble. learning_rate: Learning rate, a.k.a eta. max_depth: int (default = 6) Max tree depth. max_window_size: int (default=1000) Max window size. min_window_size: int (default=None) Min window size. If this parameters is not set, then a fixed size window of size ``max_window_size`` will be used. detect_drift: bool (default=False) If set will use a drift detector (ADWIN). update_strategy: str (default='replace') | The update strategy to use: | 'push' - the ensemble resembles a queue | 'replace' - oldest ensemble members are replaced by newer ones Notes ----- The Adaptive XGBoost [1]_ (AXGB) classifier is an adaptation of the XGBoost algorithm for evolving data streams. AXGB creates new members of the ensemble from mini-batches of data as new data becomes available. The maximum ensemble size is fixed, but learning does not stop once this size is reached, the ensemble is updated on new data to ensure consistency with the current data distribution. References ---------- .. [1] Montiel, Jacob, Mitchell, Rory, Frank, Eibe, Pfahringer, Bernhard, Abdessalem, Talel, and Bifet, Albert. “AdaptiveXGBoost for Evolving Data Streams”. In:IJCNN’20. International Joint Conference on Neural Networks. 2020. Forthcoming. """ super().__init__() self.learning_rate = learning_rate self.n_estimators = n_estimators self.max_depth = max_depth self.max_window_size = max_window_size self.min_window_size = min_window_size self._first_run = True self._ensemble = None self.detect_drift = detect_drift self._drift_detector = None self._X_buffer = np.array([]) self._y_buffer = np.array([]) self._samples_seen = 0 self._model_idx = 0 if update_strategy not in self._UPDATE_STRATEGIES: raise AttributeError("Invalid update_strategy: {}\n" "Valid options: {}".format( update_strategy, self._UPDATE_STRATEGIES)) self.update_strategy = update_strategy self._configure() def _configure(self): if self.update_strategy == self._PUSH_STRATEGY: self._ensemble = [] elif self.update_strategy == self._REPLACE_STRATEGY: self._ensemble = [None] * self.n_estimators self._reset_window_size() self._init_margin = 0.0 self._boosting_params = { "silent": True, "objective": "binary:logistic", "eta": self.learning_rate, "max_depth": self.max_depth } if self.detect_drift: self._drift_detector = ADWIN() def reset(self): """ Reset the estimator. """ self._first_run = True self._configure() def partial_fit(self, X, y, classes=None, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X: numpy.ndarray An array of shape (n_samples, n_features) with the data upon which the algorithm will create its model. y: Array-like An array of shape (, n_samples) containing the classification targets for all samples in X. Only binary data is supported. classes: Not used. sample_weight: Not used. Returns ------- AdaptiveXGBoostClassifier self """ for i in range(X.shape[0]): self._partial_fit(np.array([X[i, :]]), np.array([y[i]])) return self def _partial_fit(self, X, y): if self._first_run: self._X_buffer = np.array([]).reshape(0, get_dimensions(X)[1]) self._y_buffer = np.array([]) self._first_run = False self._X_buffer = np.concatenate((self._X_buffer, X)) self._y_buffer = np.concatenate((self._y_buffer, y)) while self._X_buffer.shape[0] >= self.window_size: self._train_on_mini_batch(X=self._X_buffer[0:self.window_size, :], y=self._y_buffer[0:self.window_size]) delete_idx = [i for i in range(self.window_size)] self._X_buffer = np.delete(self._X_buffer, delete_idx, axis=0) self._y_buffer = np.delete(self._y_buffer, delete_idx, axis=0) # Check window size and adjust it if necessary self._adjust_window_size() # Support for concept drift if self.detect_drift: correctly_classifies = self.predict(X) == y # Check for warning self._drift_detector.add_element(int(not correctly_classifies)) # Check if there was a change if self._drift_detector.detected_change(): # Reset window size self._reset_window_size() if self.update_strategy == self._REPLACE_STRATEGY: self._model_idx = 0 def _adjust_window_size(self): if self._dynamic_window_size < self.max_window_size: self._dynamic_window_size *= 2 if self._dynamic_window_size > self.max_window_size: self.window_size = self.max_window_size else: self.window_size = self._dynamic_window_size def _reset_window_size(self): if self.min_window_size: self._dynamic_window_size = self.min_window_size else: self._dynamic_window_size = self.max_window_size self.window_size = self._dynamic_window_size def _train_on_mini_batch(self, X, y): if self.update_strategy == self._REPLACE_STRATEGY: booster = self._train_booster(X, y, self._model_idx) # Update ensemble self._ensemble[self._model_idx] = booster self._samples_seen += X.shape[0] self._update_model_idx() else: # self.update_strategy == self._PUSH_STRATEGY booster = self._train_booster(X, y, len(self._ensemble)) # Update ensemble if len(self._ensemble) == self.n_estimators: self._ensemble.pop(0) self._ensemble.append(booster) self._samples_seen += X.shape[0] def _train_booster(self, X: np.ndarray, y: np.ndarray, last_model_idx: int): d_mini_batch_train = xgb.DMatrix(X, y.astype(int)) # Get margins from trees in the ensemble margins = np.asarray([self._init_margin] * d_mini_batch_train.num_row()) for j in range(last_model_idx): margins = np.add( margins, self._ensemble[j].predict(d_mini_batch_train, output_margin=True)) d_mini_batch_train.set_base_margin(margin=margins) booster = xgb.train(params=self._boosting_params, dtrain=d_mini_batch_train, num_boost_round=1, verbose_eval=False) return booster def _update_model_idx(self): self._model_idx += 1 if self._model_idx == self.n_estimators: self._model_idx = 0 def predict(self, X): """ Predict the class label for sample X Parameters ---------- X: numpy.ndarray An array of shape (n_samples, n_features) with the samples to predict the class label for. Returns ------- numpy.ndarray A 1D array of shape (, n_samples), containing the predicted class labels for all instances in X. """ if self._ensemble: if self.update_strategy == self._REPLACE_STRATEGY: trees_in_ensemble = sum(i is not None for i in self._ensemble) else: # self.update_strategy == self._PUSH_STRATEGY trees_in_ensemble = len(self._ensemble) if trees_in_ensemble > 0: d_test = xgb.DMatrix(X) for i in range(trees_in_ensemble - 1): margins = self._ensemble[i].predict(d_test, output_margin=True) d_test.set_base_margin(margin=margins) predicted = self._ensemble[trees_in_ensemble - 1].predict(d_test) return np.array(predicted > 0.5).astype(int) # Ensemble is empty, return default values (0) return np.zeros(get_dimensions(X)[0]) def predict_proba(self, X): """ Not implemented for this method. """ raise NotImplementedError( "predict_proba is not implemented for this method.")
class ANN: def __init__( self, learning_rate=0.03, hidden_layers=default_hidden_layers, class_labels: tuple = None, # classes=('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') use_cpu=True, process_as_a_batch=False, optimizer_type=OP_TYPE_SGD, loss_f=nn.CrossEntropyLoss(), adwin_delta=1e-3, back_prop_skip_loss_threshold=0.6): # configuration variables (which has the same name as init parameters) self.model_name = None self.learning_rate = learning_rate self.hidden_layers = copy.deepcopy(hidden_layers) self.class_labels = ('0', '1') if class_labels is None else class_labels self.use_cpu = use_cpu self.process_as_a_batch = process_as_a_batch self.optimizer_type = optimizer_type self.loss_f = loss_f self.adwin_delta = adwin_delta self.back_prop_skip_loss_threshold = back_prop_skip_loss_threshold # status variables self.net = None self.optimizer = None self.criterion = None self.loss = None self.device = None self.samples_seen = 0 self.trained_count = 0 self.chosen_counts = 0 self.estimator: BaseDriftDetector = None self.class_label_index_map = {} self.accumulated_loss = 0 self.init_values() def init_values(self): # init status variables self.net = None self.optimizer = None self.criterion = None self.loss = None self.device = None self.samples_seen = 0 self.estimator = ADWIN(delta=self.adwin_delta) self.class_label_index_map = {} for i, class_label in enumerate(self.class_labels): self.class_label_index_map[class_label] = i if isinstance(self.hidden_layers, nn.Module): # assumes input dimention is set properly in the network structure self.net = copy.deepcopy(self.hidden_layers) self.initialize_net_para() elif isinstance(self.hidden_layers, list): if self.hidden_layers[0]['neurons'] is None or self.hidden_layers[ 0]['nonlinearity'] is None: print('Unknown hidden layer format is passed in: {}'.format( self.hidden_layers)) print('Expected format :{}'.format(default_hidden_layers)) exit(1) self.model_name = 'L1_L1n{}_{}_{:05f}_{}'.format( math.log(self.hidden_layers[0]['neurons'], 2) // 1 if self.hidden_layers[0]['neurons'] else 'NA', self.optimizer_type, self.learning_rate, self.adwin_delta) if self.use_cpu: self.device = torch.device("cpu") else: self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") def init_optimizer(self): if self.optimizer_type == OP_TYPE_ADAGRAD or self.optimizer_type == OP_TYPE_ADAGRAD_NC: self.optimizer = optim.Adagrad(self.net.parameters(), lr=self.learning_rate, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10) elif self.optimizer_type == OP_TYPE_ADADELTA or self.optimizer_type == OP_TYPE_ADADELTA_NC: self.optimizer = optim.Adadelta(self.net.parameters(), lr=self.learning_rate, eps=1e-10) elif self.optimizer_type == OP_TYPE_RMSPROP or self.optimizer_type == OP_TYPE_RMSPROP_NC: self.optimizer = optim.RMSprop(self.net.parameters(), lr=self.learning_rate, alpha=0.99, weight_decay=0, eps=1e-10) elif self.optimizer_type == OP_TYPE_SGD or self.optimizer_type == OP_TYPE_SGD_NC: self.optimizer = optim.SGD(self.net.parameters(), lr=self.learning_rate) elif self.optimizer_type == OP_TYPE_ADAM or self.optimizer_type == OP_TYPE_ADAM_NC: self.optimizer = optim.Adam(self.net.parameters(), lr=self.learning_rate, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False) elif self.optimizer_type == OP_TYPE_ADAM_AMSG or self.optimizer_type == OP_TYPE_ADAM_AMSG_NC: self.optimizer = optim.Adam(self.net.parameters(), lr=self.learning_rate, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=True) else: print('Invalid optimizer type = {}'.format(self.optimizer_type)) def initialize_net_para(self): self.init_optimizer() print('Network configuration:\n' '{}\n' '======================================='.format(self)) def initialize_network(self, input_dimentions=None): self.net = PyNet(hidden_layers=self.hidden_layers, classes=self.class_labels, input_dimentions=input_dimentions) self.initialize_net_para() def train_net(self, x, y): if torch.cuda.is_available(): if self.device.type == 'cpu': pass else: x = x.to(self.device) y = y.to(self.device) else: pass self.optimizer.zero_grad() # zero the gradient buffers # # forward propagation outputs = self.net(x) # backward propagation # print(self.net.linear[0].weight.data) class_index = self.class_label_index_map[y.reshape((-1, )).item()] # self.loss = self.criterion(outputs, y.reshape((-1,)).long()) self.loss = self.loss_f(outputs, torch.tensor([class_index])) if self.loss.item() > self.back_prop_skip_loss_threshold: self.loss.backward() self.optimizer.step() # Does the update self.trained_count += 1 self.estimator.add_element(self.loss.item()) self.accumulated_loss += self.loss.item() # if self.estimator.detected_change(): # print('drift detected by {}'.format(self.model_name)) # pass def partial_fit(self, X, r, c, y): if self.net is None: self.initialize_network(input_dimentions=c) if self.process_as_a_batch: self.samples_seen += r self.train_net(x=torch.from_numpy(X).float(), y=torch.from_numpy(np.array(y)).view(-1, 1).float()) else: # per instance processing (default behaviour) for i in range(r): x = torch.from_numpy(X[i]) yy = torch.from_numpy(np.array(y[i])) x = x.view(1, -1).float() yy = yy.view(1, -1).float() x.unsqueeze(0) yy.unsqueeze(0) self.samples_seen += 1 self.train_net(x=x, y=yy) def predict_proba(self, X, r, c): if self.net is None: self.initialize_network(input_dimentions=c) if self.process_as_a_batch: return self.net(torch.from_numpy(X).float()).detach().numpy() else: # per instance processing (default behaviour) probas = None for i in range(r): x = torch.from_numpy(X[i]) x = x.view(1, -1).float() x.unsqueeze(0) instance_class_probas = self.net(x).detach() if r == 1: return instance_class_probas.reshape((1, -1)) elif i == 0: probas = instance_class_probas else: probas = torch.cat(probas, instance_class_probas).detach() return probas def reset(self): # configuration variables (which has the same name as init parameters) should be copied by the caller function self.init_values() return self def get_loss_estimation(self): return self.estimator.estimation # return self.accumulated_loss/self.samples_seen if self.samples_seen != 0 else 0.0 def __str__(self): return str(self.__class__) + ": " + str(self.__dict__)
class OnlineDeepLearner(BaseSKMObject, ClassifierMixin, MetaEstimatorMixin): def __init__(self, seed=0): """ OnlineDeepLearner constructor """ self.seed = seed self.classes = None self.n_classes = None self.n_features = None self.adwin = ADWIN(delta=0.0001) self.learner = None def init_learner(self): """ Initialize onn """ if self.n_classes is None or self.n_features is None: raise ValueError("Cannot initialize classifier with n_classes=None or n_features=None") self.learner = ONN(n_features=self.n_features, n_classes=self.n_classes, n_hidden_units=10, beta=0.99, learning_rate=1, s=0.2, n_layers=0) def partial_fit(self, X_numpy, y_numpy, classes=None, sample_weight=None): if self.classes is None: if classes is None: raise ValueError("The first partial_fit call should pass all the classes.") else: self.classes = classes self.n_classes = len(classes) self.n_features = X_numpy.shape[1] self.init_learner() if self.classes is not None and classes is not None: if set(self.classes) == set(classes): pass else: raise ValueError("The classes passed to the partial_fit function differ from those passed earlier.") X = tf.constant(X_numpy, shape=[X_numpy.shape[0], X_numpy.shape[1]]) y = tf.constant(y_numpy, shape=[1]) y_preds = self.predict(X) for i in range(y_preds.shape[0]): old = self.adwin.estimation self.adwin.add_element(0.0 if y_preds[i] == y[i] else 1.0) if self.adwin.detected_change(): if self.adwin.estimation > old: print('Change detected') self.init_learner() self.learner.partial_fit(X, y) def predict(self, X): if self.classes is None: return np.zeros(X.shape[0]) prob = self.predict_proba(X) return np.argmax(prob, axis=1) def predict_proba(self, X_): if isinstance(X_, np.ndarray): X = tf.constant(X_, shape=[X_.shape[0], X_.shape[1]]) else: X = X_ prob = self.learner.predict_proba(X) return prob
class KNNAdwin(KNN): """ K-Nearest Neighbors Classifier with ADWIN Change detector This Classifier is an improvement from the regular KNN classifier, as it is resistant to concept drift. It utilises the ADWIN change detector to decide which samples to keep and which ones to forget, and by doing so it regulates the sample window size. To know more about the ADWIN change detector, please visit skmultiflow.classification.core.drift_detection.adwin It uses the regular KNN Classifier as a base class, with the major difference that this class keeps a variable size window, instead of a fixed size one and also it updates the adwin algorithm at each partial_fit call. Parameters ---------- n_neighbors: int The number of nearest neighbors to search for. max_window_size: int The maximum size of the window storing the last viewed samples. leaf_size: int The maximum number of samples that can be stored in one leaf node, which determines from which point the algorithm will switch for a brute-force approach. The bigger this number the faster the tree construction time, but the slower the query time will be. categorical_list: An array-like Each entry is the index of a categorical feature. May be requested further filtering. Raises ------ NotImplementedError: A few of the functions described here are not implemented since they have no application in this context. ValueError: A ValueError is raised if the predict function is called before at least k samples have been analyzed by the algorithm. Examples -------- >>> # Imports >>> from skmultiflow.lazy.knn_adwin import KNNAdwin >>> from skmultiflow.data.file_stream import FileStream >>> # Setting up the stream >>> stream = FileStream('skmultiflow/data/datasets/covtype.csv') >>> stream.prepare_for_use() >>> # Setting up the KNNAdwin classifier >>> knn_adwin = KNNAdwin(n_neighbors=8, leaf_size=40, max_window_size=2000) >>> # Pre training the classifier with 200 samples >>> X, y = stream.next_sample(200) >>> knn_adwin = knn_adwin.partial_fit(X, y) >>> # Keeping track of sample count and correct prediction count >>> n_samples = 0 >>> corrects = 0 >>> while n_samples < 5000: ... X, y = stream.next_sample() ... pred = knn_adwin.predict(X) ... if y[0] == pred[0]: ... corrects += 1 ... knn_adwin = knn_adwin.partial_fit(X, y) ... n_samples += 1 >>> >>> # Displaying the results >>> print('KNN usage example') >>> print(str(n_samples) + ' samples analyzed.') 5000 samples analyzed. >>> print("KNNAdwin's performance: " + str(corrects/n_samples)) KNNAdwin's performance: 0.7798 """ def __init__(self, n_neighbors=5, max_window_size=sys.maxsize, leaf_size=30, categorical_list=None): super().__init__(n_neighbors=n_neighbors, max_window_size=max_window_size, leaf_size=leaf_size, categorical_list=categorical_list) self.adwin = ADWIN() self.window = None def reset(self): """ reset Resets the adwin algorithm as well as the base model kept by the KNN base class. Returns ------- KNNAdwin self """ self.adwin = ADWIN() return super().reset() def fit(self, X, y, classes=None, weights=None): self.partial_fit(X, y, classes, weights) return self def partial_fit(self, X, y, classes=None, weight=None): """ partial_fit Partially fits the model. This is done by updating the window with new samples while also updating the adwin algorithm. Then we verify if a change was detected, and if so, the window is correctly split at the drift moment. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) The data upon which the algorithm will create its model. y: Array-like An array-like containing the classification targets for all samples in X. classes: Not used. weight: Not used. Returns ------- KNNAdwin self """ r, c = get_dimensions(X) if self.window is None: self.window = InstanceWindow(max_size=self.max_window_size) for i in range(r): self.window.add_element(np.asarray([X[i]]), np.asarray([[y[i]]])) if self.window.n_samples >= self.n_neighbors: add = 1 if self.predict(np.asarray([X[i]])) == y[i] else 0 self.adwin.add_element(add) else: self.adwin.add_element(0) if self.window.n_samples >= self.n_neighbors: changed = self.adwin.detected_change() if changed: if self.adwin.width < self.window.n_samples: for i in range(self.window.n_samples, self.adwin.width, -1): self.window.delete_element() return self def get_info(self): info = '{}:'.format(type(self).__name__) info += ' - n_neighbors: {}'.format(self.n_neighbors) info += ' - max_window_size: {}'.format(self.max_window_size) info += ' - leaf_size: {}'.format(self.leaf_size) return info
class ANN: def __init__(self, learning_rate=0.03, network_layers=default_network_layers, class_labels: tuple = None, # classes=('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') use_cpu=True, process_as_a_batch=False, optimizer_type=OP_TYPE_SGD, warning_detection_method: BaseDriftDetector = ADWIN(delta=1e-8, direction=ADWIN.DETECT_DOWN), drift_detection_method: BaseDriftDetector = ADWIN(delta=1e-3, direction=ADWIN.DETECT_DOWN), loss_f_type=LOSS_F_TYPE_NLLLoss, softmax_f=nn.LogSoftmax(dim=1), loss_transform_type=LOSS_TRANSFORM_TYPE_TANH_SIGMOID): # configuration variables (which has the same name as init parameters) self.learning_rate = learning_rate self.network_layers = copy.deepcopy(network_layers) self.class_labels = ('0', '1') if class_labels is None else class_labels self.use_cpu = use_cpu self.process_as_a_batch = process_as_a_batch self.optimizer_type = optimizer_type if self.optimizer_type == OP_TYPE_SGD_NC \ or self.optimizer_type == OP_TYPE_ADAGRAD_NC \ or self.optimizer_type == OP_TYPE_RMSPROP_NC \ or self.optimizer_type == OP_TYPE_ADADELTA_NC \ or self.optimizer_type == OP_TYPE_ADAM_NC \ or self.optimizer_type == OP_TYPE_ADAM_AMSG_NC: self.drift_detection_method = None self.warning_detection_method = None else: self.drift_detection_method = drift_detection_method if self.drift_detection_method.__class__.__name__ == 'HDDM_A' \ or self.drift_detection_method.__class__.__name__ == 'HDDM_W': if warning_detection_method is not None: print('Parameter warning_detection_method should be None for drift_detection_methods HDDM_A and' ' HDDM_W as they have in built warning detection. Hence setting it to None.') self.warning_detection_method = None else: self.warning_detection_method = None else: self.warning_detection_method = warning_detection_method self.loss_f_type = loss_f_type self.softMax_f = softmax_f self.loss_transform_type = loss_transform_type # status variables self.net = None self.optimizer = None self.criterion = None self.loss = None self.device = None self.samples_seen = 0 self.detected_warnings = 0 self.loss_f = None self.estimator: BaseDriftDetector = None # To normalize the observed errors in the [0, 1] range self.min_error = float('Inf') self.max_error = float('-Inf') self.class_label_index_map = {} self.init_values() def init_values(self): # init status variables self.net = None self.optimizer = None self.criterion = None self.loss = None self.device = None self.samples_seen = 0 self.detected_warnings = 0 self.loss_f = None self.estimator = ADWIN(delta=1e-3) # To normalize the observed errors in the [0, 1] range self.min_error = float('Inf') self.max_error = float('-Inf') self.class_label_index_map = {} initialize_network = False for i, class_label in enumerate(self.class_labels): self.class_label_index_map[class_label] = i if isinstance(self.network_layers, nn.Module): self.net = self.network_layers self.initialize_net_para() elif isinstance(self.network_layers, list): if self.network_layers[0]['input_d'] is None or self.network_layers[0]['input_d'] == 0: # wait till we receive the first instance to get input dimensions # to initialize the passed-in network self.network_layers[0]['input_d'] = 0 else: initialize_network = True else: self.network_layers = copy.deepcopy(default_network_layers) print('Unknown network type passed in, set the network type to default: {}'.format(self.network_layers)) if initialize_network: self.initialize_network() if self.use_cpu: self.device = torch.device("cpu") else: self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def init_optimizer(self): if self.optimizer_type == OP_TYPE_ADAGRAD or self.optimizer_type == OP_TYPE_ADAGRAD_NC: self.optimizer = optim.Adagrad(self.net.parameters(), lr=self.learning_rate, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10) elif self.optimizer_type == OP_TYPE_ADADELTA or self.optimizer_type == OP_TYPE_ADADELTA_NC: self.optimizer = optim.Adadelta(self.net.parameters(), lr=self.learning_rate, eps=1e-10) elif self.optimizer_type == OP_TYPE_RMSPROP or self.optimizer_type == OP_TYPE_RMSPROP_NC: self.optimizer = optim.RMSprop(self.net.parameters(), lr=self.learning_rate, alpha=0.99, weight_decay=0, eps=1e-10) elif self.optimizer_type == OP_TYPE_SGD or self.optimizer_type == OP_TYPE_SGD_NC: self.optimizer = optim.SGD(self.net.parameters(), lr=self.learning_rate) elif self.optimizer_type == OP_TYPE_ADAM or self.optimizer_type == OP_TYPE_ADAM_NC: self.optimizer = optim.Adam(self.net.parameters(), lr=self.learning_rate, betas=(0.9, 0.999), eps=1e-10, weight_decay=0, amsgrad=False) elif self.optimizer_type == OP_TYPE_ADAM_AMSG or self.optimizer_type == OP_TYPE_ADAM_AMSG_NC: self.optimizer = optim.Adam(self.net.parameters(), lr=self.learning_rate, betas=(0.9, 0.999), eps=1e-10, weight_decay=0, amsgrad=True) else: print('Invalid optimizer type = {}'.format(self.optimizer_type)) def initialize_net_para(self): self.init_optimizer() # for multi class classification # criterion = nn.CrossEntropyLoss() # for binary classification # combines a Sigmoid layer # self.criterion = nn.BCEWithLogitsLoss() # self.criterion = nn.BCELoss() # self.criterion = nn.CrossEntropyLoss() if self.loss_f_type == LOSS_F_TYPE_NLLLoss: self.loss_f = nn.NLLLoss() elif self.loss_f_type == LOSS_F_TYPE_MultiMarginLoss: self.loss_f = nn.MultiMarginLoss() print('Network configuration:\n' '{}\n' '======================================='.format(self)) def initialize_network(self): self.net = PyNet(self.network_layers, self.class_labels) self.initialize_net_para() def train_net(self, x, y): if torch.cuda.is_available(): if self.device.type == 'cpu': pass else: x = x.to(self.device) y = y.to(self.device) else: pass self.optimizer.zero_grad() # zero the gradient buffers # # forward propagation outputs = self.net(x) # backward propagation # print(self.learning_rate) # print(self.net.linear[0].weight.data) if self.learning_rate > 0.0: # print('here') # self.loss = self.criterion(outputs, y.reshape((-1,)).long()) class_index = self.class_label_index_map[y.reshape((-1,)).item()] self.loss = self.loss_f(self.softMax_f(outputs), torch.tensor([class_index])) self.loss.backward() self.optimizer.step() # Does the update if self.loss_transform_type == LOSS_TRANSFORM_TYPE_MIN_MAX_NORMALIZE: # Incremental maintenance of the normalization ranges normalized_loss = 0.0 if self.loss.item() < self.min_error: self.min_error = self.loss.item() if self.loss.item() > self.max_error: self.max_error = self.loss.item() if self.min_error != self.max_error: normalized_loss = (self.loss.item() - self.min_error) / (self.max_error - self.min_error) elif self.loss_transform_type == LOSS_TRANSFORM_TYPE_TANH_SIGMOID: normalized_loss = 1/(1 + np.exp(-np.tanh(self.loss.item()))) elif self.loss_transform_type == LOSS_TRANSFORM_TYPE_NONE: normalized_loss = self.loss.item() self.estimator.add_element(normalized_loss) # print(normalized_loss, self.estimator.estimation) outputs = outputs.detach() _, predicted_idxs = torch.max(outputs, 1) predicted_labels = self.class_labels[predicted_idxs] if self.drift_detection_method is not None: # get predicted class and compare with actual class label predicted_matches_actual = predicted_labels == y self.drift_detection_method.add_element(1 if predicted_matches_actual else 0) if self.warning_detection_method is not None: self.warning_detection_method.add_element(1 if predicted_matches_actual else 0) # pass the difference to the detector # predicted_matches_actual = torch.abs(y - outputs).detach().numpy()[0] # self.drift_detection_method.add_element(predicted_matches_actual) # Check if the was a warning if self.warning_detection_method is not None: if self.warning_detection_method.detected_change(): self.detected_warnings += 1 else: # warning detector is None, hence drift detector has warning detection capability. if self.drift_detection_method.detected_warning_zone(): self.detected_warnings += 1 # 3 is the threshold level # Check if the was a change if self.detected_warnings > 3 and self.drift_detection_method.detected_change(): print('Drift detected by {} around {} th sample. Hence resetting optimizer'.format( self.drift_detection_method, self.samples_seen)) self.detected_warnings = 0 self.init_optimizer() def partial_fit(self, X, r, c, y): if self.net is None: self.network_layers[0]['input_d'] = c self.initialize_network() if self.process_as_a_batch: self.samples_seen += r self.train_net(x=torch.from_numpy(X).float(), y=torch.from_numpy(np.array(y)).view(-1, 1).float()) else: # per instance processing (default behaviour) for i in range(r): x = torch.from_numpy(X[i]) yy = torch.from_numpy(np.array(y[i])) x = x.view(1, -1).float() yy = yy.view(1, -1).float() x.unsqueeze(0) yy.unsqueeze(0) self.samples_seen += 1 self.train_net(x=x, y=yy) def predict_proba(self, X, r, c): if self.net is None: self.network_layers[0]['input_d'] = c self.initialize_network() if self.process_as_a_batch: return self.net(torch.from_numpy(X).float()).detach().numpy() else: # per instance processing (default behaviour) proba = None for i in range(r): x = torch.from_numpy(X[i]) x = x.view(1, -1).float() x.unsqueeze(0) if r == 1: return self.net(x).detach().reshape((1, -1)) elif i == 0: proba = self.net(x).detach() else: proba = torch.cat(proba, self.net(x).detach()).detach() return proba def reset(self): # configuration variables (which has the same name as init parameters) should be copied by the caller function self.init_values() return self def __str__(self): return str(self.__class__) + ": " + str(self.__dict__)
class AdaLearningNode(LearningNodeNBAdaptive, NewNode): def __init__(self, initial_class_observations): super().__init__(initial_class_observations) self._estimation_error_weight = ADWIN() self.error_change = False self._randomSeed = 1 self._classifier_random = check_random_state(self._randomSeed) # Override NewNode def number_leaves(self): return 1 # Override NewNode def get_error_estimation(self): return self._estimation_error_weight.estimation # Override NewNode def get_error_width(self): return self._estimation_error_weight.width # Override NewNode def is_null_error(self): return self._estimation_error_weight is None def kill_tree_children(self, hat): pass # Override NewNode def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y # k = self._classifier_random.poisson(1.0) # if k > 0: # weight = weight * k tmp = self.get_class_votes(X, hat) class_prediction = get_max_value_key(tmp) bl_correct = (true_class == class_prediction) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to Adwin add = 0.0 if (bl_correct is True) else 1.0 self._estimation_error_weight.add_element(add) # Detect change with Adwin self.error_change = self._estimation_error_weight.detected_change() if self.error_change is True and old_error > self.get_error_estimation(): self.error_change = False # Update statistics super().learn_from_instance(X, y, weight, hat) # call ActiveLearningNode weight_seen = self.get_weight_seen() if weight_seen - self.get_weight_seen_at_last_split_evaluation() >= hat.grace_period: hat._attempt_to_split(self, parent, parent_branch) self.set_weight_seen_at_last_split_evaluation(weight_seen) # Override LearningNodeNBAdaptive def get_class_votes(self, X, ht): # dist = {} prediction_option = ht.leaf_prediction # MC if prediction_option == MAJORITY_CLASS: dist = self.get_observed_class_distribution() # NB elif prediction_option == NAIVE_BAYES: dist = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) # NBAdaptive else: if self._mc_correct_weight > self._nb_correct_weight: dist = self.get_observed_class_distribution() else: dist = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.get_error_estimation() * self.get_error_estimation() if normalization_factor > 0.0: normalize_values_in_dict(dist, normalization_factor) return dist # Override NewNode, New for option votes def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes=None): if found_nodes is None: found_nodes = [] found_nodes.append(HoeffdingTree.FoundNode(self, parent, parent_branch))
class AdaLearningNode(ActiveLearningNodeNBA, AdaNode): """ Learning node for Hoeffding Adaptive Tree. Uses Adaptive Naive Bayes models. Parameters ---------- initial_stats: dict (class_value, weight) or None Initial class observations """ def __init__(self, initial_stats=None, random_state=None): super().__init__(initial_stats) self._adwin = ADWIN() self.error_change = False self._random_state = check_random_state(random_state) @property def n_leaves(self): return 1 @property def error_estimation(self): return self._adwin.estimation @property def error_width(self): return self._adwin.width def error_is_null(self): return self._adwin is None def kill_tree_children(self, hat): pass def learn_one(self, X, y, weight, tree, parent, parent_branch): true_class = y if tree.bootstrap_sampling: # Perform bootstrap-sampling k = self._random_state.poisson(1.0) if k > 0: weight = weight * k class_prediction = get_max_value_key(self.predict_one(X, tree=tree)) is_correct = (true_class == class_prediction) if self._adwin is None: self._adwin = ADWIN() old_error = self.error_estimation # Add element to ADWIN self._adwin.add_element(0.0 if is_correct else 1.0) # Detect change with Adwin self.error_change = self._adwin.detected_change() if self.error_change and old_error > self.error_estimation: self.error_change = False # Update statistics super().learn_one(X, y, weight=weight, tree=tree) weight_seen = self.total_weight if weight_seen - self.last_split_attempt_at >= tree.grace_period: tree._attempt_to_split(self, parent, parent_branch) self.last_split_attempt_at = weight_seen # Override LearningNodeNBAdaptive def predict_one(self, X, *, tree=None): prediction_option = tree.leaf_prediction # MC if prediction_option == tree._MAJORITY_CLASS: dist = self.stats # NB elif prediction_option == tree._NAIVE_BAYES: dist = do_naive_bayes_prediction(X, self.stats, self.attribute_observers) # NBAdaptive (default) else: dist = super().predict_one(X, tree=tree) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.error_estimation * self.error_estimation if normalization_factor > 0.0: dist = normalize_values_in_dict(dist, normalization_factor, inplace=False) return dist # Override AdaNode, New for option votes def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes=None): if found_nodes is None: found_nodes = [] found_nodes.append(FoundNode(self, parent, parent_branch))
class AdaLearningNode(LearningNodeNBAdaptive, AdaNode): """ Learning node for Hoeffding Adaptive Tree that uses Adaptive Naive Bayes models. Parameters ---------- initial_class_observations: dict (class_value, weight) or None Initial class observations """ def __init__(self, initial_class_observations, random_state=None): super().__init__(initial_class_observations) self._estimation_error_weight = ADWIN() self.error_change = False self._random_state = check_random_state(random_state) # Override AdaNode def number_leaves(self): return 1 # Override AdaNode def get_error_estimation(self): return self._estimation_error_weight.estimation # Override AdaNode def get_error_width(self): return self._estimation_error_weight.width # Override AdaNode def is_null_error(self): return self._estimation_error_weight is None def kill_tree_children(self, hat): pass # Override AdaNode def learn_from_instance(self, X, y, weight, hat, parent, parent_branch): true_class = y if hat.bootstrap_sampling: # Perform bootstrap-sampling k = self._random_state.poisson(1.0) if k > 0: weight = weight * k class_prediction = get_max_value_key(self.get_class_votes(X, hat)) bl_correct = (true_class == class_prediction) if self._estimation_error_weight is None: self._estimation_error_weight = ADWIN() old_error = self.get_error_estimation() # Add element to Adwin add = 0.0 if bl_correct else 1.0 self._estimation_error_weight.add_element(add) # Detect change with Adwin self.error_change = self._estimation_error_weight.detected_change() if self.error_change and old_error > self.get_error_estimation(): self.error_change = False # Update statistics super().learn_from_instance(X, y, weight, hat) # call ActiveLearningNode weight_seen = self.get_weight_seen() if weight_seen - self.get_weight_seen_at_last_split_evaluation() >= hat.grace_period: hat._attempt_to_split(self, parent, parent_branch) self.set_weight_seen_at_last_split_evaluation(weight_seen) # Override LearningNodeNBAdaptive def get_class_votes(self, X, ht): # dist = {} prediction_option = ht.leaf_prediction # MC if prediction_option == ht._MAJORITY_CLASS: dist = self.get_observed_class_distribution() # NB elif prediction_option == ht._NAIVE_BAYES: dist = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) # NBAdaptive (default) else: if self._mc_correct_weight > self._nb_correct_weight: dist = self.get_observed_class_distribution() else: dist = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.get_error_estimation() * self.get_error_estimation() if normalization_factor > 0.0: dist = normalize_values_in_dict(dist, normalization_factor, inplace=False) return dist # Override AdaNode, New for option votes def filter_instance_to_leaves(self, X, y, weight, parent, parent_branch, update_splitter_counts, found_nodes=None): if found_nodes is None: found_nodes = [] found_nodes.append(FoundNode(self, parent, parent_branch))
time=stime, ram_hours=-1, classified_instances=-1, accuracy=(cnt / (i + 1)), a=-1, b=-1, c=-1, d=-1, e=-1)) print(model.alphas) targets = tf.constant(y[i], shape=[1]) old = adwin.estimation adwin.add_element(0.0 if pred == y[i] else 1.0) if adwin.detected_change(): if adwin.estimation > old: print('Change detected') del model model = get_model() model.partial_fit(inputs, targets) print('#{}'.format(i + 1)) curtime = time.time() - time_mark print('time for {}: '.format(freq), curtime) stime += curtime f.write( "{instances},{time},{ram_hours},{classified_instances},{accuracy},{a},{b},{c},{d},{e}\n"