def learn_one(self, X, y, *, weight=1.0, tree=None): """Update the node with the provided instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes for updating the node. y: numpy.ndarray of length equal to the number of targets. Instance targets. weight: float Instance weight. tree: HoeffdingTreeRegressor Regression Hoeffding Tree to update. """ self.update_stats(y, weight) self.update_attribute_observers(X, y, weight, tree) if self.perceptron_weights is None: # Creates matrix of perceptron random weights _, rows = get_dimensions(y) _, cols = get_dimensions(X) self.perceptron_weights = self._random_state.uniform( -1.0, 1.0, (rows, cols + 1)) self._normalize_perceptron_weights() if tree.learning_ratio_const: learning_ratio = tree.learning_ratio_perceptron else: learning_ratio = tree.learning_ratio_perceptron / ( 1 + self.stats[0] * tree.learning_ratio_decay) for i in range(int(weight)): self._update_weights(X, y, learning_ratio, tree)
def learn_from_instance(self, X, y, weight, rht): """Update the node with the provided instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes for updating the node. y: numpy.ndarray of length equal to the number of targets. Instance targets. weight: float Instance weight. rht: HoeffdingTreeRegressor Regression Hoeffding Tree to update. """ if self.perceptron_weight is None: self.perceptron_weight = {} # Creates matrix of perceptron random weights _, rows = get_dimensions(y) _, cols = get_dimensions(X) self.perceptron_weight[0] = \ self.random_state.uniform(-1.0, 1.0, (rows, cols + 1)) # Cascade Stacking self.perceptron_weight[1] = \ self.random_state.uniform(-1.0, 1.0, (rows, rows + 1)) self.normalize_perceptron_weights() try: self._observed_class_distribution[0] += weight except KeyError: self._observed_class_distribution[0] = weight if rht.learning_ratio_const: learning_ratio = rht.learning_ratio_perceptron else: learning_ratio = rht.learning_ratio_perceptron / \ (1 + self._observed_class_distribution[0] * rht.learning_ratio_decay) try: self._observed_class_distribution[1] += weight * y self._observed_class_distribution[2] += weight * y * y except KeyError: self._observed_class_distribution[1] = weight * y self._observed_class_distribution[2] = weight * y * y for i in range(int(weight)): self.update_weights(X, y, learning_ratio, rht) for i, x in enumerate(X): try: obs = self._attribute_observers[i] except KeyError: # Creates targets observers, if not already defined if rht.nominal_attributes is not None and i in rht.nominal_attributes: obs = NominalAttributeRegressionObserver() else: obs = NumericAttributeRegressionObserverMultiTarget() self._attribute_observers[i] = obs obs.observe_attribute_class(x, y, weight)
def update_buffer_content(self, X, y, timestamp, uid): self.members['uids'].append(uid) if self.members['X'] is None: self.members['X'] = np.zeros((0, get_dimensions(X)[1])) self.members['y'] = np.zeros((0, get_dimensions(y)[1])) self.earliest_timestamp = timestamp self.members['X'] = np.vstack((self.members['X'], X)) self.members['y'] = np.vstack((self.members['y'], y)) self.members['timestamps'].append(timestamp) self.size += 1 self.latest_timestamp = timestamp
def update_weights(self, X, y, learning_ratio, rht): """Update the perceptron weights Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes for updating the node. y: numpy.ndarray of length equal to the number of targets. Targets values. learning_ratio: float perceptron learning ratio rht: HoeffdingTreeRegressor Regression Hoeffding Tree to update. """ normalized_sample = rht.normalize_sample(X) normalized_base_pred = self._predict_base(normalized_sample) _, n_features = get_dimensions(X) _, n_targets = get_dimensions(y) normalized_target_value = rht.normalize_target_value(y) self.perceptron_weight[0] += learning_ratio * \ (normalized_target_value - normalized_base_pred)[:, None] @ \ normalized_sample[None, :] # Add bias term normalized_base_pred = np.append(normalized_base_pred, 1.0) normalized_meta_pred = self._predict_meta(normalized_base_pred) self.perceptron_weight[1] += learning_ratio * \ (normalized_target_value - normalized_meta_pred)[:, None] @ \ normalized_base_pred[None, :] self.normalize_perceptron_weights() # Update faded errors for the predictors # The considered errors are normalized, since they are based on # mean centered and sd scaled values self.fMAE_M = 0.95 * self.fMAE_M + np.absolute( normalized_target_value - rht.normalize_target_value(self._observed_class_distribution[1] / self._observed_class_distribution[0])) # Ignore added bias term in the comparison self.fMAE_P = 0.95 * self.fMAE_P + np.absolute( normalized_target_value - normalized_base_pred[:-1]) self.fMAE_SP = 0.95 * self.fMAE_SP + np.absolute( normalized_target_value - normalized_meta_pred)
def add_sample(self, X, y, arrival_time): if not self._is_initialized: self._n_features = get_dimensions(X)[1] self._n_targets = get_dimensions(y)[1] self.configure() if self._n_features != get_dimensions(X)[1]: raise ValueError("Inconsistent number of features in X: {}, previously observed {}.". format(get_dimensions(X)[1], self._n_features)) if not self.overlap_windows: self._add_sample_no_overlap(X, y, arrival_time) else: self._add_sample_overlap(X, y, arrival_time)
def partial_fit(self, X, y, sample_weight=None): """Incrementally trains the model. Train samples (instances) are composed of X attributes and their corresponding targets y. Tasks performed before training: * Verify instance weight. if not provided, uniform weights (1.0) are assumed. * If more than one instance is passed, loop through X and pass instances one at a time. * Update weight seen by model. Training tasks: * If the tree is empty, create a leaf node as the root. * If the tree is already initialized, find the corresponding leaf for the instance and update the leaf node statistics. * If growth is allowed and the number of instances that the leaf has observed between split attempts exceed the grace period then attempt to split. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Instance attributes. y: numpy.ndarray of shape (n_samples, n_targets) Target values. sample_weight: float or array-like Samples weight. If not provided, uniform weights are assumed. """ if y is not None: # Set the number of targets once if not self._n_targets_set: _, self._n_targets = get_dimensions(y) self._n_targets_set = True row_cnt, _ = get_dimensions(X) if sample_weight is None: sample_weight = np.ones(row_cnt) if row_cnt != len(sample_weight): raise ValueError( 'Inconsistent number of instances ({}) and weights ({}).'.format( row_cnt, len(sample_weight) ) ) for i in range(row_cnt): if sample_weight[i] != 0.0: self._train_weight_seen_by_model += sample_weight[i] self._partial_fit(X[i], y[i], sample_weight[i])
def predict(self, X): """ Predict classes for the passed data. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The set of data samples to predict the class labels for. Returns ------- A numpy.ndarray with all the predictions for the samples in X. Notes ----- The predict function will average the predictions from all its learners to find the most likely prediction for the sample matrix X. """ r, c = get_dimensions(X) proba = self.predict_proba(X) predictions = [] if proba is None: return None for i in range(r): predictions.append(np.argmax(proba[i])) return np.asarray(predictions)
def predict_proba(self, X): """ Estimate the probability of X belonging to each class-labels. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) Samples one wants to predict the class probabilities for. Returns ------- A numpy.ndarray of shape (n_samples, n_labels), in which each outer entry is associated with the X entry of the same index. And where the list in index [i] contains len(self.target_values) elements, each of which represents the probability that the i-th sample of X belongs to a certain class-label. """ n_samples, n_features = get_dimensions(X) y_proba = [] if self.ensemble is None: self._init_ensemble(n_features=n_features) return np.zeros(n_samples) for i in range(n_samples): y_proba.append(self._predict_proba(np.asarray([X[i]]))) return np.asarray(y_proba)
def partial_fit(self, X, y, classes=None, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The features to train the model. y: numpy.ndarray of shape (n_samples) An array-like with the class labels of all samples in X. classes: numpy.ndarray, list, optional (default=None) Array with all possible/known class labels. This is an optional parameter, except for the first partial_fit call where it is compulsory. sample_weight: numpy.ndarray of shape (n_samples), optional (default=None) Samples weight. If not provided, uniform weights are assumed. """ if self.classes is None and classes is not None: self.classes = classes if sample_weight is None: weight = 1.0 else: weight = sample_weight if y is not None: row_cnt, _ = get_dimensions(X) weight = check_weights(weight, expand_length=row_cnt) for iterator in range(row_cnt): if weight[iterator] != 0.0: self._partial_fit(X[iterator], y[iterator], self.classes, weight[iterator]) return self
def predict_proba(self, X): """Predicts probabilities of all label of the instance(s). Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Samples for which we want to predict the labels. Returns ------- numpy.array Predicted the probabilities of all the labels for all instances in X. """ r, _ = get_dimensions(X) predictions = [] for i in range(r): votes = copy.deepcopy(self.get_votes_for_instance(X[i])) if votes == {}: # Tree is empty, all classes equal, default to zero predictions.append([0]) else: if sum(votes.values()) != 0: votes = normalize_values_in_dict(votes, inplace=False) if self.classes is not None: y_proba = np.zeros(int(max(self.classes)) + 1) else: y_proba = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): y_proba[int(key)] = value predictions.append(y_proba) return np.array(predictions)
def _init_ensemble(self, X): self._set_max_features(get_dimensions(X)[1]) self.ensemble = [ ARFBaseLearner( index_original=i, classifier=ARFHoeffdingTreeClassifier( max_byte_size=self.max_byte_size, memory_estimate_period=self.memory_estimate_period, grace_period=self.grace_period, split_criterion=self.split_criterion, split_confidence=self.split_confidence, tie_threshold=self.tie_threshold, binary_split=self.binary_split, stop_mem_management=self.stop_mem_management, remove_poor_atts=self.remove_poor_atts, no_preprune=self.no_preprune, leaf_prediction=self.leaf_prediction, nb_threshold=self.nb_threshold, nominal_attributes=self.nominal_attributes, max_features=self.max_features, random_state=self.random_state), instances_seen=self.instances_seen, drift_detection_method=self.drift_detection_method, warning_detection_method=self.warning_detection_method, is_background_learner=False) for i in range(self.n_estimators) ]
def predict_proba(self, X, max_score): """ Predicts probabilities of all label of the X instance(s) Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Samples for which we want to predict the labels. max_score: float The maximum score of an instance could have in the tree. Returns ------- numpy.array Predicted the probabilities of all the labels for all instances in X. """ r, _ = get_dimensions(X) predictions = [] for i in range(r): votes = copy.deepcopy(self.get_votes_for_instance(X[i], max_score)) y_proba = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): y_proba[int(key)] = value predictions.append(y_proba) return np.array(predictions)
def normalize_sample(self, X): """Normalize the features in order to have the same influence during the process of training. Parameters ---------- X: np.array features. Returns ------- np.array: normalized samples """ if self.examples_seen <= 1: _, c = get_dimensions(X) return np.zeros((c + 1), dtype=np.float64) mean = self.sum_of_attribute_values / self.examples_seen variance = (self.sum_of_attribute_squares - (self.sum_of_attribute_values**2) / self.examples_seen) / ( self.examples_seen - 1) sd = np.sqrt(variance, out=np.zeros_like(variance), where=variance >= 0.0) normalized_sample = np.zeros(X.shape[0] + 1, dtype=np.float64) np.divide(X - mean, sd, where=sd != 0, out=normalized_sample[:-1]) # Augments sample with the bias input signal (or y intercept for # each target) normalized_sample[-1] = 1.0 return normalized_sample
def partial_fit(self, X, y, classes=None, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The features to train the model. y: numpy.ndarray of shape (n_samples) An array-like with the labels of all samples in X. classes: numpy.ndarray, optional (default=None) Array with all possible/known classes. Usage varies depending on the learning method. sample_weight: numpy.ndarray of shape (n_samples), optional (default=None) Samples weight. If not provided, uniform weights are assumed. Usage varies depending on the learning method. Returns ------- self """ r, c = get_dimensions(X) if self._STMSamples is None: self._STMSamples = np.empty(shape=(0, c)) self._LTMSamples = np.empty(shape=(0, c)) for i in range(r): self._partial_fit(X[i, :], y[i]) return self
def partial_fit(self, X, y, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The data upon which the algorithm will create its model. y: numpy.ndarray of shape (n_samples) An array-like containing the target values for all samples in X. sample_weight: Not used. Returns ------- KNNRegressor self Notes ----- For the K-Nearest Neighbors regressor, fitting the model is the equivalent of inserting the newer samples in the observed window, and if the size_limit is reached, removing older results. """ r, c = get_dimensions(X) for i in range(r): self.data_window.add_sample(X=X[i], y=y[i]) return self
def predict(self, X): """ Predict the class label for sample X Parameters ---------- X: numpy.ndarray An array of shape (n_samples, n_features) with the samples to predict the class label for. Returns ------- numpy.ndarray A 1D array of shape (, n_samples), containing the predicted class labels for all instances in X. """ if self._ensemble: if self.update_strategy == self._REPLACE_STRATEGY: trees_in_ensemble = sum(i is not None for i in self._ensemble) else: # self.update_strategy == self._PUSH_STRATEGY trees_in_ensemble = len(self._ensemble) if trees_in_ensemble > 0: d_test = xgb.DMatrix(X) for i in range(trees_in_ensemble - 1): margins = self._ensemble[i].predict(d_test, output_margin=True) d_test.set_base_margin(margin=margins) predicted = self._ensemble[trees_in_ensemble - 1].predict(d_test) return np.array(predicted > 0.5).astype(int) # Ensemble is empty, return default values (0) return np.zeros(get_dimensions(X)[0])
def _partial_fit(self, X, y): if self._first_run: self._X_buffer = np.array([]).reshape(0, get_dimensions(X)[1]) self._y_buffer = np.array([]) self._first_run = False self._X_buffer = np.concatenate((self._X_buffer, X)) self._y_buffer = np.concatenate((self._y_buffer, y)) while self._X_buffer.shape[0] >= self.window_size: self._train_on_mini_batch(X=self._X_buffer[0:self.window_size, :], y=self._y_buffer[0:self.window_size]) delete_idx = [i for i in range(self.window_size)] self._X_buffer = np.delete(self._X_buffer, delete_idx, axis=0) self._y_buffer = np.delete(self._y_buffer, delete_idx, axis=0) # Check window size and adjust it if necessary self._adjust_window_size() # Support for concept drift if self.detect_drift: correctly_classifies = self.predict(X) == y # Check for warning self._drift_detector.add_element(int(not correctly_classifies)) # Check if there was a change if self._drift_detector.detected_change(): # Reset window size self._reset_window_size() if self.update_strategy == self._REPLACE_STRATEGY: self._model_idx = 0
def predict(self, X): """Predicts the target value using mean class or the perceptron. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Samples for which we want to predict the labels. Returns ------- numpy.ndarray Predicted target values. """ predictions = [] if self.samples_seen > 0 and self._tree_root is not None: r, _ = get_dimensions(X) for i in range(r): node = self._tree_root.filter_instance_to_leaf(X[i], None, -1).node if node.is_leaf(): predictions.append(node.predict_one(X[i], tree=self)) else: # The instance sorting ended up in a Split Node, since no branch was found # for some of the instance's features. Use the mean prediction in this case predictions.append(node.stats[1] / node.stats[0]) else: # Model is empty predictions.append(0.0) return np.asarray(predictions)
def learn_from_instance(self, X, y, weight, ht): """Update the node with the provided instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes for updating the node. y: int Instance class. weight: float Instance weight. ht: HoeffdingTreeClassifier Hoeffding Tree to update. """ try: self._observed_class_distribution[y] += weight except KeyError: self._observed_class_distribution[y] = weight self._observed_class_distribution = dict( sorted(self._observed_class_distribution.items())) if self.list_attributes.size == 0: self.list_attributes = self._sample_features(get_dimensions(X)[1]) for i in self.list_attributes: try: obs = self._attribute_observers[i] except KeyError: if ht.nominal_attributes is not None and i in ht.nominal_attributes: obs = NominalAttributeClassObserver() else: obs = NumericAttributeClassObserverGaussian() self._attribute_observers[i] = obs obs.observe_attribute_class(X[i], int(y), weight)
def predict_proba(self, X): """ Estimates the probability of each sample in X belonging to each of the class-labels. Parameters ---------- X : Numpy.ndarray of shape (n_samples, n_features) The matrix of samples one wants to predict the class probabilities for. Returns ------- A numpy.ndarray of shape (n_samples, n_labels), in which each outer entry is associated with the X entry of the same index. And where the list in index [i] contains len(self.target_values) elements, each of which represents the probability that the i-th sample of X belongs to a certain class-label. """ predictions = deque() r, _ = get_dimensions(X) if self._observed_class_distribution == {}: # Model is empty, all classes equal, default to zero return np.zeros((r, 1)) else: for i in range(r): votes = do_naive_bayes_prediction(X[i], self._observed_class_distribution, self._attribute_observers) sum_values = sum(votes.values()) if self._classes is not None: y_proba = np.zeros(int(max(self._classes)) + 1) else: y_proba = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): y_proba[int(key)] = value / sum_values if sum_values != 0 else 0.0 predictions.append(y_proba) return np.array(predictions)
def partial_fit(self, X, y, classes=None, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The features to train the model. y: numpy.ndarray of shape (n_samples) An array-like with the class labels of all samples in X. classes: None Not used by this method. sample_weight: None Not used by this method. Returns ------- self """ row_cnt, _ = X.shape if self.samples_seen == 0: self._random_state = check_random_state(self.random_state) self.n_features = get_dimensions(X)[1] self.build_trees() for i in range(row_cnt): self._partial_fit(X[i], y[i]) return self
def partial_fit(self, X, y, classes=None, sample_weight=None): """ Partially (incrementally) fit the model. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The features to train the model. y: numpy.ndarray of shape (n_samples) An array-like with the class labels of all samples in X. classes: numpy.ndarray, optional (default=None) No used. sample_weight: numpy.ndarray of shape (n_samples), optional \ (default=None) Samples weight. If not provided, uniform weights are assumed. Usage varies depending on the learning method. Returns ------- self """ n_rows, n_cols = get_dimensions(X) if sample_weight is None: sample_weight = np.ones(n_rows) for i in range(n_rows): self._partial_fit(np.asarray([X[i]]), np.asarray([y[i]]), classes=classes, sample_weight=np.asarray([sample_weight[i]])) return self
def predict(self, X): """ Predict classes for the passed data. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The set of data samples to predict the class labels for. Returns ------- A numpy.ndarray with all the predictions for the samples in X. """ r, _ = get_dimensions(X) predictions = [] for i in range(r): y_proba = self.predict_proba(X) if y_proba is None: # Ensemble is empty, all classes equal, default to zero predictions.append(0) else: # if prediction of this instance being anomaly is greater than the threshold defined, # then this instance is classified as an anomaly. if y_proba[0][1] > self.anomaly_threshold: predictions.append(1) else: predictions.append(0) return np.asarray(predictions)
def predict(self, X): """ predict The predict function will average the predictions from all its learners to find the most likely prediction for the sample matrix X. Parameters ---------- X: Numpy.ndarray of shape (n_samples, n_features) A matrix of the samples we want to predict. Returns ------- numpy.ndarray A numpy.ndarray with the label prediction for all the samples in X. """ r, c = get_dimensions(X) proba = self.predict_proba(X) predictions = [] if proba is None: return None for i in range(r): predictions.append(np.argmax(proba[i])) return np.asarray(predictions)
def _init_ensemble(self, X): self._set_max_features(get_dimensions(X)[1]) # Generate a different random seed per tree random_states = self._random_state.randint(0, 4294967295, size=self.n_estimators, dtype='u8') self.ensemble = [ ARFRegBaseLearner( index_original=i, estimator=ARFHoeffdingTreeRegressor( max_byte_size=self.max_byte_size, memory_estimate_period=self.memory_estimate_period, grace_period=self.grace_period, split_confidence=self.split_confidence, tie_threshold=self.tie_threshold, binary_split=self.binary_split, stop_mem_management=self.stop_mem_management, remove_poor_atts=self.remove_poor_atts, no_preprune=self.no_preprune, leaf_prediction=self.leaf_prediction, nominal_attributes=self.nominal_attributes, learning_ratio_perceptron=self.learning_ratio_perceptron, learning_ratio_decay=self.learning_ratio_decay, learning_ratio_const=self.learning_ratio_const, max_features=self.max_features, random_state=random_states[i]), instances_seen=self.instances_seen, drift_detection_method=self.drift_detection_method, warning_detection_method=self.warning_detection_method, performance_metric=self.weighted_vote_strategy, drift_detection_criteria=self.drift_detection_criteria, is_background_learner=False) for i in range(self.n_estimators) ]
def predict(self, X): """Predicts the target value using mean class or the perceptron. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) Samples for which we want to predict the labels. Returns ------- list Predicted target values. """ r, _ = get_dimensions(X) try: predictions = np.zeros((r, self._n_targets), dtype=np.float64) except AttributeError: warnings.warn("Calling predict without previously fitting the model at least once.\n" "Predictions will default to a column array filled with zeros.") return np.zeros((r, 1)) for i in range(r): node = self._tree_root.filter_instance_to_leaf(X[i], None, -1).node if isinstance(node, SplitNode): # If not leaf, use mean as response predictions[i, :] = node.stats[1] / node.stats[0] if len(node.stats) > 0 else 0.0 continue predictions[i, :] = node.predict_one(X[i], tree=self) return predictions
def partial_fit(self, X, y, classes=None, sample_weight=None): """ Fit an array of observations. Splits input into individual observations and passes to a helper function _partial_fit. Randomly weights observations depending on Config. """ if self.classes is None and classes is not None: self.classes = classes if y is not None: row_cnt, _ = get_dimensions(X) if sample_weight is None: sample_weight = np.ones(row_cnt) if row_cnt != len(sample_weight): raise ValueError( 'Inconsistent number of instances ({}) and weights ({}).'. format(row_cnt, len(sample_weight))) for i in range(row_cnt): if sample_weight[i] != 0.0: self._train_weight_seen_by_model += sample_weight[i] self.ex += 1 if self.rand_weights and self.poisson >= 1: # Use weights similar to ARF. # This just uses similar avg grace period etc # without having to calculate those parameters. k = self.poisson sample_weight[i] = k self._partial_fit(X[i], y[i], sample_weight[i])
def predict(self, X): predictions = deque() r, _ = get_dimensions(X) y_pred = self.stream.current_sample_y for i in range(r): predictions.append(y_pred) return np.array(predictions)
def _partial_fit(self, X, y, classes=None, sample_weight=None): self._n_samples_seen += 1 _, n_features = get_dimensions(X) if not self.ensemble: self._init_ensemble(n_features) for i in range(len(self.ensemble)): # Get prediction for instance y_pred = np.asarray([np.argmax(self.ensemble[i].predict_proba(X))]) # Update performance evaluator self.ensemble[i].performance_evaluator.add_result(y[0], y_pred[0], sample_weight[0]) # Train using random subspaces without resampling, # i.e. all instances are used for training. if self.training_method == self._TRAIN_RANDOM_SUBSPACES: self.ensemble[i].partial_fit(X=X, y=y, classes=classes, sample_weight=np.asarray([1.]), n_samples_seen=self._n_samples_seen, random_state=self._random_state) # Train using random patches or resampling, # thus we simulate online bagging with Poisson(lambda=...) else: k = self._random_state.poisson(lam=self.lam) if k > 0: self.ensemble[i].partial_fit(X=X, y=y, classes=classes, sample_weight=np.asarray([k]), n_samples_seen=self._n_samples_seen, random_state=self._random_state)
def predict(self, X): predictions = deque() r, _ = get_dimensions(X) y_proba = np.zeros((r, len(Data.classes))) for i in range(r): session_vector = Data.session_vector[-self.max_session_size:] for pos, y_o_idx in enumerate(session_vector): weight = self.w_mc if y_o_idx == session_vector[-1] else 1 y_proba_current = self.ht.predict_proba(np.array([[y_o_idx]])) y_proba_current *= weight / (len(session_vector) - pos) y_proba += y_proba_current y_proba[i][Data.session_vector[-1]] = 0.0 nonzero = np.flatnonzero(y_proba[i]) if len(nonzero > 0): sorted_desc = np.argsort(y_proba[i][nonzero])[::-1] sorted_ids = nonzero[sorted_desc] if not Data.allow_reminders: sorted_ids = sorted_ids[~np.isin(sorted_ids, Data. session_vector)] if not Data.allow_repeated: session = X[i, Data.sid] sorted_ids = sorted_ids[ ~np.isin(sorted_ids, self._rec_tracker[session])] self._rec_tracker[session].extend( sorted_ids[:Data.rec_size]) y_pred = Data.classes[sorted_ids[:Data.rec_size]] else: y_pred = np.array([]) predictions.append(y_pred) return np.array(predictions)