def get_class_votes(self, X, ht): # dist = {} prediction_option = ht.leaf_prediction # MC if prediction_option == ht._MAJORITY_CLASS: dist = self.get_observed_class_distribution() # NB elif prediction_option == ht._NAIVE_BAYES: dist = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) # NBAdaptive (default) else: if self._mc_correct_weight > self._nb_correct_weight: dist = self.get_observed_class_distribution() else: dist = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.get_error_estimation() * self.get_error_estimation() if normalization_factor > 0.0: dist = normalize_values_in_dict(dist, normalization_factor, inplace=False) return dist
def learn_from_instance(self, X, y, weight, ht): """Update the node with the provided instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes for updating the node. y: int Instance class. weight: float The instance's weight. ht: HoeffdingTreeClassifier The Hoeffding Tree to update. """ if self._observed_class_distribution == {}: # All classes equal, default to class 0 if 0 == y: self._mc_correct_weight += weight elif max(self._observed_class_distribution, key=self._observed_class_distribution.get) == y: self._mc_correct_weight += weight nb_prediction = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) if max(nb_prediction, key=nb_prediction.get) == y: self._nb_correct_weight += weight super().learn_from_instance(X, y, weight, ht)
def predict_proba(self, X): """ Estimates the probability of each sample in X belonging to each of the class-labels. Parameters ---------- X : Numpy.ndarray of shape (n_samples, n_features) The matrix of samples one wants to predict the class probabilities for. Returns ------- A numpy.ndarray of shape (n_samples, n_labels), in which each outer entry is associated with the X entry of the same index. And where the list in index [i] contains len(self.target_values) elements, each of which represents the probability that the i-th sample of X belongs to a certain class-label. """ predictions = deque() r, _ = get_dimensions(X) if self._observed_class_distribution == {}: # Model is empty, all classes equal, default to zero return np.zeros((r, 1)) else: for i in range(r): votes = do_naive_bayes_prediction(X[i], self._observed_class_distribution, self._attribute_observers) sum_values = sum(votes.values()) if self._classes is not None: y_proba = np.zeros(int(max(self._classes)) + 1) else: y_proba = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): y_proba[int(key)] = value / sum_values if sum_values != 0 else 0.0 predictions.append(y_proba) return np.array(predictions)
def get_class_votes(self, X, hot): if self.get_weight_seen() >= 0: return do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) else: return super().get_class_votes(X, hot)
def learn_from_instance(self, X, y, weight, hot): if self._observed_class_distribution == {}: # All classes equal, default to class 0 if 0 == y: self._mc_correct_weight += weight elif max(self._observed_class_distribution, key=self._observed_class_distribution.get) == y: self._mc_correct_weight += weight nb_prediction = do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers) if max(nb_prediction, key=nb_prediction.get) == y: self._nb_correct_weight += weight super().learn_from_instance(X, y, weight, hot)
def get_class_votes(self, X, ht): """Get the votes per class for a given instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes. ht: HoeffdingTree Hoeffding Tree. Returns ------- dict (class_value, weight) Class votes for the given instance. """ if self._mc_correct_weight > self._nb_correct_weight: return self._observed_class_distribution return do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers)
def get_class_votes(self, X, vfdr): """Get the votes per class for a given instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes. vfdr: AVFDR Very Fast Decision Rules. Returns ------- dict (class_value, weight) Class votes for the given instance. """ if self.get_weight_seen() >= vfdr.nb_threshold and vfdr.nb_prediction: return do_naive_bayes_prediction(X, self.observed_class_distribution, self._attribute_observers) else: return self.observed_class_distribution
def get_class_votes(self, X, ht): """ Get the votes per class for a given instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes. ht: HoeffdingTreeClassifier Hoeffding Tree. Returns ------- dict (class_value, weight) Class votes for the given instance. """ if self.get_weight_seen() >= ht.nb_threshold: return do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers ) else: return super().get_class_votes(X, ht)
def predict_one(self, X, *, tree=None): prediction_option = tree.leaf_prediction # MC if prediction_option == tree._MAJORITY_CLASS: dist = self.stats # NB elif prediction_option == tree._NAIVE_BAYES: dist = do_naive_bayes_prediction(X, self.stats, self.attribute_observers) # NBAdaptive (default) else: dist = super().predict_one(X, tree=tree) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.error_estimation * self.error_estimation if normalization_factor > 0.0: dist = normalize_values_in_dict(dist, normalization_factor, inplace=False) return dist
def get_class_votes(self, X, hot): if self._mc_correct_weight > self._nb_correct_weight: return self._observed_class_distribution return do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers)