def get_class_votes(self, X, ht): # dist = {} prediction_option = ht.leaf_prediction # MC if prediction_option == MAJORITY_CLASS: dist = self.get_observed_class_distribution() # NB elif prediction_option == NAIVE_BAYES: dist = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) # NBAdaptive else: if self._mc_correct_weight > self._nb_correct_weight: dist = self.get_observed_class_distribution() else: dist = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) dist_sum = sum(dist.values()) # sum all values in dictionary normalization_factor = dist_sum * self.get_error_estimation( ) * self.get_error_estimation() if normalization_factor > 0.0: normalize_values_in_dict(dist, normalization_factor) return dist
def learn_from_instance(self, X, y, weight, ht): """Update the node with the provided instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes for updating the node. y: int Instance class. weight: float The instance's weight. ht: HoeffdingTree The Hoeffding Tree to update. """ y = ''.join(str(e) for e in y) y = int(y, 2) if self._observed_class_distribution == {}: # All target_values equal, default to class 0 if 0 == y: self._mc_correct_weight += weight elif max(self._observed_class_distribution, key=self._observed_class_distribution.get) == y: self._mc_correct_weight += weight nb_prediction = do_naive_bayes_prediction( X, self._observed_class_distribution, self._attribute_observers) if max(nb_prediction, key=nb_prediction.get) == y: self._nb_correct_weight += weight super().learn_from_instance(X, y, weight, ht)
def get_class_votes(self, X, ht): """Get the votes per class for a given instance. Parameters ---------- X: numpy.ndarray of length equal to the number of features. Instance attributes. ht: HoeffdingTree Hoeffding Tree. Returns ------- dict (class_value, weight) Class votes for the given instance. """ if self._mc_correct_weight > self._nb_correct_weight: return self._observed_class_distribution return do_naive_bayes_prediction(X, self._observed_class_distribution, self._attribute_observers)
def predict_proba(self, X): """ Predicts the probability of each sample belonging to each one of the known classes. Parameters ---------- X: Numpy.ndarray, shape (n_samples, n_features) A matrix of the samples we want to predict. Returns ------- numpy.ndarray An array of shape (n_samples, n_features), in which each outer entry is associated with the X entry of the same index. And where the list in index [i] contains len(self.target_values) elements, each of which represents the probability that the i-th sample of X belongs to a certain label. """ predictions = deque() if self._observed_class_distribution == {}: # Model is empty, all classes equal, default to zero r, _ = get_dimensions(X) return np.zeros(r) else: r, _ = get_dimensions(X) for i in range(r): votes = do_naive_bayes_prediction( X[i], self._observed_class_distribution, self._attribute_observers) if self._classes is not None: y_proba = np.zeros(int(max(self._classes)) + 1) else: y_proba = np.zeros(int(max(votes.keys())) + 1) for key, value in votes.items(): y_proba[int(key)] = value predictions.append(y_proba) return np.array(predictions)