def get_class_votes(self, X, ht):
            # dist = {}
            prediction_option = ht.leaf_prediction
            # MC
            if prediction_option == MAJORITY_CLASS:
                dist = self.get_observed_class_distribution()
            # NB
            elif prediction_option == NAIVE_BAYES:
                dist = do_naive_bayes_prediction(
                    X, self._observed_class_distribution,
                    self._attribute_observers)
            # NBAdaptive
            else:
                if self._mc_correct_weight > self._nb_correct_weight:
                    dist = self.get_observed_class_distribution()
                else:
                    dist = do_naive_bayes_prediction(
                        X, self._observed_class_distribution,
                        self._attribute_observers)

            dist_sum = sum(dist.values())  # sum all values in dictionary
            normalization_factor = dist_sum * self.get_error_estimation(
            ) * self.get_error_estimation()

            if normalization_factor > 0.0:
                normalize_values_in_dict(dist, normalization_factor)

            return dist
Example #2
0
        def learn_from_instance(self, X, y, weight, ht):
            """Update the node with the provided instance.

            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes for updating the node.
            y: int
                Instance class.
            weight: float
                The instance's weight.
            ht: HoeffdingTree
                The Hoeffding Tree to update.

            """

            y = ''.join(str(e) for e in y)
            y = int(y, 2)

            if self._observed_class_distribution == {}:
                # All target_values equal, default to class 0
                if 0 == y:
                    self._mc_correct_weight += weight
            elif max(self._observed_class_distribution,
                     key=self._observed_class_distribution.get) == y:
                self._mc_correct_weight += weight
            nb_prediction = do_naive_bayes_prediction(
                X, self._observed_class_distribution,
                self._attribute_observers)
            if max(nb_prediction, key=nb_prediction.get) == y:
                self._nb_correct_weight += weight
            super().learn_from_instance(X, y, weight, ht)
Example #3
0
        def get_class_votes(self, X, ht):
            """Get the votes per class for a given instance.

            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes.
            ht: HoeffdingTree
                Hoeffding Tree.

            Returns
            -------
            dict (class_value, weight)
                Class votes for the given instance.

            """
            if self._mc_correct_weight > self._nb_correct_weight:
                return self._observed_class_distribution
            return do_naive_bayes_prediction(X,
                                             self._observed_class_distribution,
                                             self._attribute_observers)
Example #4
0
    def predict_proba(self, X):
        """ Predicts the probability of each sample belonging to each one of the
        known classes.

        Parameters
        ----------
        X: Numpy.ndarray, shape (n_samples, n_features)
            A matrix of the samples we want to predict.

        Returns
        -------
        numpy.ndarray
            An array of shape (n_samples, n_features), in which each outer entry is
            associated with the X entry of the same index. And where the list in
            index [i] contains len(self.target_values) elements, each of which represents
            the probability that the i-th sample of X belongs to a certain label.

        """
        predictions = deque()
        if self._observed_class_distribution == {}:
            # Model is empty, all classes equal, default to zero
            r, _ = get_dimensions(X)
            return np.zeros(r)
        else:
            r, _ = get_dimensions(X)
            for i in range(r):
                votes = do_naive_bayes_prediction(
                    X[i], self._observed_class_distribution,
                    self._attribute_observers)
                if self._classes is not None:
                    y_proba = np.zeros(int(max(self._classes)) + 1)
                else:
                    y_proba = np.zeros(int(max(votes.keys())) + 1)
                for key, value in votes.items():
                    y_proba[int(key)] = value
                predictions.append(y_proba)
        return np.array(predictions)