예제 #1
0
    def normalize_sample(self, X):
        """
        Normalize the features in order to have the same influence during
        training.

        Parameters
        ----------
        X: list or array or numpy.ndarray
            features.
        Returns
        -------
        array:
            normalized samples
        """
        normalized_sample = []
        for i in range(len(X)):
            if (self._nominal_attributes is not None and i
                    not in self._nominal_attributes) and self.samples_seen > 1:
                mean = self.sum_of_attribute_values[i] / self.samples_seen
                sd = compute_sd(self.sum_of_attribute_squares[i],
                                self.sum_of_attribute_values[i],
                                self.samples_seen)
                if sd > 0:
                    normalized_sample.append(float(X[i] - mean) / (3 * sd))
                else:
                    normalized_sample.append(0.0)
            else:
                normalized_sample.append(0.0)
        if self.samples_seen > 1:
            normalized_sample.append(
                1.0)  # Value to be multiplied with the constant factor
        else:
            normalized_sample.append(0.0)
        return np.asarray(normalized_sample)
예제 #2
0
    def predict(self, X):
        """Predicts the target value using mean class or the perceptron.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Samples for which we want to predict the labels.

        Returns
        -------
        numpy.ndarray
            Predicted target values.

        """
        predictions = []
        if self.samples_seen > 0:
            r, _ = get_dimensions(X)
            for i in range(r):
                if self.leaf_prediction == _TARGET_MEAN:
                    votes = self.get_votes_for_instance(
                        X[i])  # Gets observed data statistics
                    if votes == {}:
                        # Tree is empty, all target_values equal, default to zero
                        predictions.append(0)
                    else:
                        number_of_samples_seen = votes[0]
                        sum_of_values = votes[1]
                        predictions.append(sum_of_values /
                                           number_of_samples_seen)
                elif self.leaf_prediction == _PERCEPTRON:
                    if self.samples_seen > 1:
                        perceptron_weights = self.get_weights_for_instance(
                            X[i])
                        if perceptron_weights is None:
                            predictions.append(0.0)
                            continue
                        normalized_sample = self.normalize_sample(X[i])
                        normalized_prediction = np.dot(perceptron_weights,
                                                       normalized_sample)
                        # De-normalize prediction
                        mean = self.sum_of_values / self.samples_seen
                        sd = compute_sd(self.sum_of_squares,
                                        self.sum_of_values, self.samples_seen)
                        predictions.append(normalized_prediction * sd * 3 +
                                           mean)
                    else:
                        predictions.append(0.0)
        else:
            # Model is empty
            predictions.append(0.0)
        return np.asarray(predictions)
    def normalize_target_value(self, y):
        """
        Normalize the target in order to have the same influence during the process of
        training.
        Parameters
        ----------
        y: float
            target value

        Returns
        -------
        float:
            normalized target value
        """
        if self.samples_seen > 1:
            mean = self.sum_of_values / self.samples_seen
            sd = compute_sd(self.sum_of_squares, self.sum_of_values, self.samples_seen)
            if sd > 0:
                return float(y - mean) / (3 * sd)
        return 0.0