Example #1
0
        def learn_from_instance(self, X, y, weight, rht):

            if self.perceptron_weight is None:
                # Creates matrix of perceptron random weights
                _, rows = get_dimensions(y)
                _, cols = get_dimensions(X)

                self.perceptron_weight = self.random_state.uniform(
                    -1, 1, (rows, cols + 1))
                self.normalize_perceptron_weights()

            try:
                self._observed_class_distribution[0] += weight
            except KeyError:
                self._observed_class_distribution[0] = weight

            if rht.learning_ratio_const:
                learning_ratio = rht.learning_ratio_perceptron
            else:
                learning_ratio = rht.learning_ratio_perceptron / \
                                (1 + self._observed_class_distribution[0] *
                                 rht.learning_ratio_decay)

            try:
                self._observed_class_distribution[1] += weight * y
                self._observed_class_distribution[2] += weight * y * y
            except KeyError:
                self._observed_class_distribution[1] = weight * y
                self._observed_class_distribution[2] = weight * y * y

            for i in range(int(weight)):
                self.update_weights(X, y, learning_ratio, rht)
Example #2
0
        def learn_from_instance(self, X, y, weight, rht):
            """Update the node with the provided instance.

            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes for updating the node.
            y: numpy.ndarray of length equal to the number of targets.
                Instance targets.
            weight: float
                Instance weight.
            rht: RegressionHoeffdingTree
                Regression Hoeffding Tree to update.
            """
            if self.perceptron_weight is None:
                self.perceptron_weight = {}
                # Creates matrix of perceptron random weights
                _, rows = get_dimensions(y)
                _, cols = get_dimensions(X)

                self.perceptron_weight[0] = \
                    self.random_state.uniform(-1.0, 1.0, (rows, cols + 1))
                # Cascade Stacking
                self.perceptron_weight[1] = \
                    self.random_state.uniform(-1.0, 1.0, (rows, rows + 1))
                self.normalize_perceptron_weights()

            try:
                self._observed_class_distribution[0] += weight
            except KeyError:
                self._observed_class_distribution[0] = weight

            if rht.learning_ratio_const:
                learning_ratio = rht.learning_ratio_perceptron
            else:
                learning_ratio = rht.learning_ratio_perceptron / \
                                 (1 + self._observed_class_distribution[0] *
                                  rht.learning_ratio_decay)

            try:
                self._observed_class_distribution[1] += weight * y
                self._observed_class_distribution[2] += weight * y * y
            except KeyError:
                self._observed_class_distribution[1] = weight * y
                self._observed_class_distribution[2] = weight * y * y

            for i in range(int(weight)):
                self.update_weights(X, y, learning_ratio, rht)

            for i, x in enumerate(X):
                try:
                    obs = self._attribute_observers[i]
                except KeyError:
                    # Creates targets observers, if not already defined
                    if rht.nominal_attributes is not None and i in rht.nominal_attributes:
                        obs = NominalAttributeRegressionObserver()
                    else:
                        obs = NumericAttributeRegressionObserverMultiTarget()
                    self._attribute_observers[i] = obs
                obs.observe_attribute_class(x, y, weight)
Example #3
0
        def update_weights(self, X, y, learning_ratio, rht):
            """Update the perceptron weights

            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes for updating the node.
            y: numpy.ndarray of length equal to the number of targets.
                Targets values.
            learning_ratio: float
                perceptron learning ratio
            rht: RegressionHoeffdingTree
                Regression Hoeffding Tree to update.
            """
            normalized_sample = rht.normalize_sample(X)
            normalized_base_pred = self._predict_base(normalized_sample)

            _, n_features = get_dimensions(X)
            _, n_targets = get_dimensions(y)

            normalized_target_value = rht.normalized_target_value(y)

            self.perceptron_weight[0] += learning_ratio * \
                (normalized_target_value - normalized_base_pred)[:, None] @ \
                normalized_sample[None, :]

            # Add bias term
            normalized_base_pred = np.append(normalized_base_pred, 1.0)
            normalized_meta_pred = self._predict_meta(normalized_base_pred)

            self.perceptron_weight[1] += learning_ratio * \
                (normalized_target_value - normalized_meta_pred)[:, None] @ \
                normalized_base_pred[None, :]

            self.normalize_perceptron_weights()

            # Update faded errors for the predictors
            # The considered errors are normalized, since they are based on
            # mean centered and sd scaled values
            self.fMAE_M = 0.95 * self.fMAE_M + np.absolute(
                normalized_target_value - rht.normalized_target_value(
                    self._observed_class_distribution[1] /
                    self._observed_class_distribution[0]))

            # Ignore added bias term in the comparison
            self.fMAE_P = 0.95 * self.fMAE_P + np.absolute(
                normalized_target_value - normalized_base_pred[:-1])

            self.fMAE_SP = 0.95 * self.fMAE_SP + np.absolute(
                normalized_target_value - normalized_meta_pred)
Example #4
0
    def normalize_sample(self, X):
        """Normalize the features in order to have the same influence during the
        process of training.

        Parameters
        ----------
        X: np.array
            features.
        Returns
        -------
        np.array:
            normalized samples
        """
        if self.examples_seen <= 1:
            _, c = get_dimensions(X)
            return np.zeros((c + 1), dtype=np.float64)

        mean = self.sum_of_attribute_values / self.examples_seen
        variance = (self.sum_of_attribute_squares -
                    (self.sum_of_attribute_values**2) / self.examples_seen) / (
                        self.examples_seen - 1)

        sd = np.sqrt(variance,
                     out=np.zeros_like(variance),
                     where=variance >= 0.0)

        normalized_sample = np.zeros(X.shape[0] + 1, dtype=np.float64)
        np.divide(X - mean, sd, where=sd != 0, out=normalized_sample[:-1])
        # Augments sample with the bias input signal (or y intercept for
        # each target)
        normalized_sample[-1] = 1.0

        return normalized_sample
    def transform(self, X):
        r, c = get_dimensions(X)

        custom_dtype_b = np.dtype([('bytes', np.uint8, 8)])
        X_u4 = X.view(custom_dtype_b)
        X_u4_u = np.unpackbits(X_u4['bytes'], axis=1)
        return X_u4_u.reshape(r, c * 8 * 8)
    def init_ensemble(self, X):
        self.ensemble = [None] * self.n_estimators

        self._set_max_features(get_dimensions(X)[1])

        for i in range(self.n_estimators):
            self.ensemble[i] = ARFBaseLearner(i,
                                              ARFHoeffdingTree(max_byte_size=self.max_byte_size,
                                                               memory_estimate_period=self.memory_estimate_period,
                                                               grace_period=self.grace_period,
                                                               split_criterion=self.split_criterion,
                                                               split_confidence=self.split_confidence,
                                                               tie_threshold=self.tie_threshold,
                                                               binary_split=self.binary_split,
                                                               stop_mem_management=self.stop_mem_management,
                                                               remove_poor_atts=self.remove_poor_atts,
                                                               no_preprune=self.no_preprune,
                                                               leaf_prediction=self.leaf_prediction,
                                                               nb_threshold=self.nb_threshold,
                                                               nominal_attributes=self.nominal_attributes,
                                                               max_features=self.max_features,
                                                               random_state=self._init_random_state),
                                              self.instances_seen,
                                              self.drift_detection_method,
                                              self.warning_detection_method,
                                              False)
        def learn_from_instance(self, X, y, weight, ht):
            """Update the node with the provided instance.

            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes for updating the node.
            y: int
                Instance class.
            weight: float
                Instance weight.
            ht: HoeffdingTree
                Hoeffding Tree to update.

            """
            try:
                self._observed_class_distribution[y] += weight
            except KeyError:
                self._observed_class_distribution[y] = weight
            if self.list_attributes.size == 0:
                self.list_attributes = self._sample_features(
                    get_dimensions(X)[1])

            for i in self.list_attributes:
                try:
                    obs = self._attribute_observers[i]
                except KeyError:
                    if i in ht.nominal_attributes:
                        obs = NominalAttributeClassObserver()
                    else:
                        obs = NumericAttributeClassObserverGaussian()
                    self._attribute_observers[i] = obs
                obs.observe_attribute_class(X[i], int(y), weight)
Example #8
0
 def _partial_fit(self, X
                  # , y
                  # , sample_weight=None
                  ):
     # print(__name__, 6)
     # if y is not None:
     sample_weight = None
     row_cnt, _ = get_dimensions(X)
     if sample_weight is None:
         # sample_weight = 1
         sample_weight = np.ones(row_cnt)
         # sample_weight = np.ones((1, row_cnt))
     if row_cnt != len(sample_weight):
         raise ValueError('Inconsistent number of instances ({}) and weights ({}).'.
                          format(row_cnt, len(sample_weight)))
     for i in range(row_cnt):
         if sample_weight[i] != 0.0:
             self.samples_seen += sample_weight[i]
             try:
                 self.sum_of_attribute_values = np.add(self.sum_of_attribute_values,
                                                       np.multiply(sample_weight[i], X[i]))
                 self.sum_of_attribute_squares = np.add(
                     self.sum_of_attribute_squares, np.multiply(sample_weight[i], np.power(X[i], 2))
                 )
             except ValueError:
                 self.sum_of_attribute_values = np.multiply(sample_weight[i], X[i])
                 self.sum_of_attribute_squares = np.multiply(sample_weight[i], np.power(X[i], 2))
    def predict(self, X):
        """ predict

        The predict function will average the predictions from all its learners
        to find the most likely prediction for the sample matrix X.

        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            A matrix of the samples we want to predict.

        Returns
        -------
        numpy.ndarray
            A numpy.ndarray with the label prediction for all the samples in X.

        """
        r, c = get_dimensions(X)
        proba = self.predict_proba(X)
        predictions = []
        if proba is None:
            return None
        for i in range(r):
            predictions.append(np.argmax(proba[i]))
        return np.asarray(predictions)
def run(X, y, hyperParams):
    """ run

    Test function for SAMKNN, not integrated with evaluation modules.

    Parameters
    ----------
    X: numpy.ndarray of shape (n_samples, n_features)
        The feature's matrix, coded as 64 bits.

    y: numpy.array of size n_samples
        The labels for all the samples in X coded as 8 bits.

    hyperParams: dict
        A dictionary containing the __init__ params for the SAMKNN.

    """
    r, c = get_dimensions(X)
    classifier = SAMKNN(n_neighbors=hyperParams['nNeighbours'],
                        max_window_size=hyperParams['maxSize'],
                        weighting=hyperParams['knnWeights'],
                        stm_size_option=hyperParams['STMSizeAdaption'],
                        use_ltm=hyperParams['useLTM'])
    logging.info('applying model on dataset')
    predicted_labels = []
    true_labels = []
    for i in range(r):
        pred = classifier.predict(np.asarray([X[i]]))
        predicted_labels.append(pred[0])
        true_labels.append(y[i])
        classifier = classifier.partial_fit(np.asarray([X[i]]), np.asarray([y[i]]), None)
        if (i % (r // 20)) == 0:
            logging.info(str((i // (r / 20))*5) + "%")
    accuracy = accuracy_score(true_labels, predicted_labels)
    logging.info('error rate %.2f%%' % (100-100*accuracy))
Example #11
0
    def predict_proba(self, X):
        """ Estimate the probability of X belonging to each class-labels.

        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)

        Returns
        -------
        numpy.ndarray
            A  2D array of shape (n_samples, n_classes). Where each i-th row
            contains len(self.target_value) elements, representing the
            probability that the i-th sample of X belongs to a certain
            class label.

        """
        r, c = get_dimensions(X)
        if self.data_window is None or self.data_window.size < self.n_neighbors:
            # The model is empty, defaulting to zero
            return np.zeros(shape=(r, 1))
        proba = []

        self.classes = list(set().union(
            self.classes,
            np.unique(self.data_window.targets_buffer.astype(np.int))))

        new_dist, new_ind = self._get_neighbors(X)
        for i in range(r):
            votes = [0.0 for _ in range(int(max(self.classes) + 1))]
            for index in new_ind[i]:
                votes[int(self.data_window.targets_buffer[index])] += 1. / len(
                    new_ind[i])
            proba.append(votes)

        return np.asarray(proba)
Example #12
0
    def transform(self, X):
        """ transform
        
        Transform one hot features in the X matrix into int coded 
        categorical features.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
         
        Returns
        -------
        numpy.ndarray
            The transformed data.
        
        """
        r, c = get_dimensions(X)

        new_width = c
        for i in range(len(self.categorical_list)):
            new_width -= len(self.categorical_list[i]) - 1

        ret = np.zeros((0, new_width), dtype=X.dtype)
        for i in range(r):
            ret = np.concatenate((ret, self._transform(X[i, :], new_width)),
                                 axis=0)

        return ret
Example #13
0
    def partial_fit(self, X, y, sample_weight=None):
        """Incrementally trains the model. Train samples (instances) are
        composed of X attributes and their corresponding targets y.

        Tasks performed before training:

        * Verify instance weight. if not provided, uniform weights (1.0) are
          assumed.
        * If more than one instance is passed, loop through X and pass
          instances one at a time.
        * Update weight seen by model.

        Training tasks:

        * If the tree is empty, create a leaf node as the root.
        * If the tree is already initialized, find the corresponding leaf for
          the instance and update the leaf node statistics.
        * If growth is allowed and the number of instances that the leaf has
          observed between split attempts exceed the grace period then attempt
          to split.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Instance attributes.
        y: numpy.ndarray of shape (n_samples, n_targets)
            Target values.
        sample_weight: float or array-like
            Samples weight. If not provided, uniform weights are assumed.
        """
        if y is not None:
            # Set the number of targets once
            if not self._n_targets_set:
                _, self._n_targets = get_dimensions(y)
                self._n_targets_set = True

            row_cnt, _ = get_dimensions(X)
            if sample_weight is None:
                sample_weight = np.ones(row_cnt)
            if row_cnt != len(sample_weight):
                raise ValueError(
                    'Inconsistent number of instances ({}) and weights ({}).'.
                    format(row_cnt, len(sample_weight)))
            for i in range(row_cnt):
                if sample_weight[i] != 0.0:
                    self._train_weight_seen_by_model += sample_weight[i]
                    self._partial_fit(X[i], y[i], sample_weight[i])
Example #14
0
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ Incrementally trains the model. Train samples (instances) are composed of X attributes and their
        corresponding targets y.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Instance attributes.
        y: array_like
            Classes (targets) for all samples in X.
        classes: list or numpy.array
            Contains the class values in the stream. If defined, will be used to define the length of the arrays
            returned by `predict_proba`
        sample_weight: float or array-like
            Samples weight. If not provided, uniform weights are assumed.

        Notes
        -----
        Tasks performed before training:

        * Verify instance weight. if not provided, uniform weights (1.0) are assumed.
        * If more than one instance is passed, loop through X and pass instances one at a time.
        * Update weight seen by model.

        Training tasks:

        * If the tree is empty, create a leaf node as the root.
        * If the tree is already initialized, find the path from root to the corresponding leaf for the instance and
          sort the instance.

          * Reevaluate the best split for each internal node.
          * Attempt to split the leaf.
        """
        if self.classes is None and classes is not None:
            self.classes = classes
        if y is not None:
            if sample_weight is None:
                sample_weight = np.array([1.0])
            row_cnt, _ = get_dimensions(X)
            wrow_cnt, _ = get_dimensions(sample_weight)
            if row_cnt != wrow_cnt:
                sample_weight = [sample_weight[0]] * row_cnt
            for i in range(row_cnt):
                if sample_weight[i] != 0.0:
                    self._train_weight_seen_by_model += sample_weight[i]
                    self._partial_fit(X[i], y[i], sample_weight[i])
 def predict(self, X):
     r, _ = get_dimensions(X)
     predictions = []
     y_proba = self.predict_proba(X)
     for i in range(r):
         index = np.argmax(y_proba[i])
         predictions.append(index)
     return np.array(predictions)
    def predict_proba(self, X):
        """ Estimate the probability of X belonging to each class-label.

        Parameters
        ----------
        X : numpy.ndarray of shape (n_samples, n_features)
            The matrix of samples to predict the class probabilities for.

        Raises
        ------
        ValueError: A ValueError is raised if the number of classes in the base
        learner exceed that of the ensemble learner.

        Returns
        -------
        A numpy.ndarray of shape (n_samples, n_labels), in which each outer
        entry is associated with the X entry of the same index. And where the
        list in index [i] contains len(self.target_values) elements, each of
        which represents the probability that the i-th sample of X belongs to
        a certain class-label.

        Notes
        -----
        Calculates the probability of each sample in X belonging to each
        of the labels, based on the base estimator. This is done by predicting
        the class probability for each one of the ensemble's classifier, and
        then taking the absolute probability from the ensemble itself.

        """
        if self.enable_code_matrix:
            return self.predict_binary_proba(X)
        proba = []
        r, c = get_dimensions(X)
        try:
            for i in range(self.actual_n_estimators):
                partial_proba = self.ensemble[i].predict_proba(X)
                if len(partial_proba[0]) > max(self.classes) + 1:
                    raise ValueError("The number of classes in the base learner is larger than in"
                                     " the ensemble.")

                if len(proba) < 1:
                    for row_idx in range(r):
                        proba.append([0.0] * len(partial_proba[row_idx]))

                for row_idx in range(r):
                    for class_idx in range(len(partial_proba[row_idx])):
                        try:
                            proba[row_idx][class_idx] += partial_proba[row_idx][class_idx]
                        except IndexError:
                            proba[row_idx].append(partial_proba[row_idx][class_idx])
        except ValueError:
            return np.zeros((r, 1))
        except TypeError:
            return np.zeros((r, 1))

        return self._normalize_probabilities(rows=r, y_proba=proba)
Example #17
0
 def predict(self, X):
     #print(self.get_model_measurements()["Tree size (nodes)"])
     #print("start predict")
     r, _ = get_dimensions(X)
     predictions = []
     y_proba = self.predict_proba(X)
     for i in range(r):
         index = np.argmax(y_proba[i])
         predictions.append(index)
     return np.array(predictions)
    def partial_fit(self, X, y, classes=None, weight=1.0):
        if self.classes is None and classes is not None:
            self.classes = classes

        if y is not None:
            row_cnt, _ = get_dimensions(X)
            weight = check_weights(weight, expand_length=row_cnt)
            for i in range(row_cnt):
                if weight[i] != 0.0:
                    self._train_weight_seen_by_model += weight[i]
                    self._partial_fit(X[i], y[i], self.classes, weight[i])
Example #19
0
 def transform(self, X):
     # print(__name__, 2)
     self._partial_fit(X)
     normalized_samples = []
     if self.samples_seen > 0:
         r, c = get_dimensions(X)
         for i in range(r):
             normalized_samples.append(self.normalize_sample(X[i]))
     else:
         pass
     return np.array(normalized_samples)
Example #20
0
    def predict(self, X):
        r, c = get_dimensions(X)
        predictedLabel = []
        if self._STMSamples is None:
            self._STMSamples = np.empty(shape=(0, c))
            self._LTMSamples = np.empty(shape=(0, c))

        for i in range(r):
            distancesSTM = SAMKNN.get_distances(X[i], self._STMSamples)
            predictedLabel.append(self.predictFct(X[i], None, distancesSTM))
        return predictedLabel
Example #21
0
    def partial_fit(self, X, y, classes=None, weight=None):
        """Processes a new sample."""
        r, c = get_dimensions(X)
        if self._STMSamples is None:
            self._STMSamples = np.empty(shape=(0, c))
            self._LTMSamples = np.empty(shape=(0, c))

        for i in range(r):
            self._partial_fit(X[i, :], y[i])

        return self
def test_get_dimensions():
    rows_expected = 5
    cols_expected = 5

    a_list = [None] * cols_expected
    rows, cols = get_dimensions(a_list)
    assert rows == 1
    assert cols == cols_expected

    a_list_of_lists = [a_list] * rows_expected
    rows, cols = get_dimensions(a_list_of_lists)
    assert rows == rows_expected
    assert cols == cols_expected

    a_ndarray = np.ndarray(cols_expected)
    rows, cols = get_dimensions(a_ndarray)
    assert rows == 1
    assert cols == cols_expected

    a_ndarray = np.ndarray((rows_expected, cols_expected))
    rows, cols = get_dimensions(a_ndarray)
    assert rows == rows_expected
    assert cols == cols_expected
Example #23
0
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ Partially (incrementally) fit the model.

        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            The data upon which the algorithm will create its model.

        y: Array-like
            An array-like containing the classification targets for all
            samples in X.

        classes: numpy.ndarray, optional (default=None)
            Array with all possible/known classes.

        sample_weight: Not used.

        Returns
        -------
        KNNADWINClassifier
            self

        Notes
        -----
        Partially fits the model by updating the window with new samples
        while also updating the ADWIN algorithm. IF ADWIN detects a change,
        the window is split in such a wat that samples from the previous
        concept are dropped.

        """
        r, c = get_dimensions(X)
        if classes is not None:
            self.classes = list(set().union(self.classes, classes))

        for i in range(r):
            self.data_window.add_sample(X[i], y[i])
            if self.data_window.size >= self.n_neighbors:
                correctly_classifies = 1 if self.predict(np.asarray(
                    [X[i]])) == y[i] else 0
                self.adwin.add_element(correctly_classifies)
            else:
                self.adwin.add_element(0)

        if self.data_window.size >= self.n_neighbors:
            if self.adwin.detected_change():
                if self.adwin.width < self.data_window.size:
                    for i in range(self.data_window.size, self.adwin.width,
                                   -1):
                        self.data_window.delete_oldest_sample()
        return self
Example #24
0
        def update_weights(self, X, y, learning_ratio, rht):
            """Update the perceptron weights

            Parameters
            ----------
            X: numpy.ndarray of length equal to the number of features.
                Instance attributes for updating the node.
            y: numpy.ndarray of length equal to the number of targets.
                Targets values.
            learning_ratio: float
                perceptron learning ratio
            rht: RegressionHoeffdingTree
                Regression Hoeffding Tree to update.
            """
            normalized_sample = rht.normalize_sample(X)
            normalized_base_pred = self._predict_base(normalized_sample)

            _, n_features = get_dimensions(X)
            _, n_targets = get_dimensions(y)

            normalized_target_value = rht.normalized_target_value(y)

            self.perceptron_weight[0] += learning_ratio * \
                (normalized_target_value - normalized_base_pred)[:, None] @ \
                normalized_sample[None, :]

            # Add bias term
            normalized_base_pred = np.append(normalized_base_pred, 1.0)

            normalized_meta_pred = self._predict_meta(normalized_base_pred)

            self.perceptron_weight[1] += learning_ratio * \
                (normalized_target_value - normalized_meta_pred)[:, None] @ \
                normalized_base_pred[None, :]

            self.normalize_perceptron_weights()
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        if self.classes is None and classes is not None:
            self.classes = classes
        if y is not None:
            row_cnt, _ = get_dimensions(X)
            if sample_weight is None:
                sample_weight = np.ones(row_cnt)
            if row_cnt != len(sample_weight):
                raise ValueError(
                    'Inconsistent number of instances ({}) and weights ({}).'.
                    format(row_cnt, len(sample_weight)))
            for i in range(row_cnt):
                if sample_weight[i] != 0.0:
                    self._train_weight_seen_by_model += sample_weight[i]
                    self._partial_fit(X[i], y[i], sample_weight[i])

        return self
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ Partially (incrementally) fit the model.

        Parameters
        ----------
        X : numpy.ndarray of shape (n_samples, n_features)
            The features to train the model.

        y: numpy.ndarray of shape (n_samples)
            An array-like with the class labels of all samples in X.

        classes: numpy.ndarray, optional (default=None)
            Array with all possible/known class labels.

        sample_weight: not used (default=None)

        Raises
        ------
        ValueError: A ValueError is raised if the 'classes' parameter is not
        passed in the first partial_fit call, or if they are passed in further
        calls but differ from the initial classes list passed.

        Returns
        -------
        LeveragingBaggingClassifier
            self

        """
        if classes is None and self.classes is None:
            raise ValueError("The first partial_fit call should pass all the classes.")
        if classes is not None and self.classes is None:
            self.classes = classes
        elif classes is not None and self.classes is not None:
            if set(self.classes) == set(classes):
                pass
            else:
                raise ValueError(
                    "The classes passed to the partial_fit function differ from those passed in "
                    "an earlier call.")

        r, c = get_dimensions(X)
        for i in range(r):
            self.__partial_fit(X[i], y[i])

        return self
Example #27
0
    def transform(self, X):
        """ transform
        
        Does the transformation process in the samples in X.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
        
        """
        r, c = get_dimensions(X)
        for i in range(r):
            for j in range(c):
                if X[i][j] in self.missing_value:
                    X[i][j] = self._get_substitute(j)

        return X
    def transform(self, X):
        """ transform
        
        Does the transformation process in the samples in X.
        
        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The sample or set of samples that should be transformed.
        
        """
        r, c = get_dimensions(X)
        for i in range(r):
            if self.strategy in ['mean', 'median', 'mode']:
                self.window.add_element([X[i][:]])
            for j in range(c):
                if X[i][j] in self.missing_value or np.isnan(X[i][j]):
                    X[i][j] = self._get_substitute(j)

        return X
 def predict_proba(self, X):
     r, _ = get_dimensions(X)
     predictions = []
     for i in range(r):
         votes = self.get_votes_for_instance(X[i]).copy()
         if votes == {}:
             # Tree is empty, all classes equal, default to zero
             predictions.append([0])
         else:
             new_votes = dict((key, d[key]) for d in votes for key in d)
             if sum(new_votes.values()) != 0:
                 normalize_values_in_dict(new_votes)
             if self.classes is not None:
                 y_proba = np.zeros(int(max(self.classes)) + 1)
             else:
                 y_proba = np.zeros(int(max(new_votes.keys())) + 1)
             for key, value in new_votes.items():
                 y_proba[int(key)] = value
             predictions.append(y_proba)
     return np.array(predictions)
 def predict(self, X):
     """Predicts the label of the X instance(s)
     Parameters
     ----------
     X: numpy.ndarray of shape (n_samples, n_features)
         Samples for which we want to predict the labels.
     Returns
     -------
     numpy.ndarray
         Predicted labels for all instances in X.
     """
     r, _ = get_dimensions(X)
     predictions = []
     for i in range(r):
         votes = self.get_votes_for_instance(X[i])
         if votes == {}:
             # Ensemble is empty, all classes equal, default to zero
             predictions.append(0)
         else:
             predictions.append(max(votes, key=votes.get))
     return np.asarray(predictions)