Beispiel #1
0
    def learn_one(self, X, y, *, weight=1.0, tree=None):
        """Update the node with the provided instance.

        Parameters
        ----------
        X: numpy.ndarray of length equal to the number of features.
            Instance attributes for updating the node.
        y: numpy.ndarray of length equal to the number of targets.
            Instance targets.
        weight: float
            Instance weight.
        tree: HoeffdingTreeRegressor
            Regression Hoeffding Tree to update.
        """
        self.update_stats(y, weight)
        self.update_attribute_observers(X, y, weight, tree)

        if self.perceptron_weights is None:
            # Creates matrix of perceptron random weights
            _, rows = get_dimensions(y)
            _, cols = get_dimensions(X)

            self.perceptron_weights = self._random_state.uniform(
                -1.0, 1.0, (rows, cols + 1))
            self._normalize_perceptron_weights()

        if tree.learning_ratio_const:
            learning_ratio = tree.learning_ratio_perceptron
        else:
            learning_ratio = tree.learning_ratio_perceptron / (
                1 + self.stats[0] * tree.learning_ratio_decay)

        for i in range(int(weight)):
            self._update_weights(X, y, learning_ratio, tree)
Beispiel #2
0
    def learn_from_instance(self, X, y, weight, rht):
        """Update the node with the provided instance.

        Parameters
        ----------
        X: numpy.ndarray of length equal to the number of features.
            Instance attributes for updating the node.
        y: numpy.ndarray of length equal to the number of targets.
            Instance targets.
        weight: float
            Instance weight.
        rht: HoeffdingTreeRegressor
            Regression Hoeffding Tree to update.
        """
        if self.perceptron_weight is None:
            self.perceptron_weight = {}
            # Creates matrix of perceptron random weights
            _, rows = get_dimensions(y)
            _, cols = get_dimensions(X)

            self.perceptron_weight[0] = \
                self.random_state.uniform(-1.0, 1.0, (rows, cols + 1))
            # Cascade Stacking
            self.perceptron_weight[1] = \
                self.random_state.uniform(-1.0, 1.0, (rows, rows + 1))
            self.normalize_perceptron_weights()

        try:
            self._observed_class_distribution[0] += weight
        except KeyError:
            self._observed_class_distribution[0] = weight

        if rht.learning_ratio_const:
            learning_ratio = rht.learning_ratio_perceptron
        else:
            learning_ratio = rht.learning_ratio_perceptron / \
                             (1 + self._observed_class_distribution[0] *
                              rht.learning_ratio_decay)

        try:
            self._observed_class_distribution[1] += weight * y
            self._observed_class_distribution[2] += weight * y * y
        except KeyError:
            self._observed_class_distribution[1] = weight * y
            self._observed_class_distribution[2] = weight * y * y

        for i in range(int(weight)):
            self.update_weights(X, y, learning_ratio, rht)

        for i, x in enumerate(X):
            try:
                obs = self._attribute_observers[i]
            except KeyError:
                # Creates targets observers, if not already defined
                if rht.nominal_attributes is not None and i in rht.nominal_attributes:
                    obs = NominalAttributeRegressionObserver()
                else:
                    obs = NumericAttributeRegressionObserverMultiTarget()
                self._attribute_observers[i] = obs
            obs.observe_attribute_class(x, y, weight)
Beispiel #3
0
 def update_buffer_content(self, X, y, timestamp, uid):
     self.members['uids'].append(uid)
     if self.members['X'] is None:
         self.members['X'] = np.zeros((0, get_dimensions(X)[1]))
         self.members['y'] = np.zeros((0, get_dimensions(y)[1]))
         self.earliest_timestamp = timestamp
     self.members['X'] = np.vstack((self.members['X'], X))
     self.members['y'] = np.vstack((self.members['y'], y))
     self.members['timestamps'].append(timestamp)
     self.size += 1
     self.latest_timestamp = timestamp
    def update_weights(self, X, y, learning_ratio, rht):
        """Update the perceptron weights

        Parameters
        ----------
        X: numpy.ndarray of length equal to the number of features.
            Instance attributes for updating the node.
        y: numpy.ndarray of length equal to the number of targets.
            Targets values.
        learning_ratio: float
            perceptron learning ratio
        rht: HoeffdingTreeRegressor
            Regression Hoeffding Tree to update.
        """
        normalized_sample = rht.normalize_sample(X)
        normalized_base_pred = self._predict_base(normalized_sample)

        _, n_features = get_dimensions(X)
        _, n_targets = get_dimensions(y)

        normalized_target_value = rht.normalize_target_value(y)

        self.perceptron_weight[0] += learning_ratio * \
            (normalized_target_value - normalized_base_pred)[:, None] @ \
            normalized_sample[None, :]

        # Add bias term
        normalized_base_pred = np.append(normalized_base_pred, 1.0)
        normalized_meta_pred = self._predict_meta(normalized_base_pred)

        self.perceptron_weight[1] += learning_ratio * \
            (normalized_target_value - normalized_meta_pred)[:, None] @ \
            normalized_base_pred[None, :]

        self.normalize_perceptron_weights()

        # Update faded errors for the predictors
        # The considered errors are normalized, since they are based on
        # mean centered and sd scaled values
        self.fMAE_M = 0.95 * self.fMAE_M + np.absolute(
            normalized_target_value -
            rht.normalize_target_value(self._observed_class_distribution[1] /
                                       self._observed_class_distribution[0]))

        # Ignore added bias term in the comparison
        self.fMAE_P = 0.95 * self.fMAE_P + np.absolute(
            normalized_target_value - normalized_base_pred[:-1])

        self.fMAE_SP = 0.95 * self.fMAE_SP + np.absolute(
            normalized_target_value - normalized_meta_pred)
Beispiel #5
0
    def add_sample(self, X, y, arrival_time):
        if not self._is_initialized:
            self._n_features = get_dimensions(X)[1]
            self._n_targets = get_dimensions(y)[1]
            self.configure()

        if self._n_features != get_dimensions(X)[1]:
            raise ValueError("Inconsistent number of features in X: {}, previously observed {}.".
                             format(get_dimensions(X)[1], self._n_features))

        if not self.overlap_windows:
            self._add_sample_no_overlap(X, y, arrival_time)
        else:
            self._add_sample_overlap(X, y, arrival_time)
Beispiel #6
0
    def partial_fit(self, X, y, sample_weight=None):
        """Incrementally trains the model. Train samples (instances) are
        composed of X attributes and their corresponding targets y.

        Tasks performed before training:

        * Verify instance weight. if not provided, uniform weights (1.0) are
          assumed.
        * If more than one instance is passed, loop through X and pass
          instances one at a time.
        * Update weight seen by model.

        Training tasks:

        * If the tree is empty, create a leaf node as the root.
        * If the tree is already initialized, find the corresponding leaf for
          the instance and update the leaf node statistics.
        * If growth is allowed and the number of instances that the leaf has
          observed between split attempts exceed the grace period then attempt
          to split.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Instance attributes.
        y: numpy.ndarray of shape (n_samples, n_targets)
            Target values.
        sample_weight: float or array-like
            Samples weight. If not provided, uniform weights are assumed.
        """
        if y is not None:
            # Set the number of targets once
            if not self._n_targets_set:
                _, self._n_targets = get_dimensions(y)
                self._n_targets_set = True

            row_cnt, _ = get_dimensions(X)
            if sample_weight is None:
                sample_weight = np.ones(row_cnt)
            if row_cnt != len(sample_weight):
                raise ValueError(
                    'Inconsistent number of instances ({}) and weights ({}).'.format(
                        row_cnt, len(sample_weight)
                    )
                )
            for i in range(row_cnt):
                if sample_weight[i] != 0.0:
                    self._train_weight_seen_by_model += sample_weight[i]
                    self._partial_fit(X[i], y[i], sample_weight[i])
Beispiel #7
0
    def predict(self, X):
        """ Predict classes for the passed data.

        Parameters
        ----------
        X : numpy.ndarray of shape (n_samples, n_features)
            The set of data samples to predict the class labels for.

        Returns
        -------
        A numpy.ndarray with all the predictions for the samples in X.

        Notes
        -----
        The predict function will average the predictions from all its learners
        to find the most likely prediction for the sample matrix X.

        """
        r, c = get_dimensions(X)
        proba = self.predict_proba(X)
        predictions = []
        if proba is None:
            return None
        for i in range(r):
            predictions.append(np.argmax(proba[i]))
        return np.asarray(predictions)
Beispiel #8
0
    def predict_proba(self, X):
        """ Estimate the probability of X belonging to each class-labels.

        Parameters
        ----------
        X : numpy.ndarray of shape (n_samples, n_features)
            Samples one wants to predict the class probabilities for.

        Returns
        -------
        A numpy.ndarray of shape (n_samples, n_labels), in which each outer
        entry is associated with the X entry of the same index. And where the
        list in index [i] contains len(self.target_values) elements, each of
        which represents the probability that the i-th sample of X belongs to
        a certain class-label.

        """
        n_samples, n_features = get_dimensions(X)
        y_proba = []

        if self.ensemble is None:
            self._init_ensemble(n_features=n_features)
            return np.zeros(n_samples)

        for i in range(n_samples):
            y_proba.append(self._predict_proba(np.asarray([X[i]])))
        return np.asarray(y_proba)
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ Partially (incrementally) fit the model.
        Parameters
        ----------
        X : numpy.ndarray of shape (n_samples, n_features)
            The features to train the model.
        y: numpy.ndarray of shape (n_samples)
            An array-like with the class labels of all samples in X.
        classes: numpy.ndarray, list, optional (default=None)
            Array with all possible/known class labels. This is an optional parameter, except
            for the first partial_fit call where it is compulsory.
        sample_weight: numpy.ndarray of shape (n_samples), optional (default=None)
            Samples weight. If not provided, uniform weights are assumed.
        """
        if self.classes is None and classes is not None:
            self.classes = classes

        if sample_weight is None:
            weight = 1.0
        else:
            weight = sample_weight

        if y is not None:
            row_cnt, _ = get_dimensions(X)
            weight = check_weights(weight, expand_length=row_cnt)
            for iterator in range(row_cnt):
                if weight[iterator] != 0.0:
                    self._partial_fit(X[iterator], y[iterator], self.classes,
                                      weight[iterator])
        return self
    def predict_proba(self, X):
        """Predicts probabilities of all label of the instance(s).

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Samples for which we want to predict the labels.

        Returns
        -------
        numpy.array
            Predicted the probabilities of all the labels for all instances in X.

        """
        r, _ = get_dimensions(X)
        predictions = []
        for i in range(r):
            votes = copy.deepcopy(self.get_votes_for_instance(X[i]))
            if votes == {}:
                # Tree is empty, all classes equal, default to zero
                predictions.append([0])
            else:
                if sum(votes.values()) != 0:
                    votes = normalize_values_in_dict(votes, inplace=False)
                if self.classes is not None:
                    y_proba = np.zeros(int(max(self.classes)) + 1)
                else:
                    y_proba = np.zeros(int(max(votes.keys())) + 1)
                for key, value in votes.items():
                    y_proba[int(key)] = value
                predictions.append(y_proba)
        return np.array(predictions)
Beispiel #11
0
    def _init_ensemble(self, X):
        self._set_max_features(get_dimensions(X)[1])

        self.ensemble = [
            ARFBaseLearner(
                index_original=i,
                classifier=ARFHoeffdingTreeClassifier(
                    max_byte_size=self.max_byte_size,
                    memory_estimate_period=self.memory_estimate_period,
                    grace_period=self.grace_period,
                    split_criterion=self.split_criterion,
                    split_confidence=self.split_confidence,
                    tie_threshold=self.tie_threshold,
                    binary_split=self.binary_split,
                    stop_mem_management=self.stop_mem_management,
                    remove_poor_atts=self.remove_poor_atts,
                    no_preprune=self.no_preprune,
                    leaf_prediction=self.leaf_prediction,
                    nb_threshold=self.nb_threshold,
                    nominal_attributes=self.nominal_attributes,
                    max_features=self.max_features,
                    random_state=self.random_state),
                instances_seen=self.instances_seen,
                drift_detection_method=self.drift_detection_method,
                warning_detection_method=self.warning_detection_method,
                is_background_learner=False) for i in range(self.n_estimators)
        ]
    def predict_proba(self, X, max_score):
        """ Predicts probabilities of all label of the X instance(s)

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Samples for which we want to predict the labels.

        max_score: float
            The maximum score of an instance could have in the tree.

        Returns
        -------
        numpy.array
            Predicted the probabilities of all the labels for all instances in X.

        """
        r, _ = get_dimensions(X)
        predictions = []
        for i in range(r):
            votes = copy.deepcopy(self.get_votes_for_instance(X[i], max_score))
            y_proba = np.zeros(int(max(votes.keys())) + 1)
            for key, value in votes.items():
                y_proba[int(key)] = value
            predictions.append(y_proba)
        return np.array(predictions)
Beispiel #13
0
    def normalize_sample(self, X):
        """Normalize the features in order to have the same influence during the
        process of training.

        Parameters
        ----------
        X: np.array
            features.
        Returns
        -------
        np.array:
            normalized samples
        """
        if self.examples_seen <= 1:
            _, c = get_dimensions(X)
            return np.zeros((c + 1), dtype=np.float64)

        mean = self.sum_of_attribute_values / self.examples_seen
        variance = (self.sum_of_attribute_squares -
                    (self.sum_of_attribute_values**2) / self.examples_seen) / (
                        self.examples_seen - 1)

        sd = np.sqrt(variance,
                     out=np.zeros_like(variance),
                     where=variance >= 0.0)

        normalized_sample = np.zeros(X.shape[0] + 1, dtype=np.float64)
        np.divide(X - mean, sd, where=sd != 0, out=normalized_sample[:-1])
        # Augments sample with the bias input signal (or y intercept for
        # each target)
        normalized_sample[-1] = 1.0

        return normalized_sample
Beispiel #14
0
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ Partially (incrementally) fit the model.

            Parameters
            ----------
            X : numpy.ndarray of shape (n_samples, n_features)
                The features to train the model.

            y: numpy.ndarray of shape (n_samples)
                An array-like with the labels of all samples in X.

            classes: numpy.ndarray, optional (default=None)
                Array with all possible/known classes. Usage varies depending on
                the learning method.

            sample_weight: numpy.ndarray of shape (n_samples), optional (default=None)
                Samples weight. If not provided, uniform weights are assumed.
                Usage varies depending on the learning method.

            Returns
            -------
            self
        """
        r, c = get_dimensions(X)
        if self._STMSamples is None:
            self._STMSamples = np.empty(shape=(0, c))
            self._LTMSamples = np.empty(shape=(0, c))

        for i in range(r):
            self._partial_fit(X[i, :], y[i])

        return self
    def partial_fit(self, X, y, sample_weight=None):
        """ Partially (incrementally) fit the model.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The data upon which the algorithm will create its model.

        y: numpy.ndarray of shape (n_samples)
            An array-like containing the target values for all
            samples in X.

        sample_weight: Not used.

        Returns
        -------
        KNNRegressor
            self

        Notes
        -----
        For the K-Nearest Neighbors regressor, fitting the model is the
        equivalent of inserting the newer samples in the observed window,
        and if the size_limit is reached, removing older results.

        """
        r, c = get_dimensions(X)

        for i in range(r):
            self.data_window.add_sample(X=X[i], y=y[i])
        return self
    def predict(self, X):
        """
        Predict the class label for sample X

        Parameters
        ----------
        X: numpy.ndarray
            An array of shape (n_samples, n_features) with the samples to
            predict the class label for.

        Returns
        -------
        numpy.ndarray
            A 1D array of shape (, n_samples), containing the
            predicted class labels for all instances in X.

        """
        if self._ensemble:
            if self.update_strategy == self._REPLACE_STRATEGY:
                trees_in_ensemble = sum(i is not None for i in self._ensemble)
            else:  # self.update_strategy == self._PUSH_STRATEGY
                trees_in_ensemble = len(self._ensemble)
            if trees_in_ensemble > 0:
                d_test = xgb.DMatrix(X)
                for i in range(trees_in_ensemble - 1):
                    margins = self._ensemble[i].predict(d_test,
                                                        output_margin=True)
                    d_test.set_base_margin(margin=margins)
                predicted = self._ensemble[trees_in_ensemble -
                                           1].predict(d_test)
                return np.array(predicted > 0.5).astype(int)
        # Ensemble is empty, return default values (0)
        return np.zeros(get_dimensions(X)[0])
    def _partial_fit(self, X, y):
        if self._first_run:
            self._X_buffer = np.array([]).reshape(0, get_dimensions(X)[1])
            self._y_buffer = np.array([])
            self._first_run = False
        self._X_buffer = np.concatenate((self._X_buffer, X))
        self._y_buffer = np.concatenate((self._y_buffer, y))
        while self._X_buffer.shape[0] >= self.window_size:
            self._train_on_mini_batch(X=self._X_buffer[0:self.window_size, :],
                                      y=self._y_buffer[0:self.window_size])
            delete_idx = [i for i in range(self.window_size)]
            self._X_buffer = np.delete(self._X_buffer, delete_idx, axis=0)
            self._y_buffer = np.delete(self._y_buffer, delete_idx, axis=0)

            # Check window size and adjust it if necessary
            self._adjust_window_size()

        # Support for concept drift
        if self.detect_drift:
            correctly_classifies = self.predict(X) == y
            # Check for warning
            self._drift_detector.add_element(int(not correctly_classifies))
            # Check if there was a change
            if self._drift_detector.detected_change():
                # Reset window size
                self._reset_window_size()
                if self.update_strategy == self._REPLACE_STRATEGY:
                    self._model_idx = 0
    def predict(self, X):
        """Predicts the target value using mean class or the perceptron.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Samples for which we want to predict the labels.

        Returns
        -------
        numpy.ndarray
            Predicted target values.

        """
        predictions = []
        if self.samples_seen > 0 and self._tree_root is not None:
            r, _ = get_dimensions(X)
            for i in range(r):
                node = self._tree_root.filter_instance_to_leaf(X[i], None,
                                                               -1).node
                if node.is_leaf():
                    predictions.append(node.predict_one(X[i], tree=self))
                else:
                    # The instance sorting ended up in a Split Node, since no branch was found
                    # for some of the instance's features. Use the mean prediction in this case
                    predictions.append(node.stats[1] / node.stats[0])
        else:
            # Model is empty
            predictions.append(0.0)
        return np.asarray(predictions)
    def learn_from_instance(self, X, y, weight, ht):
        """Update the node with the provided instance.

        Parameters
        ----------
        X: numpy.ndarray of length equal to the number of features.
            Instance attributes for updating the node.
        y: int
            Instance class.
        weight: float
            Instance weight.
        ht: HoeffdingTreeClassifier
            Hoeffding Tree to update.
        """
        try:
            self._observed_class_distribution[y] += weight
        except KeyError:
            self._observed_class_distribution[y] = weight
            self._observed_class_distribution = dict(
                sorted(self._observed_class_distribution.items()))

        if self.list_attributes.size == 0:
            self.list_attributes = self._sample_features(get_dimensions(X)[1])

        for i in self.list_attributes:
            try:
                obs = self._attribute_observers[i]
            except KeyError:
                if ht.nominal_attributes is not None and i in ht.nominal_attributes:
                    obs = NominalAttributeClassObserver()
                else:
                    obs = NumericAttributeClassObserverGaussian()
                self._attribute_observers[i] = obs
            obs.observe_attribute_class(X[i], int(y), weight)
Beispiel #20
0
    def predict_proba(self, X):
        """ Estimates the probability of each sample in X belonging to each of the class-labels.

        Parameters
        ----------
        X : Numpy.ndarray of shape (n_samples, n_features)
            The matrix of samples one wants to predict the class probabilities for.

        Returns
        -------
        A numpy.ndarray of shape (n_samples, n_labels), in which each outer entry is associated
        with the X entry of the same index. And where the list in index [i] contains
        len(self.target_values) elements, each of which represents the probability that
        the i-th sample of X belongs to a certain class-label.

        """
        predictions = deque()
        r, _ = get_dimensions(X)
        if self._observed_class_distribution == {}:
            # Model is empty, all classes equal, default to zero
            return np.zeros((r, 1))
        else:
            for i in range(r):
                votes = do_naive_bayes_prediction(X[i], self._observed_class_distribution,
                                                  self._attribute_observers)
                sum_values = sum(votes.values())
                if self._classes is not None:
                    y_proba = np.zeros(int(max(self._classes)) + 1)
                else:
                    y_proba = np.zeros(int(max(votes.keys())) + 1)
                for key, value in votes.items():
                    y_proba[int(key)] = value / sum_values if sum_values != 0 else 0.0
                predictions.append(y_proba)
        return np.array(predictions)
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ Partially (incrementally) fit the model.

        Parameters
        ----------
        X : numpy.ndarray of shape (n_samples, n_features)
            The features to train the model.

        y: numpy.ndarray of shape (n_samples)
            An array-like with the class labels of all samples in X.

        classes: None
            Not used by this method.

        sample_weight: None
            Not used by this method.

        Returns
        -------
        self

        """

        row_cnt, _ = X.shape

        if self.samples_seen == 0:
            self._random_state = check_random_state(self.random_state)
            self.n_features = get_dimensions(X)[1]
            self.build_trees()

        for i in range(row_cnt):
            self._partial_fit(X[i], y[i])

        return self
    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ Partially (incrementally) fit the model.

        Parameters
        ----------
        X : numpy.ndarray of shape (n_samples, n_features)
            The features to train the model.

        y: numpy.ndarray of shape (n_samples)
            An array-like with the class labels of all samples in X.

        classes: numpy.ndarray, optional (default=None)
            No used.

        sample_weight: numpy.ndarray of shape (n_samples), optional \
            (default=None)
            Samples weight. If not provided, uniform weights are assumed.
            Usage varies depending on the learning method.

        Returns
        -------
        self

        """
        n_rows, n_cols = get_dimensions(X)

        if sample_weight is None:
            sample_weight = np.ones(n_rows)

        for i in range(n_rows):
            self._partial_fit(np.asarray([X[i]]), np.asarray([y[i]]),
                              classes=classes, sample_weight=np.asarray([sample_weight[i]]))

        return self
    def predict(self, X):
        """ Predict classes for the passed data.

        Parameters
        ----------
        X : numpy.ndarray of shape (n_samples, n_features)
            The set of data samples to predict the class labels for.

        Returns
        -------
        A numpy.ndarray with all the predictions for the samples in X.

        """
        r, _ = get_dimensions(X)
        predictions = []
        for i in range(r):
            y_proba = self.predict_proba(X)
            if y_proba is None:
                # Ensemble is empty, all classes equal, default to zero
                predictions.append(0)
            else:
                # if prediction of this instance being anomaly is greater than the threshold defined,
                # then this instance is classified as an anomaly.
                if y_proba[0][1] > self.anomaly_threshold:
                    predictions.append(1)
                else:
                    predictions.append(0)
        return np.asarray(predictions)
Beispiel #24
0
    def predict(self, X):
        """ predict

        The predict function will average the predictions from all its learners
        to find the most likely prediction for the sample matrix X.

        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            A matrix of the samples we want to predict.

        Returns
        -------
        numpy.ndarray
            A numpy.ndarray with the label prediction for all the samples in X.

        """
        r, c = get_dimensions(X)
        proba = self.predict_proba(X)
        predictions = []
        if proba is None:
            return None
        for i in range(r):
            predictions.append(np.argmax(proba[i]))
        return np.asarray(predictions)
 def _init_ensemble(self, X):
     self._set_max_features(get_dimensions(X)[1])
     # Generate a different random seed per tree
     random_states = self._random_state.randint(0,
                                                4294967295,
                                                size=self.n_estimators,
                                                dtype='u8')
     self.ensemble = [
         ARFRegBaseLearner(
             index_original=i,
             estimator=ARFHoeffdingTreeRegressor(
                 max_byte_size=self.max_byte_size,
                 memory_estimate_period=self.memory_estimate_period,
                 grace_period=self.grace_period,
                 split_confidence=self.split_confidence,
                 tie_threshold=self.tie_threshold,
                 binary_split=self.binary_split,
                 stop_mem_management=self.stop_mem_management,
                 remove_poor_atts=self.remove_poor_atts,
                 no_preprune=self.no_preprune,
                 leaf_prediction=self.leaf_prediction,
                 nominal_attributes=self.nominal_attributes,
                 learning_ratio_perceptron=self.learning_ratio_perceptron,
                 learning_ratio_decay=self.learning_ratio_decay,
                 learning_ratio_const=self.learning_ratio_const,
                 max_features=self.max_features,
                 random_state=random_states[i]),
             instances_seen=self.instances_seen,
             drift_detection_method=self.drift_detection_method,
             warning_detection_method=self.warning_detection_method,
             performance_metric=self.weighted_vote_strategy,
             drift_detection_criteria=self.drift_detection_criteria,
             is_background_learner=False) for i in range(self.n_estimators)
     ]
Beispiel #26
0
    def predict(self, X):
        """Predicts the target value using mean class or the perceptron.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            Samples for which we want to predict the labels.

        Returns
        -------
        list
            Predicted target values.
        """

        r, _ = get_dimensions(X)
        try:
            predictions = np.zeros((r, self._n_targets), dtype=np.float64)
        except AttributeError:
            warnings.warn("Calling predict without previously fitting the model at least once.\n"
                          "Predictions will default to a column array filled with zeros.")
            return np.zeros((r, 1))
        for i in range(r):
            node = self._tree_root.filter_instance_to_leaf(X[i], None, -1).node

            if isinstance(node, SplitNode):
                # If not leaf, use mean as response
                predictions[i, :] = node.stats[1] / node.stats[0] if len(node.stats) > 0 else 0.0
                continue
            predictions[i, :] = node.predict_one(X[i], tree=self)

        return predictions
 def partial_fit(self, X, y, classes=None, sample_weight=None):
     """
     Fit an array of observations. Splits input into individual observations and
     passes to a helper function _partial_fit. Randomly weights observations depending on 
     Config.
     """
     if self.classes is None and classes is not None:
         self.classes = classes
     if y is not None:
         row_cnt, _ = get_dimensions(X)
         if sample_weight is None:
             sample_weight = np.ones(row_cnt)
         if row_cnt != len(sample_weight):
             raise ValueError(
                 'Inconsistent number of instances ({}) and weights ({}).'.
                 format(row_cnt, len(sample_weight)))
         for i in range(row_cnt):
             if sample_weight[i] != 0.0:
                 self._train_weight_seen_by_model += sample_weight[i]
                 self.ex += 1
                 if self.rand_weights and self.poisson >= 1:
                     # Use weights similar to ARF.
                     # This just uses similar avg grace period etc
                     # without having to calculate those parameters.
                     k = self.poisson
                     sample_weight[i] = k
                 self._partial_fit(X[i], y[i], sample_weight[i])
Beispiel #28
0
 def predict(self, X):
     predictions = deque()
     r, _ = get_dimensions(X)
     y_pred = self.stream.current_sample_y
     for i in range(r):
         predictions.append(y_pred)
     return np.array(predictions)
    def _partial_fit(self, X, y, classes=None, sample_weight=None):
        self._n_samples_seen += 1
        _, n_features = get_dimensions(X)

        if not self.ensemble:
            self._init_ensemble(n_features)

        for i in range(len(self.ensemble)):
            # Get prediction for instance
            y_pred = np.asarray([np.argmax(self.ensemble[i].predict_proba(X))])

            # Update performance evaluator
            self.ensemble[i].performance_evaluator.add_result(y[0], y_pred[0], sample_weight[0])

            # Train using random subspaces without resampling,
            # i.e. all instances are used for training.
            if self.training_method == self._TRAIN_RANDOM_SUBSPACES:
                self.ensemble[i].partial_fit(X=X, y=y, classes=classes,
                                             sample_weight=np.asarray([1.]),
                                             n_samples_seen=self._n_samples_seen,
                                             random_state=self._random_state)
            # Train using random patches or resampling,
            # thus we simulate online bagging with Poisson(lambda=...)
            else:
                k = self._random_state.poisson(lam=self.lam)
                if k > 0:
                    self.ensemble[i].partial_fit(X=X, y=y, classes=classes,
                                                 sample_weight=np.asarray([k]),
                                                 n_samples_seen=self._n_samples_seen,
                                                 random_state=self._random_state)
Beispiel #30
0
 def predict(self, X):
     predictions = deque()
     r, _ = get_dimensions(X)
     y_proba = np.zeros((r, len(Data.classes)))
     for i in range(r):
         session_vector = Data.session_vector[-self.max_session_size:]
         for pos, y_o_idx in enumerate(session_vector):
             weight = self.w_mc if y_o_idx == session_vector[-1] else 1
             y_proba_current = self.ht.predict_proba(np.array([[y_o_idx]]))
             y_proba_current *= weight / (len(session_vector) - pos)
             y_proba += y_proba_current
         y_proba[i][Data.session_vector[-1]] = 0.0
         nonzero = np.flatnonzero(y_proba[i])
         if len(nonzero > 0):
             sorted_desc = np.argsort(y_proba[i][nonzero])[::-1]
             sorted_ids = nonzero[sorted_desc]
             if not Data.allow_reminders:
                 sorted_ids = sorted_ids[~np.isin(sorted_ids, Data.
                                                  session_vector)]
             if not Data.allow_repeated:
                 session = X[i, Data.sid]
                 sorted_ids = sorted_ids[
                     ~np.isin(sorted_ids, self._rec_tracker[session])]
                 self._rec_tracker[session].extend(
                     sorted_ids[:Data.rec_size])
             y_pred = Data.classes[sorted_ids[:Data.rec_size]]
         else:
             y_pred = np.array([])
         predictions.append(y_pred)
     return np.array(predictions)