Example #1
0
    def _set_data(self, features: np.ndarray, times: np.array,
                  censoring: np.array):  #

        if self.dtype is None:
            self.dtype = features.dtype
            if self.dtype != times.dtype:
                raise ValueError("Features and labels differ in data types")

        n_samples, n_features = features.shape
        if n_samples != times.shape[0]:
            raise ValueError(("Features has %i samples while times "
                              "have %i" % (n_samples, times.shape[0])))
        if n_samples != censoring.shape[0]:
            raise ValueError(("Features has %i samples while censoring "
                              "have %i" % (n_samples, censoring.shape[0])))

        features = safe_array(features, dtype=self.dtype)
        times = safe_array(times, dtype=self.dtype)
        censoring = safe_array(censoring, np.ushort)

        self._set("features", features)
        self._set("times", times)
        self._set("censoring", censoring)
        self._set("n_samples", n_samples)
        self._set("n_features", n_features)
        self._set(
            "_model", dtype_class_mapper[self.dtype](self.features, self.times,
                                                     self.censoring))
Example #2
0
 def _all_safe(self, features: np.ndarray, times: np.array,
               censoring: np.array):
     if not set(np.unique(censoring)).issubset({0, 1}):
         raise ValueError('``censoring`` must only have values in {0, 1}')
     # All times must be positive
     if not np.all(times >= 0):
         raise ValueError('``times`` array must contain only non-negative '
                          'entries')
     features = safe_array(features)
     times = safe_array(times)
     censoring = safe_array(censoring, np.ushort)
     return features, times, censoring
Example #3
0
    def _set_data(self, features, labels):
        n_samples, n_features = features.shape
        if n_samples != labels.shape[0]:
            raise ValueError(("Features has %i samples while labels "
                              "have %i" % (n_samples, labels.shape[0])))

        features = safe_array(features)
        labels = safe_array(labels)

        self._set("features", features)
        self._set("labels", labels)
        self._set("n_features", n_features)
        self._set("n_samples", n_samples)
Example #4
0
 def predict(self, X):
     if not self._fitted:
         raise RuntimeError("You must call ``fit`` before")
     else:
         X = safe_array(X, dtype='float32')
         scores = self.predict_proba(X)
         return scores.argmax(axis=1)
Example #5
0
 def n_lags(self, value):
     offsets = [0]
     for l in value:
         if l < 0:
             raise ValueError('n_lags elements should be greater than or '
                              'equal to 0.')
         offsets.append(offsets[-1] + l + 1)
     value = safe_array(value, dtype=np.uint64)
     self._set('_n_lags', value)
     self._set('_features_offset', offsets)
     self._construct_preprocessor_obj()
Example #6
0
    def _set_data(self, features: np.ndarray, times: np.array,
                  censoring: np.array):
        n_samples, n_features = features.shape
        if n_samples != times.shape[0]:
            raise ValueError(("Features has %i samples while times "
                              "have %i" % (n_samples, times.shape[0])))
        if n_samples != censoring.shape[0]:
            raise ValueError(("Features has %i samples while censoring "
                              "have %i" % (n_samples, censoring.shape[0])))

        features = safe_array(features)
        times = safe_array(times)
        censoring = safe_array(censoring, np.ushort)

        self._set("features", features)
        self._set("times", times)
        self._set("censoring", censoring)
        self._set("n_samples", n_samples)
        self._set("n_features", n_features)
        self._set(
            "_model",
            _ModelCoxRegPartialLik(self.features, self.times, self.censoring))
Example #7
0
    def partial_fit(self, X, y, classes=None):
        """

        :param X:
        :param y:
        :param classes:
        :return:
        """
        X = safe_array(X, dtype='float32')
        y = safe_array(y, dtype='float32')
        n_samples, n_features = X.shape
        # TODO: check that sizes of X and y match
        if self._forest is None:
            self.n_features = n_features
            # print(f"n_features: {n_features}, n_trees: {self.n_trees}")
            max_nodes_with_memory_in_tree \
                = int(1024 ** 2 * self.memory / (8 * self.n_trees * n_features))

            # max_nodes_with_memory_in_tree = 20000

            _forest = _OnlineForestClassifier(
                n_features, self.n_classes, self.n_trees, self.step,
                self._criterion, self._feature_importances_type,
                self.use_aggregation, self.dirichlet, self.split_pure,
                self.max_nodes, self.min_extension_size,
                self.min_samples_split, self.max_features, self.n_threads,
                self.seed, self.verbose, self.print_every,
                max_nodes_with_memory_in_tree
                # self.verbose_every
            )
            if self._feature_importances_type == FeatureImportanceType_given:
                _forest.set_given_feature_importances(
                    self._given_feature_importances)
            self._set('_forest', _forest)
        self._set("_fitted", True)
        self._forest.fit(X, y)
        return self
Example #8
0
    def predict(self, X, use_aggregation: bool = True):
        """Predict class for given samples

        Parameters
        ----------
        X : `np.ndarray` or `scipy.sparse.csr_matrix`, shape=(n_samples, n_features)
            Features matrix to predict for.

        Returns
        -------
        output : `np.array`, shape=(n_samples,)
            Returns predicted values.
        """
        import numpy as np
        y_pred = np.empty(X.shape[0])
        if not self._fitted:
            raise ValueError("You must call ``fit`` before")
        else:
            X = safe_array(X)
        self._forest.predict(X, y_pred, True)
        return y_pred
Example #9
0
    def predict_proba(self, X):
        """Predict class for given samples

        Parameters
        ----------
        X : `np.ndarray` or `scipy.sparse.csr_matrix`, shape=(n_samples, n_features)
            Features matrix to predict for.

        Returns
        -------
        output : `np.ndarray`, shape=(n_samples, n_classes)
            Returns predicted values.
        """
        import numpy as np
        scores = np.empty((X.shape[0], self.n_classes), dtype='float32')
        if not self._fitted:
            raise RuntimeError("You must call ``fit`` before")
        else:
            X = safe_array(X, dtype='float32')
        self._forest.predict(X, scores)
        return scores
Example #10
0
 def _safe_array(X, dtype=np.float64):
     return safe_array(X, dtype)
Example #11
0
 def fit(self, X, y):
     X = safe_array(X)
     y = safe_array(y)
     self._set("_fitted", True)
     self._forest.fit(X, y)
     return self
Example #12
0
 def set_data(self, X, y):
     X = safe_array(X)
     y = safe_array(y)
     self._forest.set_data(X, y)
Example #13
0
 def _safe_array(X, dtype="float64"):
     return safe_array(X, dtype)
Example #14
0
 def set_data(self, X, y):
     X = safe_array(X, dtype='float32')
     y = safe_array(y, dtype='float32')
     self._forest.set_data(X, y)