Beispiel #1
0
    def _predict_proba_for_estimator(
        self, X, X_p, X_d, classifier, intervals, dims, atts
    ):
        c22 = Catch22(outlier_norm=True)
        if isinstance(self._base_estimator, ContinuousIntervalTree):
            return classifier._predict_proba_drcif(
                X, X_p, X_d, c22, self._n_intervals, intervals, dims, atts
            )
        else:
            T = [X, X_p, X_d]

            transformed_x = np.empty(
                shape=(self._att_subsample_size * self.total_intervals, X.shape[0]),
                dtype=np.float32,
            )

            p = 0
            j = 0
            for r in range(0, len(T)):
                for _ in range(0, self._n_intervals[r]):
                    for a in range(0, self._att_subsample_size):
                        transformed_x[p] = _drcif_feature(
                            T[r], intervals[j], dims[j], atts[a], c22
                        )
                        p += 1
                    j += 1

            transformed_x = transformed_x.T
            transformed_x.round(8)
            np.nan_to_num(transformed_x, False, 0, 0, 0)

            return classifier.predict_proba(transformed_x)
Beispiel #2
0
    def _predict_proba_for_estimator(self, X, classifier, intervals, dims,
                                     atts):
        c22 = Catch22(outlier_norm=True)
        if isinstance(self._base_estimator, ContinuousIntervalTree):
            return classifier._predict_proba_cif(X, c22, intervals, dims, atts)
        else:
            transformed_x = np.empty(
                shape=(self._att_subsample_size * self._n_intervals,
                       X.shape[0]),
                dtype=np.float32,
            )

            for j in range(0, self._n_intervals):
                for a in range(0, self._att_subsample_size):
                    transformed_x[self._att_subsample_size * j +
                                  a] = _drcif_feature(X,
                                                      intervals[j],
                                                      dims[j],
                                                      atts[a],
                                                      c22,
                                                      case_id=j)

            transformed_x = transformed_x.T
            transformed_x.round(8)
            np.nan_to_num(transformed_x, False, 0, 0, 0)

            return classifier.predict_proba(transformed_x)
Beispiel #3
0
    def _fit(self, X, y):
        """Fit an estimator using transformed data from the Catch22 transformer.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, n_dims]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
        self.n_classes = np.unique(y).shape[0]

        self._transformer = Catch22(outlier_norm=self.outlier_norm)
        self._estimator = _clone_estimator(
            RandomForestClassifier(n_estimators=200)
            if self.estimator is None else self.estimator,
            self.random_state,
        )

        m = getattr(self._estimator, "n_jobs", None)
        if m is not None:
            self._estimator.n_jobs = self.n_jobs

        X_t = self._transformer.fit_transform(X, y)
        X_t = np.nan_to_num(X_t, False, 0, 0, 0)
        self._estimator.fit(X_t, y)

        return self
    def fit(self, X, y):
        """Fit a random catch22 feature forest classifier.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X = check_X(X, enforce_univariate=False, coerce_to_numpy=True)
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        c22 = Catch22(outlier_norm=self.outlier_norm)
        c22_list = c22.fit_transform(X)

        self.classifier = RandomForestClassifier(
            n_jobs=self.n_jobs,
            n_estimators=self.n_estimators,
            random_state=self.random_state,
        )

        X_c22 = np.nan_to_num(np.array(c22_list, dtype=np.float32), False, 0,
                              0, 0)
        self.classifier.fit(X_c22, y)

        self._is_fitted = True
        return self
Beispiel #5
0
    def _fit_estimator(self, X, y, idx):
        c22 = Catch22(outlier_norm=True)
        rs = 255 if self.random_state == 0 else self.random_state
        rs = None if self.random_state is None else rs * 37 * (idx + 1)
        rng = check_random_state(rs)

        transformed_x = np.empty(
            shape=(self._att_subsample_size * self._n_intervals, self.n_instances),
            dtype=np.float32,
        )

        atts = rng.choice(25, self._att_subsample_size, replace=False)
        dims = rng.choice(self.n_dims, self._n_intervals, replace=True)
        intervals = np.zeros((self._n_intervals, 2), dtype=int)

        # Find the random intervals for classifier i and concatenate
        # features
        for j in range(0, self._n_intervals):
            if rng.random() < 0.5:
                intervals[j][0] = rng.randint(
                    0, self.series_length - self._min_interval
                )
                len_range = min(
                    self.series_length - intervals[j][0],
                    self._max_interval,
                )
                length = (
                    rng.randint(0, len_range - self._min_interval) + self._min_interval
                )
                intervals[j][1] = intervals[j][0] + length
            else:
                intervals[j][1] = (
                    rng.randint(0, self.series_length - self._min_interval)
                    + self._min_interval
                )
                len_range = min(intervals[j][1], self._max_interval)
                length = (
                    rng.randint(0, len_range - self._min_interval) + self._min_interval
                    if len_range - self._min_interval > 0
                    else self._min_interval
                )
                intervals[j][0] = intervals[j][1] - length

            for a in range(0, self._att_subsample_size):
                transformed_x[self._att_subsample_size * j + a] = _cif_feature(
                    X, intervals[j], dims[j], atts[a], c22
                )

        tree = _clone_estimator(self._base_estimator, random_state=rs)
        transformed_x = transformed_x.T
        transformed_x = transformed_x.round(8)
        transformed_x = np.nan_to_num(transformed_x, False, 0, 0, 0)
        tree.fit(transformed_x, y)

        return [tree, intervals, dims, atts]
Beispiel #6
0
def test_catch22_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # fit catch22
    c22 = Catch22()
    c22.fit(X_train.iloc[indices], y_train[indices])

    # assert transformed data is the same
    data = c22.transform(X_train.iloc[indices])
    testing.assert_array_almost_equal(data, catch22_gunpoint_data)
Beispiel #7
0
def test_catch22_on_unit_test():
    """Test of Catch22 on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    indices = np.random.RandomState(0).choice(len(y_train), 5, replace=False)

    # fit catch22
    c22 = Catch22(outlier_norm=True)
    c22.fit(X_train.iloc[indices], y_train[indices])

    # assert transformed data is the same
    data = np.nan_to_num(c22.transform(X_train.iloc[indices]), False, 0, 0, 0)
    testing.assert_array_almost_equal(data, catch22_unit_test_data)
Beispiel #8
0
def test_catch22_on_basic_motions():
    """Test of Catch22 on basic motions data."""
    # load basic motions data
    X_train, y_train = load_basic_motions(split="train")
    indices = np.random.RandomState(4).choice(len(y_train), 5, replace=False)

    # fit catch22
    c22 = Catch22()
    c22.fit(X_train.iloc[indices], y_train[indices])

    # assert transformed data is the same
    data = np.nan_to_num(c22.transform(X_train.iloc[indices]), False, 0, 0, 0)
    testing.assert_array_almost_equal(data, catch22_basic_motions_data)
Beispiel #9
0
def test_catch22_single_feature_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # fit catch22
    c22 = Catch22()
    c22.fit(X_train.iloc[indices], y_train[indices])

    # assert transformed data is the same
    data = []
    for i in range(22):
        data.append(c22._transform_single_feature(X_train.iloc[indices], i))
    testing.assert_array_almost_equal(data, catch22_single_feature_gunpoint_data)
Beispiel #10
0
def test_catch22_single_feature_on_unit_test():
    """Test of Catch22 on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    indices = np.random.RandomState(0).choice(len(y_train), 2, replace=False)

    # fit catch22
    c22 = Catch22(outlier_norm=True)
    c22.fit(X_train.iloc[indices], y_train[indices])

    # assert transformed data is the same
    results = catch22_unit_test_data.transpose()
    for i in range(22):
        data = np.nan_to_num(
            c22.transform_single_feature(X_train.iloc[indices], i), False, 0, 0, 0
        )
        testing.assert_array_almost_equal(data, results[i][:2])
Beispiel #11
0
    def _fit(self, X, y):
        """Fit a pipeline on cases (X,y), where y is the target variable.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The training data.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self :
            Reference to self.

        Notes
        -----
        Changes state by creating a fitted model that updates attributes
        ending in "_" and sets is_fitted flag to True.
        """
        interval_transformers = (Catch22(outlier_norm=True, replace_nans=True)
                                 if self.interval_transformers is None else
                                 self.interval_transformers)

        self._transformer = RandomIntervals(
            n_intervals=self.n_intervals,
            transformers=interval_transformers,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )

        self._estimator = _clone_estimator(
            RotationForest() if self.estimator is None else self.estimator,
            self.random_state,
        )

        m = getattr(self._estimator, "n_jobs", None)
        if m is not None:
            self._estimator.n_jobs = self._threads_to_use

        X_t = self._transformer.fit_transform(X, y)
        self._estimator.fit(X_t, y)

        return self
    def predict(self, X):
        """Make predictions for all cases in X.

        Parameters
        ----------
        X : The testing input samples of shape [n_instances,1].

        Returns
        -------
        output : numpy array of shape = [n_instances]
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=False, coerce_to_numpy=True)

        c22 = Catch22(outlier_norm=self.outlier_norm)
        c22_list = c22.fit_transform(X)

        X_c22 = np.nan_to_num(np.array(c22_list, dtype=np.float32), False, 0,
                              0, 0)
        return self.classifier.predict(X_c22)
    def predict_proba(self, X):
        """Make class probability estimates on each case in X.

        Parameters
        ----------
        X - pandas dataframe of testing data of shape [n_instances,1].

        Returns
        -------
        output : numpy array of shape =
                [n_instances, num_classes] of probabilities
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=False, coerce_to_numpy=True)

        c22 = Catch22(outlier_norm=self.outlier_norm)
        c22_list = c22.fit_transform(X)

        X_c22 = np.nan_to_num(np.array(c22_list, dtype=np.float32), False, 0,
                              0, 0)
        return self.classifier.predict_proba(X_c22)
Beispiel #14
0
    def _fit(self, X, y):
        """Fit a pipeline on cases (X,y), where y is the target variable.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The training data.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self :
            Reference to self.

        Notes
        -----
        Changes state by creating a fitted model that updates attributes
        ending in "_" and sets is_fitted flag to True.
        """
        self._transformer = Catch22(outlier_norm=self.outlier_norm)

        self._estimator = _clone_estimator(
            RandomForestClassifier(n_estimators=200)
            if self.estimator is None else self.estimator,
            self.random_state,
        )

        m = getattr(self._estimator, "n_jobs", None)
        if m is not None:
            self._estimator.n_jobs = self._threads_to_use

        X_t = self._transformer.fit_transform(X, y)
        X_t = np.nan_to_num(X_t, False, 0, 0, 0)
        self._estimator.fit(X_t, y)

        return self
Beispiel #15
0
    def _fit_estimator(self, X, X_p, X_d, y, idx):
        c22 = Catch22(outlier_norm=True)
        T = [X, X_p, X_d]
        rs = 255 if self.random_state == 0 else self.random_state
        rs = None if self.random_state is None else rs * 37 * (idx + 1)
        rng = check_random_state(rs)

        transformed_x = np.empty(
            shape=(self._att_subsample_size * self.total_intervals, self.n_instances),
            dtype=np.float32,
        )

        atts = rng.choice(29, self._att_subsample_size, replace=False)
        dims = rng.choice(self.n_dims, self.total_intervals, replace=True)
        intervals = np.zeros((self.total_intervals, 2), dtype=int)

        p = 0
        j = 0
        for r in range(0, len(T)):
            transform_length = T[r].shape[2]

            # Find the random intervals for classifier i, transformation r
            # and concatenate features
            for _ in range(0, self._n_intervals[r]):
                if rng.random() < 0.5:
                    intervals[j][0] = rng.randint(
                        0, transform_length - self._min_interval[r]
                    )
                    len_range = min(
                        transform_length - intervals[j][0],
                        self._max_interval[r],
                    )
                    length = (
                        rng.randint(0, len_range - self._min_interval[r])
                        + self._min_interval[r]
                    )
                    intervals[j][1] = intervals[j][0] + length
                else:
                    intervals[j][1] = (
                        rng.randint(0, transform_length - self._min_interval[r])
                        + self._min_interval[r]
                    )
                    len_range = min(intervals[j][1], self._max_interval[r])
                    length = (
                        rng.randint(0, len_range - self._min_interval[r])
                        + self._min_interval[r]
                        if len_range - self._min_interval[r] > 0
                        else self._min_interval[r]
                    )
                    intervals[j][0] = intervals[j][1] - length

                for a in range(0, self._att_subsample_size):
                    transformed_x[p] = _drcif_feature(
                        T[r], intervals[j], dims[j], atts[a], c22
                    )
                    p += 1

                j += 1

        tree = _clone_estimator(self._base_estimator, random_state=rs)
        transformed_x = transformed_x.T
        transformed_x = transformed_x.round(8)
        transformed_x = np.nan_to_num(transformed_x, False, 0, 0, 0)
        tree.fit(transformed_x, y)

        return [
            tree,
            intervals,
            dims,
            atts,
            transformed_x if self.save_transformed_data else None,
        ]
Beispiel #16
0
    _print_array(
        "ShapeletTransformClassifier - BasicMotions",
        _reproduce_classification_basic_motions(
            ShapeletTransformClassifier(
                estimator=RotationForest(n_estimators=3),
                max_shapelets=20,
                n_shapelet_samples=500,
                batch_size=100,
                random_state=0,
            )
        ),
    )

    _print_array(
        "Catch22 - UnitTest",
        _reproduce_transform_unit_test(Catch22(outlier_norm=True)),
    )
    _print_array(
        "Catch22 - BasicMotions",
        _reproduce_transform_basic_motions(Catch22()),
    )
    _print_array(
        "RandomIntervals - UnitTest",
        _reproduce_transform_unit_test(RandomIntervals(random_state=0, n_intervals=3)),
    )
    _print_array(
        "RandomIntervals - BasicMotions",
        _reproduce_transform_basic_motions(
            RandomIntervals(random_state=0, n_intervals=3)
        ),
    )