Esempio n. 1
0
def test_check_X_bad_input_args(X):
    """Test for the correct reaction for bad input in check_X."""
    with pytest.raises(ValueError):
        check_X(X)

    with pytest.raises(ValueError):
        check_X_y(X, y)
Esempio n. 2
0
    def distance_to_exemplars(self, X):
        """Find distance to exemplars.

        Parameters
        ----------
        X: the dataset containing a list of instances

        Returns
        -------
        2d numpy array of distances from each instance to each
        exemplar (instance by exemplar)
        """
        check_X(X)
        if self.n_jobs > 1 or self.n_jobs < 0:
            parallel = Parallel(self.n_jobs)
            distances = parallel(
                delayed(self._distance_to_exemplars_inst)(
                    self.X_exemplar, X.iloc[index, :], self.distance_measure)
                for index in range(X.shape[0]))
        else:
            distances = [
                self._distance_to_exemplars_inst(self.X_exemplar,
                                                 X.iloc[index, :],
                                                 self.distance_measure)
                for index in range(X.shape[0])
            ]
        distances = np.vstack(np.array(distances))
        return distances
Esempio n. 3
0
def test_check_X_enforce_univariate():
    X, y = make_classification_problem(n_columns=2)
    msg = r"univariate"
    with pytest.raises(ValueError, match=msg):
        check_X(X, enforce_univariate=True)

    with pytest.raises(ValueError, match=msg):
        check_X_y(X, y, enforce_univariate=True)
Esempio n. 4
0
def test_check_X_enforce_min_columns():
    X, y = make_classification_problem(n_columns=2)
    msg = r"columns"
    with pytest.raises(ValueError, match=msg):
        check_X(X, enforce_min_columns=3)

    with pytest.raises(ValueError, match=msg):
        check_X_y(X, y, enforce_min_columns=3)
Esempio n. 5
0
def test_check_enforce_min_instances():
    X, y = make_classification_problem(n_instances=3)
    msg = r"instance"
    with pytest.raises(ValueError, match=msg):
        check_X(X, enforce_min_instances=4)

    with pytest.raises(ValueError, match=msg):
        check_X_y(X, y, enforce_min_instances=4)

    with pytest.raises(ValueError, match=msg):
        check_y(y, enforce_min_instances=4)
Esempio n. 6
0
    def _set_oob_score(self, X, y):
        """Compute out-of-bag score."""
        check_X_y(X, y)
        check_X(X, enforce_univariate=True)

        n_classes_ = self.n_classes_
        n_samples = y.shape[0]

        oob_decision_function = []
        oob_score = 0.0
        predictions = [
            np.zeros((n_samples, n_classes_[k]))
            for k in range(self.n_outputs_)
        ]

        n_samples_bootstrap = _get_n_samples_bootstrap(n_samples,
                                                       self.max_samples)

        for estimator in self.estimators_:
            final_estimator = estimator.steps[-1][1]
            unsampled_indices = _generate_unsampled_indices(
                final_estimator.random_state, n_samples, n_samples_bootstrap)
            p_estimator = estimator.predict_proba(X.iloc[unsampled_indices, :])

            if self.n_outputs_ == 1:
                p_estimator = [p_estimator]

            for k in range(self.n_outputs_):
                predictions[k][unsampled_indices, :] += p_estimator[k]

        for k in range(self.n_outputs_):
            if (predictions[k].sum(axis=1) == 0).any():
                warn("Some inputs do not have OOB scores. "
                     "This probably means too few trees were used "
                     "to compute any reliable oob estimates.")

            decision = predictions[k] / predictions[k].sum(axis=1)[:,
                                                                   np.newaxis]
            oob_decision_function.append(decision)
            oob_score += np.mean(y[:, k] == np.argmax(predictions[k], axis=1),
                                 axis=0)

        if self.n_outputs_ == 1:
            self.oob_decision_function_ = oob_decision_function[0]
        else:
            self.oob_decision_function_ = oob_decision_function

        self.oob_score_ = oob_score / self.n_outputs_
Esempio n. 7
0
    def transform(self, X, y=None):
        """

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, n_dims]
            Nested dataframe with multivariate time-series in cells.

        Returns
        -------
        dims: Pandas data frame with first dimension in column zero,
              second in column one etc.
        """
        # Check the data
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=False, coerce_to_pandas=True)

        # Get information about the dataframe
        num_atts = len(X.iloc[0, 0])
        col_names = X.columns

        # Check the parameters are appropriate
        self._check_parameters(num_atts)

        # On each dimension, perform PAA
        dataFrames = []
        for x in col_names:
            dataFrames.append(self._perform_paa_along_dim(pd.DataFrame(X[x])))

        # Combine the dimensions together
        result = pd.concat(dataFrames, axis=1, sort=False)
        result.columns = col_names

        return result
Esempio n. 8
0
    def predict(self, X):
        """Predict.

        Parameters
        ----------
        X : pd.DataFrame or np.ndarray
            Panel data

        Returns
        -------
        np.ndarray
            Predictions.
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
        X = X.squeeze(1)

        _, series_length = X.shape
        if series_length != self.series_length:
            raise TypeError(
                "The number of time points in the training data does not match "
                "that in the test data.")
        y_pred = Parallel(n_jobs=self.n_jobs)(
            delayed(_predict)(X, self.estimators_[i], self.intervals_[i])
            for i in range(self.n_estimators))
        return np.mean(y_pred, axis=0)
    def _predict_proba_drcif(self, X, X_p, X_d, c22, n_intervals, intervals,
                             dims, atts):
        """Embedded predict proba for the DrCIF classifier."""
        if not self._is_fitted:
            raise NotFittedError(
                f"This instance of {self.__class__.__name__} has not "
                f"been fitted yet; please call `fit` first.")
        X = check_X(X, coerce_to_numpy=True)
        n_instances, n_dims, series_length = X.shape

        dists = np.zeros((n_instances, self.n_classes))
        for i in range(n_instances):
            r = [
                X[i].reshape((1, n_dims, series_length)),
                X_p[i].reshape((1, n_dims, X_p.shape[2])),
                X_d[i].reshape((1, n_dims, X_d.shape[2])),
            ]
            dists[i] = self.root.predict_proba_drcif(
                r,
                c22,
                n_intervals,
                intervals,
                dims,
                atts,
                self.n_classes,
            )
        return dists
Esempio n. 10
0
    def _transform_words(self, X):
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=False, coerce_to_pandas=True)

        if self.use_first_order_differences:
            X = self.add_first_order_differences(X)

        bag_all_words = [dict() for _ in range(len(X))]

        # On each dimension, perform SFA
        for ind, column in enumerate(self.col_names):
            X_dim = X[[column]]
            X_dim = from_nested_to_3d_numpy(X_dim)

            for i, window_size in enumerate(self.window_sizes[ind]):

                # SFA transform
                sfa_words = self.SFA_transformers[ind][i].transform(X_dim)
                bag = sfa_words[0]

                # merging bag-of-patterns of different window_sizes
                # to single bag-of-patterns with prefix indicating
                # the used window-length
                highest = np.int32(self.highest_bits[ind])
                for j in range(len(bag)):
                    for (key, value) in bag[j].items():
                        # append the prefices to the words to distinguish
                        # between window-sizes
                        word = MUSE.shift_left(key, highest, ind,
                                               self.highest_dim_bit,
                                               window_size)
                        bag_all_words[j][word] = value

        return bag_all_words
    def fit(self, X, y):
        """Fit a random catch22 feature forest classifier.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X = check_X(X, enforce_univariate=False, coerce_to_numpy=True)
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        c22 = Catch22(outlier_norm=self.outlier_norm)
        c22_list = c22.fit_transform(X)

        self.classifier = RandomForestClassifier(
            n_jobs=self.n_jobs,
            n_estimators=self.n_estimators,
            random_state=self.random_state,
        )

        X_c22 = np.nan_to_num(np.array(c22_list, dtype=np.float32), False, 0,
                              0, 0)
        self.classifier.fit(X_c22, y)

        self._is_fitted = True
        return self
Esempio n. 12
0
        def wrapper(self, data, labels=None, **kwargs):
            # Check if pandas so we can convert back
            is_pandas = True if isinstance(data, pd.DataFrame) else False
            pd_idx = data.index if is_pandas else None

            # Fit checks
            if check_fitted:
                self.check_is_fitted()

            # First convert to pandas so everything is the same format
            if labels is None:
                data = check_X(data, coerce_to_pandas=True)
            else:
                data, labels = check_X_y(data, labels, coerce_to_pandas=True)

            # Now convert it to a numpy array
            # Note sktime uses [N, C, L] whereas signature code uses shape
            # [N, L, C] (C being channels) so we must transpose.
            data = np.transpose(from_nested_to_3d_numpy(data), [0, 2, 1])

            # Apply the function to the transposed array
            if labels is None:
                output = func(self, data, **kwargs)
            else:
                output = func(self, data, labels, **kwargs)

            # Convert back
            if all(
                [is_pandas,
                 isinstance(output, np.ndarray), not force_numpy]):
                output = pd.DataFrame(index=pd_idx, data=output)

            return output
Esempio n. 13
0
    def predict_proba(self, X):
        """Predict class probabilities for n_instances in X.

        Parameters
        ----------
        X : pd.DataFrame of shape (n_instances, n_dims)

        Returns
        -------
        predicted_probs : array of shape (n_instances, n_classes)
            Predicted probability of each class.
        """
        self.check_is_fitted()
        X = check_X(X)

        X_t = self._transformer.transform(X)
        X_t = np.nan_to_num(X_t, False, 0, 0, 0)

        m = getattr(self._estimator, "predict_proba", None)
        if callable(m):
            return self._estimator.predict_proba(X_t)
        else:
            dists = np.zeros((X.shape[0], self.n_classes))
            preds = self._estimator.predict(X_t)
            for i in range(0, X.shape[0]):
                dists[i, np.where(self.classes_ == preds[i])] = 1
            return dists
Esempio n. 14
0
    def predict_proba(self, X):
        """
        Find probability estimates for each class for all cases in X.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.
            If a Pandas data frame is passed (sktime format)
            If a Pandas data frame is passed, a check is performed that it
            only has one column.
            If not, an exception is thrown, since this classifier does not
            yet have
            multivariate capability.

        Returns
        -------
        output : array of shape = [n_instances, n_classes] of probabilities
        """
        X = check_X(X, enforce_univariate=True, coerce_to_pandas=True)
        X = dataset_properties.negative_dataframe_indices(X)
        if self.n_jobs > 1 or self.n_jobs < 0:
            parallel = Parallel(self.n_jobs)
            distributions = parallel(
                delayed(self._predict_proba_tree)(X, tree)
                for tree in self.trees)
        else:
            distributions = [
                self._predict_proba_tree(X, tree) for tree in self.trees
            ]
        distributions = np.array(distributions)
        distributions = np.sum(distributions, axis=0)
        normalize(distributions, copy=False, norm="l1")
        return distributions
Esempio n. 15
0
    def predict(self, X):
        self.check_is_fitted()
        X = check_X(X, coerce_to_numpy=True)
        num_cases = X.shape[0]

        if self.n_dims > 1:
            words = [defaultdict(int) for _ in range(num_cases)]

            for i, dim in enumerate(self.dims):
                X_dim = X[:, dim, :].reshape(num_cases, 1, self.series_length)
                dim_words = self.transformers[i].transform(X_dim)
                dim_words = dim_words[0]

                for i in range(num_cases):
                    for word, count in dim_words[i].items():
                        words[i][word << self.highest_dim_bit | dim] = count

            test_bags = words
        else:
            test_bags = self.transformers[0].transform(X)
            test_bags = test_bags[0]

        classes = Parallel(n_jobs=self.n_jobs)(
            delayed(self._test_nn)(test_bag, ) for test_bag in test_bags)

        return np.array(classes)
Esempio n. 16
0
    def predict_proba(self, X):
        """Predict class probabilities for X.
        The predicted class probabilities of an input sample are computed as
        the mean predicted class probabilities of the trees in the forest. The
        class probability of a single tree is the fraction of samples of the
        same
        class in a leaf.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute `classes_`.
        """
        # Check data
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True)

        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        all_proba = Parallel(n_jobs=n_jobs,
                             verbose=self.verbose)(delayed(e.predict_proba)(X)
                                                   for e in self.estimators_)

        return np.sum(all_proba, axis=0) / len(self.estimators_)
Esempio n. 17
0
    def transform(self, X, y=None):
        """
        Transform X, transforms univariate time-series using sklearn's PCA
        class

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, 1]
            Nested dataframe with univariate time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with the same number of rows and the
          (potentially reduced) PCA transformed
          column. Time indices of the original column are replaced with 0:(
          n_components - 1).
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
        X = X.squeeze(1)

        # Transform X using the fitted PCA
        Xpca = pd.DataFrame(data=self.pca.transform(X))

        # Back-transform into time series data format
        Xt = from_2d_array_to_nested(Xpca)
        return Xt
Esempio n. 18
0
    def transform(self, X, y=None):
        """Transform X.
        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_columns]
            Nested dataframe with time-series in cells.
        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame
        """

        # input checks
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_pandas=True)

        # get column name
        column_name = X.columns[0]

        self._starts = []
        self._lengths = []

        # find plateaus (segments of the same value)
        for x in X.iloc[:, 0]:
            x = np.asarray(x)

            # find indices of transition
            if np.isnan(self.value):
                i = np.where(np.isnan(x), 1, 0)

            elif np.isinf(self.value):
                i = np.where(np.isinf(x), 1, 0)

            else:
                i = np.where(x == self.value, 1, 0)

            # pad and find where segments transition
            transitions = np.diff(np.hstack([0, i, 0]))

            # compute starts, ends and lengths of the segments
            starts = np.where(transitions == 1)[0]
            ends = np.where(transitions == -1)[0]
            lengths = ends - starts

            # filter out single points
            starts = starts[lengths >= self.min_length]
            lengths = lengths[lengths >= self.min_length]

            self._starts.append(starts)
            self._lengths.append(lengths)

        # put into dataframe
        Xt = pd.DataFrame()
        column_prefix = "%s_%s" % (
            column_name,
            "nan" if np.isnan(self.value) else str(self.value),
        )
        Xt["%s_starts" % column_prefix] = pd.Series(self._starts)
        Xt["%s_lengths" % column_prefix] = pd.Series(self._lengths)
        return Xt
Esempio n. 19
0
    def predict_proba(self, X):
        """Find probability estimates for each class for all cases in X.

        Parameters
        ----------
        X : The training input samples. array-like or sparse matrix of shape
        = [n_test_instances, series_length]
            If a Pandas data frame is passed (sktime format) a check is
            performed that it only has one column.
            If not, an exception is thrown, since this classifier does not
            yet have
            multivariate capability.

        Returns
        -------
        output : array of shape = [n_test_instances, num_classes] of
        probabilities
        """
        self.check_is_fitted()
        X = check_X(X, coerce_to_numpy=True)

        sums = np.zeros((X.shape[0], self.n_classes))

        for n, clf in enumerate(self.estimators_):
            preds = clf.predict(X)
            for i in range(0, X.shape[0]):
                sums[i, self.class_dictionary[preds[i]]] += self.weights[n]

        return np.around(sums / (np.ones(self.n_classes) * self.weight_sum), 8)
Esempio n. 20
0
    def predict_proba(self, X) -> np.array:
        """Predicts labels probabilities for sequences in X.

        Parameters
        ----------
        X : 2D np.array (univariate, equal length series) of shape = [n_instances,
        series_length]
            or 3D np.array (any number of dimensions, equal length series) of shape =
            [n_instances,n_dimensions,series_length]
            or pd.DataFrame with each column a dimension, each cell a pd.Series (any
            number of dimensions, equal or unequal length series)

        Returns
        -------
        y : 2D array of shape =  [n_instances, n_classes] - estimated class
        probabilities
        """
        self.check_is_fitted()

        coerce_to_numpy = self.get_tag("coerce-X-to-numpy")
        coerce_to_pandas = self.get_tag("coerce-X-to-pandas")
        allow_multivariate = self.get_tag("capability:multivariate")
        X = check_X(
            X,
            coerce_to_numpy=coerce_to_numpy,
            coerce_to_pandas=coerce_to_pandas,
            enforce_univariate=not allow_multivariate,
        )

        return self._predict_proba(X)
    def fit(self, X, y=None):
        """Fit dilations and biases to input time series.

        Parameters
        ----------
        X : pandas DataFrame, input time series (sktime format)
        y : array_like, target values (optional, ignored as irrelevant)

        Returns
        -------
        self
        """
        _X = check_X(X, coerce_to_numpy=True)

        if self.normalise:
            _X = (_X - _X.mean(axis=-1, keepdims=True)) / (
                _X.std(axis=-1, keepdims=True) + 1e-8
            )

        if _X.shape[2] < 10:
            # handling very short series (like PensDigit from the MTSC archive)
            # series have to be at least a length of 10 (including differencing)
            _X1 = np.zeros((_X.shape[0], _X.shape[1], 10), dtype=_X.dtype)
            _X1[:, :, : _X.shape[2]] = _X
            _X = _X1
            del _X1

        self.parameter = self._get_parameter(_X)
        _X1 = np.diff(_X, 1)

        self.parameter1 = self._get_parameter(_X1)

        self._is_fitted = True

        return self
Esempio n. 22
0
    def transform(self, X, y=None):
        """
        Transform X.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, n_columns]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
        """
        self.check_is_fitted()
        X = check_X(X, coerce_to_pandas=True)

        n_instances, n_dims = X.shape

        arr = [X.iloc[i, :].values for i in range(n_instances)]

        max_length = _get_max_length(arr)

        if max_length > self.pad_length_:
            raise ValueError(
                "Error: max_length of series \
                    is greater than the one found when fit or set."
            )

        pad = [pd.Series([self._create_pad(series) for series in out]) for out in arr]

        return pd.DataFrame(pad)
Esempio n. 23
0
    def fit(self, X, y=None):
        """
        Fit transformer.

        Parameters
        ----------
        X : pandas DataFrame of shape [n_samples, n_features]
            Input data
        y : pandas Series, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : an instance of self.
        """
        X = check_X(X, coerce_to_pandas=True)

        if self.lower is None:
            n_instances, _ = X.shape
            arr = [X.iloc[i, :].values for i in range(n_instances)]
            self.lower_ = self.get_min_length(arr)
        else:
            self.lower_ = self.lower

        self._is_fitted = True
        return self
Esempio n. 24
0
    def predict_proba(self, X):
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True)

        dists = np.zeros((X.shape[0], self.n_classes))

        dists = np.add(
            dists,
            self.stc.predict_proba(X) *
            (np.ones(self.n_classes) * self.stc_weight),
        )
        dists = np.add(
            dists,
            self.tsf.predict_proba(X) *
            (np.ones(self.n_classes) * self.tsf_weight),
        )
        dists = np.add(
            dists,
            self.rise.predict_proba(X) *
            (np.ones(self.n_classes) * self.rise_weight),
        )
        dists = np.add(
            dists,
            self.cboss.predict_proba(X) *
            (np.ones(self.n_classes) * self.cboss_weight),
        )

        return dists / dists.sum(axis=1, keepdims=True)
Esempio n. 25
0
    def predict_proba(self, X):
        """
        Find probability estimates for each class for all cases in X.
        Parameters
        ----------
        X : The training input samples. array-like or sparse matrix of shape
        = [n_test_instances, series_length]
            If a Pandas data frame is passed (sktime format) a check is
            performed that it only has one column.
            If not, an exception is thrown, since this classifier does not
            yet have
            multivariate capability.

        Returns
        -------
        output : nd.array of shape = (n_instances, n_classes)
            Predicted probabilities
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
        X = X.squeeze(1)

        _, series_length = X.shape
        if series_length != self.series_length:
            raise TypeError(
                " ERROR number of attributes in the train does not match "
                "that in the test data")
        y_probas = Parallel(n_jobs=self.n_jobs)(
            delayed(_predict_proba_for_estimator)(X, self.estimators_[i],
                                                  self.intervals_[i])
            for i in range(self.n_estimators))

        output = np.sum(y_probas,
                        axis=0) / (np.ones(self.n_classes) * self.n_estimators)
        return output
Esempio n. 26
0
    def fit(self, X, y=None):
        """Fits dilations and biases to input time series.

        Parameters
        ----------
        X : pandas DataFrame, input time series (sktime format)
        y : array_like, target values (optional, ignored as irrelevant)

        Returns
        -------
        self
        """
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)

        random_state = (np.int32(self.random_state) if isinstance(
            self.random_state, int) else None)

        X = X[:, 0, :].astype(np.float32)
        _, n_timepoints = X.shape
        if n_timepoints < 9:
            raise ValueError(
                (f"n_timepoints must be >= 9, but found {n_timepoints};"
                 " zero pad shorter series so that n_timepoints == 9"))
        self.parameters = _fit(X, self.num_kernels,
                               self.max_dilations_per_kernel, random_state)
        self._is_fitted = True
        return self
Esempio n. 27
0
    def predict_proba(self, X):
        """
        Find probability estimates for each class for all cases in X.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.
            If a Pandas data frame is passed (sktime format)
            If a Pandas data frame is passed, a check is performed that it
            only has one column.
            If not, an exception is thrown, since this classifier does not
            yet have
            multivariate capability.
        Returns
        -------
        output : array of shape = [n_instances, n_classes] of probabilities
        """
        X = check_X(X, enforce_univariate=True, coerce_to_pandas=True)

        X = dataset_properties.negative_dataframe_indices(X)
        distances = self.distance_to_exemplars(X)
        ones = np.ones(distances.shape)
        distances = np.add(distances, ones)
        distributions = np.divide(ones, distances)
        normalize(distributions, copy=False, norm="l1")
        return distributions
Esempio n. 28
0
    def transform(self, X, y=None):
        """Transform input time series.

        Parameters
        ----------
        X : pandas DataFrame, input time series (sktime format)
        y : array_like, target values (optional, ignored as irrelevant)

        Returns
        -------
        pandas DataFrame, transformed features
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
        X = X[:, 0, :].astype(np.float32)

        # change n_jobs dependend on value and existing cores
        prev_threads = get_num_threads()
        if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count():
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = self.n_jobs
        set_num_threads(n_jobs)
        X_ = _transform(X, self.parameters)
        set_num_threads(prev_threads)
        return pd.DataFrame(X_)
Esempio n. 29
0
    def transform(self, X, y=None):
        """Transforms input time series using random convolutional kernels.

        Parameters
        ----------
        X : pandas DataFrame, input time series (sktime format)
        y : array_like, target values (optional, ignored as irrelevant)

        Returns
        -------
        pandas DataFrame, transformed features
        """
        self.check_is_fitted()
        _X = check_X(X, coerce_to_numpy=True)
        if self.normalise:
            _X = (_X - _X.mean(axis=-1, keepdims=True)) / (
                _X.std(axis=-1, keepdims=True) + 1e-8)
        prev_threads = get_num_threads()
        if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count():
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = self.n_jobs
        set_num_threads(n_jobs)
        t = pd.DataFrame(_apply_kernels(_X, self.kernels))
        set_num_threads(prev_threads)
        return t
Esempio n. 30
0
    def transform(self, X, y=None):
        """Transform data into the catch22 features.

        Parameters
        ----------
        X : pandas DataFrame or 3d numpy array, input time series.
        y : array_like, target values (optional, ignored).

        Returns
        -------
        Pandas dataframe containing 22 features for each input series.
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=False, coerce_to_numpy=True)
        n_instances = X.shape[0]

        c22_list = Parallel(n_jobs=self.n_jobs)(
            delayed(self._transform_case)(
                X[i],
            )
            for i in range(n_instances)
        )

        if self.replace_nans:
            c22_list = np.nan_to_num(c22_list, False, 0, 0, 0)

        return pd.DataFrame(c22_list)