Exemple #1
0
def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == "pf":
        return pf.ProximityForest(random_state=resampleId)
    elif cls.lower() == "pt":
        return pf.ProximityTree(random_state=resampleId)
    elif cls.lower() == "ps":
        return pf.ProximityStump(random_state=resampleId)
    elif cls.lower() == "rise":
        return fb.RandomIntervalSpectralForest(random_state=resampleId)
    elif cls.lower() == "tsf":
        return ib.TimeSeriesForest(random_state=resampleId)
    elif cls.lower() == "cif":
        return CanonicalIntervalForest(random_state=resampleId)
    elif cls.lower() == "boss":
        return BOSSEnsemble(random_state=resampleId)
    elif cls.lower() == "cboss":
        return ContractableBOSS(random_state=resampleId)
    elif cls.lower() == "tde":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif cls.lower() == "st":
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == "dtwcv":
        return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv")
    elif cls.lower() == "ee" or cls.lower() == "elasticensemble":
        return dist.ElasticEnsemble()
    elif cls.lower() == "tsfcomposite":
        # It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == "risecomposite":
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion([
                    (
                        "acf",
                        make_row_transformer(
                            FunctionTransformer(func=acf_coefs,
                                                validate=False)),
                    ),
                    (
                        "ps",
                        make_row_transformer(
                            FunctionTransformer(func=powerspectrum,
                                                validate=False)),
                    ),
                ]),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(estimator=base_estimator,
                                                   n_estimators=100)
    elif cls.lower() == "rocket":
        rocket_pipeline = make_pipeline(
            Rocket(random_state=resampleId),
            RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
        )
        return rocket_pipeline
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Exemple #2
0
class RandomIntervalFeatureExtractor(_PanelToTabularTransformer):
    """
    Transformer that segments time-series into random intervals
    and subsequently extracts series-to-primitives features from each interval.

    n_intervals: str{'sqrt', 'log', 'random'}, int or float, optional (
    default='sqrt')
        Number of random intervals to generate, where m is length of time
        series:
        - If "log", log of m is used.
        - If "sqrt", sqrt of m is used.
        - If "random", random number of intervals is generated.
        - If int, n_intervals intervals are generated.
        - If float, int(n_intervals * m) is used with n_intervals giving the
        fraction of intervals of the
        time series length.

        For all arguments relative to the length of the time series,
        the generated number of intervals is
        always at least 1.

    features: list of functions, optional (default=None)
        Applies each function to random intervals to extract features.
        If None, the mean is extracted.

    random_state: : int, RandomState instance, optional (default=None)
        - If int, random_state is the seed used by the random number generator;
        - If RandomState instance, random_state is the random number generator;
        - If None, the random number generator is the RandomState instance used
        by `np.random`.
    """

    _tags = {"univariate-only": True}

    def __init__(
        self,
        n_intervals="sqrt",
        min_length=None,
        max_length=None,
        features=None,
        random_state=None,
    ):
        self.n_intervals = n_intervals
        self.min_length = min_length
        self.max_length = max_length
        self.random_state = random_state
        self.features = features
        super(RandomIntervalFeatureExtractor, self).__init__()

    def fit(self, X, y=None):
        """
        Fit transformer, generating random interval indices.

        Parameters
        ----------
        X : pandas DataFrame of shape [n_samples, n_features]
            Input data
        y : pandas Series, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : RandomIntervalSegmenter
            This estimator
        """
        # We use composition rather than inheritance here, because this transformer
        # has a different transform type (returns tabular) compared to the
        # RandomIntervalSegmenter (returns panel).
        self._interval_segmenter = RandomIntervalSegmenter(
            self.n_intervals, self.min_length, self.max_length,
            self.random_state)
        self._interval_segmenter.fit(X, y)
        self.intervals_ = self._interval_segmenter.intervals_
        self.input_shape_ = self._interval_segmenter.input_shape_
        self._time_index = self._interval_segmenter._time_index
        self._is_fitted = True
        return self

    def transform(self, X, y=None):
        """
        Transform X, segments time-series in each column into random
        intervals using interval indices generated
        during `fit` and extracts features from each interval.

        Parameters
        ----------
        X : nested pandas.DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas.DataFrame
          Transformed pandas DataFrame with same number of rows and one
          column for each generated interval.
        """
        # Check is fit had been called
        self.check_is_fitted()

        # Check input of feature calculators, i.e list of functions to be
        # applied to time-series
        features = _check_features(self.features)
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)

        # Check that the input is of the same shape as the one passed
        # during fit.
        if X.shape[1] != self.input_shape_[1]:
            raise ValueError(
                "Number of columns of input is different from what was seen"
                "in `fit`")
        # Input validation
        # if not all([np.array_equal(fit_idx, trans_idx) for trans_idx,
        # fit_idx in zip(check_equal_index(X),
        #     raise ValueError('Indexes of input time-series are different
        #     from what was seen in `fit`')

        n_instances, n_columns, _ = X.shape
        n_features = len(features)

        intervals = self.intervals_
        n_intervals = len(intervals)

        # Compute features on intervals.
        Xt = np.zeros(
            (n_instances, n_features * n_intervals))  # Allocate output array
        # for transformed data
        columns = []

        i = 0
        for func in features:
            # TODO generalise to series-to-series functions and function kwargs
            for start, end in intervals:
                interval = X[:, :, start:end]

                # Try to use optimised computations over axis if possible,
                # otherwise iterate over rows.
                try:
                    Xt[:, i] = func(interval, axis=-1).squeeze()
                except TypeError as e:
                    if (str(e) == f"{func.__name__}() got an unexpected "
                            f"keyword argument 'axis'"):
                        Xt[:, i] = np.apply_along_axis(func,
                                                       axis=2,
                                                       arr=interval).squeeze()
                    else:
                        raise
                i += 1
                columns.append(f"{start}_{end}_{func.__name__}")

        Xt = pd.DataFrame(Xt)
        Xt.columns = columns
        return Xt
Exemple #3
0
def test_bad_input_args(bad_interval):
    X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=2)
    with pytest.raises(ValueError):
        RandomIntervalSegmenter(n_intervals=bad_interval).fit(X)