def test_output_format_dim(n_instances, n_timepoints, n_intervals, features):
    X = _make_nested_from_array(np.ones(n_timepoints),
                                n_instances=n_instances,
                                n_columns=1)
    n_rows, n_cols = X.shape
    trans = RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                           features=features)
    Xt = trans.fit_transform(X)
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == n_rows
    assert np.array_equal(Xt.values, np.ones(Xt.shape))
Beispiel #2
0
def test_TimeSeriesForest_predictions(n_estimators, n_intervals):
    random_state = 1234
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)

    features = [np.mean, np.std, _slope]
    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(
                random_state=random_state, features=features
            ),
        ),
        ("clf", DecisionTreeClassifier()),
    ]
    estimator = Pipeline(steps)

    clf1 = ComposableTimeSeriesForestClassifier(
        estimator=estimator, random_state=random_state, n_estimators=n_estimators
    )
    clf1.fit(X_train, y_train)
    a = clf1.predict_proba(X_test)

    # default, semi-modular implementation using
    # RandomIntervalFeatureExtractor internally
    clf2 = ComposableTimeSeriesForestClassifier(
        random_state=random_state, n_estimators=n_estimators
    )
    clf2.fit(X_train, y_train)
    b = clf2.predict_proba(X_test)

    np.testing.assert_array_equal(a, b)
Beispiel #3
0
    def _validate_estimator(self):

        if not isinstance(self.n_estimators, numbers.Integral):
            raise ValueError("n_estimators must be an integer, "
                             "got {0}.".format(type(self.n_estimators)))

        if self.n_estimators <= 0:
            raise ValueError("n_estimators must be greater than zero, "
                             "got {0}.".format(self.n_estimators))

        # Set base estimator
        if self.estimator is None:
            # Set default time series forest
            features = [np.mean, np.std, _slope]
            steps = [
                (
                    "transform",
                    RandomIntervalFeatureExtractor(
                        n_intervals="sqrt",
                        features=features,
                        random_state=self.random_state,
                    ),
                ),
                ("clf", DecisionTreeRegressor(random_state=self.random_state)),
            ]
            self.estimator_ = Pipeline(steps)

        else:
            # else check given estimator is a pipeline with prior
            # transformations and final decision tree
            if not isinstance(self.estimator, Pipeline):
                raise ValueError(
                    "`estimator` must be pipeline with transforms.")
            if not isinstance(self.estimator.steps[-1][1],
                              DecisionTreeRegressor):
                raise ValueError(
                    "Last step in `estimator` must be DecisionTreeRegressor.")
            self.estimator_ = self.estimator

        # Set parameters according to naming in pipeline
        estimator_params = {
            "criterion": self.criterion,
            "max_depth": self.max_depth,
            "min_samples_split": self.min_samples_split,
            "min_samples_leaf": self.min_samples_leaf,
            "min_weight_fraction_leaf": self.min_weight_fraction_leaf,
            "max_features": self.max_features,
            "max_leaf_nodes": self.max_leaf_nodes,
            "min_impurity_decrease": self.min_impurity_decrease,
            "min_impurity_split": self.min_impurity_split,
        }
        final_estimator = self.estimator_.steps[-1][0]
        self.estimator_params = {
            f"{final_estimator}__{pname}": pval
            for pname, pval in estimator_params.items()
        }

        # Set renamed estimator parameters
        for pname, pval in self.estimator_params.items():
            self.__setattr__(pname, pval)
def test_different_implementations():
    random_state = 1233
    X_train, y_train = make_classification_problem()

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals=1, random_state=random_state)
    tran2 = SeriesToPrimitivesRowTransformer(FunctionTransformer(
        func=np.mean, validate=False),
                                             check_transformer=False)
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals=1,
                                          features=[np.mean],
                                          random_state=random_state)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_almost_equal(A, B)
def test_different_pipelines():
    """Compare with transformer pipeline using TSFeatureUnion."""
    random_state = 1233
    X_train, y_train = make_classification_problem()
    steps = [
        (
            "segment",
            RandomIntervalSegmenter(n_intervals=1, random_state=random_state),
        ),
        (
            "transform",
            FeatureUnion([
                (
                    "mean",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.mean, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "std",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.std, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "slope",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=_slope, validate=False),
                        check_transformer=False,
                    ),
                ),
            ]),
        ),
    ]
    pipe = Pipeline(steps)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals=1,
        features=[np.mean, np.std, _slope],
        random_state=random_state,
    )
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_results(n_instances, n_timepoints, n_intervals):
    X, _ = make_classification_problem(n_instances=n_instances,
                                       n_timepoints=n_timepoints,
                                       return_numpy=True)
    transformer = RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                                 features=[np.mean, np.std])
    Xt = transformer.fit_transform(X)
    Xt = Xt.loc[:, ~Xt.columns.duplicated()]
    # Check results
    intervals = transformer.intervals_
    for start, end in intervals:
        expected_mean = np.mean(X[:, 0, start:end], axis=-1)
        expected_std = np.std(X[:, 0, start:end], axis=-1)

        actual_means = Xt.loc[:, f"{start}_{end}_mean"].to_numpy().ravel()
        actual_stds = Xt.loc[:, f"{start}_{end}_std"].to_numpy().ravel()

        np.testing.assert_array_equal(actual_means, expected_mean)
        np.testing.assert_array_equal(actual_stds, expected_std)
Beispiel #7
0
def test_equivalent_model_specifications(n_intervals, n_estimators):
    """Test composable TSF vs an equivalent model."""
    random_state = 1234
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")

    # Due to tie-breaking/floating point rounding in the final decision tree
    # classifier, the results depend on the
    # exact column order of the input data

    # Compare pipeline predictions outside of ensemble.
    steps = [
        (
            "segment",
            RandomIntervalSegmenter(n_intervals=n_intervals,
                                    random_state=random_state),
        ),
        (
            "transform",
            FeatureUnion([("mean", mean_transformer),
                          ("std", std_transformer)]),
        ),
        ("clf", DecisionTreeClassifier(random_state=random_state)),
    ]
    clf1 = Pipeline(steps)
    clf1.fit(X_train, y_train)
    a = clf1.predict(X_test)

    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(
                n_intervals=n_intervals,
                features=[np.mean, np.std],
                random_state=random_state,
            ),
        ),
        ("clf", DecisionTreeClassifier(random_state=random_state)),
    ]
    clf2 = Pipeline(steps)
    clf2.fit(X_train, y_train)
    b = clf2.predict(X_test)
    np.array_equal(a, b)
def test_bad_features(bad_features):
    X, y = make_classification_problem()
    with pytest.raises(ValueError):
        RandomIntervalFeatureExtractor(n_intervals=bad_features).fit(X)
def test_bad_n_intervals(bad_n_intervals):
    """Check that exception is raised for bad input args."""
    X, y = make_classification_problem()
    with pytest.raises(ValueError):
        RandomIntervalFeatureExtractor(n_intervals=bad_n_intervals).fit(X)