예제 #1
0
def test_row_transformer_function_transformer_series_to_primitives():
    X, y = load_gunpoint(return_X_y=True)
    ft = FunctionTransformer(func=np.mean, validate=False)
    t = SeriesToPrimitivesRowTransformer(ft, check_transformer=False)
    Xt = t.fit_transform(X, y)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0],
                      float)  # check series-to-primitive transforms
예제 #2
0
def test_different_implementations():
    random_state = 1233
    X_train, y_train = make_classification_problem()

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals=1, random_state=random_state)
    tran2 = SeriesToPrimitivesRowTransformer(FunctionTransformer(
        func=np.mean, validate=False),
                                             check_transformer=False)
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals=1,
                                          features=[np.mean],
                                          random_state=random_state)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_almost_equal(A, B)
예제 #3
0
def test_different_pipelines():
    random_state = 1233
    X_train, y_train = make_classification_problem()
    steps = [
        (
            "segment",
            RandomIntervalSegmenter(n_intervals=1, random_state=random_state),
        ),
        (
            "transform",
            FeatureUnion([
                (
                    "mean",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.mean, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "std",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.std, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "slope",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=time_series_slope,
                                            validate=False),
                        check_transformer=False,
                    ),
                ),
            ]),
        ),
    ]
    pipe = Pipeline(steps)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals=1,
        features=[np.mean, np.std, time_series_slope],
        random_state=random_state,
    )
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
예제 #4
0
def test_feature_importances_single_feature_interval_and_estimator():
    random_state = 1234

    # Compute using default method
    features = [np.mean]
    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(n_intervals=1,
                                           features=features,
                                           random_state=random_state),
        ),
        ("clf", DecisionTreeClassifier()),
    ]
    base_estimator = Pipeline(steps)
    clf1 = TimeSeriesForestClassifier(estimator=base_estimator,
                                      random_state=random_state,
                                      n_estimators=1)
    clf1.fit(X_train, y_train)

    # Extract the interval and the estimator, and compute using pipelines
    intervals = clf1.estimators_[0].steps[0][1].intervals_
    steps = [
        ("segment", IntervalSegmenter(intervals)),
        (
            "transform",
            FeatureUnion([(
                "mean",
                SeriesToPrimitivesRowTransformer(
                    FunctionTransformer(func=np.mean, validate=False),
                    check_transformer=False,
                ),
            )]),
        ),
        ("clf", clone(clf1.estimators_[0].steps[-1][1])),
    ]
    clf2 = Pipeline(steps)
    clf2.fit(X_train, y_train)

    # Check for feature importances obtained from the estimators
    fi_expected = clf1.estimators_[0].steps[-1][1].feature_importances_
    fi_actual = clf2.steps[-1][1].feature_importances_
    np.testing.assert_array_equal(fi_actual, fi_expected)
예제 #5
0
def test_RowTransformer_pipeline():
    X_train, y_train = load_basic_motions(split="train", return_X_y=True)
    X_test, y_test = load_basic_motions(split="test", return_X_y=True)

    # using pure sklearn
    def row_mean(X):
        if isinstance(X, pd.Series):
            X = pd.DataFrame(X)
        Xt = pd.concat([pd.Series(col.apply(np.mean)) for _, col in X.items()],
                       axis=1)
        return Xt

    def row_first(X):
        if isinstance(X, pd.Series):
            X = pd.DataFrame(X)
        Xt = pd.concat(
            [
                pd.Series(from_nested_to_2d_array(col).iloc[:, 0])
                for _, col in X.items()
            ],
            axis=1,
        )
        return Xt

    # specify column as a list, otherwise pandas Series are selected and
    # passed on to the transformers
    transformer = ColumnTransformer([
        ("mean", FunctionTransformer(func=row_mean,
                                     validate=False), ["dim_0"]),
        ("first", FunctionTransformer(func=row_first,
                                      validate=False), ["dim_1"]),
    ])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    steps = [("extract", transformer), ("classify", estimator)]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    expected = model.predict(X_test)

    # using sktime with sklearn pipeline
    transformer = ColumnTransformer([
        (
            "mean",
            SeriesToPrimitivesRowTransformer(
                FunctionTransformer(func=np.mean, validate=False),
                check_transformer=False,
            ),
            ["dim_0"],
        ),
        (
            "first",
            SeriesToPrimitivesRowTransformer(
                FunctionTransformer(func=lambda x: x[0], validate=False),
                check_transformer=False,
            ),
            ["dim_1"],
        ),
    ])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    steps = [("extract", transformer), ("classify", estimator)]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    actual = model.predict(X_test)
    np.testing.assert_array_equal(expected, actual)
from sktime.datasets import load_gunpoint
from sktime.transformers.panel.compose import (
    SeriesToPrimitivesRowTransformer,
)
from sktime.transformers.panel.segment import RandomIntervalSegmenter
from sktime.transformers.panel.summarize import (
    RandomIntervalFeatureExtractor,
)
from sktime.utils._testing.panel import make_classification_problem
from sktime.utils.time_series import time_series_slope

X, y = make_classification_problem()
n_classes = len(np.unique(y))

mean_transformer = SeriesToPrimitivesRowTransformer(
    FunctionTransformer(func=np.mean, validate=False, kw_args={"axis": 0}),
    check_transformer=False,
)
std_transformer = SeriesToPrimitivesRowTransformer(
    FunctionTransformer(func=np.std, validate=False, kw_args={"axis": 0}),
    check_transformer=False,
)


# Check simple cases.
def test_predict_proba():
    clf = TimeSeriesForestClassifier(n_estimators=2)
    clf.fit(X, y)
    proba = clf.predict_proba(X)

    assert proba.shape == (X.shape[0], n_classes)
    np.testing.assert_array_equal(np.ones(X.shape[0]), np.sum(proba, axis=1))
예제 #7
0
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.tree import DecisionTreeClassifier

from sktime.datasets import load_gunpoint
from sktime.transformers.panel.compose import (
    SeriesToPrimitivesRowTransformer, )
from sktime.transformers.panel.segment import RandomIntervalSegmenter
from sktime.utils._testing import make_classification_problem

# load data
X, y = make_classification_problem()
X_train, X_test, y_train, y_test = train_test_split(X, y)

mean_transformer = SeriesToPrimitivesRowTransformer(FunctionTransformer(
    func=np.mean, validate=False),
                                                    check_transformer=False)
std_transformer = SeriesToPrimitivesRowTransformer(FunctionTransformer(
    func=np.std, validate=False),
                                                   check_transformer=False)


def test_FeatureUnion_pipeline():
    # pipeline with segmentation plus multiple feature extraction

    steps = [
        ("segment", RandomIntervalSegmenter(n_intervals=1)),
        (
            "transform",
            FeatureUnion([("mean", mean_transformer),
                          ("std", std_transformer)]),
예제 #8
0
def test_feature_importances_multi_intervals_estimators(
        n_intervals, n_estimators):
    random_state = 1234
    n_features = 2

    # Compute feature importances using the default method
    features = [np.mean, np.std]
    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                           features=features,
                                           random_state=random_state),
        ),
        ("clf", DecisionTreeClassifier()),
    ]
    base_estimator = Pipeline(steps)
    clf1 = TimeSeriesForestClassifier(estimator=base_estimator,
                                      random_state=random_state,
                                      n_estimators=n_estimators)
    clf1.fit(X_train, y_train)

    fi_expected = np.zeros([n_estimators, n_intervals * n_features])
    fi_actual = np.zeros([n_estimators, n_intervals * n_features])

    # Obtain intervals and decision trees from fitted classifier
    for i in range(n_estimators):
        intervals = clf1.estimators_[i].steps[0][1].intervals_
        steps = [
            ("segment", IntervalSegmenter(intervals)),
            (
                "transform",
                FeatureUnion([
                    (
                        "mean",
                        SeriesToPrimitivesRowTransformer(
                            FunctionTransformer(func=np.mean, validate=False),
                            check_transformer=False,
                        ),
                    ),
                    (
                        "std",
                        SeriesToPrimitivesRowTransformer(
                            FunctionTransformer(func=np.std, validate=False),
                            check_transformer=False,
                        ),
                    ),
                ]),
            ),
            ("clf", clone(clf1.estimators_[i].steps[-1][1])),
        ]
        clf2 = Pipeline(steps)
        clf2.fit(X_train, y_train)

        # Compute and check for individual feature importances
        fi_expected[
            i, :] = clf1.estimators_[i].steps[-1][1].feature_importances_
        fi_actual[i, :] = clf2.steps[-1][1].feature_importances_
        np.testing.assert_array_equal(fi_actual[i, :], fi_expected[i, :])

    # Compute normalised feature values of the time series using the
    # default property
    fis_expacted = clf1.feature_importances_

    # Compute normalised feature values of the time series from the pipeline
    # implementation
    n_timepoints = len(clf1.estimators_[0].steps[0][1]._time_index)
    fis_actual = np.zeros((n_timepoints, n_features))

    for i in range(n_estimators):
        intervals = clf1.estimators_[i].steps[0][1].intervals_
        for j in range(n_features):
            for k in range(n_intervals):
                start, end = intervals[k]
                fis_actual[start:end, j] += fi_actual[i, (j * n_intervals) + k]
    fis_actual = fis_actual / n_estimators / n_intervals
    np.testing.assert_array_equal(fis_actual, fis_expacted)