Ejemplo n.º 1
0
def test_different_implementations():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_seed)
    tran2 = RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_seed)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_equal(A, B)

    # Compare with transformer pipeline using TSFeatureUnion.
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt', check_input=False)),
        ('transform', TSFeatureUnion([
            ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
            ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))),
        ])),
    ]
    pipe = TSPipeline(steps, random_state=random_seed)
    a = pipe.fit_transform(X_train)
    n_ints = a.shape[1] // 2  # Rename columns for comparing re-ordered arrays.
    a.columns = [*a.columns[:n_ints] + '_mean', *a.columns[n_ints:n_ints * 2] + '_std']
    a = a.reindex(np.sort(a.columns), axis=1)

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std],
                                          random_state=random_seed)
    b = tran.fit_transform(X_train)
    b = b.reindex(np.sort(b.columns), axis=1)
    np.testing.assert_array_equal(a, b)
    def _test_TimeSeriesForest_predictions(n_estimators=None, n_intervals=None, random_state=None):

        # fully modular implementation using pipeline with FeatureUnion
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals=n_intervals, check_input=False)),
            ('transform', TSFeatureUnion([
                ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
                ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))),
                ('slope', RowwiseTransformer(FunctionTransformer(func=time_series_slope, validate=False)))
            ])),
            ('clf', DecisionTreeClassifier())
        ]

        base_estimator = TSPipeline(steps)
        clf1 = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                          random_state=random_state,
                                          n_estimators=n_estimators)
        clf1.fit(X_train, y_train)
        a = clf1.predict_proba(X_test)

        # default, semi-modular implementation using RandomIntervalFeatureExtractor internally
        clf2 = TimeSeriesForestClassifier(random_state=random_state,
                                          n_estimators=n_estimators)
        clf2.set_params(**{'base_estimator__transform__n_intervals': n_intervals})
        clf2.fit(X_train, y_train)
        b = clf2.predict_proba(X_test)

        np.testing.assert_array_equal(a, b)
Ejemplo n.º 3
0
    def _test_pipeline_predictions(n_intervals=None, random_state=None):
        steps = [('segment',
                  RandomIntervalSegmenter(n_intervals=n_intervals,
                                          check_input=False)),
                 ('transform',
                  FeatureUnion([('mean',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=np.mean,
                                                         validate=False))),
                                ('std',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=np.std,
                                                         validate=False)))])),
                 ('clf', DecisionTreeClassifier())]
        clf1 = Pipeline(steps, random_state=random_state)
        clf1.fit(X_train, y_train)
        a = clf1.predict(X_test)

        steps = [('transform',
                  RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                                 features=[np.mean, np.std])),
                 ('clf', DecisionTreeClassifier())]
        clf2 = Pipeline(steps, random_state=random_state)
        clf2.fit(X_train, y_train)
        b = clf2.predict(X_test)
        np.array_equal(a, b)
Ejemplo n.º 4
0
def test_FeatureUnion_pipeline():
    # pipeline with segmentation plus multiple feature extraction
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3, check_input=False)),
        ('transform', FeatureUnion([
            ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
            ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False)))
        ])),
        ('clf', DecisionTreeClassifier())
    ]
    clf = Pipeline(steps)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
Ejemplo n.º 5
0
def test_Pipeline_random_state():
    steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean])), ('clf', DecisionTreeClassifier())]
    pipe = Pipeline(steps)

    # Check that pipe is initiated without random_state
    assert pipe.random_state is None
    assert pipe.get_params()['random_state'] is None

    # Check that all components are initiated without random_state
    for step in pipe.steps:
        assert step[1].random_state is None
        assert step[1].get_params()['random_state'] is None

    # Check that if random state is set, it's set to itself and all its random components
    rs = 1234
    pipe.set_params(**{'random_state': rs})

    assert pipe.random_state == rs
    assert pipe.get_params()['random_state'] == rs

    for step in pipe.steps:
        assert step[1].random_state == rs
        assert step[1].get_params()['random_state'] == rs

    # Check specific results
    X_train, y_train = load_gunpoint(return_X_y=True)
    X_test, y_test = load_gunpoint("TEST", return_X_y=True)

    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt', check_input=False)),
        ('extract', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
        ('clf', DecisionTreeClassifier())
    ]
    pipe = Pipeline(steps, random_state=rs)
    pipe.fit(X_train, y_train)
    y_pred_first = pipe.predict(X_test)
    N_ITER = 10
    for _ in range(N_ITER):
        pipe = Pipeline(steps, random_state=rs)
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        np.testing.assert_array_equal(y_pred_first, y_pred)