예제 #1
0
def test_Pipeline_random_state():
    steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean])),
             ('clf', DecisionTreeClassifier())]
    pipe = Pipeline(steps)

    # Check that pipe is initiated without random_state
    assert pipe.random_state is None
    assert pipe.get_params()['random_state'] is None

    # Check that all components are initiated without random_state
    for step in pipe.steps:
        assert step[1].random_state is None
        assert step[1].get_params()['random_state'] is None

    # Check that if random state is set, it's set to itself and all its random components
    rs = 1234
    pipe.set_params(**{'random_state': rs})

    assert pipe.random_state == rs
    assert pipe.get_params()['random_state'] == rs

    for step in pipe.steps:
        assert step[1].random_state == rs
        assert step[1].get_params()['random_state'] == rs

    # Check specific results
    X_train, y_train = load_gunpoint(return_X_y=True)
    X_test, y_test = load_gunpoint("TEST", return_X_y=True)

    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3)),
        ('extract',
         RowwiseTransformer(FunctionTransformer(func=np.mean,
                                                validate=False))),
        ('clf', DecisionTreeClassifier())
    ]
    pipe = Pipeline(steps, random_state=rs)
    pipe.fit(X_train, y_train)
    y_pred_first = pipe.predict(X_test)
    N_ITER = 10
    for _ in range(N_ITER):
        pipe = Pipeline(steps, random_state=rs)
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        np.testing.assert_array_equal(y_pred_first, y_pred)
예제 #2
0
def test_Pipeline_check_input():
    steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean]))]
    pipe = Pipeline(steps)

    # Check that pipe is initiated without check_input set to True
    assert pipe.check_input is True
    assert pipe.get_params()['check_input'] is True

    # Check that all components are initiated with check_input set to True
    for step in pipe.steps:
        assert step[1].check_input is True
        assert step[1].get_params()['check_input'] is True

    # Check that if random state is set, it's set to itself and all its random components
    ci = False
    pipe.set_params(**{'check_input': ci})

    assert pipe.check_input == ci
    assert pipe.get_params()['check_input'] == ci

    for step in pipe.steps:
        assert step[1].check_input == ci
        assert step[1].get_params()['check_input'] == ci
예제 #3
0
    def __init__(self,
                 base_estimator=None,
                 n_estimators=500,
                 criterion='mse',
                 max_depth=None,
                 min_samples_split=2,
                 min_samples_leaf=1,
                 min_weight_fraction_leaf=0.,
                 max_features=None,
                 max_leaf_nodes=None,
                 min_impurity_decrease=0.,
                 min_impurity_split=None,
                 bootstrap=False,
                 oob_score=False,
                 n_jobs=None,
                 random_state=None,
                 verbose=0,
                 warm_start=False,
                 check_input=True):

        if base_estimator is None:
            features = [np.mean, np.std, time_series_slope]
            steps = [('transform',
                      RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                                     features=features)),
                     ('clf', DecisionTreeRegressor())]
            base_estimator = Pipeline(steps)

        elif not isinstance(base_estimator, Pipeline):
            raise ValueError(
                'Base estimator must be pipeline with transforms.')
        elif not isinstance(base_estimator.steps[-1][1],
                            DecisionTreeRegressor):
            raise ValueError(
                'Last step in base estimator pipeline must be DecisionTreeRegressor.'
            )

        # Assign values, even though passed on to base estimator below, necessary here for cloning
        self.criterion = criterion
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.max_features = max_features
        self.max_leaf_nodes = max_leaf_nodes
        self.min_impurity_decrease = min_impurity_decrease
        self.min_impurity_split = min_impurity_split

        # Rename estimator params according to name in pipeline.
        estimator = base_estimator.steps[-1][0]
        estimator_params = {
            "criterion": criterion,
            "max_depth": max_depth,
            "min_samples_split": min_samples_split,
            "min_samples_leaf": min_samples_leaf,
            "min_weight_fraction_leaf": min_weight_fraction_leaf,
            "max_features": max_features,
            "max_leaf_nodes": max_leaf_nodes,
            "min_impurity_decrease": min_impurity_decrease,
            "min_impurity_split": min_impurity_split,
        }
        estimator_params = {
            f'{estimator}__{pname}': pval
            for pname, pval in estimator_params.items()
        }

        # Pass on params.
        super(TimeSeriesForestRegressor, self).__init__(
            base_estimator=base_estimator,
            n_estimators=n_estimators,
            estimator_params=tuple(estimator_params.keys()),
            bootstrap=bootstrap,
            oob_score=oob_score,
            n_jobs=n_jobs,
            random_state=random_state,
            verbose=verbose,
            warm_start=warm_start,
        )

        # Assign random state to pipeline.
        base_estimator.set_params(**{
            'random_state': random_state,
            'check_input': False
        })

        # Store renamed estimator params.
        for pname, pval in estimator_params.items():
            self.__setattr__(pname, pval)
        self.check_input = check_input