def test_Pipeline_random_state(): steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean])), ('clf', DecisionTreeClassifier())] pipe = Pipeline(steps) # Check that pipe is initiated without random_state assert pipe.random_state is None assert pipe.get_params()['random_state'] is None # Check that all components are initiated without random_state for step in pipe.steps: assert step[1].random_state is None assert step[1].get_params()['random_state'] is None # Check that if random state is set, it's set to itself and all its random components rs = 1234 pipe.set_params(**{'random_state': rs}) assert pipe.random_state == rs assert pipe.get_params()['random_state'] == rs for step in pipe.steps: assert step[1].random_state == rs assert step[1].get_params()['random_state'] == rs # Check specific results X_train, y_train = load_gunpoint(return_X_y=True) X_test, y_test = load_gunpoint("TEST", return_X_y=True) steps = [ ('segment', RandomIntervalSegmenter(n_intervals=3)), ('extract', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))), ('clf', DecisionTreeClassifier()) ] pipe = Pipeline(steps, random_state=rs) pipe.fit(X_train, y_train) y_pred_first = pipe.predict(X_test) N_ITER = 10 for _ in range(N_ITER): pipe = Pipeline(steps, random_state=rs) pipe.fit(X_train, y_train) y_pred = pipe.predict(X_test) np.testing.assert_array_equal(y_pred_first, y_pred)
def test_Pipeline_check_input(): steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean]))] pipe = Pipeline(steps) # Check that pipe is initiated without check_input set to True assert pipe.check_input is True assert pipe.get_params()['check_input'] is True # Check that all components are initiated with check_input set to True for step in pipe.steps: assert step[1].check_input is True assert step[1].get_params()['check_input'] is True # Check that if random state is set, it's set to itself and all its random components ci = False pipe.set_params(**{'check_input': ci}) assert pipe.check_input == ci assert pipe.get_params()['check_input'] == ci for step in pipe.steps: assert step[1].check_input == ci assert step[1].get_params()['check_input'] == ci
def __init__(self, base_estimator=None, n_estimators=500, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0., max_features=None, max_leaf_nodes=None, min_impurity_decrease=0., min_impurity_split=None, bootstrap=False, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, check_input=True): if base_estimator is None: features = [np.mean, np.std, time_series_slope] steps = [('transform', RandomIntervalFeatureExtractor(n_intervals='sqrt', features=features)), ('clf', DecisionTreeRegressor())] base_estimator = Pipeline(steps) elif not isinstance(base_estimator, Pipeline): raise ValueError( 'Base estimator must be pipeline with transforms.') elif not isinstance(base_estimator.steps[-1][1], DecisionTreeRegressor): raise ValueError( 'Last step in base estimator pipeline must be DecisionTreeRegressor.' ) # Assign values, even though passed on to base estimator below, necessary here for cloning self.criterion = criterion self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.max_leaf_nodes = max_leaf_nodes self.min_impurity_decrease = min_impurity_decrease self.min_impurity_split = min_impurity_split # Rename estimator params according to name in pipeline. estimator = base_estimator.steps[-1][0] estimator_params = { "criterion": criterion, "max_depth": max_depth, "min_samples_split": min_samples_split, "min_samples_leaf": min_samples_leaf, "min_weight_fraction_leaf": min_weight_fraction_leaf, "max_features": max_features, "max_leaf_nodes": max_leaf_nodes, "min_impurity_decrease": min_impurity_decrease, "min_impurity_split": min_impurity_split, } estimator_params = { f'{estimator}__{pname}': pval for pname, pval in estimator_params.items() } # Pass on params. super(TimeSeriesForestRegressor, self).__init__( base_estimator=base_estimator, n_estimators=n_estimators, estimator_params=tuple(estimator_params.keys()), bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, ) # Assign random state to pipeline. base_estimator.set_params(**{ 'random_state': random_state, 'check_input': False }) # Store renamed estimator params. for pname, pval in estimator_params.items(): self.__setattr__(pname, pval) self.check_input = check_input