def train_test_evaluate(pipeline: Pipeline, X: pd.DataFrame, y: pd.Series, metric: Metric) -> float: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, shuffle=shuffle, random_state=random_state) pipeline.fit(X_train, y_train) test_predictions = pipeline.predict(X_test) score = metric(y_test, test_predictions) return score
def test_can_run_basic(self): """ The flexga tuner should be able to complete without erroring. """ pipeline = Pipeline() pipeline.add_step(classifiers["DecisionTreeClassifierPrimitive"]) evaluate = make_train_test_evaluator() logger.info( f"baseline score: {evaluate(pipeline, self.X, self.y, f1macro)}") ga_tune(pipeline, self.X, self.y, evaluate, f1macro, iters=2)
def test_returns_correct_number_evals(self): pipeline = Pipeline() pipeline.add_step(classifiers["DecisionTreeClassifierPrimitive"]) evaluate = make_train_test_evaluator() n_expected_evals = 13 # 6 * 2 (+ 1 for the default params) result = ga_tune(pipeline, self.X, self.y, evaluate, f1macro, iters=2, population_size=6) self.assertEqual(result.n_evals, n_expected_evals)
def kfold_evaluate(pipeline: Pipeline, X: pd.DataFrame, y: pd.Series, metric: Metric) -> float: cv = KFold(n_splits=k, shuffle=shuffle, random_state=random_state) # Perform cross validation, calculating the average performance # over the folds as this pipeline's performance. scores = [] for train_index, test_index in cv.split(X): X_train, X_test = X.iloc[train_index], X.iloc[test_index] y_train, y_test = y.iloc[train_index], y.iloc[test_index] pipeline.fit(X_train, y_train) test_predictions = pipeline.predict(X_test) fold_score = metric(y_test, test_predictions) scores.append(fold_score) test_score = mean(scores) return test_score
def _get_flexga_metas(pipeline: Pipeline, X: pd.DataFrame) -> t.Dict[str, ArgMeta]: """ Converts meta information about the hyperparameters of a pipeline's primitive steps to the format the `flexga` package uses to know the bounds and characteristics of those hyperparameters (the things `flexga` is optimizing). """ param_metas = pipeline.param_metas_with_data(X) kwargsmeta = {} for i, step_pmetas in param_metas.items(): for key, pmeta in step_pmetas.items(): flexga_key = f"{i},{key}" if isinstance(pmeta, IntParamMeta): flexga_arg_meta = IntArgMeta( (pmeta.lbound, pmeta.ubound), _range_rule(pmeta.lbound, pmeta.ubound), ) elif isinstance(pmeta, FloatParamMeta): flexga_arg_meta = FloatArgMeta( (pmeta.lbound, pmeta.ubound), _range_rule(pmeta.lbound, pmeta.ubound), ) elif isinstance(pmeta, BoolParamMeta): flexga_arg_meta = BoolArgMeta() elif isinstance(pmeta, CategoricalParamMeta): flexga_arg_meta = CategoricalArgMeta(pmeta.options) else: raise ValueError( f"unsupported ParamMeta type {type(pmeta)} for {key} param" ) kwargsmeta[flexga_key] = flexga_arg_meta return kwargsmeta
def down_sample_evaluate(pipeline: Pipeline, X: pd.DataFrame, y: pd.Series, metric: Metric) -> float: # First down-sample the data by using `train_test_split` in a # perhaps unintended way. X_smaller, _, y_smaller, _ = train_test_split( X, y, train_size=sample_ratio, shuffle=shuffle, random_state=random_state) # Now make the train/test split. X_train, X_test, y_train, y_test = train_test_split( X_smaller, y_smaller, test_size=test_size, shuffle=shuffle, random_state=random_state, ) # Finally fit and evaluate the models' performance. pipeline.fit(X_train, y_train) test_predictions = pipeline.predict(X_test) score = metric(y_test, test_predictions) return score
def sample_pipeline( self, problem_type: ProblemType, models: t.List[t.Type[Primitive]], transformers: t.List[t.Type[Primitive]], ) -> Pipeline: pipeline = Pipeline() for _ in range(self.preprocessors): pipeline.add_step(random.choice(transformers)) pipeline.add_step(random.choice(models)) return pipeline
def test_can_tune_multiple_primitives(self): """ The flexga tuner should be able to tune the hyperparameters of all primitives in a pipeline at once. """ pipeline = Pipeline() pipeline.add_step(transformers["PCAPrimitive"]) pipeline.add_step(classifiers["DecisionTreeClassifierPrimitive"]) evaluate = make_train_test_evaluator() logger.info( f"baseline score: {evaluate(pipeline, self.X, self.y, f1macro)}") ga_tune(pipeline, self.X, self.y, evaluate, f1macro, iters=2)
def sample_pipeline( self, problem_type: ProblemType, models: t.List[t.Type[Primitive]], transformers: t.List[t.Type[Primitive]], ) -> Pipeline: all_primitives = models + transformers pipeline = Pipeline() stack_input = pipeline.curr_step_i stack_outputs = [] for _ in range(self.width): primitive = random.choice(all_primitives) pipeline.add_step(primitive, [stack_input]) stack_outputs.append(pipeline.curr_step_i) pipeline.add_step(random.choice(models), stack_outputs) return pipeline
def ga_tune( pipeline: Pipeline, X: pd.DataFrame, y: pd.Series, evaluator: t.Callable, metric: Metric, exit_on_pipeline_error: bool = True, **flexgakwargs, ) -> TuneResult: """ Performs a genetic algorithm hyperparameter tuning on `pipeline`, returning the best score it could find and the number of evaluations it completed. Essentially performs a `.fit` operation on the pipeline, where the pipeine is fit with the best performing hyperparameter configuration it could find. Returns ------- result : TuneResult A named tuple containing data about how the tuning process went. """ # See what score the model gets without any tuning starting_params = pipeline.get_params() starting_score = evaluator(pipeline, X, y, metric) # keep track of how many iterations were completed n_evals = 1 # we already completed one def objective(*args, **flexga_params) -> float: """ The objective function the genetic algorithm will try to maximize. """ params = _get_params_from_flexga(flexga_params) nonlocal n_evals try: pipeline.set_params(params) score = evaluator(pipeline, X, y, metric) except PipelineRunError as e: logger.exception(e) if exit_on_pipeline_error: raise e # Pipelines that make errors are bad. # TODO: make this `None` or `np.nan` instead. score = metric.worst_value n_evals += 1 # The genetic algorithm tries to maximize return -score if metric.opt_dir == OptimizationDirection.MINIMIZE else score # Use flexga to find the best hyperparameter configuration it can. optimal_score, _, optimal_flexga_params = flexga( objective, kwargsmeta=_get_flexga_metas(pipeline, X), **flexgakwargs) if metric.is_better_than(optimal_score, starting_score): optimal_params = _get_params_from_flexga(optimal_flexga_params) did_improve = True else: # The tuner couldn't find anything better than the params the # pipeline started with under the conditions given. optimal_score = starting_score optimal_params = starting_params did_improve = False pipeline.set_params(optimal_params) pipeline.fit(X, y) logger.info("tuning complete.") logger.info(f"found best pipeline configuration: {pipeline}") logger.info(f"found best validation score of {optimal_score}") return TuneResult(optimal_score, n_evals, did_improve)