Ejemplo n.º 1
0
def test_evaluate_pipeline(BernoulliNBStandardScaler):
    x, y = load_iris(return_X_y=True)
    x, y = pd.DataFrame(x), pd.Series(y)

    prediction, scores, estimators, errors = evaluate_pipeline(
        BernoulliNBStandardScaler.pipeline, x, y, timeout=60, metrics=scoring_to_metric('accuracy')
    )
    assert 1 == len(scores)
    assert errors is None
    assert 5 == len(estimators)
    assert prediction.shape == (150,)
Ejemplo n.º 2
0
def test_evaluate_invalid_pipeline(InvalidLinearSVC):
    x, y = load_iris(return_X_y=True)
    x, y = pd.DataFrame(x), pd.Series(y)

    prediction, scores, estimators, error = evaluate_pipeline(
        InvalidLinearSVC.pipeline, x, y, timeout=60, metrics=scoring_to_metric('accuracy')
    )
    assert (float('-inf'),) == scores
    assert str(error).startswith("Unsupported set of arguments:")
    assert str(error).endswith("penalty='l1', loss='squared_hinge', dual=True")
    assert estimators is None
    assert prediction is None
Ejemplo n.º 3
0
def test_cross_val_predict_score():
    estimator = DecisionTreeClassifier()
    x, y = load_iris(return_X_y=True)
    y_ohe = OneHotEncoder().fit_transform(y.reshape(-1, 1))
    x, y = pd.DataFrame(x), pd.Series(y)

    metrics = scoring_to_metric(['accuracy', 'log_loss'])
    predictions, scores, estimators = cross_val_predict_score(estimator, x, y, metrics=metrics)
    accuracy, logloss = scores

    assert accuracy_score(y_ohe, predictions) == pytest.approx(accuracy)
    assert -1 * log_loss(y_ohe, predictions) == pytest.approx(logloss)
    assert len(set(estimators)) == len(estimators)
Ejemplo n.º 4
0
    def __init__(self, config=None, scoring='neg_log_loss', *args, **kwargs):
        if not config:
            # Do this to avoid the whole dictionary being included in the documentation.
            config = clf_config

        self._metrics = scoring_to_metric(scoring)
        if any(metric.requires_probabilities for metric in self._metrics):
            # we don't want classifiers that do not have `predict_proba`, because then we have to
            # start doing one hot encodings of predictions etc.
            config = {alg: hp for (alg, hp) in config.items()
                      if not (inspect.isclass(alg) and issubclass(alg, ClassifierMixin)and not hasattr(alg(), 'predict_proba'))}

        self._label_encoder = None
        super().__init__(*args, **kwargs, config=config, scoring=scoring)
Ejemplo n.º 5
0
def test_evaluate_pipeline(SS_BNB):
    x, y = load_iris(return_X_y=True)
    x, y = pd.DataFrame(x), pd.Series(y)

    prediction, scores, estimators, errors = evaluate_pipeline(
        SS_BNB.pipeline,
        x,
        y,
        timeout=60,
        metrics=scoring_to_metric("accuracy"),
    )
    assert 1 == len(scores)
    assert errors is None
    assert 5 == len(estimators)
    assert prediction.shape == (150, )
Ejemplo n.º 6
0
    def __init__(
        self,
        scoring: Union[str, Metric, Iterable[str],
                       Iterable[Metric]] = "filled_in_by_child_class",
        regularize_length: bool = True,
        max_pipeline_length: Optional[int] = None,
        config: Dict = None,
        random_state: Optional[int] = None,
        max_total_time: int = 3600,
        max_eval_time: Optional[int] = None,
        n_jobs: Optional[int] = None,
        max_memory_mb: Optional[int] = None,
        verbosity: int = logging.WARNING,
        search: BaseSearch = AsyncEA(),
        post_processing: BasePostProcessing = BestFitPostProcessing(),
        output_directory: Optional[str] = None,
        store: str = "logs",
    ):
        """

        Parameters
        ----------
        scoring: str, Metric or Tuple
            Specifies the/all metric(s) to optimize towards.
            A string will be converted to Metric.
            A tuple must specify each metric with the same type (e.g. all str).
            See :ref:`Metrics` for built-in metrics.

        regularize_length: bool (default=True)
            If True, add pipeline length as an optimization metric.
            Short pipelines should then be preferred over long ones.

        max_pipeline_length: int, optional (default=None)
            If set, limit the maximum number of steps in any evaluated pipeline.
            Encoding and imputation are excluded.

        config: Dict
            Specifies available components and their valid hyperparameter settings.
            For more information, see :ref:`search_space_configuration`.

        random_state:  int, optional (default=None)
            Seed for the random number generators used in the process.
            However, with `n_jobs > 1`,
            there will be randomization introduced by multi-processing.
            For reproducible results, set this and use `n_jobs=1`.

        max_total_time: positive int (default=3600)
            Time in seconds that can be used for the `fit` call.

        max_eval_time: positive int, optional (default=None)
            Time in seconds that can be used to evaluate any one single individual.
            If None, set to 0.1 * max_total_time.

        n_jobs: int, optional (default=None)
            The amount of parallel processes that may be created to speed up `fit`.
            Accepted values are positive integers, -1 or None.
            If -1 is specified, multiprocessing.cpu_count() processes are created.
            If None is specified, multiprocessing.cpu_count() / 2 processes are created.

        max_memory_mb: int, optional (default=None)
            Sets the total amount of memory GAMA is allowed to use (in megabytes).
            If not set, GAMA will use as much as it needs.
            GAMA is not guaranteed to respect this limit at all times,
            but it should never violate it for too long.

        verbosity: int (default=logging.WARNING)
            Sets the level of log messages to be automatically output to terminal.

        search: BaseSearch (default=AsyncEA())
            Search method to use to find good pipelines. Should be instantiated.

        post_processing: BasePostProcessing (default=BestFitPostProcessing())
            Post-processing method to create a model after the search phase.
            Should be an instantiated subclass of BasePostProcessing.

        output_directory: str, optional (default=None)
            Directory to use to save GAMA output. This includes both intermediate
            results during search and logs.
            If set to None, generate a unique name ("gama_HEXCODE").

        store: str (default='logs')
            Determines which data is stored after each run:
             - 'nothing': keep nothing from this run
             - 'models': keep only cache with models and predictions
             - 'logs': keep only the logs
             - 'all': keep logs and cache with models and predictions
        """
        if not output_directory:
            output_directory = f"gama_{str(uuid.uuid4())}"
        self.output_directory = os.path.abspath(
            os.path.expanduser(output_directory))
        if not os.path.exists(self.output_directory):
            os.mkdir(self.output_directory)

        register_stream_log(verbosity)
        if store in ["logs", "all"]:
            log_file = os.path.join(self.output_directory, "gama.log")
            log_handler = logging.FileHandler(log_file)
            log_handler.setLevel(logging.DEBUG)
            log_format = logging.Formatter(
                "[%(asctime)s - %(name)s] %(message)s")
            log_handler.setFormatter(log_format)
            logging.getLogger("gama").addHandler(log_handler)

        arguments = ",".join([
            f"{k}={v}" for (k, v) in locals().items() if k not in
            ["self", "config", "log_file", "log_handler", "log_format"]
        ])
        log.info(f"Using GAMA version {__version__}.")
        log.info(f"INIT:{self.__class__.__name__}({arguments})")

        if n_jobs is None:
            n_jobs = multiprocessing.cpu_count() // 2
            log.debug("n_jobs defaulted to %d.", n_jobs)
        elif n_jobs == -1:
            n_jobs = multiprocessing.cpu_count()
            log.debug("n_jobs set to use all %d cores.", n_jobs)

        err = ""
        if max_total_time is None or max_total_time <= 0:
            err = f"Expect positive int for max_total_time, got {max_total_time}."
        if max_eval_time is not None and max_eval_time <= 0:
            err = f"Expect None or positive int for max_eval_time, got {max_eval_time}."
        if n_jobs < -1 or n_jobs == 0:
            err = f"n_jobs should be -1 or positive int but is {n_jobs}."
        if err:
            self.cleanup("all")
            raise ValueError(err)

        setattr(
            AsyncEvaluator,
            "__init__",
            partialmethod(
                AsyncEvaluator.__init__,
                n_workers=n_jobs,
                memory_limit_mb=max_memory_mb,
                logfile=os.path.join(self.output_directory, "memory.log"),
            ),
        )

        if max_eval_time is None:
            max_eval_time = round(0.1 * max_total_time)
        if max_eval_time > max_total_time:
            log.warning(
                f"max_eval_time ({max_eval_time}) > max_total_time ({max_total_time}) "
                f"is not allowed. max_eval_time set to {max_total_time}.")
            max_eval_time = max_total_time

        self._max_eval_time = max_eval_time
        self._time_manager = TimeKeeper(max_total_time)
        self._metrics: Tuple[Metric, ...] = scoring_to_metric(scoring)
        self._regularize_length = regularize_length
        self._search_method: BaseSearch = search
        self._post_processing = post_processing
        self._store = store

        if random_state is not None:
            random.seed(random_state)
            np.random.seed(random_state)

        self._x: Optional[pd.DataFrame] = None
        self._y: Optional[pd.DataFrame] = None
        self._basic_encoding_pipeline: Optional[Pipeline] = None
        self._fixed_pipeline_extension: List[Tuple[str, TransformerMixin]] = []
        self._inferred_dtypes: List[Type] = []
        self.model: object = None
        self._final_pop: List[Individual] = []

        self._subscribers: Dict[str, List[Callable]] = defaultdict(list)
        cache_directory = os.path.join(self.output_directory, "cache")
        if isinstance(post_processing, EnsemblePostProcessing):
            self._evaluation_library = EvaluationLibrary(
                m=post_processing.hyperparameters["max_models"],
                n=post_processing.hyperparameters["hillclimb_size"],
                cache=cache_directory,
            )
        else:
            # Don't keep memory-heavy evaluation meta-data (predictions, estimators)
            self._evaluation_library = EvaluationLibrary(m=0,
                                                         cache=cache_directory)
        self.evaluation_completed(self._evaluation_library.save_evaluation)
        e = search.logger(
            os.path.join(self.output_directory, "evaluations.log"))
        self.evaluation_completed(e.log_evaluation)

        self._pset, parameter_checks = pset_from_config(config)

        if DATA_TERMINAL not in self._pset:
            if max_pipeline_length is None:
                log.info(
                    "Setting `max_pipeline_length` to 1 "
                    "because there are no preprocessing steps in the search space."
                )
                max_pipeline_length = 1
            elif max_pipeline_length > 1:
                raise ValueError(
                    f"`max_pipeline_length` can't be {max_pipeline_length} "
                    "because there are no preprocessing steps in the search space."
                )
        max_start_length = 3 if max_pipeline_length is None else max_pipeline_length
        self._operator_set = OperatorSet(
            mutate=partial(
                random_valid_mutation_in_place,
                primitive_set=self._pset,
                max_length=max_pipeline_length,
            ),
            mate=partial(random_crossover, max_length=max_pipeline_length),
            create_from_population=partial(create_from_population,
                                           cxpb=0.2,
                                           mutpb=0.8),
            create_new=partial(
                create_random_expression,
                primitive_set=self._pset,
                max_length=max_start_length,
            ),
            compile_=compile_individual,
            eliminate=eliminate_from_pareto,
            evaluate_callback=self._on_evaluation_completed,
            completed_evaluations=self._evaluation_library.lookup,
        )
Ejemplo n.º 7
0
def test_scoring_to_metric_mixed():
    metrics = list(all_metrics)
    mixed_metrics = [Metric.from_string(metric)
                     for metric in metrics[:2]] + metrics[2:]
    scoring_to_metric(mixed_metrics)
Ejemplo n.º 8
0
    def __init__(
        self,
        scoring: Union[str, Metric, Iterable[str],
                       Iterable[Metric]] = "filled_in_by_child_class",
        regularize_length: bool = True,
        max_pipeline_length: Optional[int] = None,
        config: Dict = None,
        random_state: Optional[int] = None,
        max_total_time: int = 3600,
        max_eval_time: Optional[int] = None,
        n_jobs: Optional[int] = None,
        verbosity: int = logging.WARNING,
        keep_analysis_log: Optional[str] = "gama.log",
        search_method: BaseSearch = AsyncEA(),
        post_processing_method: BasePostProcessing = BestFitPostProcessing(),
        cache: Optional[str] = None,
    ):
        """

        Parameters
        ----------
        scoring: str, Metric or Tuple
            Specifies the/all metric(s) to optimize towards.
            A string will be converted to Metric.
            A tuple must specify each metric with the same type (e.g. all str).
            See :ref:`Metrics` for built-in metrics.

        regularize_length: bool (default=True)
            If True, add pipeline length as an optimization metric.
            Short pipelines should then be preferred over long ones.

        max_pipeline_length: int, optional (default=None)
            If set, limit the maximum number of steps in any evaluated pipeline.
            Encoding and imputation are excluded.

        config: Dict
            Specifies available components and their valid hyperparameter settings.
            For more information, see :ref:`search_space_configuration`.

        random_state:  int, optional (default=None)
            Seed for the random number generators used in the process.
            However, with `n_jobs > 1`,
            there will be randomization introduced by multi-processing.
            For reproducible results, set this and use `n_jobs=1`.

        max_total_time: positive int (default=3600)
            Time in seconds that can be used for the `fit` call.

        max_eval_time: positive int, optional (default=None)
            Time in seconds that can be used to evaluate any one single individual.
            If None, set to 0.1 * max_total_time.

        n_jobs: int, optional (default=None)
            The amount of parallel processes that may be created to speed up `fit`.
            Accepted values are positive integers, -1 or None.
            If -1 is specified, multiprocessing.cpu_count() processes are created.
            If None is specified, multiprocessing.cpu_count() / 2 processes are created.

        verbosity: int (default=logging.WARNING)
            Sets the level of log messages to be automatically output to terminal.

        keep_analysis_log: str, optional (default='gama.log')
            If non-empty str, specify filepath where the log should be stored.
            If `None`, no log is stored.

        search_method: BaseSearch (default=AsyncEA())
            Search method to use to find good pipelines. Should be instantiated.

        post_processing_method: BasePostProcessing (default=BestFitPostProcessing())
            Post-processing method to create a model after the search phase.
            Should be an instantiated subclass of BasePostProcessing.

        cache: str, optional (default=None)
            Directory to use to save intermediate results during search.
            If set to None, generate a unique cache name.
        """
        register_stream_log(verbosity)
        if keep_analysis_log is not None:
            register_file_log(keep_analysis_log)

        if keep_analysis_log is not None and not os.path.isabs(
                keep_analysis_log):
            keep_analysis_log = os.path.abspath(keep_analysis_log)

        arguments = ",".join([
            f"{k}={v}" for (k, v) in locals().items()
            if k not in ["self", "config"]
        ])
        log.info(f"Using GAMA version {__version__}.")
        log.info(f"{self.__class__.__name__}({arguments})")
        log_event(log, TOKENS.INIT, arguments)

        if n_jobs is None:
            n_jobs = multiprocessing.cpu_count() // 2
            log.debug("n_jobs defaulted to %d", n_jobs)

        if max_total_time is None or max_total_time <= 0:
            raise ValueError(
                f"Expect positive int for max_total_time, got {max_total_time}."
            )
        if max_eval_time is not None and max_eval_time <= 0:
            raise ValueError(
                f"Expect None or positive int for max_eval_time, got {max_eval_time}."
            )
        if n_jobs < -1 or n_jobs == 0:
            raise ValueError(
                f"n_jobs should be -1 or positive int but is {n_jobs}.")
        AsyncEvaluator.n_jobs = n_jobs

        if max_eval_time is None:
            max_eval_time = round(0.1 * max_total_time)
        if max_eval_time > max_total_time:
            log.warning(
                f"max_eval_time ({max_eval_time}) > max_total_time ({max_total_time}) "
                f"is not allowed. max_eval_time set to {max_total_time}.")
            max_eval_time = max_total_time

        self._max_eval_time = max_eval_time
        self._time_manager = TimeKeeper(max_total_time)
        self._metrics: Tuple[Metric, ...] = scoring_to_metric(scoring)
        self._regularize_length = regularize_length
        self._search_method: BaseSearch = search_method
        self._post_processing = post_processing_method

        if random_state is not None:
            random.seed(random_state)
            np.random.seed(random_state)

        self._x: Optional[pd.DataFrame] = None
        self._y: Optional[pd.DataFrame] = None
        self._basic_encoding_pipeline: Optional[Pipeline] = None
        self._fixed_pipeline_extension: List[Tuple[str, TransformerMixin]] = []
        self._inferred_dtypes: List[Type] = []
        self.model: object = None
        self._final_pop: List[Individual] = []

        self._subscribers: Dict[str, List[Callable]] = defaultdict(list)
        if not cache:
            cache = f"cache_{str(uuid.uuid4())}"
        if isinstance(post_processing_method, EnsemblePostProcessing):
            self._evaluation_library = EvaluationLibrary(
                m=post_processing_method.hyperparameters["max_models"],
                n=post_processing_method.hyperparameters["hillclimb_size"],
                cache_directory=cache,
            )
        else:
            # Don't keep memory-heavy evaluation meta-data (predictions, estimators)
            self._evaluation_library = EvaluationLibrary(m=0,
                                                         cache_directory=cache)
        self.evaluation_completed(self._evaluation_library.save_evaluation)

        self._pset, parameter_checks = pset_from_config(config)

        max_start_length = 3 if max_pipeline_length is None else max_pipeline_length
        self._operator_set = OperatorSet(
            mutate=partial(
                random_valid_mutation_in_place,
                primitive_set=self._pset,
                max_length=max_pipeline_length,
            ),
            mate=partial(random_crossover, max_length=max_pipeline_length),
            create_from_population=partial(create_from_population,
                                           cxpb=0.2,
                                           mutpb=0.8),
            create_new=partial(
                create_random_expression,
                primitive_set=self._pset,
                max_length=max_start_length,
            ),
            compile_=compile_individual,
            eliminate=eliminate_from_pareto,
            evaluate_callback=self._on_evaluation_completed,
            completed_evaluations=self._evaluation_library.lookup,
        )
Ejemplo n.º 9
0
    def __init__(
        self,
        scoring: Union[str, Metric, Tuple[Union[str, Metric],
                                          ...]] = 'filled_in_by_child_class',
        regularize_length: bool = True,
        max_pipeline_length: Optional[int] = None,
        config: Dict = None,
        random_state: Optional[int] = None,
        max_total_time: int = 3600,
        max_eval_time: Optional[int] = None,
        n_jobs: Optional[int] = None,
        verbosity: int = logging.WARNING,
        keep_analysis_log: Optional[str] = 'gama.log',
        search_method: BaseSearch = AsyncEA(),
        post_processing_method: BasePostProcessing = BestFitPostProcessing()):
        """

        Parameters
        ----------
        scoring: str, Metric or Tuple
            Specifies the/all metric(s) to optimize towards. A string will be converted to Metric. A tuple must
            specify each metric with the same type (i.e. all str or all Metric). See :ref:`Metrics` for built-in
            metrics.

        regularize_length: bool
            If True, add pipeline length as an optimization metric (preferring short over long).

        max_pipeline_length: int, optional (default=None)
            If set, limit the maximum number of steps in any evaluated pipeline. Encoding and imputation are excluded.

        config: a dictionary which specifies available components and their valid hyperparameter settings
            For more information, see :ref:`search_space_configuration`.

        random_state:  int, optional (default=None)
            If an integer is passed, this will be the seed for the random number generators used in the process.
            However, with `n_jobs > 1`, there will be randomization introduced by multi-processing.
            For reproducible results, set this and use `n_jobs=1`.

        max_total_time: positive int (default=3600)
            Time in seconds that can be used for the `fit` call.

        max_eval_time: positive int, optional (default=None)
            Time in seconds that can be used to evaluate any one single individual.
            If None, set to 0.1 * max_total_time.

        n_jobs: int, optional (default=None)
            The amount of parallel processes that may be created to speed up `fit`.
            Accepted values are positive integers, -1 or None.
            If -1 is specified, multiprocessing.cpu_count() processes are created.
            If None is specified, multiprocessing.cpu_count() / 2 processes are created.

        verbosity: int (default=logging.WARNING)
            Sets the level of log messages to be automatically output to terminal.

        keep_analysis_log: str, optional (default='gama.log')
            If non-empty str, specifies the path (and name) where the log should be stored, e.g. /output/gama.log.
            If `None`, no log is stored.

        search_method: BaseSearch (default=AsyncEA())
            Search method to use to find good pipelines. Should be instantiated.

        post_processing_method: BasePostProcessing (default=BestFitPostProcessing())
            Post-processing method to create a model after the search phase. Should be instantiated.

        """
        register_stream_log(verbosity)
        if keep_analysis_log is not None:
            register_file_log(keep_analysis_log)

        if keep_analysis_log is not None and not os.path.isabs(
                keep_analysis_log):
            keep_analysis_log = os.path.abspath(keep_analysis_log)

        arguments = ','.join([
            '{}={}'.format(k, v) for (k, v) in locals().items() if k not in [
                'self', 'config', 'gamalog', 'file_handler',
                'stdout_streamhandler'
            ]
        ])
        log.info('Using GAMA version {}.'.format(__version__))
        log.info('{}({})'.format(self.__class__.__name__, arguments))
        log_event(log, TOKENS.INIT, arguments)

        if n_jobs is None:
            n_jobs = multiprocessing.cpu_count() // 2
            log.debug('n_jobs defaulted to %d', n_jobs)

        if max_total_time is None or max_total_time <= 0:
            raise ValueError(
                f"max_total_time should be integer greater than zero but is {max_total_time}."
            )
        if max_eval_time is not None and max_eval_time <= 0:
            raise ValueError(
                f"max_eval_time should be None or integer greater than zero but is {max_eval_time}."
            )
        if n_jobs < -1 or n_jobs == 0:
            raise ValueError(
                f"n_jobs should be -1 or positive integer but is {n_jobs}.")
        elif n_jobs != -1:
            # AsyncExecutor defaults to using multiprocessing.cpu_count(), i.e. n_jobs=-1
            AsyncEvaluator.n_jobs = n_jobs

        if max_eval_time is None:
            max_eval_time = 0.1 * max_total_time
        if max_eval_time > max_total_time:
            log.warning(
                f"max_eval_time ({max_eval_time}) > max_total_time ({max_total_time}) is not allowed. "
                f"max_eval_time set to {max_total_time}.")
            max_eval_time = max_total_time

        self._max_eval_time = max_eval_time
        self._time_manager = TimeKeeper(max_total_time)
        self._metrics: Tuple[Metric] = scoring_to_metric(scoring)
        self._regularize_length = regularize_length
        self._search_method: BaseSearch = search_method
        self._post_processing = post_processing_method

        if random_state is not None:
            random.seed(random_state)
            np.random.seed(random_state)

        self._X: Optional[pd.DataFrame] = None
        self._y: Optional[pd.DataFrame] = None
        self._basic_encoding_pipeline: Optional[Pipeline] = None
        self._inferred_dtypes: List[Type] = []
        self.model: object = None
        self._final_pop = None

        self._subscribers = defaultdict(list)
        if isinstance(post_processing_method, EnsemblePostProcessing):
            self._evaluation_library = EvaluationLibrary(
                m=post_processing_method.hyperparameters['max_models'],
                n=post_processing_method.hyperparameters['hillclimb_size'],
            )
        else:
            # Don't keep memory-heavy evaluation meta-data (predictions, estimators)
            self._evaluation_library = EvaluationLibrary(m=0)
        self.evaluation_completed(self._evaluation_library.save_evaluation)

        self._pset, parameter_checks = pset_from_config(config)

        max_start_length = 3 if max_pipeline_length is None else max_pipeline_length
        self._operator_set = OperatorSet(
            mutate=partial(random_valid_mutation_in_place,
                           primitive_set=self._pset,
                           max_length=max_pipeline_length),
            mate=partial(random_crossover, max_length=max_pipeline_length),
            create_from_population=partial(create_from_population,
                                           cxpb=0.2,
                                           mutpb=0.8),
            create_new=partial(create_random_expression,
                               primitive_set=self._pset,
                               max_length=max_start_length),
            compile_=compile_individual,
            eliminate=eliminate_from_pareto,
            evaluate_callback=self._on_evaluation_completed)