예제 #1
0
 def __init__(self,
              steps: NamedTupleList,
              batch_size=None,
              cache_folder=None):
     Pipeline.__init__(self, steps=steps, cache_folder=cache_folder)
     ForceHandleMixin.__init__(self)
     self.__validate_barriers_batch_size(batch_size)
     self.__patch_missing_barrier(batch_size)
     self.__patch_barriers_batch_size(batch_size)
예제 #2
0
    def __init__(self, wrapped: BaseTransformer, then_unflatten: bool = True):
        MetaStep.__init__(self, wrapped)
        ResumableStepMixin.__init__(self)
        ForceHandleMixin.__init__(self)

        self.then_unflatten = then_unflatten

        self.len_di = []
        self.len_eo = []
예제 #3
0
    def __init__(
            self,
            pipeline: Union[BaseStep, NamedTupleList],
            validation_size: int = None,
            batch_size: int = None,
            batch_metrics: Dict[str, Callable] = None,
            shuffle_in_each_epoch_at_train: bool = True,
            seed: int = None,
            n_epochs: int = 1,
            epochs_metrics: Dict[str, Callable] = None,
            scoring_function: Callable = None,
            cache_folder: str = None,
            print_epoch_metrics=False,
            print_batch_metrics=False
    ):
        """
        :param pipeline: pipeline to wrap with an epoch repeater, a validation split wrapper, and a mini batch sequential pipeline
        :param validation_size: ratio for validation size between 0 and 1
        :param batch_size: batch size for the mini batch sequential pipeline
        :param batch_metrics: metrics to calculate for each processed mini batch
        :param shuffle_in_each_epoch_at_train:
        :param seed: random seed for the data shuffling that can be done at each epoch when the param shuffle_in_each_epoch_at_train is True
        :param n_epochs: number of epochs
        :param epochs_metrics: metrics to calculate for each epoch
        :param scoring_function: scoring function with two arguments (y_true, y_pred)
        :param cache_folder: cache folder to be used inside the pipeline
        :param print_epoch_metrics: whether or not to print epoch metrics
        :param print_batch_metrics: whether or not to print batch metrics
        """
        if epochs_metrics is None:
            epochs_metrics = {}
        if batch_metrics is None:
            batch_metrics = {}

        self.final_scoring_metric = scoring_function
        self.epochs_metrics = epochs_metrics
        self.n_epochs = n_epochs
        self.shuffle_in_each_epoch_at_train = shuffle_in_each_epoch_at_train
        self.batch_size = batch_size
        self.batch_metrics = batch_metrics
        self.validation_size = validation_size
        self.print_batch_metrics = print_batch_metrics
        self.print_epoch_metrics = print_epoch_metrics

        wrapped = pipeline
        wrapped = self._create_mini_batch_pipeline(wrapped)

        if shuffle_in_each_epoch_at_train:
            wrapped = TrainShuffled(wrapped=wrapped, seed=seed)

        wrapped = self._create_validation_split(wrapped)
        wrapped = self._create_epoch_repeater(wrapped)

        BaseStep.__init__(self)
        MetaStepMixin.__init__(self, wrapped)
        EvaluableStepMixin.__init__(self)
        ForceHandleMixin.__init__(self, cache_folder)
예제 #4
0
    def __init__(
            self,
            pipeline: BaseStep,
            validation_splitter: 'BaseValidationSplitter',
            refit_trial: bool,
            scoring_callback: ScoringCallback,
            hyperparams_optimizer: BaseHyperparameterSelectionStrategy = None,
            hyperparams_repository: HyperparamsRepository = None,
            n_trials: int = 10,
            epochs: int = 1,
            callbacks: List[BaseCallback] = None,
            refit_scoring_function: Callable = None,
            print_func: Callable = None,
            cache_folder_when_no_handle=None,
            continue_loop_on_error=True
    ):
        BaseStep.__init__(self)
        ForceHandleMixin.__init__(self, cache_folder=cache_folder_when_no_handle)

        self.validation_splitter: BaseValidationSplitter = validation_splitter

        if print_func is None:
            print_func = print

        if hyperparams_optimizer is None:
            hyperparams_optimizer = RandomSearchHyperparameterSelectionStrategy()
        self.hyperparameter_optimizer: BaseHyperparameterSelectionStrategy = hyperparams_optimizer

        if hyperparams_repository is None:
            hyperparams_repository = HyperparamsJSONRepository(hyperparams_optimizer, cache_folder_when_no_handle)
        else:
            hyperparams_repository.set_strategy(hyperparams_optimizer)

        self.hyperparams_repository: HyperparamsJSONRepository = hyperparams_repository

        self.pipeline: BaseStep = pipeline
        self.print_func: Callable = print_func

        self.n_trial: int = n_trials
        self.hyperparams_repository: HyperparamsRepository = hyperparams_repository

        self.refit_scoring_function: Callable = refit_scoring_function

        self.refit_trial: bool = refit_trial

        self.error_types_to_raise = (SystemError, SystemExit, EOFError, KeyboardInterrupt) if continue_loop_on_error \
            else (Exception,)

        self.trainer = Trainer(
            epochs=epochs,
            scoring_callback=scoring_callback,
            callbacks=callbacks,
            print_func=self.print_func,
            validation_splitter=validation_splitter,
            hyperparams_repository=hyperparams_repository
        )
예제 #5
0
 def __init__(self,
              steps: NamedTupleList,
              batch_size=None,
              include_incomplete_batch: bool = None,
              default_value_data_inputs=None,
              default_value_expected_outputs=None,
              cache_folder=None):
     Pipeline.__init__(self, steps=steps, cache_folder=cache_folder)
     ForceHandleMixin.__init__(self)
     self.default_value_data_inputs = default_value_data_inputs
     self.default_value_expected_outputs = default_value_expected_outputs
     self.__validate_barriers_batch_size(batch_size=batch_size)
     self.__patch_missing_barrier(
         batch_size=batch_size,
         include_incomplete_batch=include_incomplete_batch,
         default_value_data_inputs=default_value_data_inputs,
         default_value_expected_outputs=default_value_expected_outputs)
     self.__patch_barriers_batch_size(batch_size)
예제 #6
0
 def __init__(self,
              steps: NamedTupleList,
              batch_size=None,
              keep_incomplete_batch: bool = None,
              default_value_data_inputs=AbsentValuesNullObject(),
              default_value_expected_outputs=None,
              cache_folder=None,
              mute_joiner_batch_size_warning: bool = True):
     Pipeline.__init__(self, steps=steps, cache_folder=cache_folder)
     ForceHandleMixin.__init__(self)
     self.default_value_data_inputs = default_value_data_inputs
     self.default_value_expected_outputs = default_value_expected_outputs
     self.__validate_barriers_batch_size(batch_size=batch_size)
     self.__patch_missing_barrier(
         batch_size=batch_size,
         keep_incomplete_batch=keep_incomplete_batch,
         default_value_data_inputs=default_value_data_inputs,
         default_value_expected_outputs=default_value_expected_outputs)
     self.mute_joiner_batch_size_warning = mute_joiner_batch_size_warning
     self.__patch_barriers_batch_size(batch_size)
예제 #7
0
 def __init__(self):
     BaseStep.__init__(self)
     ForceHandleMixin.__init__(self)
예제 #8
0
 def __init__(self):
     BaseTransformer.__init__(self)
     ForceHandleMixin.__init__(self)
 def __init__(self):
     Identity.__init__(self)
     ForceHandleMixin.__init__(self)
예제 #10
0
    def __init__(
            self,
            pipeline: BaseStep,
            validation_splitter: 'BaseValidationSplitter',
            refit_trial: bool,
            scoring_callback: ScoringCallback,
            hyperparams_optimizer: BaseHyperparameterSelectionStrategy = None,
            hyperparams_repository: HyperparamsRepository = None,
            n_trials: int = 10,
            epochs: int = 1,
            callbacks: List[BaseCallback] = None,
            refit_scoring_function: Callable = None,
            cache_folder_when_no_handle=None,
            n_jobs=-1,
            continue_loop_on_error=True):
        """
        Notes on multiprocess :
              Usage of a multiprocess-safe hyperparams repository is recommended, although it is, most of the time, not necessary.
              Beware of the behaviour of HyperparamsRepository's observers/subscribers.
              Context instances are not shared between trial but copied. So is the AutoML loop and the DataContainers.


        :param pipeline: The pipeline, or BaseStep, which will be use by the AutoMLloop
        :param validation_splitter: A :class:`BaseValidationSplitter` instance to split data between training and validation set.
        :param refit_trial: A boolean indicating whether to perform, after ,  a fit call with
        :param scoring_callback: The scoring callback to use during training
        :param hyperparams_optimizer: a :class:`BaseHyperparameterSelectionStrategy` instance that can be queried for new sets of hyperparameters.
        :param hyperparams_repository: a :class:`HyperparamsRepository` instance to store experiement status and results.
        :param n_trials: The number of different hyperparameters to try.
        :param epochs: The number of epoch to perform for each trial.
        :param callbacks: A list of callbacks to perform after each epoch.
        :param refit_scoring_function: A scoring function to use on a refit call
        :param cache_folder_when_no_handle: default cache folder used if auto_ml_loop isn't called through handler functions.
        :param n_jobs: If n_jobs in (-1, None, 1), then automl is executed in a single thread. if n_jobs > 1, then n_jobs thread are launched, if n_jobs < -1 then (n_cpus + 1 + n_jobs) thread are launched.
        :param continue_loop_on_error:
        """
        BaseStep.__init__(self)
        ForceHandleMixin.__init__(self,
                                  cache_folder=cache_folder_when_no_handle)

        self.validation_splitter: BaseValidationSplitter = validation_splitter

        if hyperparams_optimizer is None:
            hyperparams_optimizer = RandomSearchHyperparameterSelectionStrategy(
            )
        self.hyperparameter_optimizer: BaseHyperparameterSelectionStrategy = hyperparams_optimizer

        if hyperparams_repository is None:
            hyperparams_repository = HyperparamsJSONRepository(
                hyperparams_optimizer, cache_folder_when_no_handle)
        else:
            hyperparams_repository.set_strategy(hyperparams_optimizer)

        self.pipeline: BaseStep = pipeline

        self.n_trial: int = n_trials
        self.hyperparams_repository: HyperparamsRepository = hyperparams_repository

        self.refit_scoring_function: Callable = refit_scoring_function

        self.refit_trial: bool = refit_trial
        self.n_jobs = n_jobs

        self.error_types_to_raise = (SystemError, SystemExit, EOFError, KeyboardInterrupt) if continue_loop_on_error \
            else (Exception,)

        self.trainer = Trainer(epochs=epochs,
                               scoring_callback=scoring_callback,
                               callbacks=callbacks,
                               validation_splitter=validation_splitter)