def __init__(self, wrapped, epochs, fit_only=True, repeat_in_test_mode=False): BaseStep.__init__(self) MetaStepMixin.__init__(self, wrapped) self.repeat_in_test_mode = repeat_in_test_mode self.fit_only = fit_only self.epochs = epochs
def __init__(self, wrapped: BaseStep, then_unflatten: bool = True): BaseStep.__init__(self) MetaStepMixin.__init__(self, wrapped) ResumableStepMixin.__init__(self) ForceHandleMixin.__init__(self) self.then_unflatten = then_unflatten self.len_di = [] self.len_eo = []
def __init__(self, pipeline: Union[BaseStep, NamedTupleList], validation_size: float = None, batch_size: int = None, batch_metrics: Dict[str, Callable] = None, shuffle_in_each_epoch_at_train: bool = True, seed: int = None, n_epochs: int = 1, epochs_metrics: Dict[str, Callable] = None, scoring_function: Callable = None, cache_folder: str = None, print_epoch_metrics=False, print_batch_metrics=False): """ :param pipeline: pipeline to wrap with an epoch repeater, a validation split wrapper, and a mini batch sequential pipeline :param validation_size: ratio for validation size between 0 and 1 :param batch_size: batch size for the mini batch sequential pipeline :param batch_metrics: metrics to calculate for each processed mini batch :param shuffle_in_each_epoch_at_train: :param seed: random seed for the data shuffling that can be done at each epoch when the param shuffle_in_each_epoch_at_train is True :param n_epochs: number of epochs :param epochs_metrics: metrics to calculate for each epoch :param scoring_function: scoring function with two arguments (y_true, y_pred) :param cache_folder: cache folder to be used inside the pipeline :param print_epoch_metrics: whether or not to print epoch metrics :param print_batch_metrics: whether or not to print batch metrics """ if epochs_metrics is None: epochs_metrics = {} if batch_metrics is None: batch_metrics = {} self.final_scoring_metric = scoring_function self.epochs_metrics = epochs_metrics self.n_epochs = n_epochs self.shuffle_in_each_epoch_at_train = shuffle_in_each_epoch_at_train self.batch_size = batch_size self.batch_metrics = batch_metrics self.validation_size = validation_size self.print_batch_metrics = print_batch_metrics self.print_epoch_metrics = print_epoch_metrics wrapped = pipeline wrapped = self._create_mini_batch_pipeline(wrapped) if shuffle_in_each_epoch_at_train: wrapped = TrainShuffled(wrapped=wrapped, seed=seed) wrapped = self._create_validation_split(wrapped) wrapped = self._create_epoch_repeater(wrapped) BaseStep.__init__(self) MetaStepMixin.__init__(self, wrapped) EvaluableStepMixin.__init__(self) ForceHandleMixin.__init__(self, cache_folder)
def __init__( self, pipeline: BaseStep, validation_splitter: 'BaseValidationSplitter', refit_trial: bool, scoring_callback: ScoringCallback, hyperparams_optimizer: BaseHyperparameterSelectionStrategy = None, hyperparams_repository: HyperparamsRepository = None, n_trials: int = 10, epochs: int = 1, callbacks: List[BaseCallback] = None, refit_scoring_function: Callable = None, print_func: Callable = None, cache_folder_when_no_handle=None): BaseStep.__init__(self) ForceHandleOnlyMixin.__init__(self, cache_folder=cache_folder_when_no_handle) self.validation_split_function: BaseValidationSplitter = validation_splitter if print_func is None: print_func = print if hyperparams_optimizer is None: hyperparams_optimizer = RandomSearchHyperparameterSelectionStrategy( ) self.hyperparameter_optimizer: BaseHyperparameterSelectionStrategy = hyperparams_optimizer if hyperparams_repository is None: hyperparams_repository = HyperparamsJSONRepository( hyperparams_optimizer, cache_folder_when_no_handle) else: hyperparams_repository.set_strategy(hyperparams_optimizer) self.hyperparams_repository: HyperparamsJSONRepository = hyperparams_repository self.pipeline: BaseStep = pipeline self.print_func: Callable = print_func self.n_trial: int = n_trials self.hyperparams_repository: HyperparamsRepository = hyperparams_repository self.refit_scoring_function: Callable = refit_scoring_function if callbacks is None: callbacks = [] callbacks: List[BaseCallback] = [scoring_callback] + callbacks self.refit_trial: bool = refit_trial self.trainer = Trainer(callbacks=callbacks, epochs=epochs, print_func=self.print_func)
def __init__(self, transform_callback_function, fit_callback_function, more_arguments: List = tuple(), transform_function=None, hyperparams=None): BaseStep.__init__(self, hyperparams) self.transform_function = transform_function self.more_arguments = more_arguments self.fit_callback_function = fit_callback_function self.transform_callback_function = transform_callback_function
def __init__( self, handle_fit_callback, handle_transform_callback, handle_fit_transform_callback ): ForceMustHandleMixin.__init__(self) BaseStep.__init__(self) self.handle_fit_callback = handle_fit_callback self.handle_fit_transform_callback = handle_fit_transform_callback self.handle_transform_callback = handle_transform_callback
def __init__(self, wrapped=None, scoring_function: Callable = r2_score): """ Base class For validation wrappers. It has a scoring function to calculate the score for the validation split. :param scoring_function: scoring function with two arguments (y_true, y_pred) :type scoring_function: Callable """ BaseStep.__init__(self) MetaStepMixin.__init__(self, wrapped) self.scoring_function = scoring_function
def __init__(self, wrapped): """ Wrap a scikit-learn MetaEstimatorMixin for usage in Neuraxle. This class is similar to the SKLearnWrapper class of Neuraxle that can wrap a scikit-learn BaseEstimator. :param wrapped: a scikit-learn object of type "MetaEstimatorMixin". """ BaseStep.__init__(self) MetaStepMixin.__init__(self) self.wrapped_sklearn_metaestimator = wrapped # TODO: use self.set_step of the MetaStepMixin instead?
def __init__(self, wrapped: BaseStep, copy_op=copy.deepcopy, cache_folder_when_no_handle=None): BaseStep.__init__(self) MetaStepMixin.__init__(self, wrapped) ForceHandleOnlyMixin.__init__(self, cache_folder_when_no_handle) self.set_step(wrapped) self.steps: List[BaseStep] = [] self.copy_op = copy_op
def __init__(self, wrapped, epochs, repeat_in_test_mode=False, cache_folder_when_no_handle=None): BaseStep.__init__(self) MetaStep.__init__(self, wrapped) ForceHandleOnlyMixin.__init__(self, cache_folder=cache_folder_when_no_handle) self.repeat_in_test_mode = repeat_in_test_mode self.epochs = epochs
def __init__(self, hyperparameter_optimizer: BaseHyperparameterOptimizer, validation_technique: BaseCrossValidationWrapper = None, higher_score_is_better=True): BaseStep.__init__(self) MetaStepMixin.__init__(self, None) if validation_technique is None: validation_technique = KFoldCrossValidationWrapper() self.validation_technique = validation_technique self.higher_score_is_better = higher_score_is_better self.hyperparameter_optimizer = hyperparameter_optimizer
def save_best_model(self, step: BaseStep): """ Save the best model inside the best retrained model folder. :param step: step to save :return: saved step """ hyperparams = step.get_hyperparams().to_flat_as_dict_primitive() trial_hash = self._get_trial_hash(hyperparams) step.set_name(trial_hash).save(ExecutionContext(self.best_retrained_model_folder), full_dump=True) return step
def __init__(self, wrapped: BaseStep, copy_op=copy.deepcopy, cache_folder_when_no_handle=None): BaseStep.__init__(self) MetaStepMixin.__init__(self, wrapped) ForceHandleOnlyMixin.__init__(self, cache_folder_when_no_handle) self.savers.append(TruncableJoblibStepSaver()) self.set_step(wrapped) self.steps_as_tuple: List[NamedTupleList] = [] self.copy_op = copy_op
def __init__(self, callback_function, more_arguments: List = tuple(), hyperparams=None, fit_callback_function=None, transform_function=None): """ Create the callback step with a function and extra arguments to send to the function :param callback_function: The function that will be called on events. :param more_arguments: Extra arguments that will be sent to the callback after the processed data (optional). """ BaseStep.__init__(self, hyperparams=hyperparams) self.transform_function = transform_function self.callback_function = callback_function self.fit_callback_function = fit_callback_function self.more_arguments = more_arguments
def __init__(self, wrapped, transform_callback_function, fit_callback_function, inverse_transform_callback_function=None, more_arguments: List = tuple(), hyperparams=None): MetaStepMixin.__init__(self, wrapped) BaseStep.__init__(self, hyperparams) self.inverse_transform_callback_function = inverse_transform_callback_function self.more_arguments = more_arguments self.fit_callback_function = fit_callback_function self.transform_callback_function = transform_callback_function
def __init__( self, wrapped_sklearn_predictor, hyperparams_space: HyperparameterSpace = None, return_all_sklearn_default_params_on_get=False ): if not isinstance(wrapped_sklearn_predictor, BaseEstimator): raise ValueError("The wrapped_sklearn_predictor must be an instance of scikit-learn's BaseEstimator.") self.wrapped_sklearn_predictor = wrapped_sklearn_predictor params: HyperparameterSamples = HyperparameterSamples(wrapped_sklearn_predictor.get_params()) BaseStep.__init__(self, hyperparams=params, hyperparams_space=hyperparams_space) self.return_all_sklearn_default_params_on_get = return_all_sklearn_default_params_on_get self.name += "_" + wrapped_sklearn_predictor.__class__.__name__
def __init__(self, wrapped: BaseStep, enabled: bool = True, nullified_return_value=None): BaseStep.__init__( self, hyperparams=HyperparameterSamples({ OPTIONAL_ENABLED_HYPERPARAM: enabled }) ) MetaStepMixin.__init__(self, wrapped) ForceMustHandleMixin.__init__(self) if nullified_return_value is None: nullified_return_value = [] self.nullified_return_value = nullified_return_value
def _set_hyperparams(self, hyperparams: HyperparameterSamples) -> BaseStep: """ Set hyperparams for base step, and the wrapped sklearn_predictor. :param hyperparams: :return: self """ # set the step hyperparams, and set the wrapped sklearn predictor params BaseStep._set_hyperparams(self, hyperparams) self.wrapped_sklearn_predictor.set_params(**hyperparams.with_separator( RecursiveDict.DEFAULT_SEPARATOR).to_flat_dict()) return self.hyperparams
def __init__(self, wrapped: BaseStep, enabled: bool = True, nullified_return_value=None, cache_folder_when_no_handle=None): BaseStep.__init__(self, hyperparams=HyperparameterSamples( {OPTIONAL_ENABLED_HYPERPARAM: enabled})) MetaStepMixin.__init__(self, wrapped) ForceHandleOnlyMixin.__init__(self, cache_folder_when_no_handle) if nullified_return_value is None: nullified_return_value = [] self.nullified_return_value = nullified_return_value
def __init__( self, wrapped: BaseStep, cache_folder: str = DEFAULT_CACHE_FOLDER, value_hasher: 'BaseValueHasher' = None, ): BaseStep.__init__(self) MetaStepMixin.__init__(self, wrapped) self.value_hasher = value_hasher if self.value_hasher is None: self.value_hasher = Md5Hasher() self.value_caching_folder = cache_folder
def _update_hyperparams(self, hyperparams: HyperparameterSamples) -> BaseStep: """ Update hyperparams for base step, and the wrapped sklearn_predictor. :param hyperparams: :return: self """ # flatten the step hyperparams, and update the wrapped sklearn predictor params hyperparams = HyperparameterSamples(hyperparams) BaseStep._update_hyperparams(self, hyperparams.to_flat()) self.wrapped_sklearn_predictor.set_params( **self.hyperparams.with_separator(RecursiveDict.DEFAULT_SEPARATOR).to_flat_as_dict_primitive() ) return self.hyperparams.to_flat()
def __init__( self, wrapped=None, n_iter: int = 10, higher_score_is_better: bool = True, validation_technique: BaseCrossValidation = KFoldCrossValidation(), refit=True, ): if wrapped is not None: MetaStepMixin.__init__(self, wrapped) BaseStep.__init__(self) self.n_iter = n_iter self.higher_score_is_better = higher_score_is_better self.validation_technique: BaseCrossValidation = validation_technique self.refit = refit
def __init__(self, wrapped: BaseStep, test_size: float, scoring_function=r2_score, run_validation_split_in_test_mode=True): """ :param wrapped: wrapped step :param test_size: ratio for test size between 0 and 1 :param scoring_function: scoring function with two arguments (y_true, y_pred) """ MetaStepMixin.__init__(self, wrapped) BaseStep.__init__(self) self.run_validation_split_in_test_mode = run_validation_split_in_test_mode self.test_size = test_size self.scoring_function = scoring_function
def __init__(self, wrapped: BaseStep, metrics: Dict, name: str = None, print_metrics=False, print_fun=print): BaseStep.__init__(self, name=name) MetaStepMixin.__init__(self, wrapped) self.metrics: Dict = metrics self._initialize_metrics(metrics) self.print_metrics = print_metrics self.print_fun = print_fun self.enabled = True
def _get_hyperparams(self): if self.return_all_sklearn_default_params_on_get: hp = self.wrapped_sklearn_predictor.get_params() self._delete_base_estimator_from_dict(hp) return HyperparameterSamples(hp) else: return BaseStep._get_hyperparams(self)
def _will_process( self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer): """ Flatten data container before any processing is done on the wrapped step. :param data_container: data container to flatten :param context: execution context :return: (data container, execution context) :rtype: ('BaseStep', DataContainer) """ data_container, context = BaseStep._will_process( self, data_container, context) if data_container.expected_outputs is None: expected_outputs = np.empty_like( np.array(data_container.data_inputs)) expected_outputs.fill(np.nan) data_container.set_expected_outputs(expected_outputs) di, self.len_di = self._flatten_list(data_container.data_inputs) eo, self.len_eo = self._flatten_list(data_container.expected_outputs) flattened_data_container = DataContainer( summary_id=data_container.summary_id, data_inputs=di, expected_outputs=eo, sub_data_containers=data_container.sub_data_containers) return flattened_data_container, context
def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None) -> Trial: """ Train pipeline using the validation splitter. Track training, and validation metrics for each epoch. Note: the present method is just a shortcut to using the `execute_trial` method with less boilerplate code needed. Refer to `execute_trial` for full flexibility :param pipeline: pipeline to train on :param data_inputs: data inputs :param expected_outputs: expected ouptuts to fit on :return: executed trial """ validation_splits: List[Tuple[DataContainer, DataContainer]] = self.validation_split_function.split_data_container( DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs) ) repo_trial: Trial = Trial( pipeline=pipeline, hyperparams=pipeline.get_hyperparams(), main_metric_name=self.get_main_metric_name() ) self.execute_trial( pipeline=pipeline, trial_number=1, repo_trial=repo_trial, context=ExecutionContext(), validation_splits=validation_splits, n_trial=1, delete_pipeline_on_completion=False ) return repo_trial
def __init__(self, transform_callback_function=None, fit_callback_function=None, more_arguments: List = tuple(), transform_function=None, hyperparams=None): BaseStep.__init__(self, hyperparams) if transform_callback_function is None: transform_callback_function = TapeCallbackFunction() if fit_callback_function is None: fit_callback_function = TapeCallbackFunction() self.transform_function = transform_function self.more_arguments = more_arguments self.fit_callback_function = fit_callback_function self.transform_callback_function = transform_callback_function
def __init__(self, wrapped: BaseStep, auto_ml_algorithm: AutoMLAlgorithm, hyperparams_repository: HyperparamsRepository = None, n_iters: int = 100, refit=True): BaseStep.__init__(self) self.refit = refit auto_ml_algorithm = auto_ml_algorithm.set_step(wrapped) MetaStepMixin.__init__(self, auto_ml_algorithm) NonTransformableMixin.__init__(self) if hyperparams_repository is None: hyperparams_repository = InMemoryHyperparamsRepository() self.hyperparams_repository = hyperparams_repository self.n_iters = n_iters
def __init__( self, pipeline: Union[BaseStep, NamedTupleList], validation_size: float = 0.0, batch_size: int = None, batch_metrics: Dict[str, Callable] = None, shuffle_in_each_epoch_at_train: bool = True, seed: int = None, n_epochs: int = 1, epochs_metrics: Dict[str, Callable] = None, scoring_function: Callable = None, metrics_plotting_step: BaseStep = None, cache_folder: str = None, print_epoch_metrics=False, print_batch_metrics=False ): if epochs_metrics is None: epochs_metrics = {} if batch_metrics is None: batch_metrics = {} self.final_scoring_metric = scoring_function self.epochs_metrics = epochs_metrics self.n_epochs = n_epochs self.shuffle_in_each_epoch_at_train = shuffle_in_each_epoch_at_train self.batch_size = batch_size self.batch_metrics = batch_metrics self.validation_size = validation_size self.metrics_plotting_step = metrics_plotting_step self.print_batch_metrics = print_batch_metrics self.print_epoch_metrics = print_epoch_metrics wrapped = pipeline wrapped = self._create_mini_batch_pipeline(wrapped) if shuffle_in_each_epoch_at_train: wrapped = TrainShuffled(wrapped=wrapped, seed=seed) wrapped = self._create_validation_split(wrapped) wrapped = self._create_epoch_repeater(wrapped) BaseStep.__init__(self) Pipeline.__init__(self, [wrapped], cache_folder=cache_folder)