def fit(self, x=None, y=None, validation_split=0, validation_data=None, **kwargs): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. # Arguments x: numpy.ndarray or tensorflow.Dataset. Training data x. y: numpy.ndarray or tensorflow.Dataset. Training data y. validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. `validation_data` could be: - tuple `(x_val, y_val)` of Numpy arrays or tensors - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays - dataset or a dataset iterator For the first two cases, `batch_size` must be provided. For the last case, `validation_steps` must be provided. **kwargs: Any arguments supported by keras.Model.fit. """ dataset, validation_data = self.prepare_data( x=x, y=y, validation_data=validation_data, validation_split=validation_split) self._meta_build(dataset) self.hypermodel.set_io_shapes(dataset) hp = kerastuner.HyperParameters() self.hypermodel.hyper_build(hp) self.hypermodel.preprocess(hp=kerastuner.HyperParameters(), dataset=dataset, validation_data=validation_data, fit=True) self.tuner = tuner.RandomSearch(hypermodel=self.hypermodel, objective='val_loss', max_trials=self.max_trials, directory=self.directory, seed=self.seed, project_name=self.name) self.hypermodel.clear_preprocessors() # TODO: allow early stop if epochs is not specified. self.tuner.search(x=dataset, validation_data=validation_data, **kwargs)
def test_add_early_stopping(tmp_dir): tuner = tuner_module.RandomSearch(hyper_graph=mock.Mock(), hypermodel=mock.Mock(), objective='val_loss', max_trials=1, directory=tmp_dir, seed=common.SEED) callbacks = tuner._inject_callbacks([], mock.Mock()) assert any([ isinstance(callback, tf.keras.callbacks.EarlyStopping) for callback in callbacks ])
def test_search(_, _1, _2, tmp_dir): hyper_graph = mock.Mock() hyper_graph.build_graphs.return_value = (mock.Mock(), mock.Mock()) tuner = tuner_module.RandomSearch(hyper_graph=hyper_graph, hypermodel=mock.Mock(), objective='val_loss', max_trials=1, directory=tmp_dir, seed=common.SEED) oracle = mock.Mock() oracle.get_best_trials.return_value = [ mock.Mock(), mock.Mock(), mock.Mock() ] tuner.oracle = oracle tuner.preprocess_graph = mock.Mock() tuner.need_fully_train = True tuner.search(x=mock.Mock(), y=mock.Mock(), epochs=5)
def fit(self, x=None, y=None, epochs=None, callbacks=None, validation_split=0, validation_data=None, **kwargs): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. # Arguments x: numpy.ndarray or tensorflow.Dataset. Training data x. y: numpy.ndarray or tensorflow.Dataset. Training data y. epochs: Int. The number of epochs to train each model during the search. If unspecified, by default we train for a maximum of 1000 epochs, but we stop training if the validation loss stops improving for 10 epochs (unless you specified an EarlyStopping callback as part of the callbacks argument, in which case the EarlyStopping callback you specified will determine early stopping). callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. `validation_data` could be: - tuple `(x_val, y_val)` of Numpy arrays or tensors - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays - dataset or a dataset iterator For the first two cases, `batch_size` must be provided. For the last case, `validation_steps` must be provided. **kwargs: Any arguments supported by keras.Model.fit. """ dataset, validation_data = self._prepare_data( x=x, y=y, validation_data=validation_data, validation_split=validation_split) # Initialize the hypermodel. self._meta_build(dataset) self.hypermodel.set_io_shapes(dataset) # Build the hypermodel in tuner init. hp = kerastuner.HyperParameters() self.hypermodel.hyper_build(hp) self.hypermodel.preprocess(hp=kerastuner.HyperParameters(), dataset=dataset, validation_data=validation_data, fit=True) self.tuner = tuner.RandomSearch(hypermodel=self.hypermodel, objective='val_loss', max_trials=self.max_trials, directory=self.directory, seed=self.seed, project_name=self.name) self.hypermodel.clear_preprocessors() # Process the args. if callbacks is None: callbacks = [] if epochs is None: epochs = 1000 if not any([ isinstance(callback, tf.keras.callbacks.EarlyStopping) for callback in callbacks ]): callbacks = callbacks + [ tf.keras.callbacks.EarlyStopping(patience=10) ] self.tuner.search(x=dataset, epochs=epochs, callbacks=callbacks, validation_data=validation_data, **kwargs)
def fit(self, x=None, y=None, epochs=None, callbacks=None, validation_split=0, validation_data=None, objective='val_loss', **kwargs): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. # Arguments x: numpy.ndarray or tensorflow.Dataset. Training data x. y: numpy.ndarray or tensorflow.Dataset. Training data y. epochs: Int. The number of epochs to train each model during the search. If unspecified, by default we train for a maximum of 1000 epochs, but we stop training if the validation loss stops improving for 10 epochs (unless you specified an EarlyStopping callback as part of the callbacks argument, in which case the EarlyStopping callback you specified will determine early stopping). callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. The best model found would be fit on the entire dataset including the validation data. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. The type of the validation data should be the same as the training data. The best model found would be fit on the training dataset without the validation data. objective: String. Name of model metric to minimize or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'. **kwargs: Any arguments supported by keras.Model.fit. """ dataset, validation_data = self._prepare_data( x=x, y=y, validation_data=validation_data, validation_split=validation_split) # Initialize the hyper_graph. self._meta_build(dataset) # Build the hypermodel in tuner init. hp = kerastuner.HyperParameters() preprocess_graph, keras_graph = self.hyper_graph.build_graphs(hp) preprocess_graph.preprocess(dataset=dataset, validation_data=validation_data, fit=True) self.tuner = tuner.RandomSearch(hyper_graph=self.hyper_graph, fit_on_val_data=self._split_dataset, hypermodel=keras_graph, objective=objective, max_trials=self.max_trials, directory=self.directory, seed=self.seed, project_name=self.name) # Process the args. if callbacks is None: callbacks = [] if epochs is None: epochs = 1000 if not any([ isinstance(callback, tf.keras.callbacks.EarlyStopping) for callback in callbacks ]): callbacks = callbacks + [ tf.keras.callbacks.EarlyStopping(patience=10) ] self.tuner.search(x=dataset, epochs=epochs, callbacks=callbacks, validation_data=validation_data, **kwargs)