Esempio n. 1
0
    def fit(self,
            x=None,
            y=None,
            validation_split=0,
            validation_data=None,
            **kwargs):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray or tensorflow.Dataset. Training data x.
            y: numpy.ndarray or tensorflow.Dataset. Training data y.
            validation_split: Float between 0 and 1.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
                The validation data is selected from the last samples
                in the `x` and `y` data provided, before shuffling. This argument is
                not supported when `x` is a dataset.
            validation_data: Data on which to evaluate
                the loss and any model metrics at the end of each epoch.
                The model will not be trained on this data.
                `validation_data` will override `validation_split`.
                `validation_data` could be:
                  - tuple `(x_val, y_val)` of Numpy arrays or tensors
                  - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
                  - dataset or a dataset iterator
                For the first two cases, `batch_size` must be provided.
                For the last case, `validation_steps` must be provided.
            **kwargs: Any arguments supported by keras.Model.fit.
        """
        dataset, validation_data = self.prepare_data(
            x=x,
            y=y,
            validation_data=validation_data,
            validation_split=validation_split)
        self._meta_build(dataset)
        self.hypermodel.set_io_shapes(dataset)
        hp = kerastuner.HyperParameters()
        self.hypermodel.hyper_build(hp)
        self.hypermodel.preprocess(hp=kerastuner.HyperParameters(),
                                   dataset=dataset,
                                   validation_data=validation_data,
                                   fit=True)
        self.tuner = tuner.RandomSearch(hypermodel=self.hypermodel,
                                        objective='val_loss',
                                        max_trials=self.max_trials,
                                        directory=self.directory,
                                        seed=self.seed,
                                        project_name=self.name)
        self.hypermodel.clear_preprocessors()

        # TODO: allow early stop if epochs is not specified.
        self.tuner.search(x=dataset, validation_data=validation_data, **kwargs)
Esempio n. 2
0
def test_add_early_stopping(tmp_dir):
    tuner = tuner_module.RandomSearch(hyper_graph=mock.Mock(),
                                      hypermodel=mock.Mock(),
                                      objective='val_loss',
                                      max_trials=1,
                                      directory=tmp_dir,
                                      seed=common.SEED)

    callbacks = tuner._inject_callbacks([], mock.Mock())

    assert any([
        isinstance(callback, tf.keras.callbacks.EarlyStopping)
        for callback in callbacks
    ])
Esempio n. 3
0
def test_search(_, _1, _2, tmp_dir):
    hyper_graph = mock.Mock()
    hyper_graph.build_graphs.return_value = (mock.Mock(), mock.Mock())
    tuner = tuner_module.RandomSearch(hyper_graph=hyper_graph,
                                      hypermodel=mock.Mock(),
                                      objective='val_loss',
                                      max_trials=1,
                                      directory=tmp_dir,
                                      seed=common.SEED)
    oracle = mock.Mock()
    oracle.get_best_trials.return_value = [
        mock.Mock(), mock.Mock(), mock.Mock()
    ]
    tuner.oracle = oracle
    tuner.preprocess_graph = mock.Mock()
    tuner.need_fully_train = True
    tuner.search(x=mock.Mock(), y=mock.Mock(), epochs=5)
Esempio n. 4
0
    def fit(self,
            x=None,
            y=None,
            epochs=None,
            callbacks=None,
            validation_split=0,
            validation_data=None,
            **kwargs):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray or tensorflow.Dataset. Training data x.
            y: numpy.ndarray or tensorflow.Dataset. Training data y.
            epochs: Int. The number of epochs to train each model during the search.
                If unspecified, by default we train for a maximum of 1000 epochs,
                but we stop training if the validation loss stops improving for 10
                epochs (unless you specified an EarlyStopping callback as part of
                the callbacks argument, in which case the EarlyStopping callback you
                specified will determine early stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
                The validation data is selected from the last samples
                in the `x` and `y` data provided, before shuffling. This argument is
                not supported when `x` is a dataset.
            validation_data: Data on which to evaluate
                the loss and any model metrics at the end of each epoch.
                The model will not be trained on this data.
                `validation_data` will override `validation_split`.
                `validation_data` could be:
                  - tuple `(x_val, y_val)` of Numpy arrays or tensors
                  - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
                  - dataset or a dataset iterator
                For the first two cases, `batch_size` must be provided.
                For the last case, `validation_steps` must be provided.
            **kwargs: Any arguments supported by keras.Model.fit.
        """
        dataset, validation_data = self._prepare_data(
            x=x,
            y=y,
            validation_data=validation_data,
            validation_split=validation_split)

        # Initialize the hypermodel.
        self._meta_build(dataset)
        self.hypermodel.set_io_shapes(dataset)

        # Build the hypermodel in tuner init.
        hp = kerastuner.HyperParameters()
        self.hypermodel.hyper_build(hp)
        self.hypermodel.preprocess(hp=kerastuner.HyperParameters(),
                                   dataset=dataset,
                                   validation_data=validation_data,
                                   fit=True)
        self.tuner = tuner.RandomSearch(hypermodel=self.hypermodel,
                                        objective='val_loss',
                                        max_trials=self.max_trials,
                                        directory=self.directory,
                                        seed=self.seed,
                                        project_name=self.name)
        self.hypermodel.clear_preprocessors()

        # Process the args.
        if callbacks is None:
            callbacks = []
        if epochs is None:
            epochs = 1000
            if not any([
                    isinstance(callback, tf.keras.callbacks.EarlyStopping)
                    for callback in callbacks
            ]):
                callbacks = callbacks + [
                    tf.keras.callbacks.EarlyStopping(patience=10)
                ]

        self.tuner.search(x=dataset,
                          epochs=epochs,
                          callbacks=callbacks,
                          validation_data=validation_data,
                          **kwargs)
Esempio n. 5
0
    def fit(self,
            x=None,
            y=None,
            epochs=None,
            callbacks=None,
            validation_split=0,
            validation_data=None,
            objective='val_loss',
            **kwargs):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray or tensorflow.Dataset. Training data x.
            y: numpy.ndarray or tensorflow.Dataset. Training data y.
            epochs: Int. The number of epochs to train each model during the search.
                If unspecified, by default we train for a maximum of 1000 epochs,
                but we stop training if the validation loss stops improving for 10
                epochs (unless you specified an EarlyStopping callback as part of
                the callbacks argument, in which case the EarlyStopping callback you
                specified will determine early stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
                The validation data is selected from the last samples
                in the `x` and `y` data provided, before shuffling. This argument is
                not supported when `x` is a dataset.
                The best model found would be fit on the entire dataset including the
                validation data.
            validation_data: Data on which to evaluate the loss and any model metrics
                at the end of each epoch. The model will not be trained on this data.
                `validation_data` will override `validation_split`. The type of the
                validation data should be the same as the training data.
                The best model found would be fit on the training dataset without the
                validation data.
            objective: String. Name of model metric to minimize
                or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
            **kwargs: Any arguments supported by keras.Model.fit.
        """
        dataset, validation_data = self._prepare_data(
            x=x,
            y=y,
            validation_data=validation_data,
            validation_split=validation_split)

        # Initialize the hyper_graph.
        self._meta_build(dataset)

        # Build the hypermodel in tuner init.
        hp = kerastuner.HyperParameters()
        preprocess_graph, keras_graph = self.hyper_graph.build_graphs(hp)
        preprocess_graph.preprocess(dataset=dataset,
                                    validation_data=validation_data,
                                    fit=True)
        self.tuner = tuner.RandomSearch(hyper_graph=self.hyper_graph,
                                        fit_on_val_data=self._split_dataset,
                                        hypermodel=keras_graph,
                                        objective=objective,
                                        max_trials=self.max_trials,
                                        directory=self.directory,
                                        seed=self.seed,
                                        project_name=self.name)

        # Process the args.
        if callbacks is None:
            callbacks = []
        if epochs is None:
            epochs = 1000
            if not any([
                    isinstance(callback, tf.keras.callbacks.EarlyStopping)
                    for callback in callbacks
            ]):
                callbacks = callbacks + [
                    tf.keras.callbacks.EarlyStopping(patience=10)
                ]

        self.tuner.search(x=dataset,
                          epochs=epochs,
                          callbacks=callbacks,
                          validation_data=validation_data,
                          **kwargs)