예제 #1
0
def test_recursive_dict_get_item():
    dict_values = {'hp': 1, 'stepa__hp': 2, 'stepa__stepb__hp': 3}
    r = HyperparameterSamples(**dict_values)

    assert r[None].to_flat_as_dict_primitive() == {'hp': 1}
    assert r['stepa'].to_flat_as_dict_primitive() == {'hp': 2, 'stepb__hp': 3}
예제 #2
0
 def __init__(self, steps):
     FeatureUnion.__init__(self, steps_as_tuple=steps, joiner=NumpyConcatenateOnCustomAxisIfNotEmpty(axis=-1))
     self.set_hyperparams(HyperparameterSamples({}))
     self._make_all_steps_optional()
예제 #3
0
 def __init__(self, steps, joiner: NonFittableMixin = None):
     if joiner is None:
         joiner = NumpyConcatenateOnCustomAxisIfNotEmpty(axis=-1)
     FeatureUnion.__init__(self, steps_as_tuple=steps, joiner=joiner)
     self.set_hyperparams(HyperparameterSamples({}))
     self._make_all_steps_optional()
class Tensorflow2ModelStep(BaseTensorflowModelStep):
    """
    Base class for tensorflow 2 steps.
    It uses :class:`TensorflowV2StepSaver` for saving the model.

    .. seealso::
        `Using the checkpoint model format <https://www.tensorflow.org/guide/checkpoint>`_,
        :class:`~neuraxle.base.BaseStep`
    """
    HYPERPARAMS = HyperparameterSamples({})
    HYPERPARAMS_SPACE = HyperparameterSpace({})

    def __init__(self,
                 create_model,
                 create_loss,
                 create_optimizer,
                 create_inputs=None,
                 data_inputs_dtype=None,
                 expected_outputs_dtype=None,
                 tf_model_checkpoint_folder=None,
                 print_loss=False,
                 print_func=None,
                 device_name=None):
        BaseTensorflowModelStep.__init__(
            self,
            create_model=create_model,
            create_loss=create_loss,
            create_optimizer=create_optimizer,
            create_inputs=create_inputs,
            data_inputs_dtype=data_inputs_dtype,
            expected_outputs_dtype=expected_outputs_dtype,
            step_saver=TensorflowV2StepSaver(),
            print_loss=print_loss,
            print_func=print_func)

        if device_name is None:
            device_name = '/CPU:0'
        self.device_name = device_name

        if tf_model_checkpoint_folder is None:
            tf_model_checkpoint_folder = 'tensorflow_ckpts'
        self.tf_model_checkpoint_folder = tf_model_checkpoint_folder

    def setup(self, context: ExecutionContext) -> BaseStep:
        """
        Setup optimizer, model, and checkpoints for saving.

        :return: step
        :rtype: BaseStep
        """
        if self.is_initialized:
            return self

        with tf.device(self.device_name):
            self.optimizer = self.create_optimizer(self, context)
            self.model = self.create_model(self, context)

            self.checkpoint = tf.train.Checkpoint(step=tf.Variable(1),
                                                  optimizer=self.optimizer,
                                                  net=self.model)
            self.checkpoint_manager = tf.train.CheckpointManager(
                self.checkpoint,
                self.tf_model_checkpoint_folder,
                max_to_keep=3)

        self.is_initialized = True

        return self

    def strip(self):
        """
        Strip tensorflow 2 properties from to step to make it serializable.

        :return:
        """
        self.optimizer = None
        self.model = None
        self.checkpoint = None
        self.checkpoint_manager = None

    def fit(self, data_inputs, expected_outputs=None) -> 'BaseStep':
        with tf.device(self.device_name):
            self._fit_model(data_inputs, expected_outputs)

        return self

    def _fit_model(self, data_inputs, expected_outputs):
        inputs = self._create_inputs(data_inputs, expected_outputs)
        with tf.GradientTape() as tape:
            output = self.model(inputs, training=True)
            loss = self.create_loss(self,
                                    expected_outputs=tf.convert_to_tensor(
                                        expected_outputs,
                                        dtype=self.expected_outputs_dtype),
                                    predicted_outputs=output)
            self.add_new_loss(loss)
            self.model.losses.append(loss)

        self.optimizer.apply_gradients(
            zip(tape.gradient(loss, self.model.trainable_variables),
                self.model.trainable_variables))

    def _transform_data_container(self, data_container: DataContainer,
                                  context: ExecutionContext) -> DataContainer:
        data_inputs = data_container.data_inputs
        expected_outputs = data_container.expected_outputs

        with tf.device(self.device_name):
            output = self._transform_model(data_inputs, expected_outputs)

        data_container.set_data_inputs(output.numpy())
        return data_container

    def _transform_model(self, data_inputs, expected_outputs):
        output = self.model(self._create_inputs(data_inputs), training=False)

        if expected_outputs is not None:
            loss = self.create_loss(self,
                                    expected_outputs=tf.convert_to_tensor(
                                        expected_outputs,
                                        dtype=self.expected_outputs_dtype),
                                    predicted_outputs=output)
            self.add_new_loss(loss, test_only=True)
        return output

    def transform(self, data_inputs):
        with tf.device(self.device_name):
            output = self.model(self._create_inputs(data_inputs),
                                training=False)
        return output.numpy()

    def _create_inputs(self, data_inputs, expected_outputs=None):
        if self.create_inputs is not None:
            inputs = self.create_inputs(self, data_inputs, expected_outputs)
        else:
            inputs = tf.convert_to_tensor(data_inputs, self.data_inputs_dtype)
        return inputs
예제 #5
0
 def __init__(self, hyperparams: HyperparameterSamples, score: float, status: TRIAL_STATUS):
     self.hyperparams = HyperparameterSamples(hyperparams)
     self.score = score
     self.status = status
예제 #6
0
파일: numpy.py 프로젝트: yushu-liu/Neuraxle
 def __init__(self, add=1):
     NonFittableMixin.__init__(self)
     BaseStep.__init__(self,
                       hyperparams=HyperparameterSamples({'add': add}))
예제 #7
0
def rvs(step) -> RecursiveDict:
    return HyperparameterSamples(step.hyperparams_space.rvs())
예제 #8
0
 def __init__(self, multiply_by=1):
     super().__init__(
         hyperparams=HyperparameterSamples({'multiply_by': multiply_by}))
예제 #9
0
def main(chosen_device):
    exercice_number = 1
    print('exercice {}\n=================='.format(exercice_number))

    data_inputs, expected_outputs = generate_data(
        # See: https://github.com/guillaume-chevalier/seq2seq-signal-prediction/blob/master/datasets.py
        exercice_number=exercice_number,
        n_samples=None,
        window_size_past=None,
        window_size_future=None)

    print('data_inputs shape: {} => (n_samples, window_size_past, input_dim)'.
          format(data_inputs.shape))
    print(
        'expected_outputs shape: {} => (n_samples, window_size_future, output_dim)'
        .format(expected_outputs.shape))

    sequence_length = data_inputs.shape[1]
    input_dim = data_inputs.shape[2]
    output_dim = expected_outputs.shape[2]

    batch_size = 100
    epochs = 3
    validation_size = 0.15
    max_plotted_validation_predictions = 10

    seq2seq_pipeline_hyperparams = HyperparameterSamples({
        'hidden_dim':
        100,
        'layers_stacked_count':
        2,
        'lambda_loss_amount':
        0.0003,
        'learning_rate':
        0.006,
        'window_size_future':
        sequence_length,
        'output_dim':
        output_dim,
        'input_dim':
        input_dim
    })
    feature_0_metric = metric_3d_to_2d_wrapper(mean_squared_error)
    metrics = {'mse': feature_0_metric}

    signal_prediction_pipeline = Pipeline([
        ForEachDataInput(MeanStdNormalizer()),
        ToNumpy(),
        PlotPredictionsWrapper(
            Tensorflow2ModelStep(
                # See: https://github.com/Neuraxio/Neuraxle-TensorFlow
                create_model=create_model,
                create_loss=create_loss,
                create_optimizer=create_optimizer,
                expected_outputs_dtype=tf.dtypes.float32,
                data_inputs_dtype=tf.dtypes.float32,
                print_loss=True).set_hyperparams(seq2seq_pipeline_hyperparams))
    ]).set_name('SignalPrediction')

    pipeline = Pipeline([
        EpochRepeater(ValidationSplitWrapper(
            MetricsWrapper(Pipeline([
                TrainOnlyWrapper(DataShuffler()),
                MiniBatchSequentialPipeline([
                    MetricsWrapper(signal_prediction_pipeline,
                                   metrics=metrics,
                                   name='batch_metrics')
                ],
                                            batch_size=batch_size)
            ]),
                           metrics=metrics,
                           name='epoch_metrics',
                           print_metrics=True),
            test_size=validation_size,
            scoring_function=feature_0_metric),
                      epochs=epochs)
    ])

    pipeline, outputs = pipeline.fit_transform(data_inputs, expected_outputs)

    plot_metrics(pipeline=pipeline, exercice_number=exercice_number)
    plot_predictions(data_inputs, expected_outputs, pipeline,
                     max_plotted_validation_predictions)
예제 #10
0
 def _rvs(self):
     return HyperparameterSamples(self.hyperparams_space.rvs())
예제 #11
0
 def set_hyperparams(self, hyperparams: HyperparameterSamples) -> 'BaseStep':
     self.hyperparams = HyperparameterSamples(hyperparams)
     return self
예제 #12
0
 def get_params(self, deep=True):
     neuraxle_params = HyperparameterSamples(self.p.get_hyperparams()).to_flat_as_dict_primitive()
     return neuraxle_params
예제 #13
0
def test_hyperparams_copy_constructor():
    dict_values = {'hp': 1, 'stepa__hp': 2, 'stepa__stepb__hp': 3}
    r = HyperparameterSamples(HyperparameterSamples(**dict_values))

    assert r == HyperparameterSamples(**dict_values)
예제 #14
0
 def __init__(self, add=1):
     super().__init__(hyperparams=HyperparameterSamples({ 'add': add }))
예제 #15
0

def test_fft_peak_bin_with_values():
    data_inputs = np.random.random((4, 5, 2))
    step = FFTPeakBinWithValue()

    outputs = step.transform(data_inputs)

    assert outputs.shape == (4, 4)


@pytest.mark.parametrize("hyperparams, expected_feature_count", [
    (HyperparameterSamples({
        'FFT__enabled': True,
        'NumpyMean__enabled': True,
        'NumpyMedian__enabled': True,
        'NumpyMin__enabled': True,
        'NumpyMax__enabled': True
    }), 18),
    (HyperparameterSamples({
        'FFT__enabled': False,
        'NumpyMean__enabled': True,
        'NumpyMedian__enabled': True,
        'NumpyMin__enabled': True,
        'NumpyMax__enabled': True
    }), 8),
    (HyperparameterSamples({
        'FFT__enabled': True,
        'NumpyMean__enabled': False,
        'NumpyMedian__enabled': True,
        'NumpyMin__enabled': True,
예제 #16
0
    'num_lstm_layers':
    RandInt(1, 2),
    'use_xavier_init':
    Boolean(),
    'use_max_pool_else_avg_pool':
    Boolean(),
    'dropout_drop_proba':
    LogUniform(0.3, 0.7)
})

HYPERPARAMETERS = HyperparameterSamples({
    'learning_rate': 0.1,
    'l2_weight_reg': 0.001,
    'hidden_size': 32,
    'num_layers': 3,
    'num_lstm_layers': 1,
    'use_xavier_init': True,
    'use_max_pool_else_avg_pool': True,
    'dropout_drop_proba': 0.5,
    'momentum': 0.1
})

AN_INPUT = "I am an input"
AN_EXPECTED_OUTPUT = "I am an expected output"


class SomeStep(NonFittableMixin, BaseStep):
    def __init__(self,
                 hyperparams_space: HyperparameterSpace = None,
                 output=AN_EXPECTED_OUTPUT):
        BaseStep.__init__(self,
class TensorflowV1ModelStep(BaseTensorflowModelStep):
    """
    Base class for tensorflow 1 steps.
    It uses :class:`TensorflowV1StepSaver` for saving the model.

    .. seealso::
        `Using the saved model format <https://www.tensorflow.org/guide/checkpoint>`_,
        :class:`~neuraxle.base.BaseStep`
    """
    HYPERPARAMS = HyperparameterSamples({})
    HYPERPARAMS_SPACE = HyperparameterSpace({})

    def __init__(self,
                 create_graph,
                 create_loss,
                 create_optimizer,
                 create_feed_dict=None,
                 data_inputs_dtype=None,
                 expected_outputs_dtype=None,
                 variable_scope=None,
                 has_expected_outputs=True,
                 print_loss=False,
                 print_func=None):
        BaseTensorflowModelStep.__init__(
            self,
            create_model=create_graph,
            create_loss=create_loss,
            create_optimizer=create_optimizer,
            create_inputs=create_feed_dict,
            data_inputs_dtype=data_inputs_dtype,
            expected_outputs_dtype=expected_outputs_dtype,
            step_saver=TensorflowV1StepSaver(),
            print_loss=print_loss,
            print_func=print_func)

        if variable_scope is None:
            variable_scope = self.name
        self.variable_scope = variable_scope
        self.has_expected_outputs = has_expected_outputs
        self.create_feed_dict = create_feed_dict

    def setup(self) -> BaseStep:
        """
        Setup tensorflow 1 graph, and session using a variable scope.

        :return: self
        :rtype: BaseStep
        """
        if self.is_initialized:
            return self

        self.graph = tf.Graph()
        with self.graph.as_default():
            with tf.variable_scope(self.variable_scope, reuse=tf.AUTO_REUSE):
                self.session = tf.Session(
                    config=tf.ConfigProto(log_device_placement=True),
                    graph=self.graph)

                model = self.create_model(self)
                if not isinstance(model, tuple):
                    tf.identity(model, name='output')
                else:
                    tf.identity(model[0], name='output')
                    tf.identity(model[1], name='inference_output')

                tf.identity(self.create_loss(self), name='loss')
                self.create_optimizer(self).minimize(self['loss'],
                                                     name='optimizer')

                init = tf.global_variables_initializer()
                self.session.run(init)
                self.is_initialized = True

    def teardown(self) -> BaseStep:
        """
        Close session on teardown.

        :return:
        """
        if self.session is not None:
            self.session.close()
        self.is_initialized = False

        return self

    def strip(self):
        """
        Strip tensorflow 1 properties from to step to make the step serializable.

        :return: stripped step
        :rtype: BaseStep
        """
        self.graph = None
        self.session = None

        return self

    def fit(self, data_inputs, expected_outputs=None) -> 'BaseStep':
        with tf.variable_scope(self.variable_scope, reuse=tf.AUTO_REUSE):
            return self.fit_model(data_inputs, expected_outputs)

    def fit_model(self, data_inputs, expected_outputs=None) -> BaseStep:
        """
        Fit tensorflow model using the variable scope.

        :param data_inputs: data inputs
        :param expected_outputs: expected outputs to fit on
        :return: fitted self
        :rtype: BaseStep
        """
        feed_dict = {self['data_inputs']: data_inputs}

        if self.has_expected_outputs:
            feed_dict.update({self['expected_outputs']: expected_outputs})

        if self.create_inputs is not None:
            additional_feed_dict_arguments = self.create_inputs(
                self, data_inputs, expected_outputs)
            feed_dict.update(additional_feed_dict_arguments)

        results = self.session.run([self['optimizer'], self['loss']],
                                   feed_dict=feed_dict)

        loss = results[1]
        self.add_new_loss(loss)

        return self

    def transform(self, data_inputs, expected_outputs=None) -> 'BaseStep':
        with tf.variable_scope(self.variable_scope, reuse=tf.AUTO_REUSE):
            return self.transform_model(data_inputs)

    def transform_model(self, data_inputs):
        """
        Transform tensorflow model using the variable scope.

        :param data_inputs:
        :return:
        """
        inference_output_name = self._get_inference_output_name()

        feed_dict = {self['data_inputs']: data_inputs}

        results = self.session.run([self[inference_output_name], self['loss']],
                                   feed_dict=feed_dict)
        self.add_new_loss(results[1], test_only=True)

        return results[0]

    def _get_inference_output_name(self):
        """
        Return the output tensor name for inference (transform).
        In create_graph, the user can return a tuple of two elements : the output tensor for training, and the output tensor for inference.

        :return:
        """
        inference_output_name = 'output'
        if len(self['inference_output'].get_shape().as_list()) > 0:
            inference_output_name = 'inference_output'

        return inference_output_name

    def __getitem__(self, item):
        """
        Get a graph tensor by name using get item.

        :param item: tensor name
        :type item: str

        :return: tensor
        :rtype: tf.Tensor
        """
        if ":" in item:
            split = item.split(":")
            tensor_name = split[0]
            device = split[1]
        else:
            tensor_name = item
            device = "0"

        try:
            result = self.graph.get_tensor_by_name("{0}/{1}:{2}".format(
                self.variable_scope, tensor_name, device))
        except KeyError:
            result = None

        if result is None:
            try:
                result = self.graph.get_operation_by_name("{0}/{1}".format(
                    self.variable_scope, tensor_name))
            except KeyError:
                result = tf.get_variable(tensor_name, [])

        return result
예제 #18
0
 def create_new_trial(self, hyperparams: HyperparameterSamples):
     if self.print_new_trial:
         self.print_func('new trial:\n{}'.format(json.dumps(hyperparams.to_nested_dict(), sort_keys=True, indent=4)))
예제 #19
0
def test_sklearn_wrapper_set_hyperparams():
    p = SKLearnWrapper(PCA())
    p.set_hyperparams(HyperparameterSamples({'n_components': 2}))

    assert p.wrapped_sklearn_predictor.n_components == 2
예제 #20
0
파일: numpy.py 프로젝트: yushu-liu/Neuraxle
 def __init__(self, multiply_by=1):
     NonFittableMixin.__init__(self)
     BaseStep.__init__(self,
                       hyperparams=HyperparameterSamples(
                           {'multiply_by': multiply_by}))
예제 #21
0
 def set_hyperparams(self, flat_hyperparams: HyperparameterSamples) -> BaseStep:
     BaseStep.set_hyperparams(self, flat_hyperparams)
     self.wrapped_sklearn_predictor.set_params(**HyperparameterSamples(flat_hyperparams).to_flat_as_dict_primitive())
     return self
예제 #22
0
import numpy as np

from neuraxle.hyperparams.distributions import Boolean
from neuraxle.hyperparams.space import HyperparameterSpace, HyperparameterSamples
from neuraxle.pipeline import Pipeline
from neuraxle.steps.loop import StepClonerForEachDataInput
from neuraxle.steps.misc import FitCallbackStep, TapeCallbackFunction
from neuraxle.steps.numpy import MultiplyByN

HYPE_SPACE = HyperparameterSpace({"a__test": Boolean()})

HYPE_SAMPLE = HyperparameterSamples({"a__test": True})


def test_step_cloner_should_transform():
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
        Pipeline([FitCallbackStep(tape), MultiplyByN(2)]))
    data_inputs = _create_data((2, 2))

    processed_outputs = p.transform(data_inputs)

    assert isinstance(p.steps[0], Pipeline)
    assert isinstance(p.steps[1], Pipeline)
    assert np.array_equal(processed_outputs, data_inputs * 2)


def test_step_cloner_should_fit_transform():
    # Given
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
예제 #23
0
 def get_hyperparams(self):
     if self.return_all_sklearn_default_params_on_get:
         return HyperparameterSamples(self.wrapped_sklearn_predictor.get_params()).to_flat()
     else:
         return BaseStep.get_hyperparams(self)
예제 #24
0
def main():
    def accuracy(data_inputs, expected_outputs):
        return np.mean(
            np.argmax(np.array(data_inputs), axis=1) == np.argmax(
                np.array(expected_outputs), axis=1))

    # load the dataset
    df = read_csv('data/winequality-white.csv', sep=';')
    data_inputs = df.values
    data_inputs[:, -1] = data_inputs[:, -1] - 1
    n_features = data_inputs.shape[1] - 1
    n_classes = 10

    p = Pipeline([
        TrainOnlyWrapper(DataShuffler()),
        ColumnTransformerInputOutput(
            input_columns=[(
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ToNumpy(np.float32)
            )],
            output_columns=[(11, Identity())]
        ),
        OutputTransformerWrapper(PlotDistribution(column=-1)),
        MiniBatchSequentialPipeline([
            Tensorflow2ModelStep(
                create_model=create_model,
                create_loss=create_loss,
                create_optimizer=create_optimizer
            ) \
                .set_hyperparams(HyperparameterSamples({
                'n_dense_layers': 2,
                'input_dim': n_features,
                'optimizer': 'adam',
                'activation': 'relu',
                'kernel_initializer': 'he_uniform',
                'learning_rate': 0.01,
                'hidden_dim': 20,
                'n_classes': 3
            })).set_hyperparams_space(HyperparameterSpace({
                'n_dense_layers': RandInt(2, 4),
                'hidden_dim_layer_multiplier': Uniform(0.30, 1),
                'input_dim': FixedHyperparameter(n_features),
                'optimizer': Choice([
                    OPTIMIZERS.ADAM.value,
                    OPTIMIZERS.SGD.value,
                    OPTIMIZERS.ADAGRAD.value
                ]),
                'activation': Choice([
                    ACTIVATIONS.RELU.value,
                    ACTIVATIONS.TANH.value,
                    ACTIVATIONS.SIGMOID.value,
                    ACTIVATIONS.ELU.value,
                ]),
                'kernel_initializer': Choice([
                    KERNEL_INITIALIZERS.GLOROT_NORMAL.value,
                    KERNEL_INITIALIZERS.GLOROT_UNIFORM.value,
                    KERNEL_INITIALIZERS.HE_UNIFORM.value
                ]),
                'learning_rate': LogUniform(0.005, 0.01),
                'hidden_dim': RandInt(3, 80),
                'n_classes': FixedHyperparameter(n_classes)
            }))
        ], batch_size=33),
        OutputTransformerWrapper(Pipeline([
            ExpandDim(),
            OneHotEncoder(nb_columns=n_classes, name='classes')
        ]))
    ])

    auto_ml = AutoML(
        pipeline=p,
        hyperparams_repository=InMemoryHyperparamsRepository(
            cache_folder='trials'),
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        validation_splitter=ValidationSplitter(test_size=0.30),
        scoring_callback=ScoringCallback(accuracy,
                                         higher_score_is_better=True),
        callbacks=[
            MetricCallback(
                name='classification_report_imbalanced_metric',
                metric_function=classificaiton_report_imbalanced_metric,
                higher_score_is_better=True),
            MetricCallback(name='f1',
                           metric_function=f1_score_weighted,
                           higher_score_is_better=True),
            MetricCallback(name='recall',
                           metric_function=recall_score_weighted,
                           higher_score_is_better=True),
            MetricCallback(name='precision',
                           metric_function=precision_score_weighted,
                           higher_score_is_better=True),
            EarlyStoppingCallback(max_epochs_without_improvement=3)
        ],
        n_trials=200,
        refit_trial=True,
        epochs=75)

    auto_ml = auto_ml.fit(data_inputs=data_inputs)
예제 #25
0
 def __init__(self, steps):
     FeatureUnion.__init__(self, steps)
     self.set_hyperparams(HyperparameterSamples({}))
     self._make_all_steps_optional()
예제 #26
0
    def find_next_best_hyperparams(
            self, auto_ml_container: AutoMLContainer) -> HyperparameterSamples:
        """
        Find the next best hyperparams using previous trials.

        :param auto_ml_container: trials data container
        :type auto_ml_container: Trials
        :return: next best hyperparams
        :rtype: HyperparameterSamples
        """
        # Flatten hyperparameter space
        flat_hyperparameter_space: HyperparameterSpace = auto_ml_container.hyperparameter_space.to_flat(
        )

        if auto_ml_container.trial_number < self.number_of_initial_random_step:
            # Perform random search
            return self.initial_auto_ml_algo.find_next_best_hyperparams(
                auto_ml_container)

        # Keep only success trials
        success_trials: Trials = auto_ml_container.trials.filter(
            TRIAL_STATUS.SUCCESS)

        # Split trials into good and bad using quantile threshold.
        good_trials, bad_trials = success_trials.split_good_and_bad_trials(
            quantile_threshold=self.quantile_threshold,
            number_of_good_trials_max_cap=self.number_good_trials_max_cap)

        # Create gaussian mixture of good and gaussian mixture of bads.
        good_posteriors = self._create_posterior(flat_hyperparameter_space,
                                                 good_trials)
        bad_posteriors = self._create_posterior(flat_hyperparameter_space,
                                                bad_trials)

        best_hyperparams = []
        for (hyperparam_key, good_posterior) in good_posteriors.items():
            best_new_hyperparam_value = None
            best_ratio = None
            for _ in range(self.number_possible_hyperparams_candidates):
                # Sample possible new hyperparams in the good_trials.
                possible_new_hyperparm = good_posterior.rvs()

                # Verify if we use the ratio directly or we use the loglikelihood of b_post under both distribution like hyperopt.
                # In hyperopt they use :
                # # calculate the log likelihood of b_post under both distributions
                # below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
                # above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)
                #
                # # improvement = below_llik - above_llik
                # # new_node = scope.broadcast_best(b_post, improvement)
                # new_node = scope.broadcast_best(b_post, below_llik, above_llik)

                # Verify ratio good pdf versus bad pdf for all possible new hyperparms.
                # Used what is describe in the article which is the ratio (gamma + g(x) / l(x) ( 1- gamma))^-1 that we have to maximize.
                # Since there is ^-1, we have to maximize l(x) / g(x)
                # Only the best ratio is kept and is the new best hyperparams.
                # Seems to take log of pdf and not pdf directly probable to have `-` instead of `/`.
                # TODO: Maybe they use the likelyhood to sum over all possible parameters to find the max so it become a join distribution of all hyperparameters, would make sense.
                # TODO: verify is for quantized we do not want to do cdf(value higher) - cdf(value lower) to have pdf.
                ratio = good_posterior.pdf(
                    possible_new_hyperparm
                ) / bad_posteriors[hyperparam_key].pdf(possible_new_hyperparm)

                if best_new_hyperparam_value is None:
                    best_new_hyperparam_value = possible_new_hyperparm
                    best_ratio = ratio
                else:
                    if ratio > best_ratio:
                        best_new_hyperparam_value = possible_new_hyperparm
                        best_ratio = ratio

            best_hyperparams.append(
                (hyperparam_key, best_new_hyperparam_value))
        return HyperparameterSamples(best_hyperparams)
예제 #27
0
def test_hyperparams_copy_constructor(dict_values):
    r = HyperparameterSamples(HyperparameterSamples(**dict_values))
    assert r == HyperparameterSamples(**dict_values)