def _expected_improvement(x, fun_prediction, fun_prediction_args, x_bounds,
                          x_types, samples_y_aggregation,
                          minimize_constraints_fun):
    # This is only for step-wise optimization
    x = lib_data.match_val_type(x, x_bounds, x_types)

    expected_improvement = sys.maxsize
    if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is
                                              True):
        mu, sigma = fun_prediction(x, *fun_prediction_args)

        loss_optimum = min(samples_y_aggregation)
        scaling_factor = -1

        # In case sigma equals zero
        with numpy.errstate(divide="ignore"):
            Z = scaling_factor * (mu - loss_optimum) / sigma
            expected_improvement = scaling_factor * (mu - loss_optimum) * \
                                        norm.cdf(Z) + sigma * norm.pdf(Z)
            expected_improvement = 0.0 if sigma == 0.0 else expected_improvement

        # We want expected_improvement to be as large as possible
        # (i.e., as small as possible for minimize(...))
        expected_improvement = -1 * expected_improvement
    return expected_improvement
Esempio n. 2
0
def next_hyperparameter_lowest_mu(fun_prediction,
                                  fun_prediction_args,
                                  x_bounds, x_types,
                                  minimize_starting_points,
                                  minimize_constraints_fun=None):
    '''
    "Lowest Mu" acquisition function
    '''
    best_x = None
    best_acquisition_value = None
    x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
    x_bounds_minmax = numpy.array(x_bounds_minmax)

    for starting_point in numpy.array(minimize_starting_points):
        res = minimize(fun=_lowest_mu,
                       x0=starting_point.reshape(1, -1),
                       bounds=x_bounds_minmax,
                       method="L-BFGS-B",
                       args=(fun_prediction, fun_prediction_args, \
                             x_bounds, x_types, minimize_constraints_fun))

        if (best_acquisition_value is None) or (res.fun < best_acquisition_value):
            res.x = numpy.ndarray.tolist(res.x)
            res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
            if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True):
                best_acquisition_value = res.fun
                best_x = res.x

    outputs = None
    if best_x is not None:
        mu, sigma = fun_prediction(best_x, *fun_prediction_args)
        outputs = {'hyperparameter': best_x, 'expected_mu': mu,
                   'expected_sigma': sigma, 'acquisition_func': "lm"}
    return outputs
def _lowest_mu(x, fun_prediction, fun_prediction_args, x_bounds, x_types,
               minimize_constraints_fun):
    '''
    Calculate the lowest mu
    '''
    # This is only for step-wise optimization
    x = lib_data.match_val_type(x, x_bounds, x_types)

    mu = sys.maxsize
    if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is
                                              True):
        mu, _ = fun_prediction(x, *fun_prediction_args)
    return mu
Esempio n. 4
0
def _lowest_confidence(x, fun_prediction, fun_prediction_args,
                       x_bounds, x_types, minimize_constraints_fun):
    # This is only for step-wise optimization
    x = lib_data.match_val_type(x, x_bounds, x_types)

    ci = sys.maxsize
    if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
        mu, sigma = fun_prediction(x, *fun_prediction_args)
        ci = (sigma * 1.96 * 2) / mu
        # We want ci to be as large as possible
        # (i.e., as small as possible for minimize(...),
        # because this would mean lowest confidence
        ci = -1 * ci

    return ci
Esempio n. 5
0
    def _selection(self,
                   samples_x,
                   samples_y_aggregation,
                   samples_y,
                   x_bounds,
                   x_types,
                   max_resampling_per_x=3,
                   threshold_samplessize_exploitation=12,
                   threshold_samplessize_resampling=50,
                   no_candidates=False,
                   minimize_starting_points=None,
                   minimize_constraints_fun=None):

        next_candidate = None
        candidates = []
        samples_size_all = sum([len(i) for i in samples_y])
        samples_size_unique = len(samples_y)

        # ===== STEP 1: Compute the current optimum =====
        gp_model = gp_create_model.create_model(samples_x,
                                                samples_y_aggregation)
        lm_current = gp_selection.selection(
            "lm",
            samples_y_aggregation,
            x_bounds,
            x_types,
            gp_model['model'],
            minimize_starting_points,
            minimize_constraints_fun=minimize_constraints_fun)
        if not lm_current:
            return None

        if no_candidates is False:
            candidates.append({
                'hyperparameter': lm_current['hyperparameter'],
                'expected_mu': lm_current['expected_mu'],
                'expected_sigma': lm_current['expected_sigma'],
                'reason': "exploitation_gp"
            })

            # ===== STEP 2: Get recommended configurations for exploration =====
            results_exploration = gp_selection.selection(
                "lc",
                samples_y_aggregation,
                x_bounds,
                x_types,
                gp_model['model'],
                minimize_starting_points,
                minimize_constraints_fun=minimize_constraints_fun)

            if results_exploration is not None:
                if _num_past_samples(results_exploration['hyperparameter'],
                                     samples_x, samples_y) == 0:
                    candidates.append({
                        'hyperparameter':
                        results_exploration['hyperparameter'],
                        'expected_mu':
                        results_exploration['expected_mu'],
                        'expected_sigma':
                        results_exploration['expected_sigma'],
                        'reason':
                        "exploration"
                    })
                    logger.info("DEBUG: 1 exploration candidate selected\n")
            else:
                logger.info("DEBUG: No suitable exploration candidates were")

            # ===== STEP 3: Get recommended configurations for exploitation =====
            if samples_size_all >= threshold_samplessize_exploitation:
                print("Getting candidates for exploitation...\n")
                try:
                    gmm = gmm_create_model.create_model(
                        samples_x, samples_y_aggregation)
                    results_exploitation = gmm_selection.selection(
                        x_bounds,
                        x_types,
                        gmm['clusteringmodel_good'],
                        gmm['clusteringmodel_bad'],
                        minimize_starting_points,
                        minimize_constraints_fun=minimize_constraints_fun)

                    if results_exploitation is not None:
                        if _num_past_samples(
                                results_exploitation['hyperparameter'],
                                samples_x, samples_y) == 0:
                            candidates.append({'hyperparameter': results_exploitation['hyperparameter'],\
                                               'expected_mu': results_exploitation['expected_mu'],\
                                               'expected_sigma': results_exploitation['expected_sigma'],\
                                               'reason': "exploitation_gmm"})
                            logger.info(
                                "DEBUG: 1 exploitation_gmm candidate selected\n"
                            )
                    else:
                        logger.info(
                            "DEBUG: No suitable exploitation_gmm candidates were found\n"
                        )

                except ValueError as exception:
                    # The exception: ValueError: Fitting the mixture model failed
                    # because some components have ill-defined empirical covariance
                    # (for instance caused by singleton or collapsed samples).
                    # Try to decrease the number of components, or increase reg_covar.
                    logger.info(
                        "DEBUG: No suitable exploitation_gmm candidates were found due to exception."
                    )
                    logger.info(exception)

            # ===== STEP 4: Get a list of outliers =====
            if (threshold_samplessize_resampling is not None) and \
                        (samples_size_unique >= threshold_samplessize_resampling):
                logger.info("Getting candidates for re-sampling...\n")
                results_outliers = gp_outlier_detection.outlierDetection_threaded(
                    samples_x, samples_y_aggregation)

                if results_outliers is not None:
                    for results_outlier in results_outliers:
                        if _num_past_samples(
                                samples_x[results_outlier['samples_idx']],
                                samples_x, samples_y) < max_resampling_per_x:
                            candidates.append({'hyperparameter': samples_x[results_outlier['samples_idx']],\
                                               'expected_mu': results_outlier['expected_mu'],\
                                               'expected_sigma': results_outlier['expected_sigma'],\
                                               'reason': "resampling"})
                    logger.info("DEBUG: %d re-sampling candidates selected\n")
                else:
                    logger.info(
                        "DEBUG: No suitable resampling candidates were found\n"
                    )

            if candidates:
                # ===== STEP 5: Compute the information gain of each candidate towards the optimum =====
                logger.info(
                    "Evaluating information gain of %d candidates...\n")
                next_improvement = 0

                threads_inputs = [[
                    candidate, samples_x, samples_y, x_bounds, x_types,
                    minimize_constraints_fun, minimize_starting_points
                ] for candidate in candidates]
                threads_pool = ThreadPool(4)
                # Evaluate what would happen if we actually sample each candidate
                threads_results = threads_pool.map(
                    _calculate_lowest_mu_threaded, threads_inputs)
                threads_pool.close()
                threads_pool.join()

                for threads_result in threads_results:
                    if threads_result['expected_lowest_mu'] < lm_current[
                            'expected_mu']:
                        # Information gain
                        temp_improvement = threads_result[
                            'expected_lowest_mu'] - lm_current['expected_mu']

                        if next_improvement > temp_improvement:
                            next_improvement = temp_improvement
                            next_candidate = threads_result['candidate']
            else:
                # ===== STEP 6: If we have no candidates, randomly pick one =====
                logger.info(
                    "DEBUG: No candidates from exploration, exploitation,\
                                 and resampling. We will random a candidate for next_candidate\n"
                )

                next_candidate = _rand_with_constraints(x_bounds, x_types) \
                                    if minimize_starting_points is None else minimize_starting_points[0]
                next_candidate = lib_data.match_val_type(
                    next_candidate, x_bounds, x_types)
                expected_mu, expected_sigma = gp_prediction.predict(
                    next_candidate, gp_model['model'])
                next_candidate = {
                    'hyperparameter': next_candidate,
                    'reason': "random",
                    'expected_mu': expected_mu,
                    'expected_sigma': expected_sigma
                }

        # ===== STEP 7: If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, take next config as exploration step  =====
        outputs = self._pack_output(lm_current['hyperparameter'])
        ap = random.uniform(0, 1)
        if outputs in self.total_data or ap <= self.exploration_probability:
            if next_candidate is not None:
                outputs = self._pack_output(next_candidate['hyperparameter'])
            else:
                random_parameter = _rand_init(x_bounds, x_types, 1)[0]
                outputs = self._pack_output(random_parameter)
        self.total_data.append(outputs)
        return outputs
Esempio n. 6
0
                        # Information gain
                        temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu']

                        if next_improvement > temp_improvement:
                            next_improvement = temp_improvement
                            next_candidate = threads_result['candidate']
            else:
                # ===== STEP 6: If we have no candidates, randomly pick one =====
                logger.info(
                    "DEBUG: No candidates from exploration, exploitation,\
                                 and resampling. We will random a candidate for next_candidate\n"
                )

                next_candidate = _rand_with_constraints(x_bounds, x_types) \
                                    if minimize_starting_points is None else minimize_starting_points[0]
                next_candidate = lib_data.match_val_type(next_candidate, x_bounds, x_types)
                expected_mu, expected_sigma = gp_prediction.predict(next_candidate, gp_model['model'])
                next_candidate = {'hyperparameter': next_candidate, 'reason': "random",
                                  'expected_mu': expected_mu, 'expected_sigma': expected_sigma}

        # ===== STEP 7: If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, take next config as exploration step  =====
        outputs = self._pack_output(lm_current['hyperparameter'])
        ap = random.uniform(0, 1)
        if outputs in self.history_parameters or ap<=self.exploration_probability:
            if next_candidate is not None:
                outputs = self._pack_output(next_candidate['hyperparameter'])
            else:
                random_parameter = _rand_init(x_bounds, x_types, 1)[0]
                outputs = self._pack_output(random_parameter)
        self.history_parameters.append(outputs)
        return outputs