コード例 #1
0
    def _get_incumbent_value(self):
        ''' get incumbent value either from runhistory
            or from best predicted performance on configs in runhistory
            (depends on self.predict_incumbent)"
            
            Return
            ------
            float
        '''
        if self.predict_incumbent:
            configs = convert_configurations_to_array(
                self.runhistory.get_all_configs())
            costs = list(
                map(
                    lambda config: self.model.
                    predict_marginalized_over_instances(config.reshape(
                        (1, -1)))[0][0][0],
                    configs,
                ))
            incumbent_value = np.min(costs)
            # won't need log(y) if EPM was already trained on log(y)

        else:
            if self.runhistory.empty():
                raise ValueError("Runhistory is empty and the cost value of "
                                 "the incumbent is unknown.")
            incumbent_value = self.runhistory.get_cost(self.incumbent)
            if isinstance(self.rh2EPM, RunHistory2EPM4LogCost):
                incumbent_value = np.log(incumbent_value)

        return incumbent_value
コード例 #2
0
ファイル: main.py プロジェクト: cczy1910/Hydrogen
def getPrediction(config):
    pred = smac.solver.epm_chooser.model.predict(
        convert_configurations_to_array([config]))[0][0][0]
    rh2epm = smac.solver.epm_chooser.rh2EPM
    min_y = rh2epm.min_y - (
        rh2epm.perc - rh2epm.min_y
    )  # Subtract the difference between the percentile and the minimum
    min_y -= constants.VERY_SMALL_NUMBER  # Minimal value to avoid numerical issues in the log scaling below
    if min_y == rh2epm.max_y:
        min_y *= 1 - 10**-10
    pred = np.exp(pred)
    pred = pred * (rh2epm.max_y - min_y) + min_y
    return pred
コード例 #3
0
    def _get_x_best(self, predict: bool,
                    X: np.ndarray) -> typing.Tuple[float, np.ndarray]:
        """Get value, configuration, and array representation of the "best" configuration.

        The definition of best varies depending on the argument ``predict``. If set to ``True``,
        this function will return the stats of the best configuration as predicted by the model,
        otherwise it will return the stats for the best observed configuration.

        Parameters
        ----------
        predict : bool
            Whether to use the predicted or observed best.

        Returns
        -------
        float
        np.ndarry
        Configuration
        """
        if predict:
            costs = list(
                map(
                    lambda x: (
                        self.model.predict_marginalized_over_instances(
                            x.reshape((1, -1)))[0][0][0],
                        x,
                    ),
                    X,
                ))
            costs = sorted(costs, key=lambda t: t[0])
            x_best_array = costs[0][1]
            best_observation = costs[0][0]
            # won't need log(y) if EPM was already trained on log(y)
        else:
            all_configs = self.runhistory.get_all_configs_per_budget(
                budget_subset=self.currently_considered_budgets)
            x_best = self.incumbent
            x_best_array = convert_configurations_to_array(all_configs)
            best_observation = self.runhistory.get_cost(x_best)
            best_observation_as_array = np.array(best_observation).reshape(
                (1, 1))
            # It's unclear how to do this for inv scaling and potential future scaling.
            # This line should be changed if necessary
            best_observation = self.rh2EPM.transform_response_values(
                best_observation_as_array)
            best_observation = best_observation[0][0]

        return x_best_array, best_observation
コード例 #4
0
    def __call__(self, configurations: List[Configuration]):
        """Computes the acquisition value for a given X

        Parameters
        ----------
        configurations : list
            The configurations where the acquisition function should be evaluated. 

        Returns
        -------
        np.ndarray(N, 1)
            acquisition values for X
        """
        X = convert_configurations_to_array(configurations)
        if len(X.shape) == 1:
            X = X[np.newaxis, :]

        acq = self._compute(X)
        if np.any(np.isnan(acq)):
            idx = np.where(np.isnan(acq))[0]
            acq[idx, :] = -np.finfo(np.float).max
        return acq
コード例 #5
0
ファイル: abstract_ps.py プロジェクト: jajajag/SMAC3
    def to_str(self) -> str:
        """Convert the ConfigHistory object to a string.

        Returns
        -------
        return : str
            A str contains the Configuration and its related runhistories.
            For example, "0.8(config) 1(#runhistory) 0.6 1.2 1234"
        """
        # 取得config对应的runhistory
        runhistory = self.runhistory.get_history_for_config(self.config)

        # 将config转化为ndarray
        config_list = [str(param) for param in
                       convert_configurations_to_array([self.config])[0]]
        # 将runhistory转化为字符串list,每个元素为"$cost $time $seed"形式
        runhistory_list = [" ".join([str(item) for item in history]) for history
                           in runhistory]

        # 返回由config,runhistory数量和各个runhistory字符串组成的list
        return " ".join(config_list + [str(len(runhistory_list))] +
                        runhistory_list)
コード例 #6
0
 def _preprocess(self, runhistory):
     """
     Method to marginalize over instances such that fANOVA can determine the parameter importance without
     having to deal with instance features.
     :param runhistory: RunHistory that knows all configurations that were run. For all these configurations
                        we have to marginalize away the instance features with which fANOVA will make it's
                        predictions
     """
     self.logger.info(
         'PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING'
     )
     self.logger.info('Marginalizing away all instances!')
     configs = runhistory.get_all_configs()
     X_prime = np.array(convert_configurations_to_array(configs))
     y_prime = np.array(
         self.model.predict_marginalized_over_instances(X_prime)[0])
     self.X = X_prime
     self.y = y_prime
     self.logger.info('Size of training X after preprocessing: %s' %
                      str(self.X.shape))
     self.logger.info('Size of training y after preprocessing: %s' %
                      str(self.y.shape))
     self.logger.info('Finished Preprocessing')
コード例 #7
0
    def _get_incumbent_value(self):
        ''' get incumbent value either from runhistory
            or from best predicted performance on configs in runhistory
            (depends on self.predict_incumbent)"

            Return
            ------
            float
        '''
        if self.predict_incumbent:
            configs = convert_configurations_to_array(
                self.runhistory.get_all_configs())
            costs = list(
                map(
                    lambda config: self.model.
                    predict_marginalized_over_instances(config.reshape(
                        (1, -1)))[0][0][0],
                    configs,
                ))
            incumbent_value = np.min(costs)
            # won't need log(y) if EPM was already trained on log(y)

        else:
            if self.runhistory.empty():
                raise ValueError("Runhistory is empty and the cost value of "
                                 "the incumbent is unknown.")
            incumbent_value = self.runhistory.get_cost(self.incumbent)
            # It's unclear how to do this for inv scaling and potential future scaling. This line should be changed if
            # necessary
            incumbent_value_as_array = np.array(incumbent_value).reshape(
                (1, 1))
            incumbent_value = self.rh2EPM.transform_response_values(
                incumbent_value_as_array)
            incumbent_value = incumbent_value[0][0]

        return incumbent_value
コード例 #8
0
def build_matrix(feature_pd:pd.DataFrame, perf_pd:pd.DataFrame, 
                 configs:list, cs:ConfigurationSpace,
                 n_insts:int=None):
    
    insts = list(feature_pd.index)
    
    if n_insts is not None and n_insts < len(insts):
        insts = random.sample(insts, n_insts)
        
    config_matrix = convert_configurations_to_array(configs)
    
    # one hot encode categorical parameters
    n_values = []
    mask_array = []
    parameters = cs.get_hyperparameters()
    
    for param in parameters:
        if isinstance(param, (CategoricalHyperparameter)):
            n_values.append(len(param.choices))
            mask_array.append(True)
        else:
            mask_array.append(False)
    
    n_values = np.array(n_values)
    mask_array = np.array(mask_array)        
    
    ohe = OneHotEncoder(n_values=n_values, categorical_features=mask_array, sparse=False)
    config_matrix = ohe.fit_transform(config_matrix)
    
    train_config_indices = random.sample(range(len(configs)), int(len(configs)/2))
    valid_config_indices = random.sample(train_config_indices, int(len(train_config_indices)/2))
    
    train_inst_indices = random.sample(range(len(insts)), int(len(insts)/2))
    valid_inst_indices = random.sample(train_inst_indices, int(len(train_inst_indices)/2))
    
    # convert in X matrix and y vector
    X_I, X_II, X_III, X_IV = [[],[],[],[]], [], [], []
    y_I, y_II, y_III, y_IV = [[],[],[],[]], [], [], []
    for i_idx, inst in enumerate(insts):
        feat_vector = feature_pd.loc[inst].values
        perf_vector = perf_pd.loc[inst].values
        for c_idx in range(len(configs)):
            config_vec = config_matrix[c_idx,:]
            perf = perf_vector[c_idx]
            
            if i_idx in train_inst_indices and c_idx in train_config_indices:
                if i_idx in valid_inst_indices and c_idx in valid_config_indices:
                    X_I[3].append(np.concatenate((config_vec, feat_vector)))
                    y_I[3].append(perf)
                elif i_idx not in valid_inst_indices and c_idx in valid_config_indices:
                    X_I[2].append(np.concatenate((config_vec, feat_vector)))
                    y_I[2].append(perf)
                elif i_idx in valid_inst_indices and c_idx not in valid_config_indices:
                    X_I[1].append(np.concatenate((config_vec, feat_vector)))
                    y_I[1].append(perf)
                else:
                    X_I[0].append(np.concatenate((config_vec, feat_vector)))
                    y_I[0].append(perf)
            elif i_idx not in train_inst_indices and c_idx in train_config_indices:
                X_II.append(np.concatenate((config_vec, feat_vector)))
                y_II.append(perf)
            elif i_idx in train_inst_indices and c_idx not in train_config_indices:
                X_III.append(np.concatenate((config_vec, feat_vector)))
                y_III.append(perf)
            else:
                X_IV.append(np.concatenate((config_vec, feat_vector)))
                y_IV.append(perf)
    
    X_II, X_III, X_IV = np.array(X_II), np.array(X_III), np.array(X_IV)
    y_II, y_III, y_IV = np.array(y_II), np.array(y_III), np.array(y_IV)
    X_I = np.array([np.array(X_I[0]),np.array(X_I[1]),np.array(X_I[2]),np.array(X_I[3])]) 
    y_I = np.array([np.array(y_I[0]),np.array(y_I[1]),np.array(y_I[2]),np.array(y_I[3])]) 
    
    print(X_I.shape, X_II.shape, X_III.shape, X_IV.shape)
    print(y_I.shape, y_II.shape, y_III.shape, y_IV.shape)
    
    return X_I, X_II, X_III, X_IV, y_I, y_II, y_III, y_IV