예제 #1
0
 def __init__(self, scenario, cs, model, to_evaluate: int,
              runhist: RunHistory, **kwargs):
     super().__init__(scenario, cs, model, to_evaluate, **kwargs)
     self.name = 'fANOVA'
     self.logger = self.name
     # This way the instance features in X are ignored and a new forest is constructed
     if self.model.instance_features is None:
         self.logger.debug('No preprocessing necessary')
     else:
         self._preprocess(runhist)
     self.evaluator = fanova_pyrfr(X=self.X,
                                   Y=self.y.flatten(),
                                   config_space=cs,
                                   config_on_hypercube=True)
예제 #2
0
    def execute(save_folder, runhistory_location, configspace_location, manual_logtransform, use_percentiles, interaction_effect, n_trees, run_limit=None, draw_plots=True):
        with open(runhistory_location) as runhistory_file:
            runhistory = json.load(runhistory_file)
        with open(configspace_location) as configspace_file:
            configspace = read(configspace_file)
        os.makedirs(save_folder, exist_ok=True)

        X = []
        y = []

        for item in runhistory['data']:
            if run_limit is not None and len(X) > run_limit:
                break

            valid = True
            current = []
            setup_id = str(item[0][0])
            configuration = runhistory['configs'][setup_id]
            for param in configspace.get_hyperparameters():
                value = configuration[param.name]
                if isinstance(param, ConfigSpace.hyperparameters.UniformFloatHyperparameter) and not isinstance(value, float):
                    valid = False
                elif isinstance(param, ConfigSpace.hyperparameters.UniformIntegerHyperparameter) and not isinstance(value, int):
                    valid = False

                if isinstance(param, ConfigSpace.hyperparameters.CategoricalHyperparameter):
                    value = param.choices.index(value)
                elif param.log and manual_logtransform:
                    value = np.log(value)

                current.append(value)
            if valid:
                X.append(current)
                y.append(item[1][0])
            else:
                print('Illegal configuration', current)
        X = np.array(X)
        y = np.array(y)

        if X.ndim != 2:
            raise ValueError('Wrong shape')

        if manual_logtransform:
            configspace = openmlpimp.utils.scale_configspace_to_log(configspace)

        cutoffs = (-np.inf, np.inf)
        if use_percentiles:
            p75 = np.percentile(y, 75.0)
            p100 = np.percentile(y, 100.0)
            cutoffs = (p75, p100)

        # start the evaluator
        evaluator = fanova_pyrfr(X=X, Y=y, config_space=configspace, config_on_hypercube=False, cutoffs=cutoffs, n_trees=n_trees)
        # obtain the results
        params = configspace.get_hyperparameters()
        result = {}

        for idx, param in enumerate(params):
            importance = evaluator.quantify_importance([idx])[(idx,)]['total importance']
            result[param.name] = importance

        # store main results to disk
        filename = 'pimp_values_fanova.json'
        with open(os.path.join(save_folder, filename), 'w') as out_file:
            json.dump(result, out_file, sort_keys=True, indent=4, separators=(',', ': '))
            print('Saved individuals to %s' %os.path.join(save_folder, filename))

        # call plotting fn
        yrange = (0, 1)
        if use_percentiles:
            yrange = (p75, p100)
        if draw_plots:
            FanovaBackend._plot_result(evaluator, configspace, save_folder + '/fanova', yrange)

        if interaction_effect:
            result_interaction = {}
            for idx, param in enumerate(params):
                for idx2, param2 in enumerate(params):
                    if param.name >= param2.name: # string comparison cause stable
                        continue
                    print('interaction effects between', param.name, param2.name)
                    interaction = evaluator.quantify_importance([idx, idx2])[(idx,idx2)]['total importance']
                    interaction -= result[param.name]
                    interaction -= result[param2.name]
                    combined_name = param.name + '__' + param2.name
                    if interaction < 0.0:
                        raise ValueError('interaction score too low. Params: %s score %d' %(combined_name, interaction))
                    result_interaction[combined_name] = interaction

            for idx, param in enumerate(params):
                for idx2, param2 in enumerate(params):
                    if param.name >= param2.name:  # string comparison cause stable
                        continue
                    for idx3, param3 in enumerate(params):
                        if param2.name >= param3.name:  # string comparison cause stable
                            continue

                        print('interaction effects between', param.name, param2.name, param3.name)
                        interaction = evaluator.quantify_importance([idx, idx2, idx3])[(idx, idx2, idx3)]['total importance']
                        interaction -= result[param.name]
                        interaction -= result[param2.name]
                        interaction -= result[param3.name]
                        combined_name = param.name + '__' + param2.name + '__' + param3.name

                        interaction -= result_interaction[param.name + '__' + param2.name]
                        interaction -= result_interaction[param2.name + '__' + param3.name]
                        interaction -= result_interaction[param.name + '__' + param3.name]

                        if interaction < 0.0:
                            raise ValueError('interaction score too low. Params: %s score %d' % (combined_name, interaction))
                        result_interaction[combined_name] = interaction

            # store interaction effects to disk

            if sum(result_interaction.values()) + sum(result.values()) > 1:
                raise ValueError('Sum of results too high')

            filename = 'pimp_values_fanova_interaction.json'
            with open(os.path.join(save_folder, filename), 'w') as out_file:
                json.dump(result_interaction, out_file, sort_keys=True, indent=4, separators=(',', ': '))
                print('Saved interactions to %s' %os.path.join(save_folder, filename))
            if draw_plots:
                vis = Visualizer(evaluator, configspace, save_folder + '/fanova', y_label='Predictive Accuracy')
                vis.create_most_important_pairwise_marginal_plots()

        return save_folder + "/" + filename