Esempio n. 1
0
    def _init_pimp_and_validator(
        self,
        alternative_output_dir=None,
    ):
        """Create ParameterImportance-object and use it's trained model for validation and further predictions.
        We pass a combined (original + validated) runhistory, so that the returned model will be based on as much
        information as possible

        Parameters
        ----------
        alternative_output_dir: str
            e.g. for budgets we want pimp to use an alternative output-dir (subfolders per budget)
        """
        self.logger.debug(
            "Using '%s' as output for pimp", alternative_output_dir
            if alternative_output_dir else self.output_dir)
        self.pimp = Importance(
            scenario=copy.deepcopy(self.scenario),
            runhistory=self.combined_runhistory,
            incumbent=self.incumbent if self.incumbent else self.default,
            save_folder=alternative_output_dir
            if alternative_output_dir is not None else self.output_dir,
            seed=self.rng.randint(1, 100000),
            max_sample_size=self.options['fANOVA'].getint("pimp_max_samples"),
            fANOVA_pairwise=self.options['fANOVA'].getboolean(
                "fanova_pairwise"),
            preprocess=False,
            verbose=1,  # disable progressbars
        )
        # Validator (initialize without trajectory)
        self.validator = Validator(self.scenario, None, None)
        self.validator.epm = self.pimp.model
Esempio n. 2
0
    def _init_pimp_and_validator(self, rh, alternative_output_dir=None):
        """Create ParameterImportance-object and use it's trained model for  validation and further predictions
        We pass validated runhistory, so that the returned model will be based on as much information as possible

        Parameters
        ----------
        rh: RunHistory
            runhistory used to build EPM
        alternative_output_dir: str
            e.g. for budgets we want pimp to use an alternative output-dir (subfolders per budget)
        """
        self.logger.debug(
            "Using '%s' as output for pimp", alternative_output_dir
            if alternative_output_dir else self.output_dir)
        self.pimp = Importance(
            scenario=copy.deepcopy(self.scenario),
            runhistory=rh,
            incumbent=self.default,  # Inject correct incumbent later
            parameters_to_evaluate=4,
            save_folder=alternative_output_dir
            if alternative_output_dir else self.output_dir,
            seed=self.rng.randint(1, 100000),
            max_sample_size=self.pimp_max_samples,
            fANOVA_pairwise=self.fanova_pairwise,
            preprocess=False)
        self.model = self.pimp.model

        # Validator (initialize without trajectory)
        self.validator = Validator(self.scenario, None, None)
        self.validator.epm = self.model
Esempio n. 3
0
    def execute(save_folder,
                runhistory_location,
                configspace_location,
                modus='ablation',
                seed=1):
        with open(runhistory_location, 'r') as runhistory_filep:
            runhistory = json.load(runhistory_filep)

        # create scenario file
        scenario_dict = {
            'run_obj': 'quality',
            'deterministic': 1,
            'paramfile': configspace_location
        }

        trajectory_lines = openmlpimp.utils.runhistory_to_trajectory(
            runhistory, maximize=True)
        if len(trajectory_lines) != 1:
            raise ValueError('trajectory file should containexactly one line.')

        traj_file = tempfile.NamedTemporaryFile('w', delete=False)
        for line in trajectory_lines:
            json.dump(line, traj_file)
            traj_file.write("\n")
        traj_file.close()

        num_params = len(trajectory_lines[0]['incumbent'])
        importance = Importance(scenario_dict,
                                runhistory_file=runhistory_location,
                                parameters_to_evaluate=num_params,
                                traj_file=traj_file.name,
                                seed=seed,
                                save_folder=save_folder)

        try:
            os.makedirs(save_folder)
        except FileExistsError:
            pass

        for i in range(5):
            try:
                result = importance.evaluate_scenario(modus)
                filename = 'pimp_values_%s.json' % modus
                with open(os.path.join(save_folder, filename),
                          'w') as out_file:
                    json.dump(result,
                              out_file,
                              sort_keys=True,
                              indent=4,
                              separators=(',', ': '))
                importance.plot_results(name=os.path.join(save_folder, modus),
                                        show=False)
                return save_folder + "/" + filename
            except ZeroDivisionError as e:
                pass
        raise e
Esempio n. 4
0
    def parameter_importance(self,
                             modus,
                             incumbent,
                             output,
                             num_params=4,
                             num_pairs=0):
        """Calculate parameter-importance using the PIMP-package.
        Currently ablation, forward-selection and fanova are used.

        Parameters
        ----------
        modus: str
            modus for parameter importance, from [forward-selection, ablation,
            fanova]

        Returns
        -------
        importance: pimp.Importance
            importance object with evaluated data
        """
        self.logger.info("... parameter importance {}".format(modus))
        # Evaluate parameter importance
        save_folder = output
        if not self.pimp:
            self.pimp = Importance(scenario=copy.deepcopy(self.scenario),
                                   runhistory=self.original_rh,
                                   incumbent=incumbent,
                                   parameters_to_evaluate=num_params,
                                   save_folder=save_folder,
                                   seed=12345,
                                   max_sample_size=self.max_pimp_samples,
                                   fANOVA_pairwise=self.fanova_pairwise,
                                   preprocess=False)
        result = self.pimp.evaluate_scenario([modus], save_folder)
        self.evaluators.append(self.pimp.evaluator)
        return self.pimp
Esempio n. 5
0
    def __init__(self,
                 scenario: Scenario,
                 smac: Union[SMAC, None] = None,
                 mode: str = 'all',
                 X: Union[None, List[list], np.ndarray] = None,
                 y: Union[None, List[list], np.ndarray] = None,
                 numParams: int = -1,
                 impute: bool = False,
                 seed: int = 12345,
                 run: bool = False,
                 max_sample_size: int = -1,
                 fanova_cut_at_default: bool = False,
                 fANOVA_pairwise: bool = True,
                 forwardsel_feat_imp: bool = False,
                 incn_quant_var: bool = True,
                 marginalize_away_instances: bool = False,
                 save_folder: str = 'PIMP'):
        """
        Interface to be used with SMAC or with X and y matrices.
        :param scenario: The scenario object, that knows the configuration space.
        :param smac: The smac object that keeps all the run-data
        :param mode: The mode with which to run PIMP [ablation, fanova, all, forward-selection]
        :param X: Numpy Array that contains parameter arrays
        :param y: Numpy array that contains the corresponding performance values
        :param numParams: The number of parameters to evaluate
        :param impute: Flag to decide if censored data gets imputed or not
        :param seed: The random seed
        :param run: Flag to immediately compute the importance values after this setup or not.
        """
        self.scenario = scenario
        self.imp = None
        self.mode = mode
        self.save_folder = save_folder
        if not os.path.exists(self.save_folder): os.mkdir(self.save_folder)
        if smac is not None:
            self.imp = Importance(scenario=scenario,
                                  runhistory=smac.runhistory,
                                  incumbent=smac.solver.incumbent,
                                  seed=seed,
                                  parameters_to_evaluate=numParams,
                                  save_folder='PIMP',
                                  impute_censored=impute,
                                  max_sample_size=max_sample_size,
                                  fANOVA_cut_at_default=fanova_cut_at_default,
                                  fANOVA_pairwise=fANOVA_pairwise,
                                  forwardsel_feat_imp=forwardsel_feat_imp,
                                  incn_quant_var=incn_quant_var,
                                  preprocess=marginalize_away_instances)
        elif X is not None and y is not None:
            X = np.array(X)
            y = np.array(y)
            runHist = RunHistory(average_cost)
            if X.shape[0] != y.shape[0]:
                raise Exception('Number of samples in X and y dont match!')
            n_params = len(scenario.cs.get_hyperparameters())
            feats = None
            if X.shape[1] > n_params:
                feats = X[:, n_params:]
                assert feats.shape[1] == scenario.feature_array.shape[1]
                X = X[:, :n_params]

            for p in range(X.shape[1]):  # Normalize the data to fit into [0, 1]
                _min, _max = np.min(X[:, p]), np.max(X[:, p])
                if _min < 0. or 1 < _max:  # if it is not already normalized
                    for id, v in enumerate(X[:, p]):
                        X[id, p] = (v - _min) / (_max - _min)

            # Add everything to a runhistory such that PIMP can work with it
            for x, feat, y_val in zip(X, feats if feats is not None else X, y):
                id = None
                for inst in scenario.feature_dict:  # determine on which instance a configuration was run
                    if np.all(scenario.feature_dict[inst] == feat):
                        id = inst
                        break
                runHist.add(Configuration(scenario.cs, vector=x), y_val, 0, StatusType.SUCCESS, id)
            self.X = X
            self.y = y

            best_ = None  # Determine incumbent according to the best mean cost in the runhistory
            for config in runHist.config_ids:
                inst_seed_pairs = runHist.get_runs_for_config(config)
                all_ = []
                for inst, seed in inst_seed_pairs:
                    rk = RunKey(runHist.config_ids[config], inst, seed)
                    all_.append(runHist.data[rk].cost)
                mean = np.mean(all_)
                if best_ is None or best_[0] > mean:
                    best_ = (mean, config)
            incumbent = best_[1]
            self.imp = Importance(scenario=scenario,
                                  runhistory=runHist,
                                  seed=seed,
                                  parameters_to_evaluate=numParams,
                                  save_folder=self.save_folder,
                                  impute_censored=impute,
                                  incumbent=incumbent,
                                  fANOVA_cut_at_default=fanova_cut_at_default,
                                  fANOVA_pairwise=fANOVA_pairwise,
                                  forwardsel_feat_imp=forwardsel_feat_imp,
                                  incn_quant_var=incn_quant_var,
                                  preprocess=marginalize_away_instances
                                  )
        else:
            raise Exception('Neither X and y matrices nor a SMAC object were specified to compute the importance '
                            'values from!')

        if run:
            self.compute_importances()
Esempio n. 6
0
def cmd_line_call():
    """
    Main Parameter importance script.
    """
    cmd_reader = CMDs()
    args, misc_ = cmd_reader.read_cmd()  # read cmd args
    cwd = os.path.abspath(os.getcwd())
    if args.out_folder and not os.path.isabs(args.out_folder):
        args.out_folder = os.path.abspath(args.out_folder)
    if args.trajectory and not os.path.isabs(args.trajectory):
        args.trajectory = os.path.abspath(args.trajectory)
    if not os.path.isabs(args.scenario_file):
        args.scenario_file = os.path.abspath(args.scenario_file)
    if not os.path.isabs(args.history):
        args.history = os.path.abspath(args.history)
    os.chdir(args.wdir)
    logging.basicConfig(level=args.verbose_level)
    ts = time.time()
    ts = datetime.datetime.fromtimestamp(ts).strftime('%Y_%m_%d_%H:%M:%S')
    fanova_ready = True

    try:
        import fanova
    except ImportError:
        warnings.simplefilter('always', ImportWarning)
        warnings.warn('fANOVA is not installed in your environment. To install it please run '
                      '"git+http://github.com/automl/fanova.git@master"')
        fanova_ready = False

    if 'influence-model' in args.modus:
        logging.warning('influence-model not fully supported yet!')
    if 'incneighbor' in args.modus:
        warnings.simplefilter('always', DeprecationWarning)
        warnings.warn('incneighbor will be deprecated in version 1.0.0 as it was the development name of'
                      ' lpi. Use lpi instead.', DeprecationWarning, stacklevel=2)
    if 'lpi' in args.modus:  # LPI will replace incneighbor in the future
        args.modus[args.modus.index('lpi')] = 'incneighbor'
    if 'fanova' in args.modus and not fanova_ready:
        raise ImportError('fANOVA is not installed! To install it please run '
                          '"git+http://github.com/automl/fanova.git@master"')
    if 'all' in args.modus:
        choices = ['ablation',
                   'forward-selection',
                   'fanova',
                   'incneighbor']
        if not fanova_ready:
            raise ImportError('fANOVA is not installed! To install it please run '
                              '"git+http://github.com/automl/fanova.git@master"')
        del args.modus[args.modus.index('all')]
        if len(args.modus) == len(choices):
            pass
        else:
            args.modus = choices
    if not args.out_folder:
        if len(args.modus) > 1:
            tmp = ['all']
        else:
            tmp = args.modus
            if 'incneighbor' in args.modus:
                tmp = ['lpi']
        save_folder = os.path.join(cwd, 'PIMP_%s' % '_'.join(tmp))
        if os.path.exists(os.path.abspath(save_folder)):
            save_folder = os.path.join(cwd, 'PIMP_%s_%s' % ('_'.join(tmp), ts))
    else:
        if len(args.modus) > 1:
            tmp = ['all']
        else:
            tmp = args.modus
            if 'incneighbor' in args.modus:
                tmp = ['lpi']
        if os.path.exists(os.path.abspath(args.out_folder)) or os.path.exists(os.path.abspath(
                        args.out_folder + '_%s' % '_'.join(tmp))):
            save_folder = os.path.join(cwd, args.out_folder + '_%s_%s' % ('_'.join(tmp), ts))
        else:
            save_folder = os.path.join(cwd, args.out_folder + '_%s' % '_'.join(tmp))

    importance = Importance(scenario_file=args.scenario_file,
                            runhistory_file=args.history,
                            parameters_to_evaluate=args.num_params,
                            traj_file=args.trajectory, seed=args.seed,
                            save_folder=save_folder,
                            impute_censored=args.impute,
                            max_sample_size=args.max_sample_size,
                            fANOVA_cut_at_default=args.fanova_cut_at_default,
                            fANOVA_pairwise=args.fanova_pairwise,
                            forwardsel_feat_imp=args.forwardsel_feat_imp,
                            incn_quant_var=args.incn_quant_var,
                            preprocess=args.marg_inst,
                            forwardsel_cv=args.forwardsel_cv)  # create importance object
    with open(os.path.join(save_folder, 'pimp_args.json'), 'w') as out_file:
        json.dump(args.__dict__, out_file, sort_keys=True, indent=4, separators=(',', ': '))
    result = importance.evaluate_scenario(args.modus, save_folder=save_folder)
    if args.table:
        importance.table_for_comparison(evaluators=result[1], name=os.path.join(
            save_folder, 'pimp_table_%s.tex' % args.modus), style='latex')
    else:
        importance.table_for_comparison(evaluators=result[1], style='cmd')
    os.chdir(cwd)
Esempio n. 7
0
if __name__ == '__main__':
    """
    Main Parameter importance script.
    """
    cmd_reader = CMDs()
    args, misc_ = cmd_reader.read_cmd()  # read cmd args
    logging.basicConfig(level=args.verbose_level)
    ts = time.time()
    ts = datetime.datetime.fromtimestamp(ts).strftime('%Y_%m_%d_%H:%M:%S')
    save_folder = 'PIMP_%s_%s' % (args.modus, ts)

    importance = Importance(
        scenario_file=args.scenario_file,
        runhistory_file=args.history,
        parameters_to_evaluate=args.num_params,
        traj_file=args.trajectory,
        seed=args.seed,
        save_folder=save_folder,
        impute_censored=args.impute)  # create importance object
    save_folder += '_run1'
    os.makedirs(save_folder, exist_ok=True)
    with open(os.path.join(save_folder, 'pimp_args.json'), 'w') as out_file:
        json.dump(args.__dict__,
                  out_file,
                  sort_keys=True,
                  indent=4,
                  separators=(',', ': '))
    result = importance.evaluate_scenario(args.modus, save_folder=save_folder)

    if args.modus == 'all':
        with open(
Esempio n. 8
0
def cmd_line_call():
    """
    Main Parameter importance script.
    """
    cmd_reader = CMDs()
    args, misc_ = cmd_reader.read_cmd()  # read cmd args
    logging.basicConfig(level=args.verbose_level)
    ts = time.time()
    ts = datetime.datetime.fromtimestamp(ts).strftime('%Y_%m_%d_%H:%M:%S')
    if not args.out_folder:
        save_folder = 'PIMP_%s_%s' % (args.modus, ts)
    else:
        if os.path.exists(os.path.abspath(args.out_folder)) or os.path.exists(
                os.path.abspath(args.out_folder + '_%s' % args.modus)):
            save_folder = args.out_folder + '_%s_%s' % (args.modus, ts)
        else:
            save_folder = args.out_folder + '_%s' % args.modus

    importance = Importance(
        scenario_file=args.scenario_file,
        runhistory_file=args.history,
        parameters_to_evaluate=args.num_params,
        traj_file=args.trajectory,
        seed=args.seed,
        save_folder=save_folder,
        impute_censored=args.impute,
        max_sample_size=args.max_sample_size)  # create importance object
    with open(os.path.join(save_folder, 'pimp_args.json'), 'w') as out_file:
        json.dump(args.__dict__,
                  out_file,
                  sort_keys=True,
                  indent=4,
                  separators=(',', ': '))
    result = importance.evaluate_scenario(args.modus, sort_by=args.order)

    if args.modus == 'all':
        with open(
                os.path.join(save_folder, 'pimp_values_%s.json' % args.modus),
                'w') as out_file:
            json.dump(result[0],
                      out_file,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
        importance.plot_results(list(
            map(lambda x: os.path.join(save_folder, x.name.lower()),
                result[1])),
                                result[1],
                                show=False)
        if args.table:
            importance.table_for_comparison(
                evaluators=result[1],
                name=os.path.join(save_folder,
                                  'pimp_table_%s.tex' % args.modus),
                style='latex')
        else:
            importance.table_for_comparison(evaluators=result[1], style='cmd')
    else:
        with open(
                os.path.join(save_folder, 'pimp_values_%s.json' % args.modus),
                'w') as out_file:
            json.dump(result,
                      out_file,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))

        importance.plot_results(name=os.path.join(save_folder, args.modus),
                                show=False)
Esempio n. 9
0
from pimp.importance.importance import Importance
from pimp.utils.io.cmd_reader import CMDs

__author__ = "Andre Biedenkapp"
__copyright__ = "Copyright 2016, ML4AAD"
__license__ = "3-clause BSD"
__maintainer__ = "Andre Biedenkapp"
__email__ = "*****@*****.**"

if __name__ == '__main__':
    """
    Main Parameter importance script.
    """
    cmd_reader = CMDs()
    args, misc_ = cmd_reader.read_cmd()  # read cmd args
    logging.basicConfig(level=args.verbose_level)
    importance = Importance(args.scenario_file,
                            args.history,
                            parameters_to_evaluate=args.num_params,
                            traj_file=args.trajectory,
                            seed=args.seed)  # create importance object
    importance_value_dict = importance.evaluate_scenario(args.modus)

    ts = time.time()
    ts = datetime.datetime.fromtimestamp(ts).strftime('%Y_%m_%d_%H:%M:%S')
    with open('pimp_values_%s_%s.json' % (args.modus, ts), 'w') as out_file:
        json.dump(importance_value_dict, out_file)

    importance.plot_results(name=args.modus)