def execute(save_folder, runhistory_location, configspace_location, modus='ablation', seed=1): with open(runhistory_location, 'r') as runhistory_filep: runhistory = json.load(runhistory_filep) # create scenario file scenario_dict = { 'run_obj': 'quality', 'deterministic': 1, 'paramfile': configspace_location } trajectory_lines = openmlpimp.utils.runhistory_to_trajectory( runhistory, maximize=True) if len(trajectory_lines) != 1: raise ValueError('trajectory file should containexactly one line.') traj_file = tempfile.NamedTemporaryFile('w', delete=False) for line in trajectory_lines: json.dump(line, traj_file) traj_file.write("\n") traj_file.close() num_params = len(trajectory_lines[0]['incumbent']) importance = Importance(scenario_dict, runhistory_file=runhistory_location, parameters_to_evaluate=num_params, traj_file=traj_file.name, seed=seed, save_folder=save_folder) try: os.makedirs(save_folder) except FileExistsError: pass for i in range(5): try: result = importance.evaluate_scenario(modus) filename = 'pimp_values_%s.json' % modus with open(os.path.join(save_folder, filename), 'w') as out_file: json.dump(result, out_file, sort_keys=True, indent=4, separators=(',', ': ')) importance.plot_results(name=os.path.join(save_folder, modus), show=False) return save_folder + "/" + filename except ZeroDivisionError as e: pass raise e
class PIMP: def __init__(self, scenario: Scenario, smac: Union[SMAC, None] = None, mode: str = 'all', X: Union[None, List[list], np.ndarray] = None, y: Union[None, List[list], np.ndarray] = None, numParams: int = -1, impute: bool = False, seed: int = 12345, run: bool = False, max_sample_size: int = -1, fanova_cut_at_default: bool = False, fANOVA_pairwise: bool = True, forwardsel_feat_imp: bool = False, incn_quant_var: bool = True, marginalize_away_instances: bool = False, save_folder: str = 'PIMP'): """ Interface to be used with SMAC or with X and y matrices. :param scenario: The scenario object, that knows the configuration space. :param smac: The smac object that keeps all the run-data :param mode: The mode with which to run PIMP [ablation, fanova, all, forward-selection] :param X: Numpy Array that contains parameter arrays :param y: Numpy array that contains the corresponding performance values :param numParams: The number of parameters to evaluate :param impute: Flag to decide if censored data gets imputed or not :param seed: The random seed :param run: Flag to immediately compute the importance values after this setup or not. """ self.scenario = scenario self.imp = None self.mode = mode self.save_folder = save_folder if not os.path.exists(self.save_folder): os.mkdir(self.save_folder) if smac is not None: self.imp = Importance(scenario=scenario, runhistory=smac.runhistory, incumbent=smac.solver.incumbent, seed=seed, parameters_to_evaluate=numParams, save_folder='PIMP', impute_censored=impute, max_sample_size=max_sample_size, fANOVA_cut_at_default=fanova_cut_at_default, fANOVA_pairwise=fANOVA_pairwise, forwardsel_feat_imp=forwardsel_feat_imp, incn_quant_var=incn_quant_var, preprocess=marginalize_away_instances) elif X is not None and y is not None: X = np.array(X) y = np.array(y) runHist = RunHistory(average_cost) if X.shape[0] != y.shape[0]: raise Exception('Number of samples in X and y dont match!') n_params = len(scenario.cs.get_hyperparameters()) feats = None if X.shape[1] > n_params: feats = X[:, n_params:] assert feats.shape[1] == scenario.feature_array.shape[1] X = X[:, :n_params] for p in range(X.shape[1]): # Normalize the data to fit into [0, 1] _min, _max = np.min(X[:, p]), np.max(X[:, p]) if _min < 0. or 1 < _max: # if it is not already normalized for id, v in enumerate(X[:, p]): X[id, p] = (v - _min) / (_max - _min) # Add everything to a runhistory such that PIMP can work with it for x, feat, y_val in zip(X, feats if feats is not None else X, y): id = None for inst in scenario.feature_dict: # determine on which instance a configuration was run if np.all(scenario.feature_dict[inst] == feat): id = inst break runHist.add(Configuration(scenario.cs, vector=x), y_val, 0, StatusType.SUCCESS, id) self.X = X self.y = y best_ = None # Determine incumbent according to the best mean cost in the runhistory for config in runHist.config_ids: inst_seed_pairs = runHist.get_runs_for_config(config) all_ = [] for inst, seed in inst_seed_pairs: rk = RunKey(runHist.config_ids[config], inst, seed) all_.append(runHist.data[rk].cost) mean = np.mean(all_) if best_ is None or best_[0] > mean: best_ = (mean, config) incumbent = best_[1] self.imp = Importance(scenario=scenario, runhistory=runHist, seed=seed, parameters_to_evaluate=numParams, save_folder=self.save_folder, impute_censored=impute, incumbent=incumbent, fANOVA_cut_at_default=fanova_cut_at_default, fANOVA_pairwise=fANOVA_pairwise, forwardsel_feat_imp=forwardsel_feat_imp, incn_quant_var=incn_quant_var, preprocess=marginalize_away_instances ) else: raise Exception('Neither X and y matrices nor a SMAC object were specified to compute the importance ' 'values from!') if run: self.compute_importances() def compute_importances(self): if self.mode == 'all': self.mode = ['ablation', 'forward-selection', 'fanova', 'incneighbor'] elif not isinstance(self.mode, list): self.mode = [self.mode] result = self.imp.evaluate_scenario(self.mode, save_folder=self.save_folder) return result def plot_results(self, result: Union[List[Dict[str, float]], Dict[str, float]], save_table: bool = True, show=False): save_folder = self.save_folder if self.mode == 'all': with open(os.path.join(save_folder, 'pimp_values_%s.json' % self.mode), 'w') as out_file: json.dump(result[0], out_file, sort_keys=True, indent=4, separators=(',', ': ')) self.imp.plot_results(list(map(lambda x: os.path.join(save_folder, x.name.lower()), result[1])), result[1], show=show) if save_table: self.imp.table_for_comparison(evaluators=result[1], name=os.path.join( save_folder, 'pimp_table_%s.tex' % self.mode), style='latex') else: self.imp.table_for_comparison(evaluators=result[1], style='cmd') else: with open(os.path.join(save_folder, 'pimp_values_%s.json' % self.mode), 'w') as out_file: json.dump(result[0], out_file, sort_keys=True, indent=4, separators=(',', ': ')) if isinstance(self.mode, list): self.imp.plot_results(name=os.path.join(save_folder, 'all'), show=show) else: self.imp.plot_results(name=os.path.join(save_folder, self.mode), show=show)
indent=4, separators=(',', ': ')) result = importance.evaluate_scenario(args.modus, save_folder=save_folder) if args.modus == 'all': with open( os.path.join(save_folder, 'pimp_values_%s.json' % args.modus), 'w') as out_file: json.dump(result[0], out_file, sort_keys=True, indent=4, separators=(',', ': ')) importance.plot_results(list( map(lambda x: os.path.join(save_folder, x.name.lower()), result[1])), result[1], show=False) if args.table: importance.table_for_comparison( evaluators=result[1], name=os.path.join(save_folder, 'pimp_table_%s.tex' % args.modus), style='latex') else: importance.table_for_comparison(evaluators=result[1], style='cmd') else: with open( os.path.join(save_folder, 'pimp_values_%s.json' % args.modus[0]), 'w') as out_file:
def cmd_line_call(): """ Main Parameter importance script. """ cmd_reader = CMDs() args, misc_ = cmd_reader.read_cmd() # read cmd args logging.basicConfig(level=args.verbose_level) ts = time.time() ts = datetime.datetime.fromtimestamp(ts).strftime('%Y_%m_%d_%H:%M:%S') if not args.out_folder: save_folder = 'PIMP_%s_%s' % (args.modus, ts) else: if os.path.exists(os.path.abspath(args.out_folder)) or os.path.exists( os.path.abspath(args.out_folder + '_%s' % args.modus)): save_folder = args.out_folder + '_%s_%s' % (args.modus, ts) else: save_folder = args.out_folder + '_%s' % args.modus importance = Importance( scenario_file=args.scenario_file, runhistory_file=args.history, parameters_to_evaluate=args.num_params, traj_file=args.trajectory, seed=args.seed, save_folder=save_folder, impute_censored=args.impute, max_sample_size=args.max_sample_size) # create importance object with open(os.path.join(save_folder, 'pimp_args.json'), 'w') as out_file: json.dump(args.__dict__, out_file, sort_keys=True, indent=4, separators=(',', ': ')) result = importance.evaluate_scenario(args.modus, sort_by=args.order) if args.modus == 'all': with open( os.path.join(save_folder, 'pimp_values_%s.json' % args.modus), 'w') as out_file: json.dump(result[0], out_file, sort_keys=True, indent=4, separators=(',', ': ')) importance.plot_results(list( map(lambda x: os.path.join(save_folder, x.name.lower()), result[1])), result[1], show=False) if args.table: importance.table_for_comparison( evaluators=result[1], name=os.path.join(save_folder, 'pimp_table_%s.tex' % args.modus), style='latex') else: importance.table_for_comparison(evaluators=result[1], style='cmd') else: with open( os.path.join(save_folder, 'pimp_values_%s.json' % args.modus), 'w') as out_file: json.dump(result, out_file, sort_keys=True, indent=4, separators=(',', ': ')) importance.plot_results(name=os.path.join(save_folder, args.modus), show=False)
from pimp.importance.importance import Importance from pimp.utils.io.cmd_reader import CMDs __author__ = "Andre Biedenkapp" __copyright__ = "Copyright 2016, ML4AAD" __license__ = "3-clause BSD" __maintainer__ = "Andre Biedenkapp" __email__ = "*****@*****.**" if __name__ == '__main__': """ Main Parameter importance script. """ cmd_reader = CMDs() args, misc_ = cmd_reader.read_cmd() # read cmd args logging.basicConfig(level=args.verbose_level) importance = Importance(args.scenario_file, args.history, parameters_to_evaluate=args.num_params, traj_file=args.trajectory, seed=args.seed) # create importance object importance_value_dict = importance.evaluate_scenario(args.modus) ts = time.time() ts = datetime.datetime.fromtimestamp(ts).strftime('%Y_%m_%d_%H:%M:%S') with open('pimp_values_%s_%s.json' % (args.modus, ts), 'w') as out_file: json.dump(importance_value_dict, out_file) importance.plot_results(name=args.modus)