def read_pcs_file(fn: str, logger=None): """Encapsulates generating configuration space object from file. Automatically detects whether the cs is saved in json, pcs or pcs_new. Parameters ---------- fn: string File name of pcs file Returns ------- ConfigSpace: ConfigSpace """ # Three possible formats: json, pcs and pcs_new. We prefer json. with open(fn) as fp: if fn.endswith('.json'): cs = pcs_json.read(fp.read()) if logger: logger.debug("Loading pcs as json from: %s", fn) else: pcs_str = fp.readlines() try: cs = pcs.read(pcs_str) except NotImplementedError: if logger: logger.debug( "Could not parse pcs file with old format; trying new format ..." ) cs = pcs_new.read(pcs_str) return cs
def smac_to_fanova(state_run_directory, destination_dir): ''' Takes the state-run files, merges them and prepares the configuration space for fANOVA. outputs: fANOVA object state_run_directory: str path to the directory of the pysmac_output/out/scenario file destination_dir: str path to the directory in which the merged states should be stored ''' state_run_list =[] files = glob(state_run_directory + "/*") for file in files: if file.startswith(state_run_directory + "/state-run"): state_run_list.append(file) state_merge.state_merge(state_run_list, destination_dir) merged_files = glob(destination_dir + '/*') for file in merged_files: if file.startswith(destination_dir + '/runs_and_results'): response_file = file if file.startswith(destination_dir + '/paramstrings'): paramstrings = file param_dict = output_reader.read_paramstrings_file(paramstrings) num_line = str(param_dict[0]).replace("'", "") num_line = str(num_line).replace("}", "") # messy way to get the parameter names wrt order f_params = [] for line in str(num_line).split(" "): line = str(line).replace(",", "") line = line.replace('{', '') if ':' in line: parameter = line.replace(':', '') f_params.append(parameter) # get configspace with open(destination_dir + '/param.pcs') as fh: cs = pcs_new.read(fh.readlines(), debug=True) X = [] hps = cs.get_hyperparameters() for p in param_dict: c = CS.Configuration(cs, fix_types(p, cs), allow_inactive_with_values=True) X.append([]) for hp in hps: if hasattr(hp, 'choices'): value = hp.choices.index(c[hp.name]) else: value = c[hp.name] X[-1].append(value) X = np.array(X) Y = data_extractor(response_file, X.shape[0]) return fanova.fANOVA(X = X, Y = Y, config_space= cs)
def __call__(self, parser: ArgumentParser, namespace: Namespace, values: list, option_string: str=None): fn = values if fn: if os.path.isfile(fn): with open(fn) as fp: pcs_str = fp.readlines() try: parsed_scen_args["cs"] = pcs.read(pcs_str) except: logger.debug("Could not parse pcs file with old format; trying new format ...") parsed_scen_args["cs"] = pcs_new.read(pcs_str) parsed_scen_args["cs"].seed(42) else: parser.exit(1, "Could not find pcs file: {}".format(fn)) setattr(namespace, self.dest, values)
def __call__(self, parser: ArgumentParser, namespace: Namespace, values: list, option_string: str=None): fn = values if fn: if os.path.isfile(fn): # Three possible formats: json, pcs and pcs_new. We prefer json. with open(fn) as fp: if fn.endswith('.json'): parsed_scen_args['cs'] = pcs_json.read(fp.read()) logger.debug("Loading pcs as json from: %s", fn) else: pcs_str = fp.readlines() try: parsed_scen_args["cs"] = pcs.read(pcs_str) except: logger.debug("Could not parse pcs file with old format; trying new format ...") parsed_scen_args["cs"] = pcs_new.read(pcs_str) parsed_scen_args["cs"].seed(42) else: parser.exit(1, "Could not find pcs file: {}".format(fn)) setattr(namespace, self.dest, values)
def _transform_arguments(self): """TODO""" self.n_features = len(self.feature_dict) self.feature_array = None if self.overall_obj[:3] in ["PAR", "par"]: par_str = self.overall_obj[3:] elif self.overall_obj[:4] in ["mean", "MEAN"]: par_str = self.overall_obj[4:] # Check for par-value as in "par10"/ "mean5" if len(par_str) > 0: self.par_factor = int(par_str) else: self.logger.debug("No par-factor detected. Using 1 by default.") self.par_factor = 1 # read instance files if self.train_inst_fn: if os.path.isfile(self.train_inst_fn): self.train_insts = self.in_reader.read_instance_file( self.train_inst_fn) else: self.logger.error("Have not found instance file: %s" % (self.train_inst_fn)) sys.exit(1) if self.test_inst_fn: if os.path.isfile(self.test_inst_fn): self.test_insts = self.in_reader.read_instance_file( self.test_inst_fn) else: self.logger.error("Have not found test instance file: %s" % (self.test_inst_fn)) sys.exit(1) self.instance_specific = {} def extract_instance_specific(instance_list): insts = [] for inst in instance_list: if len(inst) > 1: self.instance_specific[inst[0]] = " ".join(inst[1:]) insts.append(inst[0]) return insts self.train_insts = extract_instance_specific(self.train_insts) if self.test_insts: self.test_insts = extract_instance_specific(self.test_insts) self.train_insts = self._to_str_and_warn(l=self.train_insts) self.test_insts = self._to_str_and_warn(l=self.test_insts) # read feature file if self.feature_fn: if os.path.isfile(self.feature_fn): self.feature_dict = self.in_reader.read_instance_features_file( self.feature_fn)[1] if self.feature_dict: self.feature_array = [] for inst_ in self.train_insts: self.feature_array.append(self.feature_dict[inst_]) self.feature_array = numpy.array(self.feature_array) self.n_features = self.feature_array.shape[1] # read pcs file if self.pcs_fn and os.path.isfile(self.pcs_fn): with open(self.pcs_fn) as fp: pcs_str = fp.readlines() try: self.cs = pcs.read(pcs_str) except: self.logger.debug( "Could not parse pcs file with old format; trying new format next" ) self.cs = pcs_new.read(pcs_str) self.cs.seed(42) elif self.pcs_fn: self.logger.error("Have not found pcs file: %s" % (self.pcs_fn)) sys.exit(1) # you cannot set output dir to None directly # because None is replaced by default always if self.output_dir == "": self.output_dir = None self.logger.debug("Deactivate output directory.") else: self.logger.info("Output to %s" % (self.output_dir)) if self.shared_model and self.input_psmac_dirs is None: # per default, we assume that # all psmac runs write to the same directory self.input_psmac_dirs = [self.output_dir]
def smac_to_fanova(state_run_directory, destination_dir): ''' Takes the state-run files, merges them and prepares the configuration space for fANOVA. outputs: fANOVA object state_run_directory: str path to the directory of the pysmac_output/out/scenario file destination_dir: str path to the directory in which the merged states should be stored ''' state_run_list = [] files = glob(state_run_directory + "/*") for file in files: if file.startswith(state_run_directory + "/state-run"): state_run_list.append(file) state_merge.state_merge(state_run_list, destination_dir) merged_files = glob(destination_dir + '/*') for file in merged_files: if file.startswith(destination_dir + '/runs_and_results'): response_file = file if file.startswith(destination_dir + '/paramstrings'): paramstrings = file param_dict = output_reader.read_paramstrings_file(paramstrings) num_line = str(param_dict[0]).replace("'", "") num_line = str(num_line).replace("}", "") # messy way to get the parameter names wrt order f_params = [] for line in str(num_line).split(" "): line = str(line).replace(",", "") line = line.replace('{', '') if ':' in line: parameter = line.replace(':', '') f_params.append(parameter) # get configspace with open(destination_dir + '/param.pcs') as fh: cs = pcs_new.read(fh.readlines(), debug=True) X = [] hps = cs.get_hyperparameters() for p in param_dict: c = CS.Configuration(cs, fix_types(p, cs), allow_inactive_with_values=True) X.append([]) for hp in hps: if hasattr(hp, 'choices'): value = hp.choices.index(c[hp.name]) else: value = c[hp.name] X[-1].append(value) X = np.array(X) Y = data_extractor(response_file, X.shape[0]) return fanova.fANOVA(X=X, Y=Y, config_space=cs)
def smac_to_fanova(state_run_directory, destination_dir): ''' Takes the state-run files, merges them and prepares the configuration space for fANOVA. outputs: fANOVA object state_run_directory: str path to the directory of the pysmac_output/out/scenario file destination_dir: str path to the directory in which the merged states should be stored ''' state_run_list =[] files = glob(state_run_directory + "/*") for file in files: if file.startswith(state_run_directory + "/state-run"): state_run_list.append(file) state_merge.state_merge(state_run_list, destination_dir) merged_files = glob(destination_dir + '/*') for file in merged_files: if file.startswith(destination_dir + '/runs_and_results'): response_file = file if file.startswith(destination_dir + '/paramstrings'): paramstrings = file param_dict = output_reader.read_paramstrings_file(paramstrings) num_line = str(param_dict[0]).replace("'", "") num_line = str(num_line).replace("}", "") # messy way to get the parameter names wrt order f_params = [] for line in str(num_line).split(" "): line = str(line).replace(",", "") line = line.replace('{', '') if ':' in line: parameter = line.replace(':', '') f_params.append(parameter) # getting features all_nums = [] for dict_row in param_dict: num_line = str(dict_row).replace("'", "") num_line = str(num_line).replace("}", "") nums = [] for line in str(num_line).split(" "): line = str(line).replace(",", "") if line.isdigit(): nums.append(np.int(line)) elif line.replace(".", "", 1).isdigit(): nums.append(np.float(line)) elif '-' in line: new_line = line.replace("-","") if new_line.isdigit(): nums.append(np.int(line)) elif new_line.replace(".", "", 1).isdigit(): nums.append(np.float(line)) all_nums.append(nums) x = np.array(all_nums) length = len(x) Y = data_extractor(response_file, length) fh = open(destination_dir + '/param.pcs') orig_pcs = fh.readlines() cs = pcs_new.read(orig_pcs, debug=True) X = np.zeros((x.shape)) for i in range(x.shape[1]): idx = cs.get_idx_by_hyperparameter_name(f_params[i]) X[:, idx] = x[:, i] # create an instance of fanova with data for the random forest and the configSpace return fanova.fANOVA(X = X, Y = Y, config_space= cs)