예제 #1
0
    def read_pcs_file(fn: str, logger=None):
        """Encapsulates generating configuration space object from file.

        Automatically detects whether the cs is saved in json, pcs or pcs_new.

        Parameters
        ----------
            fn: string
                 File name of pcs file

        Returns
        -------
            ConfigSpace: ConfigSpace
        """
        # Three possible formats: json, pcs and pcs_new. We prefer json.
        with open(fn) as fp:
            if fn.endswith('.json'):
                cs = pcs_json.read(fp.read())
                if logger:
                    logger.debug("Loading pcs as json from: %s", fn)
            else:
                pcs_str = fp.readlines()
                try:
                    cs = pcs.read(pcs_str)
                except NotImplementedError:
                    if logger:
                        logger.debug(
                            "Could not parse pcs file with old format; trying new format ..."
                        )
                    cs = pcs_new.read(pcs_str)
        return cs
예제 #2
0
def smac_to_fanova(state_run_directory, destination_dir):
    '''
    Takes the state-run files, merges them and prepares the configuration space for fANOVA.
    
    outputs: fANOVA object
    
    state_run_directory: str
                        path to the directory of the pysmac_output/out/scenario file
    destination_dir: str
                    path to the directory in which the merged states should be stored
    '''

    state_run_list =[]
    files = glob(state_run_directory + "/*")
    for file in files:
        if file.startswith(state_run_directory + "/state-run"):
            state_run_list.append(file)
    state_merge.state_merge(state_run_list, destination_dir)
    merged_files = glob(destination_dir + '/*')

    for file in merged_files:
        if file.startswith(destination_dir + '/runs_and_results'):
            response_file = file
        if file.startswith(destination_dir + '/paramstrings'):
            paramstrings = file
    param_dict = output_reader.read_paramstrings_file(paramstrings)
    
    num_line = str(param_dict[0]).replace("'", "")
    num_line = str(num_line).replace("}", "")
    # messy way to get the parameter names wrt order
    f_params = []
    for line in str(num_line).split(" "):
        line = str(line).replace(",", "")
        line = line.replace('{',  '')
        if ':' in line:
            parameter = line.replace(':', '')
            f_params.append(parameter)
    
    # get configspace
    with open(destination_dir + '/param.pcs') as fh:
        cs = pcs_new.read(fh.readlines(), debug=True)

    X = []
    hps = cs.get_hyperparameters()


    for p in param_dict:
        c = CS.Configuration(cs, fix_types(p, cs), allow_inactive_with_values=True)
        X.append([])
        for hp in hps:
            if hasattr(hp, 'choices'):
                value = hp.choices.index(c[hp.name])
            else:
                value = c[hp.name]
            X[-1].append(value)
    
    X = np.array(X)
    Y = data_extractor(response_file, X.shape[0])

    return fanova.fANOVA(X = X, Y = Y, config_space= cs)
예제 #3
0
 def __call__(self, parser: ArgumentParser, namespace: Namespace, values: list, option_string: str=None):
     fn = values
     if fn:
         if os.path.isfile(fn):
             with open(fn) as fp:
                 pcs_str = fp.readlines()
                 try:
                     parsed_scen_args["cs"] = pcs.read(pcs_str)
                 except:
                     logger.debug("Could not parse pcs file with old format; trying new format ...")
                     parsed_scen_args["cs"] = pcs_new.read(pcs_str)
                 parsed_scen_args["cs"].seed(42)
         else:
             parser.exit(1, "Could not find pcs file: {}".format(fn))
     setattr(namespace, self.dest, values)
예제 #4
0
 def __call__(self, parser: ArgumentParser, namespace: Namespace, values: list, option_string: str=None):
     fn = values
     if fn:
         if os.path.isfile(fn):
             # Three possible formats: json, pcs and pcs_new. We prefer json.
             with open(fn) as fp:
                 if fn.endswith('.json'):
                     parsed_scen_args['cs'] = pcs_json.read(fp.read())
                     logger.debug("Loading pcs as json from: %s", fn)
                 else:
                     pcs_str = fp.readlines()
                     try:
                         parsed_scen_args["cs"] = pcs.read(pcs_str)
                     except:
                         logger.debug("Could not parse pcs file with old format; trying new format ...")
                         parsed_scen_args["cs"] = pcs_new.read(pcs_str)
                 parsed_scen_args["cs"].seed(42)
         else:
             parser.exit(1, "Could not find pcs file: {}".format(fn))
     setattr(namespace, self.dest, values)
예제 #5
0
    def _transform_arguments(self):
        """TODO"""
        self.n_features = len(self.feature_dict)
        self.feature_array = None

        if self.overall_obj[:3] in ["PAR", "par"]:
            par_str = self.overall_obj[3:]
        elif self.overall_obj[:4] in ["mean", "MEAN"]:
            par_str = self.overall_obj[4:]
        # Check for par-value as in "par10"/ "mean5"
        if len(par_str) > 0:
            self.par_factor = int(par_str)
        else:
            self.logger.debug("No par-factor detected. Using 1 by default.")
            self.par_factor = 1

        # read instance files
        if self.train_inst_fn:
            if os.path.isfile(self.train_inst_fn):
                self.train_insts = self.in_reader.read_instance_file(
                    self.train_inst_fn)
            else:
                self.logger.error("Have not found instance file: %s" %
                                  (self.train_inst_fn))
                sys.exit(1)
        if self.test_inst_fn:
            if os.path.isfile(self.test_inst_fn):
                self.test_insts = self.in_reader.read_instance_file(
                    self.test_inst_fn)
            else:
                self.logger.error("Have not found test instance file: %s" %
                                  (self.test_inst_fn))
                sys.exit(1)

        self.instance_specific = {}

        def extract_instance_specific(instance_list):
            insts = []
            for inst in instance_list:
                if len(inst) > 1:
                    self.instance_specific[inst[0]] = " ".join(inst[1:])
                insts.append(inst[0])
            return insts

        self.train_insts = extract_instance_specific(self.train_insts)
        if self.test_insts:
            self.test_insts = extract_instance_specific(self.test_insts)

        self.train_insts = self._to_str_and_warn(l=self.train_insts)
        self.test_insts = self._to_str_and_warn(l=self.test_insts)

        # read feature file
        if self.feature_fn:
            if os.path.isfile(self.feature_fn):
                self.feature_dict = self.in_reader.read_instance_features_file(
                    self.feature_fn)[1]

        if self.feature_dict:
            self.feature_array = []
            for inst_ in self.train_insts:
                self.feature_array.append(self.feature_dict[inst_])
            self.feature_array = numpy.array(self.feature_array)
            self.n_features = self.feature_array.shape[1]

        # read pcs file
        if self.pcs_fn and os.path.isfile(self.pcs_fn):
            with open(self.pcs_fn) as fp:
                pcs_str = fp.readlines()
                try:
                    self.cs = pcs.read(pcs_str)
                except:
                    self.logger.debug(
                        "Could not parse pcs file with old format; trying new format next"
                    )
                    self.cs = pcs_new.read(pcs_str)
                self.cs.seed(42)
        elif self.pcs_fn:
            self.logger.error("Have not found pcs file: %s" % (self.pcs_fn))
            sys.exit(1)

        # you cannot set output dir to None directly
        # because None is replaced by default always
        if self.output_dir == "":
            self.output_dir = None
            self.logger.debug("Deactivate output directory.")
        else:
            self.logger.info("Output to %s" % (self.output_dir))

        if self.shared_model and self.input_psmac_dirs is None:
            # per default, we assume that
            # all psmac runs write to the same directory
            self.input_psmac_dirs = [self.output_dir]
예제 #6
0
def smac_to_fanova(state_run_directory, destination_dir):
    '''
    Takes the state-run files, merges them and prepares the configuration space for fANOVA.
    
    outputs: fANOVA object
    
    state_run_directory: str
                        path to the directory of the pysmac_output/out/scenario file
    destination_dir: str
                    path to the directory in which the merged states should be stored
    '''

    state_run_list = []
    files = glob(state_run_directory + "/*")
    for file in files:
        if file.startswith(state_run_directory + "/state-run"):
            state_run_list.append(file)
    state_merge.state_merge(state_run_list, destination_dir)
    merged_files = glob(destination_dir + '/*')

    for file in merged_files:
        if file.startswith(destination_dir + '/runs_and_results'):
            response_file = file
        if file.startswith(destination_dir + '/paramstrings'):
            paramstrings = file
    param_dict = output_reader.read_paramstrings_file(paramstrings)

    num_line = str(param_dict[0]).replace("'", "")
    num_line = str(num_line).replace("}", "")
    # messy way to get the parameter names wrt order
    f_params = []
    for line in str(num_line).split(" "):
        line = str(line).replace(",", "")
        line = line.replace('{', '')
        if ':' in line:
            parameter = line.replace(':', '')
            f_params.append(parameter)

    # get configspace
    with open(destination_dir + '/param.pcs') as fh:
        cs = pcs_new.read(fh.readlines(), debug=True)

    X = []
    hps = cs.get_hyperparameters()

    for p in param_dict:
        c = CS.Configuration(cs,
                             fix_types(p, cs),
                             allow_inactive_with_values=True)
        X.append([])
        for hp in hps:
            if hasattr(hp, 'choices'):
                value = hp.choices.index(c[hp.name])
            else:
                value = c[hp.name]
            X[-1].append(value)

    X = np.array(X)
    Y = data_extractor(response_file, X.shape[0])

    return fanova.fANOVA(X=X, Y=Y, config_space=cs)
예제 #7
0
def smac_to_fanova(state_run_directory, destination_dir):
    '''
    Takes the state-run files, merges them and prepares the configuration space for fANOVA.
    
    outputs: fANOVA object
    
    state_run_directory: str
                        path to the directory of the pysmac_output/out/scenario file
    destination_dir: str
                    path to the directory in which the merged states should be stored
    '''
    state_run_list =[]
    files = glob(state_run_directory + "/*")
    for file in files:
        if file.startswith(state_run_directory + "/state-run"):
            state_run_list.append(file)
    state_merge.state_merge(state_run_list, destination_dir)
    merged_files = glob(destination_dir + '/*')
    for file in merged_files:
        if file.startswith(destination_dir + '/runs_and_results'):
            response_file = file
        if file.startswith(destination_dir + '/paramstrings'):
            paramstrings = file
    param_dict = output_reader.read_paramstrings_file(paramstrings)
    
    num_line = str(param_dict[0]).replace("'", "")
    num_line = str(num_line).replace("}", "")
    # messy way to get the parameter names wrt order
    f_params = []
    for line in str(num_line).split(" "):
        line = str(line).replace(",", "")
        line = line.replace('{',  '')
        if ':' in line:
            parameter = line.replace(':', '')
            f_params.append(parameter)

    # getting features
    all_nums = []
    for dict_row in param_dict:
        num_line = str(dict_row).replace("'", "")
        num_line = str(num_line).replace("}", "")
        nums = []
        for line in str(num_line).split(" "):
            line = str(line).replace(",", "")
            if line.isdigit():
                nums.append(np.int(line))
            elif line.replace(".", "", 1).isdigit():
                nums.append(np.float(line))
            elif '-' in line:
                new_line = line.replace("-","")
                if new_line.isdigit():
                    nums.append(np.int(line))
                elif new_line.replace(".", "", 1).isdigit():
                    nums.append(np.float(line))
        all_nums.append(nums)

    x = np.array(all_nums)
    length = len(x)
    Y = data_extractor(response_file, length)
    fh = open(destination_dir + '/param.pcs')
    orig_pcs = fh.readlines()
    cs = pcs_new.read(orig_pcs, debug=True)
    X = np.zeros((x.shape))


    for i in range(x.shape[1]):
        idx = cs.get_idx_by_hyperparameter_name(f_params[i])
        X[:, idx] = x[:, i]   
    # create an instance of fanova with data for the random forest and the configSpace
    return fanova.fANOVA(X = X, Y = Y, config_space= cs)