def get_fidelity_space(self,
                           seed: Union[int,
                                       None] = None) -> CS.ConfigurationSpace:
        """
        Get the fidelity space as a ConfigurationSpace object from the benchmark.

        Parameters
        ----------
        seed : int, None
            seed for the fidelity space object. If None:  a random seed will be used.

        Returns
        -------
            CS.ConfigurationSpace
        """
        seed_dict = json.dumps({'seed': seed},
                               indent=None,
                               cls=BenchmarkEncoder)
        logger.debug(f'Client: seed_dict {seed_dict}')
        json_str = self.benchmark.get_fidelity_space(seed_dict)

        fs = csjson.read(json_str)

        if seed is not None:
            fs.seed(seed)

        return fs
Ejemplo n.º 2
0
    def from_json(json_config):
        config = objdict(json.loads(json_config))
        if "config_space" in config.keys():
            from ConfigSpace.read_and_write import json as cs_json

            config.config_space = cs_json.read(config.config_space)
        return AbstractBenchmark(config=config)
Ejemplo n.º 3
0
    def get_scenario(self):
        run_1_existed = os.path.exists('run_1')
        in_reader = InputReader()
        # Create Scenario (disable output_dir to avoid cluttering)
        scen_fn = os.path.join(self.folder, 'scenario.txt')
        scen_dict = in_reader.read_scenario_file(scen_fn)
        scen_dict['output_dir'] = ""

        # We always prefer the less error-prone json-format if available:
        cs_json = os.path.join(self.folder, 'configspace.json')
        if os.path.exists(cs_json):
            self.logger.debug("Detected '%s'", cs_json)
            with open(cs_json, 'r') as fh:
                pcs_fn = scen_dict.pop('pcs_fn', 'no pcs_fn in scenario')
                self.logger.debug("Ignoring %s", pcs_fn)
                scen_dict['cs'] = pcs_json.read(fh.read())

        with changedir(self.ta_exec_dir):
            self.logger.debug("Creating scenario from \"%s\"",
                              self.ta_exec_dir)
            scen = Scenario(scen_dict)

        if (not run_1_existed) and os.path.exists('run_1'):
            shutil.rmtree('run_1')
        return scen
Ejemplo n.º 4
0
    def test_round_trip(self):
        this_file = os.path.abspath(__file__)
        this_directory = os.path.dirname(this_file)
        configuration_space_path = os.path.join(this_directory,
                                                "..", "test_searchspaces")
        configuration_space_path = os.path.abspath(configuration_space_path)
        pcs_files = os.listdir(configuration_space_path)

        for pcs_file in sorted(pcs_files):

            if '.pcs' in pcs_file:
                full_path = os.path.join(configuration_space_path, pcs_file)

                with open(full_path) as fh:
                    cs_string = fh.read().split('\n')
                try:
                    cs = read_pcs(cs_string)
                except:
                    cs = read_pcs_new(cs_string)

                cs.name = pcs_file

                json_string = write(cs)
                new_cs = read(json_string)

                self.assertEqual(new_cs, cs)
Ejemplo n.º 5
0
 def from_dict(raw: Dict) -> 'CandidateStructure':
     # noinspection PyTypeChecker
     cs = CandidateStructure(config_json.read(raw['configspace']), None, raw['cfg_keys'], raw['budget'])
     cs.cid = CandidateId.parse(raw['cid'])
     cs.pipeline = comp_util.deserialize(**raw['pipeline'])
     cs.cfg_keys = [ConfigKey(*t) for t in raw['cfg_keys']]
     return cs
Ejemplo n.º 6
0
    def get_scenario(self):
        run_1_existed = os.path.exists('run_1')
        in_reader = InputReader()
        # Create Scenario (disable output_dir to avoid cluttering)
        scen_fn = os.path.join(self.folder, 'scenario.txt')
        if not os.path.isfile(scen_fn):
            scen_fn = self.get_glob_file(self.folder, 'scenario.txt')
        scen_dict = in_reader.read_scenario_file(scen_fn)
        scen_dict['output_dir'] = ""

        with changedir(self.ta_exec_dir):
            # We always prefer the less error-prone json-format if available:
            pcs_fn = scen_dict.get('pcs_fn', 'no_pcs_fn')
            cs_json = os.path.join(os.path.dirname(pcs_fn), 'configspace.json')
            if not pcs_fn.endswith('.json') and os.path.exists(cs_json):
                self.logger.debug("Detected, '%s' ignoring '%s'", cs_json, pcs_fn)
                with open(cs_json, 'r') as fh:
                    scen_dict['cs'] = pcs_json.read(fh.read())
                    scen_dict['pcs_fn'] = cs_json

            self.logger.debug("Creating scenario from '%s'", self.ta_exec_dir)
            scen = Scenario(scen_dict)

        if (not run_1_existed) and os.path.exists('run_1'):
            shutil.rmtree('run_1')
        return scen
Ejemplo n.º 7
0
    def load_config_space(path):
        """
        Load ConfigSpace object
        As certain hyperparameters are not denoted as optimizable but overriden later,
        they are manually overriden here too.
        :param path:
        :return:
        """
        with open(os.path.join(path), 'r') as fh:
            json_string = fh.read()
            config_space = config_space_json_r_w.read(json_string)

        # Override the constant hyperparameters for num_layers, init_channels and
        config_space._hyperparameters.pop('NetworkSelectorDatasetInfo:darts:layers', None)
        num_layers = CSH.UniformIntegerHyperparameter(name='NetworkSelectorDatasetInfo:darts:layers', lower=1,
                                                      upper=10000)
        config_space._hyperparameters.pop('SimpleLearningrateSchedulerSelector:cosine_annealing:T_max', None)
        t_max = CSH.UniformIntegerHyperparameter(name='SimpleLearningrateSchedulerSelector:cosine_annealing:T_max',
                                                 lower=1, upper=10000)
        config_space._hyperparameters.pop('NetworkSelectorDatasetInfo:darts:init_channels', None)
        init_channels = CSH.UniformIntegerHyperparameter(name='NetworkSelectorDatasetInfo:darts:init_channels', lower=1,
                                                         upper=10000)
        config_space._hyperparameters.pop('SimpleLearningrateSchedulerSelector:cosine_annealing:eta_min', None)
        eta_min_cosine = CSH.UniformFloatHyperparameter(
            name='SimpleLearningrateSchedulerSelector:cosine_annealing:eta_min', lower=0, upper=10000)

        config_space.add_hyperparameters([num_layers, t_max, init_channels, eta_min_cosine])
        return config_space
Ejemplo n.º 8
0
    def test_round_trip(self):
        this_file = os.path.abspath(__file__)
        this_directory = os.path.dirname(this_file)
        configuration_space_path = os.path.join(this_directory,
                                                "..", "test_searchspaces")
        configuration_space_path = os.path.abspath(configuration_space_path)
        pcs_files = os.listdir(configuration_space_path)

        for pcs_file in sorted(pcs_files):

            if '.pcs' in pcs_file:
                full_path = os.path.join(configuration_space_path, pcs_file)

                with open(full_path) as fh:
                    cs_string = fh.read().split('\n')
                try:
                    cs = read_pcs(cs_string)
                except Exception:
                    cs = read_pcs_new(cs_string)

                cs.name = pcs_file

                json_string = write(cs)
                new_cs = read(json_string)

                self.assertEqual(new_cs, cs)
Ejemplo n.º 9
0
    def get_configuration_space(self,
                                seed: Union[int, None] = None
                                ) -> CS.ConfigurationSpace:
        """
        Get the configuration space object from the benchmark.

        Parameters
        ----------
        seed : int, None
            seed for the configuration space object. If None:  a random seed will be used.

        Returns
        -------
            CS.ConfigurationSpace
        """
        seed_dict = {}
        if seed is not None:
            seed_dict['seed'] = seed
        seed_dict = json.dumps(seed_dict, indent=None)
        logger.debug(f'Client: seed_dict {seed_dict}')
        json_str = self.benchmark.get_configuration_space(seed_dict)

        config_space = csjson.read(json_str)
        if seed is not None:
            config_space.seed(seed)
        return config_space
Ejemplo n.º 10
0
 def get_configuration_space(self,
                             seed: Union[int, None] = None
                             ) -> CS.ConfigurationSpace:
     cs = json_cs.read(self.config_spaces['x_discrete'])
     # cs = self._preprocess_configspace(cs)
     cs.seed(seed)
     return cs
Ejemplo n.º 11
0
 def test_configspace_with_probabilities(self):
     cs = ConfigurationSpace()
     cs.add_hyperparameter(
         CategoricalHyperparameter('a', [0, 1, 2], weights=[0.2, 0.2, 0.6]))
     string = write(cs)
     new_cs = read(string)
     self.assertEqual(
         new_cs.get_hyperparameter('a').probabilities, (0.2, 0.2, 0.6))
Ejemplo n.º 12
0
    def load_configspace(self, folder):
        """Will try to load the configspace. If it's a pcs-file, backup_cs will be a list containing all possible
        combinations of interpretation for Categoricals. If this issue will be fixed, we can drop this procedure."""
        cs_fn_json = os.path.join(folder, 'configspace.json')
        cs_fn_pcs = os.path.join(folder, 'configspace.pcs')
        if os.path.exists(cs_fn_json):
            with open(cs_fn_json, 'r') as fh:
                cs = pcs_json.read(fh.read())
                backup_cs = []
            self.logger.debug(
                "Detected and loaded \"%s\". No backup-cs necessary",
                cs_fn_json)
        elif os.path.exists(cs_fn_pcs):
            with open(cs_fn_pcs, 'r') as fh:
                cs = pcs_new.read(fh.readlines())
            # Create alternative interpretations
            categoricals = [
                hp for hp in cs.get_hyperparameters()
                if isinstance(hp, CategoricalHyperparameter)
            ]
            non_categoricals = [
                hp for hp in cs.get_hyperparameters()
                if not isinstance(hp, CategoricalHyperparameter)
            ]

            def _get_interpretations(choices):
                result = []
                if set(choices) == {"True", "False"}:
                    result.append([True, False])
                if all([c.isdigit() for c in choices]):
                    result.append([int(c) for c in choices])
                result.append(choices)
                return result

            choices_per_cat = [
                _get_interpretations(hp.choices) for hp in categoricals
            ]
            combinations = itertools.product(*choices_per_cat)
            self.logger.debug(combinations)
            backup_cs = []
            for combi in combinations:
                bcs = ConfigurationSpace()
                for hp in non_categoricals:
                    bcs.add_hyperparameter(hp)
                for name, choices in zip([hp.name for hp in categoricals],
                                         combi):
                    bcs.add_hyperparameter(
                        CategoricalHyperparameter(name, choices))
                bcs.add_conditions(cs.get_conditions())
                backup_cs.append(bcs)

            self.logger.debug("Sampled %d interpretations of \"%s\"",
                              len(backup_cs), cs_fn_pcs)
            self.logger.debug(choices_per_cat)
        else:
            raise ValueError("Missing pcs-file at '%s.[pcs|json]'!" %
                             os.path.join(folder, 'configspace'))
        return cs, backup_cs
Ejemplo n.º 13
0
def build_estimator(comp, params, X, y):
    
    if params is None:
        if get_class(comp["class"]) == sklearn.svm.SVC:
            params = {"kernel": config_json.read(comp["params"]).get_hyperparameter("kernel").value}
        else:
            return get_class(comp["class"])()
    
    return compile_pipeline_by_class_and_params(get_class(comp["class"]), params, X, y)
Ejemplo n.º 14
0
    def get_configspace():
        """Returns the ConfigSpace object for the search space

        Args:
            None

        Returns:
            ConfigSpace.ConfigurationSpace: a ConfigSpace object
        """
        with open(os.path.join('nes/optimizers/space_encoding/configspace.json'),
                  'r') as fh:
            json_string = fh.read()
            config_space = config_space_json_r_w.read(json_string)
        return config_space
Ejemplo n.º 15
0
    def __init__(self,
                 model: str,
                 task_id: int,
                 data_dir: Union[Path, str, None] = None,
                 rng: Union[int, np.random.RandomState, None] = None,
                 **kwargs):
        models = ['lr', 'svm', 'xgb', 'rf', 'nn']
        assert model in models, f'Parameter `model` has to be one of {models} but was {model}'

        self.task_id = task_id
        self.model = model

        self.dm = TabularDataManager(model, task_id, data_dir)
        self.table, self.metadata = self.dm.load()

        self.exp_args = self.metadata["exp_args"]
        self.config_spaces = self.metadata["config_spaces"]
        self.global_minimums = self.metadata["global_min"]

        self.original_cs = json_cs.read(self.config_spaces['x'])
        self.original_fs = json_cs.read(self.config_spaces['z'])

        super(TabularBenchmark, self).__init__(rng=rng, **kwargs)
Ejemplo n.º 16
0
    def get_configspace(path_to_configspace_obj=os.path.join(
        get_project_root(), "search_spaces/darts/configspace.json")):
        """
        Returns the ConfigSpace object for the search space

        Args:
            path_to_configspace_obj: path to ConfigSpace json encoding

        Returns:
            ConfigSpace.ConfigutationSpace: a ConfigSpace object
        """
        with open(path_to_configspace_obj, 'r') as fh:
            json_string = fh.read()
            config_space = config_space_json_r_w.read(json_string)
        return config_space
Ejemplo n.º 17
0
def build_estimator(comp, params, X, y):

    if params is None:
        if get_class(comp["class"]) == sklearn.svm.SVC:
            params = {
                "kernel":
                config_json.read(
                    comp["params"]).get_hyperparameter("kernel").value
            }
            print("SVC invoked without params. Setting kernel explicitly to " +
                  params["kernel"])
        else:
            return get_class(comp["class"])()

    return compile_pipeline_by_class_and_params(get_class(comp["class"]),
                                                params, X, y)
Ejemplo n.º 18
0
    def __init__(self, search_space_file, X, y, seed, dp_proba=0, fp_proba=0):
        self.X = X
        self.y = y
        self.dp_proba = dp_proba
        self.fp_proba = fp_proba
        self.search_space = []
        self.search_space_description = json.load(open(search_space_file))
        self.seed = np.random.randint(10**9) if seed is None else seed
        self.rs = np.random.RandomState(self.seed)

        def get_factor_of_parameter_space(params):
            factor_global = 1
            for hp in params.get_hyperparameters():
                factor_local = 1
                if type(
                        hp
                ) == ConfigSpace.hyperparameters.CategoricalHyperparameter:
                    factor_local = len(hp.choices)
                elif type(
                        hp
                ) == ConfigSpace.hyperparameters.UniformFloatHyperparameter:
                    factor_local = 10
                elif type(
                        hp
                ) == ConfigSpace.hyperparameters.UniformIntegerHyperparameter:
                    factor_local = min(10, hp.upper - hp.lower)

                elif issubclass(
                        hp.__class__,
                        ConfigSpace.hyperparameters.NumericalHyperparameter):
                    orders = np.log(hp.upper - hp.lower) / np.log(10)
                    factor_local = orders
                factor_global *= factor_local
            return factor_global

        for step in self.search_space_description:
            comp_descriptions = {}
            for comp in step["components"]:
                params = config_json.read(comp["params"])
                params.random = self.rs
                comp_descriptions[comp["class"]] = {
                    "params": params,
                    "weight": get_factor_of_parameter_space(params)
                }
            self.search_space.append(comp_descriptions)
Ejemplo n.º 19
0
    def __init__(self,
                 step_name,
                 comp,
                 X,
                 y,
                 scoring,
                 side_scores,
                 execution_timeout,
                 other_step_component_instances,
                 index_in_steps,
                 max_time_without_imp,
                 max_its_without_imp,
                 min_its=10,
                 logger_name=None,
                 allow_exhaustive_search=True):
        self.step_name = step_name
        self.index_in_steps = index_in_steps
        self.comp = comp
        self.X = X
        self.y = y
        self.other_step_component_instances = other_step_component_instances
        self.execution_timeout = execution_timeout
        config_space_as_string = comp["params"]
        self.config_space = config_json.read(config_space_as_string)
        self.space_size = get_hyperparameter_space_size(self.config_space)
        self.eval_runtimes = []
        self.configs_since_last_imp = 0
        self.time_since_last_imp = 0
        self.evaled_configs = set([])
        self.active = self.space_size >= 1
        self.best_score = -np.inf
        self.best_params = None
        self.max_time_without_imp = max_time_without_imp
        self.max_its_without_imp = max_its_without_imp
        self.min_its = min_its
        self.scoring = scoring
        self.pool = EvaluationPool(X, y, scoring, side_scores)
        self.its = 0
        self.allow_exhaustive_search = allow_exhaustive_search

        # init logger
        self.logger = logging.getLogger(
            'naiveautoml.hpo' if logger_name is None else logger_name)
        self.logger.info(
            f"Search space size for {comp['class']} is {self.space_size}")
Ejemplo n.º 20
0
def get_model_configspace(model):
    """
    Retrieve the model_config
    :param model: Name of the model for which you want the default config
    :return:
    """
    # Find matching config for the model name
    model_config_regex = re.compile(".*{}_configspace.json".format(model))
    matched_model_config_paths = list(
        filter(model_config_regex.match, glob.glob('surrogate_models/configs/model_configs/*/*')))

    print(matched_model_config_paths)
    # Make sure we only matched exactly one config
    assert len(matched_model_config_paths) == 1, 'Multiple or no configs matched with the requested model.'
    model_config_path = matched_model_config_paths[0]

    # Load the configspace object
    model_configspace = config_space_json_r_w.read(open(model_config_path, 'r').read())
    return model_configspace
Ejemplo n.º 21
0
def load_configspace(path_to_cs_file):
    """
    Load configuration space definition
    Args:
        path_to_cs_file: Path to the file, in which the configuration space is
            defined. Must be in format pcs or json

    Returns:
        ConfigSpace.configuration_space
    """
    if path_to_cs_file.endswith('.pcs'):
        with open(path_to_cs_file, 'r') as f:
            cfg = pcs_new.read(f)
    elif path_to_cs_file.endswith('.json'):
        with open(path_to_cs_file, 'r') as f:
            cfg = json.read(f.read())
    else:
        raise ImportError('Configuration space definition not understood. File'
                          ' must be in format pcs or json.')

    return cfg
Ejemplo n.º 22
0
    def __init__(self, domain_description: dict):
        self.logger = logging.getLogger(__name__)

        # load ConfigurationSpace object from Domain Description file.
        with open(domain_description["DataFile"], 'r') as f:
            self._config_space = CSjson.read(f.read())

        # initialize search space from ConfigurationSpace object
        self.search_space_size = None
        self.__hyperparameter_names = self._config_space.get_hyperparameter_names()

        # initialize default configuration, if it is provided correctly
        try:
            CS_default_configuration = self._config_space.get_default_configuration()
            parameters = []
            for parameter_name in self.__hyperparameter_names:
                parameters.append(CS_default_configuration.get(parameter_name))
            self.__default_configuration = Configuration(parameters, Configuration.Type.DEFAULT)
        except Exception as e:
            self.logger.error("Unable to load default Configuration: %s" % e)
            self.__default_configuration = None
Ejemplo n.º 23
0
    def __init__(self, file_str, seed=None):
        self.configuration = configspace_json.read(file_str)

        self.configuration.seed(seed=seed)
        # get comments to check for parameter conversions
        # paramters = set(self.configuration.get_hyperparameter_names())
        self.parameter_conversion = {}
        json_items = json.loads(file_str)
        hyperparameters = json_items["hyperparameters"]
        for hyperparameter in hyperparameters:
            name = hyperparameter["name"]
            convert_mask = Conversion.NULL
            converts = set(hyperparameter.get("converts", []))

            if "None" in converts:
                convert_mask |= Conversion.NONE
            if "import" in converts:
                convert_mask |= Conversion.IMPORT
            if "choice" in converts:
                convert_mask |= Conversion.CHOICE

            self.parameter_conversion[name] = convert_mask
Ejemplo n.º 24
0
def fanova_analysis(budgets, res, runs_by_budget, id2conf, bohb_logs_dir):
    """
    fANOVA analysis function.
    This plots the single marginal and pair marginal importance of the parameters in the configspace.

    :param budgets:
    :param res:
    :param runs_by_budget:
    :param id2conf:
    :param bohb_logs_dir:
    :return:
    """
    with open(os.path.join(bohb_logs_dir, 'configspace.json'), 'r') as f:
        jason_string = f.read()
    config_space = json.read(jason_string)

    for b in reversed(budgets):
        X, y, new_cs = res.get_fANOVA_data(config_space, budgets=[b])

        # Remove nan values
        nan_index = np.argwhere(np.isnan(y))
        print('budget', b, 'number nan elements', len(nan_index))
        X = np.delete(X, np.argwhere(np.isnan(y)), axis=0)
        y = np.delete(y, np.argwhere(np.isnan(y)))

        # Remove infinite values
        inf_index = np.argwhere(np.isinf(y))
        print('budget', b, 'number inf elements', len(inf_index))
        X = np.delete(X, np.argwhere(np.isinf(y)), axis=0)
        y = np.delete(y, np.argwhere(np.isinf(y)))

        f = fanova.fANOVA(X, y, new_cs)
        # Cut off the unusable configs
        f.set_cutoffs(cutoffs=(0.0, 1.0))

        dir = os.path.join(bohb_logs_dir, 'fanova', 'budget_{}'.format(b))
        os.makedirs(dir, exist_ok=True)

        vis = fanova.visualizer.Visualizer(f,
                                           new_cs,
                                           dir,
                                           y_label='Validation Error')

        print(b)

        best_run_idx = np.argsort([r.loss for r in runs_by_budget[b]])[0]
        best_run = runs_by_budget[b][best_run_idx]

        inc_conf = id2conf[best_run.config_id]['config']
        inc_conf['budget'] = best_run.budget
        inc_line_style = {
            'linewidth': 3,
            'color': 'lightgray',
            'linestyle': 'dashed'
        }

        for i, hp in enumerate(config_space.get_hyperparameters()):
            print(f.quantify_importance([hp.name]))
            fig = vis.plot_marginal(i, show=False,
                                    log_scale=True)  # hp.name instead of i
            fig.axvline(x=inc_conf[hp.name], **inc_line_style)
            # fig.yscale('log')
            fig.xscale('log')
            fig.title('importance %3.1f%%' % (f.quantify_importance([hp.name])[
                (hp.name, )]['individual importance'] * 100))
            fig.tight_layout()
            fig.savefig(os.path.join(dir, '{}.png'.format(hp.name)))
            fig.close()

        for num, (hp1, hp2) in enumerate(
                itertools.combinations(config_space.get_hyperparameters(), 2)):
            n1, n2 = hp1.name, hp2.name
            fig = vis.plot_pairwise_marginal([n1, n2],
                                             show=False,
                                             three_d=False)
            fig.axvline(x=inc_conf[n1], **inc_line_style)
            fig.axhline(y=inc_conf[n2], **inc_line_style)
            xlims = fig.xlim()
            ylims = fig.ylim()

            fig.scatter([inc_conf[n1]], [inc_conf[n2]],
                        color='lightgray',
                        s=800,
                        marker='x',
                        linewidth=5)
            fig.xlim(xlims)
            fig.ylim(ylims)

            importance = f.quantify_importance([n1,
                                                n2])[(n1,
                                                      n2)]['total importance']
            fig.title("importance %3.1f%%" % (importance * 100))
            fig.title("Budget: %d epochs" % b)
            fig.tight_layout()
            fig.savefig(
                os.path.join(dir, 'parameter_comp_{}_{}.png'.format(hp1, hp2)))
Ejemplo n.º 25
0
#                           Dataset Config
#
##############################################################################

data_config_path = '../configs/data_configs/ENAS_configspace.json'
#Get Data specific configs
data_config = json.load(open(data_config_path, 'r'))

##############################################################################
#
#                           Model Config
#
##############################################################################
#Get Model configs
model_config_path = '../configs/model_configs/svge_configspace.json'
model_configspace = config_space_json_r_w.read(
    open(model_config_path, 'r').read())
model_config = model_configspace.get_default_configuration().get_dictionary()

data_type = args.data_type
model_name = args.model
ndim = model_config['node_embedding_dim']
gdim = model_config['graph_embedding_dim']
'''Load graph_args'''
with open(data_dir + data_name + '.pkl', 'rb') as f:
    _, _, graph_args = pickle.load(f)
START_TYPE, END_TYPE = graph_args.START_TYPE, graph_args.END_TYPE
max_n = graph_args.max_n
nvt = graph_args.num_vertex_type
'''BO settings'''
BO_rounds = args.BO_rounds
batch_size = args.BO_batch_size
Ejemplo n.º 26
0
    def load_configspace(self, folder):
        """Will try to load the configspace. cs_options will be a list containing all possible
        combinations of interpretation for Categoricals. If this issue will be fixed, we can drop this procedure.

        Parameters
        ----------
        folder: str
            path to folder in which to look for configspace

        Returns
        -------
        cs_options: list[ConfigurationSpace]
            list with possible interpretations for config-space-file. Only contains multiple items if file-format is pcs.
        """
        cs_options = []
        cs_fn_json = os.path.join(folder, 'configspace.json')
        cs_fn_pcs = os.path.join(folder, 'configspace.pcs')

        if os.path.exists(cs_fn_json):
            with open(cs_fn_json, 'r') as fh:
                cs_options = [pcs_json.read(fh.read())]
            self.logger.debug(
                "Detected and loaded \"%s\". No alternative interpretations necessary",
                cs_fn_json)
        elif os.path.exists(cs_fn_pcs):
            with open(cs_fn_pcs, 'r') as fh:
                cs = pcs_new.read(fh.readlines())
            # Create alternative interpretations
            categoricals = [
                hp for hp in cs.get_hyperparameters()
                if isinstance(hp, CategoricalHyperparameter)
            ]
            non_categoricals = [
                hp for hp in cs.get_hyperparameters()
                if not isinstance(hp, CategoricalHyperparameter)
            ]

            def _get_interpretations(choices):
                """ Generate different interpretations for critical categorical hyperparameters that are not seamlessly
                supported by pcs-format."""
                result = []
                if set(choices) == {"True", "False"}:
                    result.append([True, False])
                if all([c.isdigit() for c in choices]):
                    result.append([int(c) for c in choices])
                result.append(choices)
                return result

            choices_per_cat = [
                _get_interpretations(hp.choices) for hp in categoricals
            ]
            combinations = itertools.product(*choices_per_cat)
            self.logger.debug(combinations)
            for combi in combinations:
                bcs = ConfigurationSpace()
                for hp in non_categoricals:
                    bcs.add_hyperparameter(hp)
                for name, choices in zip([hp.name for hp in categoricals],
                                         combi):
                    bcs.add_hyperparameter(
                        CategoricalHyperparameter(name, choices))
                bcs.add_conditions(cs.get_conditions())
                cs_options.append(bcs)

            self.logger.debug("Sampled %d interpretations of \"%s\"",
                              len(cs_options), cs_fn_pcs)
        else:
            raise ValueError("Missing pcs-file at '%s.[pcs|json]'!" %
                             os.path.join(folder, 'configspace'))
        return cs_options
Ejemplo n.º 27
0
    def __init__(self,
                 configspace,
                 min_points_in_model=None,
                 top_n_percent=15,
                 num_samples=64,
                 random_fraction=1 / 3,
                 bandwidth_factor=3,
                 min_bandwidth=1e-3,
                 **kwargs):
        """
			Fits for each given budget a kernel density estimator on the best N percent of the
			evaluated configurations on this budget.


			Parameters:
			-----------
			configspace: ConfigSpace
				Configuration space object
			top_n_percent: int
				Determines the percentile of configurations that will be used as training data
				for the kernel density estimator, e.g if set to 10 the 10% best configurations will be considered
				for training.
			min_points_in_model: int
				minimum number of datapoints needed to fit a model
			num_samples: int
				number of samples drawn to optimize EI via sampling
			random_fraction: float
				fraction of random configurations returned
			bandwidth_factor: float
				widens the bandwidth for contiuous parameters for proposed points to optimize EI
			min_bandwidth: float
				to keep diversity, even when all (good) samples have the same value for one of the parameters,
				a minimum bandwidth (Default: 1e-3) is used instead of zero. 

		"""
        super().__init__(**kwargs)
        self.top_n_percent = top_n_percent
        self.configspace = configspace
        self.bw_factor = bandwidth_factor
        self.min_bandwidth = min_bandwidth

        self.min_points_in_model = min_points_in_model
        if min_points_in_model is None:
            self.min_points_in_model = len(
                self.configspace.get_hyperparameters()) + 1

        if self.min_points_in_model < len(
                self.configspace.get_hyperparameters()) + 1:
            self.logger.warning(
                'Invalid min_points_in_model value. Setting it to %i' %
                (len(self.configspace.get_hyperparameters()) + 1))
            self.min_points_in_model = len(
                self.configspace.get_hyperparameters()) + 1

        self.num_samples = num_samples
        self.random_fraction = random_fraction

        hps = self.configspace.get_hyperparameters()

        from ConfigSpace.read_and_write import json
        with open('configspace.json', 'w') as fh:
            fh.write(json.write(self.configspace))
        #print(cs)
        with open('configspace.json') as fh:
            cs_str = str(fh.read())
            cs_ = json.read(cs_str)
            print(self.configspace == cs_)

        self.kde_vartypes = ""
        self.vartypes = []

        for h in hps:
            if hasattr(h, 'sequence'):
                raise RuntimeError(
                    'This version on BOHB does not support ordinal hyperparameters. Please encode %s as an integer parameter!'
                    % (h.name))

            if hasattr(h, 'choices'):
                self.kde_vartypes += 'u'
                self.vartypes += [len(h.choices)]
            else:
                self.kde_vartypes += 'c'
                self.vartypes += [0]

        self.vartypes = np.array(self.vartypes, dtype=int)

        # store precomputed probs for the categorical parameters
        self.cat_probs = []

        self.configs = dict()
        self.losses = dict()
        self.good_config_rankings = dict()
        self.kde_models = dict()
Ejemplo n.º 28
0
def main(
    configurations_file: str,
    configurationspace_file: str,
    working_directory: str,
    memory_limit: int,
    time_limit: int,
    per_run_time_limit: int,
    host: str,
    port: int,
):

    start_time = time.time()

    try:
        os.makedirs(working_directory)
    except:
        pass

    with open(configurationspace_file) as fh:
        configspace = read(fh.read())
    with open(configurations_file) as fh:
        configuration_dictionaries = json.load(fh)
    configurations = {}
    for i, entry in configuration_dictionaries.items():
        config = Configuration(configuration_space=configspace, values=entry)
        configurations[i] = config

    for task_id in list(np.random.permutation(automl_metadata_task_ids)):
        print('Evaluating task', task_id)

        tmpdir = tempfile.mkdtemp()
        backend = None
        task_id = int(task_id)

        while True:

            if (time.time() - start_time) > (time_limit - per_run_time_limit -
                                             30):
                print('Reached time limit! (%f > %f)' %
                      ((time.time() - start_time),
                       (time_limit - per_run_time_limit - 30)))
                exit(0)

            # Connect to server, retry for some time
            for i in range(1, 11):
                try:
                    rval = requests.request(
                        'GET', 'http://%s:%d/?task_id=%d' % (
                            host,
                            port,
                            task_id,
                        ))
                    break
                except:
                    if i < 10:
                        time.sleep(i)
                    else:
                        raise

            response_string = rval.content.decode('utf8')
            try:
                response = json.loads(response_string)
            except:
                print(rval)
                raise

            counter = response['counter']
            # resampling_strategy, iterative_fit, early_stopping, N_FOLDS, searchspace
            run_args = response['run_args']
            evaluation = run_args[0]
            print("Going to run count:", counter)
            if counter == -1:
                break

            for job_number, (config_id, configuration) in enumerate(
                    list(sorted(list(configurations.items())))):

                if job_number != counter:
                    continue

                print('Evaluating task %d, %s_%s_%s_%s, config %s (%d/%d)' %
                      (task_id, run_args[4],
                       run_args[0] if run_args[0] != "cv" else "%s%d" %
                       (evaluation, run_args[3]), "nif" if not run_args[1] else
                       "if", "nes" if not run_args[2] else "es", config_id,
                       job_number + 1, len(configurations)))

                output_dir = os.path.join(working_directory, str(task_id))
                output_path = os.path.join(
                    output_dir,
                    '%s_%s.json' % (run_args[0], str(config_id)),
                )

                # First check if it's necessary to do something more
                # complicated!
                # This should actually be done with a timeout...
                try:
                    with open(output_path) as fh:
                        json.load(fh)
                    print('Exists')
                    continue
                except:
                    pass

                try:
                    os.makedirs(output_dir)
                except:
                    pass

                try:
                    os.symlink(output_path, output_path + '.lock')

                    if backend is None:
                        print('Loading', task_id, 'running', config_id)
                        X_train, y_train, X_test, y_test, cat = load_task(
                            task_id)

                        dm = xy_data_manager.XYDataManager(
                            X=X_train,
                            y=y_train,
                            X_test=X_test,
                            y_test=y_test,
                            task="binary.classification",
                            feat_type=cat,
                            dataset_name=str(task_id),
                        )

                        backend = autosklearn.util.backend.create(
                            temporary_directory=os.path.join(
                                tmpdir, '%d_%d_%s' %
                                (task_id, job_number, evaluation)),
                            output_directory=None,
                            delete_tmp_folder_after_terminate=False,
                            delete_output_folder_after_terminate=True,
                        )

                        backend.save_datamanager(datamanager=dm)
                        del dm
                    else:
                        print('Re-using loaded', task_id, 'running', config_id)
                        pass

                    status, cost, runtime, additional_run_info = (
                        run_configuration(
                            backend,
                            config_id,
                            task_id,
                            configuration,
                            run_args,
                            memory_limit,
                            per_run_time_limit,
                        ))

                    with open(output_path, 'w') as fh:
                        json.dump(
                            {
                                'task_id': task_id,
                                'configuration_id': config_id,
                                'status': status.value,
                                'loss': cost,
                                'runtime': runtime,
                                'additional_run_info': additional_run_info,
                            },
                            fh,
                            indent=4)
                except FileExistsError:
                    pass
                except Exception as e:
                    traceback.print_exc()
                    os.remove(output_path)
                    raise e
                finally:
                    delete_iter = 0
                    print(os.path.islink(output_path + '.lock'),
                          output_path + '.lock')
                    while os.path.islink(output_path + '.lock'):
                        delete_iter += 1
                        try:
                            os.remove(output_path + '.lock')
                        except Exception as e:
                            print(e)
                            time.sleep(1)
                        if delete_iter > 10:
                            break
Ejemplo n.º 29
0
def convert_json_to_cs(json_obj):
    json_as_string = json.dumps(json_obj)
    json_as_cs = read(json_as_string)
    return json_as_cs
Ejemplo n.º 30
0
 def get_fidelity_space(self,
                        seed: Union[int,
                                    None] = None) -> CS.ConfigurationSpace:
     cs = json_cs.read(self.config_spaces['z_discrete'])
     cs.seed(seed=seed)
     return cs
Ejemplo n.º 31
0
def main(args):

    ##############################################################################
    #
    #                           Data Config
    #
    ##############################################################################
    if args.data_search_space == 'ENAS':
        data_config_path = 'configs/data_configs/ENAS_configspace.json'
    elif args.data_search_space == 'NB101':
        data_config_path = 'configs/data_configs/NB101_configspace.json'
    else:
        raise TypeError("Unknow Seach Space : {:}".format(
            args.data_search_space))
    #Get Data specific configs
    data_config = json.load(open(data_config_path, 'r'))

    ##############################################################################
    #
    #                           Model Config
    #
    ##############################################################################
    #Get Model configs
    model_config_path = 'configs/model_configs/svge_configspace.json'
    model_configspace = config_space_json_r_w.read(
        open(model_config_path, 'r').read())
    model_config = model_configspace.get_default_configuration(
    ).get_dictionary()

    ##############################################################################
    #
    #                              Model
    #
    ##############################################################################
    model = eval(args.model)(model_config=model_config,
                             data_config=data_config).to(args.device)

    path_state_dict = args.path_state_dict
    checkpoint = args.checkpoint

    model_dict = model.state_dict()

    m = torch.load(os.path.join(path_state_dict,
                                f"model_checkpoint{checkpoint}.obj"),
                   map_location=args.device)
    m = {k: v for k, v in m.items() if k in model_dict}

    model_dict.update(m)
    model.load_state_dict(model_dict)
    ##############################################################################
    #
    #                        Load Data
    #
    ##############################################################################
    if args.data_search_space == 'ENAS':
        data_loader = torch.load('datasets/ENAS/final_structures12.pth')
    elif args.data_search_space == 'NB101':
        data_loader = torch.load('datasets/nasbench101/graphs_8.pth')

    acc = evaluate(model, data_loader, 128, args.device)
    b = torch.topk(acc, 5)
    print(b)
    for ind in b.indices.tolist():
        adj = generate_adj(data_loader[ind])
        node_atts = data_loader[ind].node_atts.cpu().numpy()
        if args.data_search_space == 'ENAS':
            enas_str = decode_NASdata_to_ENAS(data_loader[ind])
            config_dict = {
                'index': ind,
                'node_atts': node_atts,
                'adj': adj,
                'enas_str': enas_str
            }
        else:
            config_dict = {
                'index': ind,
                'node_atts': node_atts,
                'adj': adj,
            }
        with open(os.path.join(log_dir, 'results.txt'), 'a') as file:
            json.dump(str(config_dict), file)
            file.write('\n')