Esempio n. 1
0
    def __init__(self,
                 config_space: ConfigurationSpace,
                 runhistory,
                 db_type="sqlite",
                 db_params=frozendict(),
                 db_table_name="runhistory"):

        self.db_table_name = db_table_name
        self.runhistory = runhistory
        self.db_type = db_type
        self.db_params = db_params
        self.Datebase = get_db_class_by_db_type(self.db_type)
        self.db: pw.Database = self.Datebase(**self.db_params)
        self.logger = PickableLoggerAdapter(__name__)
        # --JSONField-----------------------------------------
        if self.db_type == "sqlite":
            from playhouse.sqlite_ext import JSONField
            self.JSONField = JSONField
        elif self.db_type == "postgresql":
            from playhouse.postgres_ext import JSONField
            self.JSONField = JSONField
        elif self.db_type == "mysql":
            from playhouse.mysql_ext import JSONField
            self.JSONField = JSONField
        # -----------------------------------------------------
        self.Model: pw.Model = self.get_model()
        self.config_space: ConfigurationSpace = config_space
Esempio n. 2
0
    def __init__(
            self,
            aggregate_func: typing.Callable,
            overwrite_existing_runs: bool = False,
            file_system=LocalFS(),
            db_type="sqlite",
            db_args=None,
            db_kwargs=None,
            config_space=None
    ) -> None:
        """Constructor

        Parameters
        ----------
        aggregate_func: callable
            function to aggregate perf across instances
        overwrite_existing_runs: bool
            allows to overwrites old results if pairs of
            algorithm-instance-seed were measured
            multiple times
        """
        if db_type == "sqlite":
            self.db: RunHistoryDB = RunHistoryDB(config_space, self, db_args, db_kwargs)
        else:
            raise NotImplementedError()
        self.file_system = file_system
        self.logger = PickableLoggerAdapter(
            self.__module__ + "." + self.__class__.__name__
        )

        # By having the data in a deterministic order we can do useful tests
        # when we serialize the data and can assume it's still in the same
        # order as it was added.
        self.data = collections.OrderedDict()  # type: typing.Dict[RunKey, RunValue]

        # for fast access, we have also an unordered data structure
        # to get all instance seed pairs of a configuration
        self._configid_to_inst_seed = {}  # type: typing.Dict[int, InstSeedKey]

        self.config_ids = {}  # type: typing.Dict[Configuration, str]
        self.ids_config = {}  # type: typing.Dict[str, Configuration]

        # Stores cost for each configuration ID
        self.cost_per_config = {}  # type: typing.Dict[str, float]
        # runs_per_config maps the configuration ID to the number of runs for that configuration
        # and is necessary for computing the moving average
        self.runs_per_config = {}  # type: typing.Dict[str, int]

        # Store whether a datapoint is "external", which means it was read from
        # a JSON file. Can be chosen to not be written to disk
        self.external = {}  # type: typing.Dict[RunKey, DataOrigin]

        self.aggregate_func = aggregate_func
        self.overwrite_existing_runs = overwrite_existing_runs
Esempio n. 3
0
    def __init__(self, config_space: ConfigurationSpace, runhistory, db_type="sqlite",
                 db_params=frozendict(), db_table_name="runhistory"):

        self.db_table_name = db_table_name
        self.runhistory = runhistory
        self.db_type = db_type
        self.db_params = db_params
        self.Datebase = get_db_class_by_db_type(self.db_type)
        self.db: pw.Database = self.Datebase(**self.db_params)
        self.logger = PickableLoggerAdapter(__name__)
        # --JSONField-----------------------------------------
        self.JSONField = get_JSONField(self.db_type)
        # -----------------------------------------------------
        self.Model: pw.Model = self.get_model()
        self.config_space: ConfigurationSpace = config_space
Esempio n. 4
0
    def __init__(self, model: AbstractEPM):
        """Constructor

        Parameters
        ----------
        model : AbstractEPM
            Models the objective function.
        """
        self.model = model
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Esempio n. 5
0
    def __init__(
        self,
        configspace: ConfigurationSpace,
        types: np.ndarray,
        bounds: typing.List[typing.Tuple[float, float]],
        seed: int,
        instance_features: np.ndarray = None,
        pca_components: float = None,
    ):
        """Constructor

        Parameters
        ----------
        configspace : ConfigurationSpace
            Configuration space to tune for.
        types : np.ndarray (D)
            Specifies the number of categorical values of an input dimension where
            the i-th entry corresponds to the i-th input dimension. Let's say we
            have 2 dimension where the first dimension consists of 3 different
            categorical choices and the second dimension is continuous than we
            have to pass np.array([3, 0]). Note that we count starting from 0.
        bounds : list
            bounds of input dimensions: (lower, uppper) for continuous dims; (n_cat, np.nan) for categorical dims
        seed : int
            The seed that is passed to the model library.
        instance_features : np.ndarray (I, K)
            Contains the K dimensional instance features
            of the I different instances
        pca_components : float
            Number of components to keep when using PCA to reduce
            dimensionality of instance features. Requires to
            set n_feats (> pca_dims).
        """
        self.configspace = configspace
        self.seed = seed
        self.instance_features = instance_features
        self.pca_components = pca_components

        if instance_features is not None:
            self.n_feats = instance_features.shape[1]
        else:
            self.n_feats = 0

        self.n_params = None  # will be updated on train()

        self.pca = None
        self.scaler = None
        if self.pca_components and self.n_feats > self.pca_components:
            self.pca = PCA(n_components=self.pca_components)
            self.scaler = MinMaxScaler()

        # Never use a lower variance than this
        self.var_threshold = VERY_SMALL_NUMBER

        self.bounds = bounds
        self.types = types
        # Initial types array which is used to reset the type array at every call to train()
        self._initial_types = types.copy()

        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Esempio n. 6
0
class RunHistory(object):
    """Container for target algorithm run information.

    **Note:** Guaranteed to be picklable.

    Attributes
    ----------
    data : collections.OrderedDict()
        TODO
    config_ids : dict
        Maps config -> id
    ids_config : dict
        Maps id -> config
    cost_per_config : dict
        Maps config_id -> cost
    runs_per_config : dict
        Maps config_id -> number of runs

    aggregate_func
    overwrite_existing_runs
    """
    def __init__(
        self,
        aggregate_func: typing.Callable,
        overwrite_existing_runs: bool = False,
        file_system=LocalFS(),
        config_space=None,
        db_type="sqlite",
        db_params=None,
    ) -> None:
        """Constructor

        Parameters
        ----------
        aggregate_func: callable
            function to aggregate perf across instances
        overwrite_existing_runs: bool
            allows to overwrites old results if pairs of
            algorithm-instance-seed were measured
            multiple times
        """
        self.db: RunHistoryDB = RunHistoryDB(config_space, self, db_type,
                                             db_params)
        self.file_system = file_system
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        # By having the data in a deterministic order we can do useful tests
        # when we serialize the data and can assume it's still in the same
        # order as it was added.
        self.data = collections.OrderedDict(
        )  # type: typing.Dict[RunKey, RunValue]

        # for fast access, we have also an unordered data structure
        # to get all instance seed pairs of a configuration
        self._configid_to_inst_seed = {}  # type: typing.Dict[int, InstSeedKey]

        self.config_ids = {}  # type: typing.Dict[Configuration, str]
        self.ids_config = {}  # type: typing.Dict[str, Configuration]

        # Stores cost for each configuration ID
        self.cost_per_config = {}  # type: typing.Dict[str, float]
        # runs_per_config maps the configuration ID to the number of runs for that configuration
        # and is necessary for computing the moving average
        self.runs_per_config = {}  # type: typing.Dict[str, int]

        # Store whether a datapoint is "external", which means it was read from
        # a JSON file. Can be chosen to not be written to disk
        self.external = {}  # type: typing.Dict[RunKey, DataOrigin]

        self.aggregate_func = aggregate_func
        self.overwrite_existing_runs = overwrite_existing_runs

    def add(self,
            config: Configuration,
            cost: float,
            time: float,
            status: StatusType,
            instance_id: str = None,
            seed: int = None,
            additional_info: dict = None,
            origin: DataOrigin = DataOrigin.INTERNAL):
        """Adds a data of a new target algorithm (TA) run;
        it will update data if the same key values are used
        (config, instance_id, seed)

        Parameters
        ----------
            config : dict (or other type -- depending on config space module)
                Parameter configuration
            cost: float
                Cost of TA run (will be minimized)
            time: float
                Runtime of TA run
            status: str
                Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT}
            instance_id: str
                String representing an instance (default: None)
            seed: int
                Random seed used by TA (default: None)
            additional_info: dict
                Additional run infos (could include further returned
                information from TA or fields such as start time and host_id)
            origin: DataOrigin
                Defines how data will be used.
        """
        if not instance_id:
            instance_id = None
        config_id = self.config_ids.get(config)
        if config_id is None:  # it's a new config
            new_id = get_id_of_config(config)
            self.config_ids[config] = new_id
            config_id = self.config_ids.get(config)
            self.ids_config[new_id] = config

        k = RunKey(config_id, instance_id, seed)
        v = RunValue(cost, time, status, additional_info)

        # Each runkey is supposed to be used only once. Repeated tries to add
        # the same runkey will be ignored silently if not capped.
        if self.overwrite_existing_runs or self.data.get(k) is None:
            self._add(k, v, status, origin)
        elif status != StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED:
            # overwrite capped runs with uncapped runs
            self._add(k, v, status, origin)
        elif status == StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED and cost > self.data[k].cost:
            # overwrite if censored with a larger cutoff
            self._add(k, v, status, origin)

    def _add(self, k: RunKey, v: RunValue, status: StatusType,
             origin: DataOrigin):
        """Actual function to add new entry to data structures

        TODO

        """
        self.data[k] = v
        self.external[k] = origin

        if origin in (DataOrigin.INTERNAL, DataOrigin.EXTERNAL_SAME_INSTANCES) \
                and status != StatusType.CAPPED:
            # also add to fast data structure
            is_k = InstSeedKey(k.instance_id, k.seed)
            self._configid_to_inst_seed[
                k.config_id] = self._configid_to_inst_seed.get(
                    k.config_id, [])
            if is_k not in self._configid_to_inst_seed[k.config_id]:
                self._configid_to_inst_seed[k.config_id].append(is_k)

            if not self.overwrite_existing_runs:
                # assumes an average across runs as cost function aggregation
                self.incremental_update_cost(self.ids_config[k.config_id],
                                             v.cost)
            else:
                self.update_cost(config=self.ids_config[k.config_id])

    def update_cost(self, config: Configuration):
        """Store the performance of a configuration across the instances in
        self.cost_perf_config and also updates self.runs_per_config;
        uses self.aggregate_func

        Parameters
        ----------
        config: Configuration
            configuration to update cost based on all runs in runhistory
        """
        inst_seeds = set(self.get_runs_for_config(config))
        perf = self.aggregate_func(config, self, inst_seeds)
        config_id = self.config_ids[config]
        self.cost_per_config[config_id] = perf
        self.runs_per_config[config_id] = len(inst_seeds)

    def compute_all_costs(self, instances: typing.List[str] = None):
        """Computes the cost of all configurations from scratch and overwrites
        self.cost_perf_config and self.runs_per_config accordingly;

        Parameters
        ----------
        instances: typing.List[str]
            list of instances; if given, cost is only computed wrt to this instance set
        """

        self.cost_per_config = {}
        self.runs_per_config = {}
        for config, config_id in self.config_ids.items():
            inst_seeds = set(self.get_runs_for_config(config))
            if instances is not None:
                inst_seeds = list(
                    filter(lambda x: x.instance in instances, inst_seeds))

            if inst_seeds:  # can be empty if never saw any runs on <instances>
                perf = self.aggregate_func(config, self, inst_seeds)
                self.cost_per_config[config_id] = perf
                self.runs_per_config[config_id] = len(inst_seeds)

    def incremental_update_cost(self, config: Configuration, cost: float):
        """Incrementally updates the performance of a configuration by using a
        moving average;

        Parameters
        ----------
        config: Configuration
            configuration to update cost based on all runs in runhistory
        cost: float
            cost of new run of config
        """

        config_id = self.config_ids[config]
        n_runs = self.runs_per_config.get(config_id, 0)
        old_cost = self.cost_per_config.get(config_id, 0.)
        self.cost_per_config[config_id] = (
            (old_cost * n_runs) + cost) / (n_runs + 1)
        self.runs_per_config[config_id] = n_runs + 1

    def get_cost(self, config: Configuration):
        """Returns empirical cost for a configuration; uses  self.cost_per_config

        Parameters
        ----------
        config: Configuration

        Returns
        -------
        cost: float
            Computed cost for configuration
        """
        config_id = self.config_ids[config]
        return self.cost_per_config.get(config_id, np.nan)

    def get_runs_for_config(self, config: Configuration):
        """Return all runs (instance seed pairs) for a configuration.

        Parameters
        ----------
        config : Configuration from ConfigSpace
            Parameter configuration

        Returns
        -------
        instance_seed_pairs : list<tuples of instance, seed>
        """
        config_id = self.config_ids.get(config)
        return self._configid_to_inst_seed.get(config_id, [])

    def get_instance_costs_for_config(self, config: Configuration):
        """
            Returns the average cost per instance (across seeds)
            for a configuration
            Parameters
            ----------
            config : Configuration from ConfigSpace
                Parameter configuration

            Returns
            -------
            cost_per_inst: dict<instance name<str>, cost<float>>
        """
        config_id = self.config_ids.get(config)
        runs_ = self._configid_to_inst_seed.get(config_id, [])
        cost_per_inst = {}
        for inst, seed in runs_:
            cost_per_inst[inst] = cost_per_inst.get(inst, [])
            rkey = RunKey(config_id, inst, seed)
            vkey = self.data[rkey]
            cost_per_inst[inst].append(vkey.cost)
        cost_per_inst = dict([(inst, np.mean(costs))
                              for inst, costs in cost_per_inst.items()])
        return cost_per_inst

    def get_all_configs(self):
        """Return all configurations in this RunHistory object

        Returns
        -------
            parameter configurations: list
        """
        return list(self.config_ids.keys())

    def empty(self):
        """Check whether or not the RunHistory is empty.

        Returns
        -------
        emptiness: bool
            True if runs have been added to the RunHistory,
            False otherwise
        """
        return len(self.data) == 0

    def save_json(self,
                  fn: str = "runhistory.json",
                  save_external: bool = False):
        """
        saves runhistory on disk

        Parameters
        ----------
        fn : str
            file name
        save_external : bool
            Whether to save external data in the runhistory file.
        """
        data = [([(k.config_id),
                  str(k.instance_id) if k.instance_id is not None else None,
                  int(k.seed) if k.seed is not None else 0], list(v))
                for k, v in self.data.items()
                if save_external or self.external[k] == DataOrigin.INTERNAL]
        config_ids_to_serialize = set([entry[0][0] for entry in data])
        configs = {
            id_: conf.get_dictionary()
            for id_, conf in self.ids_config.items()
            if id_ in config_ids_to_serialize
        }
        config_origins = {
            id_: conf.origin
            for id_, conf in self.ids_config.items()
            if (id_ in config_ids_to_serialize and conf.origin is not None)
        }
        txt = json.dumps(
            {
                "data": data,
                "config_origins": config_origins,
                "configs": configs
            },
            cls=EnumEncoder,
            indent=2)
        self.file_system.write_txt(fn, txt)

    def load_json(self, fn: str, cs: ConfigurationSpace):
        """Load and runhistory in json representation from disk.

        Overwrites current runhistory!

        Parameters
        ----------
        fn : str
            file name to load from
        cs : ConfigSpace
            instance of configuration space
        """
        try:
            txt = self.file_system.read_txt(fn)
            all_data = json.loads(txt, object_hook=StatusType.enum_hook)
        except Exception as e:
            self.logger.warning(
                'Encountered exception %s while reading runhistory from %s. '
                'Not adding any runs!',
                e,
                fn,
            )
            return

        config_origins = all_data.get("config_origins", {})
        self.ids_config = {}

        self.ids_config = {(id_):
                           Configuration(cs,
                                         values=values,
                                         origin=config_origins.get(id_, None))
                           for id_, values in all_data["configs"].items()}
        self.config_ids = {
            config: id_
            for id_, config in self.ids_config.items()
        }

        self._n_id = len(self.config_ids)
        # important to use add method to use all data structure correctly
        for k, v in all_data["data"]:
            id_ = (k[0])
            if id_ in self.ids_config:
                self.add(config=self.ids_config[id_],
                         cost=float(v[0]),
                         time=float(v[1]),
                         status=StatusType(v[2]),
                         instance_id=k[1],
                         seed=int(k[2]),
                         additional_info=v[3])

    def update_from_json(
        self,
        fn: str,
        cs: ConfigurationSpace,
        origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES,
        id_set: set = set(),
        file_system=LocalFS()):
        """Update the current runhistory by adding new runs from a json file.

        Parameters
        ----------
        fn : str
            File name to load from.
        cs : ConfigSpace
            Instance of configuration space.
        origin : DataOrigin
            What to store as data origin.
        """
        new_runhistory = RunHistory(self.aggregate_func,
                                    file_system=file_system)
        updated_id_set = new_runhistory.load_json(fn, cs)
        self.update(runhistory=new_runhistory, origin=origin)
        return updated_id_set

    def update(self,
               runhistory: 'RunHistory',
               origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES):
        """Update the current runhistory by adding new runs from a RunHistory.

        Parameters
        ----------
        runhistory: RunHistory
            Runhistory with additional data to be added to self
        origin: DataOrigin
            If set to ``INTERNAL`` or ``EXTERNAL_FULL`` the data will be
            added to the internal data structure self._configid_to_inst_seed
            and be available :meth:`through get_runs_for_config`.
        """

        # Configurations might be already known, but by a different ID. This
        # does not matter here because the add() method handles this
        # correctly by assigning an ID to unknown configurations and re-using
        #  the ID
        for key, value in runhistory.data.items():
            config_id, instance_id, seed = key
            cost, time, status, additional_info = value
            config = runhistory.ids_config[config_id]
            self.add(config=config,
                     cost=cost,
                     time=time,
                     status=status,
                     instance_id=instance_id,
                     seed=seed,
                     additional_info=additional_info,
                     origin=origin)
Esempio n. 7
0
class OutputWriter(object):
    """Writing scenario to file."""

    def __init__(self, file_system=LocalFS()):
        self.file_system = file_system

        self.logger = PickableLoggerAdapter(name=self.__module__ + "." + self.__class__.__name__)

    def write_scenario_file(self, scenario):
        """Write scenario to a file (format is compatible with input_reader).
        Will overwrite if file exists. If you have arguments that need special
        parsing when saving, specify so in the _parse_argument-function.
        Creates output-dir if necessesary.

        Parameters
        ----------
            scenario: Scenario
                Scenario to be written to file

        Returns
        -------
            status: False or None
                False indicates that writing process failed
        """
        if scenario.output_dir_for_this_run is None or scenario.output_dir_for_this_run == "":
            scenario.logger.info("No output directory for scenario logging "
                                 "specified -- scenario will not be logged.")
            return False
        # Create output-dir if necessary
        if not self.file_system.isdir(scenario.output_dir_for_this_run):
            scenario.logger.debug("Output directory does not exist! Will be "
                                  "created.")
            try:
                self.file_system.mkdir(scenario.output_dir_for_this_run)
            except OSError:
                scenario.logger.debug("Could not make output directory.", exc_info=1)
                raise OSError("Could not make output directory: "
                              "{}.".format(scenario.output_dir_for_this_run))

        # options_dest2name maps scenario._arguments from dest -> name
        options_dest2name = {(scenario._arguments[v]['dest'] if
                              scenario._arguments[v]['dest'] else v): v.lstrip('-').replace('-', '_') for v in
                             scenario._arguments}

        # Write all options into "output_dir/scenario.txt"
        path = self.file_system.join(scenario.output_dir_for_this_run, "scenario.txt")
        scenario.logger.debug("Writing scenario-file to {}.".format(path))
        for key in options_dest2name:
            key = key.lstrip('-').replace('-', '_')
            new_value = self._parse_argument(scenario, key, getattr(scenario, key))
            if new_value is not None:
                self.file_system.write_txt(path, "{} = {}\n".format(options_dest2name[key], new_value))

    def _parse_argument(self, scenario, key: str, value):
        """Some values of the scenario-file need to be changed upon writing,
        such as the 'ta' (target algorithm), due to it's callback. Also,
        the configspace, features, train_inst- and test-inst-lists are saved
        to output_dir, if they exist.

        Parameters:
        -----------
            scenario: Scenario
                Scenario-file to be written
            key: string
                Name of the attribute in scenario-file
            value: Any
                Corresponding attribute

        Returns:
        --------
            new value: string
                The altered value, to be written to file

        Sideeffects:
        ------------
          - copies files pcs_fn, train_inst_fn, test_inst_fn and feature_fn to
            output if possible, creates the files from attributes otherwise
        """
        if key in ['pcs_fn', 'train_inst_fn', 'test_inst_fn', 'feature_fn']:
            # Copy if file exists, else write to new file
            if value is not None and self.file_system.isfile(value):
                try:
                    new_path = shutil.copy(value, scenario.output_dir_for_this_run)
                except shutil.SameFileError:
                    new_path = value  # File is already in output_dir
                # For .pcs-file, also save with the same basename as json and use json-path!
                if key == 'pcs_fn' and scenario.cs is not None and value.endswith('.pcs'):
                    file_name = self.file_system.splitext(self.file_system.basename(value))[0]
                    new_path = self.file_system.join(scenario.output_dir_for_this_run, file_name + '.json')
                    self.save_configspace(scenario.cs, new_path, 'json')
                    scenario.logger.debug("Setting the pcs_fn-attr of written scenario from %s to %s", value, new_path)
            elif key == 'pcs_fn' and scenario.cs is not None:
                try:
                    pcs_path = self.file_system.join(scenario.output_dir_for_this_run, 'configspace.pcs')
                    self.save_configspace(scenario.cs, pcs_path, 'pcs_new')
                except TypeError:
                    self.logger.error("Could not write pcs file to disk."
                                      " ConfigSpace not compatible with (new) pcs format.")
                new_path = self.file_system.join(scenario.output_dir_for_this_run, 'configspace.json')
                self.save_configspace(scenario.cs, new_path, 'json')
            elif key == 'train_inst_fn' and scenario.train_insts != [None]:
                new_path = self.file_system.join(scenario.output_dir_for_this_run, 'train_insts.txt')
                self.write_inst_file(scenario.train_insts, new_path)
            elif key == 'test_inst_fn' and scenario.test_insts != [None]:
                new_path = self.file_system.join(scenario.output_dir_for_this_run, 'test_insts.txt')
                self.write_inst_file(scenario.test_insts, new_path)
            elif key == 'feature_fn' and scenario.feature_dict != {}:
                new_path = self.file_system.join(scenario.output_dir_for_this_run, 'features.txt')
                self.write_inst_features_file(scenario.n_features,
                                              scenario.feature_dict, new_path)
            else:
                return None
            # New value -> new path
            return new_path
        elif key == 'ta' and value is not None:
            # Reversing the callback on 'ta' (shlex.split)
            return " ".join(value)
        elif key in ['train_insts', 'test_insts', 'cs', 'feature_dict']:
            # No need to log, recreated from files
            return None
        else:
            return value

    def write_inst_file(self, insts: typing.List[str], fn: str):
        """Writes instance-list to file.

        Parameters
        ----------
            insts: list<string>
                 Instance list to be written
            fn: string
                 Output path
        """
        self.file_system.write_txt(fn, "\n".join(insts))

    def write_inst_features_file(self, n_features: int, feat_dict, fn: str):
        """Writes features to file.

        Parameters
        ----------
            n_features: int
                 Number of features
            feat_dict: dict
                 Features to be written
            fn: string
                 File name of instance feature file
        """
        header = "Instance, " + ", ".join(
            ["feature" + str(i) for i in range(n_features)]) + "\n"
        body = [", ".join([inst] + [str(f) for f in feat_dict[inst]]) + "\n"
                for inst in feat_dict]
        txt = header + "".join(body)
        self.file_system.write_txt(fn, txt)

    def save_configspace(self, cs: ConfigurationSpace, fn: str, output_format: str):
        """Writing ConfigSpace to file.

        Parameters
        ----------
            cs : ConfigurationSpace
                Config-space to be written
            fn : str
                Output-file-path
            output_format : str
                Output format of the configuration space file. Currently,
                ``json`` and ``pcs_new`` are supported.
        """
        writers = {
            'pcs_new': pcs_new.write,
            'json': json.write
        }
        writer = writers.get(output_format)
        if writer:
            txt = writer(cs)
            self.file_system.write_txt(fn, txt)
        else:
            raise ValueError(
                "Configuration space output format %s not supported. "
                "Please choose one of %s" % set(writers.keys())
            )
Esempio n. 8
0
    def __init__(self, file_system=LocalFS()):
        self.file_system = file_system

        self.logger = PickableLoggerAdapter(name=self.__module__ + "." + self.__class__.__name__)
Esempio n. 9
0
class RunHistoryDB():
    def __init__(self,
                 config_space: ConfigurationSpace,
                 runhistory,
                 db_type="sqlite",
                 db_params=frozendict(),
                 db_table_name="runhistory"):

        self.db_table_name = db_table_name
        self.runhistory = runhistory
        self.db_type = db_type
        self.db_params = db_params
        self.Datebase = get_db_class_by_db_type(self.db_type)
        self.db: pw.Database = self.Datebase(**self.db_params)
        self.logger = PickableLoggerAdapter(__name__)
        # --JSONField-----------------------------------------
        if self.db_type == "sqlite":
            from playhouse.sqlite_ext import JSONField
            self.JSONField = JSONField
        elif self.db_type == "postgresql":
            from playhouse.postgres_ext import JSONField
            self.JSONField = JSONField
        elif self.db_type == "mysql":
            from playhouse.mysql_ext import JSONField
            self.JSONField = JSONField
        # -----------------------------------------------------
        self.Model: pw.Model = self.get_model()
        self.config_space: ConfigurationSpace = config_space

    def get_model(self) -> pw.Model:
        class Run_History(pw.Model):
            run_id = pw.CharField(primary_key=True)
            config_id = pw.CharField(default="")
            config = self.JSONField(default={})
            config_bin = pw.BitField(default=0)
            config_origin = pw.TextField(default="")
            cost = pw.FloatField(default=65535)
            time = pw.FloatField(default=0.0)
            instance_id = pw.CharField(default="")
            seed = pw.IntegerField(default=0)
            status = pw.IntegerField(default=0)
            additional_info = pw.CharField(default="")
            origin = pw.IntegerField(default=0)
            weight = pw.FloatField(default=0.0)
            pid = pw.IntegerField(default=os.getpid)
            timestamp = pw.DateTimeField(default=datetime.datetime.now)

            class Meta:
                database = self.db
                table_name = self.db_table_name

        self.db.create_tables([Run_History])
        return Run_History

    def get_run_id(self, instance_id, config_id):
        return instance_id + "-" + config_id

    def appointment_config(self, config, instance_id) -> bool:
        config_id = get_id_of_config(config)
        run_id = self.get_run_id(instance_id, config_id)
        query = self.Model.select().where(self.Model.run_id == run_id)
        if query.exists():
            return False
        try:
            self.Model.create(run_id=run_id, origin=-1)
        except Exception as e:
            return False
        return True

    def insert_runhistory(self,
                          config: Configuration,
                          cost: float,
                          time: float,
                          status: StatusType,
                          instance_id: str = "",
                          seed: int = 0,
                          additional_info: dict = frozendict(),
                          origin: DataOrigin = DataOrigin.INTERNAL):
        config_id = get_id_of_config(config)
        run_id = self.get_run_id(instance_id, config_id)
        if instance_id is None:
            instance_id = ""
        try:
            self.Model.create(
                run_id=run_id,
                config_id=config_id,
                config=config.get_dictionary(),
                config_origin=config.origin,
                config_bin=pickle.dumps(config),
                cost=cost,
                time=time,
                instance_id=instance_id,
                seed=seed,
                status=status.value,
                additional_info=dict(additional_info),
                origin=origin.value,
            )
        except pw.IntegrityError:
            self.Model(
                run_id=run_id,
                config_id=config_id,
                config=config.get_dictionary(),
                config_origin=config.origin,
                config_bin=pickle.dumps(config),
                cost=cost,
                time=time,
                instance_id=instance_id,
                seed=seed,
                status=status.value,
                additional_info=dict(additional_info),
                origin=origin.value,
            ).save()
        self.timestamp = datetime.datetime.now()

    def fetch_new_runhistory(self, is_init=False):
        if is_init:
            n_del = self.Model.delete().where(self.Model.origin < 0).execute()
            if n_del > 0:
                self.logger.info(
                    f"Delete {n_del} invalid records in run_history database.")
            query = self.Model.select().where(self.Model.origin >= 0)
        else:
            query = self.Model.select().where(
                self.Model.pid != os.getpid()).where(self.Model.origin >= 0)
        for model in query:
            run_id = model.run_id
            config_id = model.config_id
            config = model.config
            config_bin = model.config_bin
            config_origin = model.config_origin
            cost = model.cost
            time = model.time
            instance_id = model.instance_id
            seed = model.seed
            status = model.status
            additional_info = model.additional_info
            origin = model.origin
            timestamp = model.timestamp
            try:
                config = pickle.loads(config_bin)
            except Exception as e:
                self.logger.error(
                    f"{e}\nUsing config json instead to build Configuration.")
                config = Configuration(self.config_space,
                                       values=config,
                                       origin=config_origin)
            try:
                additional_info = json.loads(additional_info)
            except Exception as e:
                self.logger.error(f"{e}\nSet default to additional_info.")
                additional_info = {}
            self.runhistory.add(config, cost, time, StatusType(status),
                                instance_id, seed, additional_info,
                                DataOrigin(origin))
        self.timestamp = datetime.datetime.now()