Beispiel #1
0
    def __init__(self, scenario: Scenario, file_system=LocalFS()):
        """Constructor

        Parameters
        ----------
        scenario : Scenario

        output_dir : str
        """
        self.file_system = file_system
        self.__scenario = scenario

        self.ta_runs = 0
        self.n_configs = 0
        self.wallclock_time_used = 0
        self.ta_time_used = 0
        self.inc_changed = 0

        # debug stats
        self._n_configs_per_intensify = 0
        self._n_calls_of_intensify = 0
        ## exponential moving average
        self._ema_n_configs_per_intensifiy = 0
        self._EMA_ALPHA = 0.2

        self._start_time = None
        self._logger = logging.getLogger(self.__module__ + "." +
                                         self.__class__.__name__)
Beispiel #2
0
    def __init__(
            self,
            aggregate_func: typing.Callable,
            overwrite_existing_runs: bool = False,
            file_system=LocalFS(),
            db_type="sqlite",
            db_args=None,
            db_kwargs=None,
            config_space=None
    ) -> None:
        """Constructor

        Parameters
        ----------
        aggregate_func: callable
            function to aggregate perf across instances
        overwrite_existing_runs: bool
            allows to overwrites old results if pairs of
            algorithm-instance-seed were measured
            multiple times
        """
        if db_type == "sqlite":
            self.db: RunHistoryDB = RunHistoryDB(config_space, self, db_args, db_kwargs)
        else:
            raise NotImplementedError()
        self.file_system = file_system
        self.logger = PickableLoggerAdapter(
            self.__module__ + "." + self.__class__.__name__
        )

        # By having the data in a deterministic order we can do useful tests
        # when we serialize the data and can assume it's still in the same
        # order as it was added.
        self.data = collections.OrderedDict()  # type: typing.Dict[RunKey, RunValue]

        # for fast access, we have also an unordered data structure
        # to get all instance seed pairs of a configuration
        self._configid_to_inst_seed = {}  # type: typing.Dict[int, InstSeedKey]

        self.config_ids = {}  # type: typing.Dict[Configuration, str]
        self.ids_config = {}  # type: typing.Dict[str, Configuration]

        # Stores cost for each configuration ID
        self.cost_per_config = {}  # type: typing.Dict[str, float]
        # runs_per_config maps the configuration ID to the number of runs for that configuration
        # and is necessary for computing the moving average
        self.runs_per_config = {}  # type: typing.Dict[str, int]

        # Store whether a datapoint is "external", which means it was read from
        # a JSON file. Can be chosen to not be written to disk
        self.external = {}  # type: typing.Dict[RunKey, DataOrigin]

        self.aggregate_func = aggregate_func
        self.overwrite_existing_runs = overwrite_existing_runs
Beispiel #3
0
 def init_data(self):
     if not self.file_system:
         self.file_system = LocalFS()
     self.ml_task = self.data_manager.ml_task
     if not self.meta_learner:
         if self.ml_task.mainTask == "classification":
             self.meta_learner = LogisticRegression(penalty='l2',
                                                    solver="lbfgs",
                                                    multi_class="auto",
                                                    random_state=10)
         else:
             self.meta_learner = Lasso()
     if isinstance(self.dataset_paths, str):
         self.dataset_paths = [self.dataset_paths]
Beispiel #4
0
    def __init__(self, output_dir, stats, file_system=LocalFS()):
        """Constructor

        Parameters
        ----------
        output_dir: str
            directory for logging (or None to disable logging)
        stats: Stats()
            Stats object
        """
        self.file_system = file_system
        TrajLogger.static_file_system = file_system
        self.stats = stats
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.output_dir = output_dir
        if output_dir is None or output_dir == "":
            self.output_dir = None
            self.logger.info("No output directory for trajectory logging "
                             "specified -- trajectory will not be logged.")

        else:
            if not self.file_system.isdir(output_dir):
                try:
                    self.file_system.mkdir(output_dir)
                except OSError:
                    self.logger.debug("Could not make output directory.",
                                      exc_info=1)
                    raise OSError("Could not make output directory: "
                                  "{}.".format(output_dir))

            self.old_traj_fn = self.file_system.join(output_dir,
                                                     "traj_old.csv")
            if not self.file_system.isfile(self.old_traj_fn):
                txt = ('"CPU Time Used","Estimated Training Performance",'
                       '"Wallclock Time","Incumbent ID",'
                       '"Automatic Configurator (CPU) Time",'
                       '"Configuration..."\n')
                self.file_system.write_txt(self.old_traj_fn, txt)

            self.aclib_traj_fn = self.file_system.join(output_dir,
                                                       "traj_aclib2.json")

        self.trajectory = []
Beispiel #5
0
 def __init__(self,
              project_path=None,
              file_system=None,
              max_persistent_model=50,
              persistent_mode="fs",
              db_type="sqlite"):
     self.persistent_mode = persistent_mode
     assert self.persistent_mode in ("fs", "db")
     self.db_type = db_type
     self.max_persistent_model = max_persistent_model
     if not file_system:
         file_system = LocalFS()
     self.file_system = file_system
     if not project_path:
         project_path = os.getcwd(
         ) + f'''/auto-pipeline-{time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())}'''
     self.project_path = project_path
     self.file_system.mkdir(self.project_path)
Beispiel #6
0
    def update_from_json(self, fn: str, cs: ConfigurationSpace,
                         origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES, id_set: set = set(),
                         file_system=LocalFS()):
        """Update the current runhistory by adding new runs from a json file.

        Parameters
        ----------
        fn : str
            File name to load from.
        cs : ConfigSpace
            Instance of configuration space.
        origin : DataOrigin
            What to store as data origin.
        """
        new_runhistory = RunHistory(self.aggregate_func, file_system=file_system)
        updated_id_set = new_runhistory.load_json(fn, cs)
        self.update(runhistory=new_runhistory, origin=origin)
        return updated_id_set
Beispiel #7
0
    def __init__(self,
                 scenario=None,
                 cmd_options: dict = None,
                 runtime='local',
                 runtime_config=None,
                 initial_runs=20,
                 filter_callback: typing.Optional[typing.Callable] = None,
                 after_run_callback: typing.Optional[typing.Callable] = None,
                 db_type="sqlite",
                 db_params=None,
                 anneal_func=None):
        """ Creates a scenario-object. The output_dir will be
        "output_dir/run_id/" and if that exists already, the old folder and its
        content will be moved (without any checks on whether it's still used by
        another process) to "output_dir/run_id.OLD". If that exists, ".OLD"s
        will be appended until possible.

        Parameters
        ----------
        scenario : str or dict or None
            If str, it will be interpreted as to a path a scenario file
            If dict, it will be directly to get all scenario related information
            If None, only cmd_options will be used
        cmd_options : dict
            Options from parsed command line arguments
        """
        self.logger = logging.getLogger(self.__module__ + '.' +
                                        self.__class__.__name__)
        if isinstance(anneal_func, str):
            try:
                anneal_func = eval(anneal_func)
            except Exception as e:
                self.logger.error("Can not eval anneal_func\n" + str(e))
                anneal_func = None

        self.anneal_func = anneal_func
        self.db_params = db_params
        self.db_type = db_type
        self.after_run_callback = after_run_callback
        self.filter_callback = filter_callback
        self.initial_runs = initial_runs
        self.runtime_config: dict = runtime_config
        self.runtime = runtime
        if self.runtime == 'hdfs':
            self.file_system = HDFS(
                self.runtime_config.get('hdfs_url', 'http://0.0.0.0:50070'))
        else:
            self.file_system = LocalFS()

        self.PCA_DIM = 7

        self.in_reader = InputReader()
        self.out_writer = OutputWriter(self.file_system)

        self.output_dir_for_this_run = None

        self._arguments = {}
        self._arguments.update(CMDReader().scen_cmd_actions)

        if scenario is None:
            scenario = {}
        if isinstance(scenario, str):
            scenario_fn = scenario
            scenario = {}
            if cmd_options:
                scenario.update(cmd_options)
            cmd_reader = CMDReader()
            self.logger.info("Reading scenario file: %s", scenario_fn)
            smac_args_, scen_args_ = cmd_reader.read_smac_scenario_dict_cmd(
                scenario, scenario_fn)
            scenario = {}
            scenario.update(vars(smac_args_))
            scenario.update(vars(scen_args_))
        elif isinstance(scenario, dict):
            scenario = copy.copy(scenario)
            if cmd_options:
                scenario.update(cmd_options)
            cmd_reader = CMDReader()
            smac_args_, scen_args_ = cmd_reader.read_smac_scenario_dict_cmd(
                scenario)
            scenario = {}
            scenario.update(vars(smac_args_))
            scenario.update(vars(scen_args_))
        else:
            raise TypeError(
                "Wrong type of scenario (str or dict are supported)")

        for arg_name, arg_value in scenario.items():
            setattr(self, arg_name, arg_value)

        self._transform_arguments()

        self.logger.debug("SMAC and Scenario Options:")
        if cmd_options:
            for arg_name, arg_value in cmd_options.items():
                if isinstance(arg_value, (int, str, float)):
                    self.logger.debug("%s = %s" % (arg_name, arg_value))
Beispiel #8
0
    def __init__(self, file_system=LocalFS()):
        self.file_system = file_system

        self.logger = PickableLoggerAdapter(name=self.__module__ + "." + self.__class__.__name__)
Beispiel #9
0
class TrajLogger(object):
    """Writes trajectory logs files and creates output directory if not exists already

    Attributes
    ----------
    stats
    logger
    output_dir
    aclib_traj_fn
    old_traj_fn
    trajectory
    """
    static_file_system = LocalFS()

    def __init__(self, output_dir, stats, file_system=LocalFS()):
        """Constructor

        Parameters
        ----------
        output_dir: str
            directory for logging (or None to disable logging)
        stats: Stats()
            Stats object
        """
        self.file_system = file_system
        TrajLogger.static_file_system = file_system
        self.stats = stats
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.output_dir = output_dir
        if output_dir is None or output_dir == "":
            self.output_dir = None
            self.logger.info("No output directory for trajectory logging "
                             "specified -- trajectory will not be logged.")

        else:
            if not self.file_system.isdir(output_dir):
                try:
                    self.file_system.mkdir(output_dir)
                except OSError:
                    self.logger.debug("Could not make output directory.",
                                      exc_info=1)
                    raise OSError("Could not make output directory: "
                                  "{}.".format(output_dir))

            self.old_traj_fn = self.file_system.join(output_dir,
                                                     "traj_old.csv")
            if not self.file_system.isfile(self.old_traj_fn):
                txt = ('"CPU Time Used","Estimated Training Performance",'
                       '"Wallclock Time","Incumbent ID",'
                       '"Automatic Configurator (CPU) Time",'
                       '"Configuration..."\n')
                self.file_system.write_txt(self.old_traj_fn, txt)

            self.aclib_traj_fn = self.file_system.join(output_dir,
                                                       "traj_aclib2.json")

        self.trajectory = []

    def add_entry(self, train_perf: float, incumbent_id: int,
                  incumbent: Configuration):
        """Adds entries to trajectory files (several formats) with using the
        same timestamps for each entry

        Parameters
        ----------
        train_perf: float
            estimated performance on training (sub)set
        incumbent_id: int
            id of incumbent
        incumbent: Configuration()
            current incumbent configuration
        """
        ta_runs = self.stats.ta_runs
        ta_time_used = self.stats.ta_time_used
        wallclock_time = self.stats.get_used_wallclock_time()
        self.trajectory.append(
            TrajEntry(train_perf, incumbent_id, incumbent, ta_runs,
                      ta_time_used, wallclock_time))
        if self.output_dir is not None:
            self._add_in_old_format(train_perf, incumbent_id, incumbent,
                                    ta_time_used, wallclock_time)
            self._add_in_aclib_format(train_perf, incumbent_id, incumbent,
                                      ta_time_used, wallclock_time)

    def _add_in_old_format(self, train_perf: float, incumbent_id: int,
                           incumbent: Configuration, ta_time_used: float,
                           wallclock_time: float):
        """Adds entries to old SMAC2-like trajectory file

        Parameters
        ----------
        train_perf: float
            Estimated performance on training (sub)set
        incumbent_id: int
            Id of incumbent
        incumbent: Configuration()
            Current incumbent configuration
        ta_time_used: float
            CPU time used by the target algorithm
        wallclock_time: float
            Wallclock time used so far
        """

        conf = []
        for p in incumbent:
            if not incumbent.get(p) is None:
                conf.append("%s='%s'" % (p, repr(incumbent[p])))

        txt = ("%f, %f, %f, %d, %f, %s\n" %
               (ta_time_used, train_perf, wallclock_time, incumbent_id,
                wallclock_time - ta_time_used, ", ".join(conf)))
        self.file_system.write_txt(self.old_traj_fn, txt, append=True)

    def _add_in_aclib_format(self, train_perf: float, incumbent_id: int,
                             incumbent: Configuration, ta_time_used: float,
                             wallclock_time: float):
        """Adds entries to AClib2-like trajectory file

        Parameters
        ----------
        train_perf: float
            Estimated performance on training (sub)set
        incumbent_id: int
            Id of incumbent
        incumbent: Configuration()
            Current incumbent configuration
        ta_time_used: float
            CPU time used by the target algorithm
        wallclock_time: float
            Wallclock time used so far
        """

        conf = []
        for p in incumbent:
            if not incumbent.get(p) is None:
                conf.append("%s='%s'" % (p, repr(incumbent[p])))

        traj_entry = {
            "cpu_time": ta_time_used,
            "total_cpu_time": None,  # TODO: fix this
            "wallclock_time": wallclock_time,
            "evaluations": self.stats.ta_runs,
            "cost": train_perf,
            "incumbent": conf
        }
        try:
            traj_entry["origin"] = incumbent.origin
        except AttributeError:
            traj_entry["origin"] = "UNKNOWN"
        txt = json.dumps(traj_entry) + "\n"
        self.file_system.write_txt(self.aclib_traj_fn, txt, True)

    @staticmethod
    def read_traj_aclib_format(fn: str, cs: ConfigurationSpace):
        """Reads trajectory from file

        Parameters
        ----------
        fn: str
            Filename with saved runhistory in self._add_in_aclib_format format
        cs: ConfigurationSpace
            Configuration Space to translate dict object into Confiuration object

        Returns
        -------
        trajectory: list
            Each entry in the list is a dictionary of the form
            {
            "cpu_time": float,
            "total_cpu_time": None, # TODO
            "wallclock_time": float,
            "evaluations": int
            "cost": float,
            "incumbent": Configuration
            }
        """

        trajectory = []
        fp = TrajLogger.static_file_system.read_txt(fn).splitlines()
        for line in fp:
            entry = json.loads(line)
            entry["incumbent"] = TrajLogger._convert_dict_to_config(
                entry["incumbent"], cs=cs)
            trajectory.append(entry)

        return trajectory

    @staticmethod
    def _convert_dict_to_config(config_list: typing.List[str],
                                cs: ConfigurationSpace):
        # CAN BE DONE IN CONFIGSPACE
        """Since we save a configurations in a dictionary str->str we have to
        try to figure out the type (int, float, str) of each parameter value

        Parameters
        ----------
        config_list: typing.List[str]
            Configuration as a list of "str='str'"
        cs: ConfigurationSpace
            Configuration Space to translate dict object into Confiuration object
        """
        config_dict = {}
        for param in config_list:
            k, v = param.split("=")
            v = v.strip("'")
            hp = cs.get_hyperparameter(k)
            if isinstance(hp, FloatHyperparameter):
                v = float(v)
            elif isinstance(hp, IntegerHyperparameter):
                v = int(v)
            config_dict[k] = v

        config = Configuration(configuration_space=cs, values=config_dict)
        config.origin = "External Trajectory"

        return config