Example #1
0
    def _call_ta(
        self,
        obj: typing.Callable,
        config: Configuration,
        obj_kwargs: typing.Dict[str, typing.Union[int, str, float, None]],
    ) -> typing.Union[float, typing.Tuple[float, typing.Dict]]:

        x = np.array([val for _, val in sorted(config.get_dictionary().items())],
                     dtype=np.float)
        return obj(x, **obj_kwargs)
Example #2
0
    def add(
        self,
        config: Configuration,
        cost: Union[int, float, list, np.ndarray],
        time: float,
        status: StatusType,
        instance_id: Optional[str] = None,
        seed: Optional[int] = None,
        budget: float = 0.0,
        starttime: float = 0.0,
        endtime: float = 0.0,
        additional_info: Optional[Dict] = None,
        origin: DataOrigin = DataOrigin.INTERNAL,
        force_update: bool = False,
    ) -> None:
        """Adds a data of a new target algorithm (TA) run;
        it will update data if the same key values are used
        (config, instance_id, seed)

        Parameters
        ----------
            config : dict (or other type -- depending on config space module)
                Parameter configuration
            cost: Union[int, float, list, np.ndarray]
                Cost of TA run (will be minimized)
            time: float
                Runtime of TA run
            status: str
                Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT}
            instance_id: str
                String representing an instance (default: None)
            seed: int
                Random seed used by TA (default: None)
            budget: float
                budget (cutoff) used in intensifier to limit TA (default: 0)
            starttime: float
                starting timestamp of TA evaluation
            endtime: float
                ending timestamp of TA evaluation
            additional_info: dict
                Additional run infos (could include further returned
                information from TA or fields such as start time and host_id)
            origin: DataOrigin
                Defines how data will be used.
            force_update: bool (default: False)
                Forces the addition of a config to the history
        """

        if config is None:
            raise TypeError(
                "Configuration to add to the runhistory must not be None")
        elif not isinstance(config, Configuration):
            raise TypeError(
                "Configuration to add to the runhistory is not of type Configuration, but %s"
                % type(config))

        # Squeeze is important to reduce arrays with one element
        # to scalars.
        cost = np.asarray(cost).squeeze()

        # Get the config id
        config_id_tmp = self.config_ids.get(config)
        if config_id_tmp is None:
            self._n_id += 1
            self.config_ids[config] = self._n_id
            config_id = cast(int, self.config_ids.get(config))
            self.ids_config[self._n_id] = config
        else:
            config_id = cast(int, config_id_tmp)

        if self.num_obj == -1:
            self.num_obj = np.size(cost)
        else:
            if np.size(cost) != self.num_obj:
                raise ValueError(
                    f"Cost is not of the same length ({np.size(cost)}) as the number "
                    f"of objectives ({self.num_obj})")

        k = RunKey(config_id, instance_id, seed, budget)
        v = RunValue(cost.tolist(), time, status, starttime, endtime,
                     additional_info)

        # Construct keys and values for the data dictionary
        for key, value in (
            ("config", config.get_dictionary()),
            ("config_id", config_id),
            ("instance_id", instance_id),
            ("seed", seed),
            ("budget", budget),
            ("cost", cost.tolist()),
            ("time", time),
            ("status", status),
            ("starttime", starttime),
            ("endtime", endtime),
            ("additional_info", additional_info),
            ("origin", config.origin),
        ):
            self._check_json_serializable(key, value, EnumEncoder, k, v)

        # Each runkey is supposed to be used only once. Repeated tries to add
        # the same runkey will be ignored silently if not capped.
        if self.overwrite_existing_runs or force_update or self.data.get(
                k) is None:
            self._add(k, v, status, origin)
        elif status != StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED:
            # overwrite capped runs with uncapped runs
            self._add(k, v, status, origin)
        elif (status == StatusType.CAPPED
              and self.data[k].status == StatusType.CAPPED
              and cost > self.data[k].cost):
            # overwrite if censored with a larger cutoff
            self._add(k, v, status, origin)
class TrajLoggerTest(unittest.TestCase):

    def mocked_get_used_wallclock_time(self):
        self.value += 1
        return self.value

    def setUp(self):
        logging.basicConfig()
        self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
        self.logger.setLevel(logging.DEBUG)
        self.value = 0
        self.cs = ConfigurationSpace()
        self.cs.add_hyperparameters([
            UniformFloatHyperparameter('param_a', -0.2, 1.77, 1.1),
            UniformIntegerHyperparameter('param_b', -3, 10, 1),
            Constant('param_c', 'value'),
            CategoricalHyperparameter('ambigous_categorical', choices=['True', True, 5]),  # True is ambigous here
        ])
        self.test_config = Configuration(self.cs, {'param_a': 0.5,
                                                   'param_b': 1,
                                                   'param_c': 'value',
                                                   'ambigous_categorical': 5})

    def test_init(self):
        scen = Scenario(scenario={'run_obj': 'quality', 'cs': self.cs, 'output_dir': ''})
        stats = Stats(scen)
        with tempfile.TemporaryDirectory() as tmpdir:
            path = os.path.join(tmpdir, 'tmp_test_folder')
            TrajLogger(output_dir=path, stats=stats)
            self.assertTrue(os.path.exists(path))

    def test_oserror(self):
        scen = Scenario(scenario={'run_obj': 'quality', 'cs': self.cs, 'output_dir': ''})
        stats = Stats(scen)
        # test OSError
        with patch('os.makedirs') as osMock:
            osMock.side_effect = OSError()
            self.assertRaises(OSError, TrajLogger, output_dir='random_directory', stats=stats)

    @patch('smac.stats.stats.Stats')
    def test_add_entries(self, mock_stats):
        # Mock stats
        mock_stats.ta_time_used = .5
        mock_stats.get_used_wallclock_time = self.mocked_get_used_wallclock_time
        mock_stats.finished_ta_runs = 1

        with tempfile.TemporaryDirectory() as tmpdir:
            tl = TrajLogger(output_dir=tmpdir, stats=mock_stats)

            # Add some entries
            tl.add_entry(0.9, 1, self.test_config, 0)
            mock_stats.ta_runs = 2
            mock_stats.ta_time_used = 0
            tl.add_entry(1.3, 1, self.test_config, 10)
            mock_stats.ta_time_used = 0
            tl.add_entry(0.7, 2, Configuration(self.cs, dict(self.test_config.get_dictionary(), **{'param_a': 0.})), 10)

            # Test the list that's added to the trajectory class
            self.assertEqual(tl.trajectory[0], TrajEntry(0.9, 1, self.test_config, 1, 0.5, 1, 0))
            # Test named-tuple-access:
            self.assertEqual(tl.trajectory[0].train_perf, 0.9)
            self.assertEqual(tl.trajectory[0].incumbent_id, 1)
            self.assertEqual(tl.trajectory[0].ta_runs, 1)
            self.assertEqual(tl.trajectory[0].ta_time_used, 0.5)
            self.assertEqual(tl.trajectory[0].wallclock_time, 1)
            self.assertEqual(tl.trajectory[0].budget, 0)
            self.assertEqual(len(tl.trajectory), 3)

            # Check if the trajectories are generated
            for fn in ['traj_old.csv', 'traj_aclib2.json', 'traj.json']:
                self.assertTrue(os.path.exists(os.path.join(tmpdir, fn)))

            # Load trajectories
            with open(os.path.join(tmpdir, 'traj_old.csv')) as to:
                data = to.read().split('\n')
            with open(os.path.join(tmpdir, 'traj_aclib2.json')) as js_aclib:
                json_dicts_aclib2 = [json.loads(line) for line in js_aclib.read().splitlines()]
            with open(os.path.join(tmpdir, 'traj.json')) as js:
                json_dicts_alljson = [json.loads(line) for line in js.read().splitlines()]

        # Check old format
        header = data[0].split(',')
        self.assertEqual(header[0], '"CPU Time Used"')
        self.assertEqual(header[-1], '"Configuration..."')

        data = list(map(lambda x: x.split(', '), data[1:]))
        frmt_str = '%1.6f'
        self.assertEqual(frmt_str % 0.5, data[0][0])
        self.assertEqual(frmt_str % 0.9, data[0][1])
        self.assertEqual(frmt_str % 0.5, data[0][4])

        self.assertEqual(frmt_str % 0, data[1][0])
        self.assertEqual(frmt_str % 1.3, data[1][1])
        self.assertEqual(frmt_str % 2, data[1][4])

        self.assertEqual(frmt_str % 0, data[2][0])
        self.assertEqual(frmt_str % .7, data[2][1])
        self.assertEqual(frmt_str % 3, data[2][4])

        # Check aclib2-format
        self.assertEqual(json_dicts_aclib2[0]['cpu_time'], .5)
        self.assertEqual(json_dicts_aclib2[0]['cost'], 0.9)
        self.assertEqual(len(json_dicts_aclib2[0]['incumbent']), 4)
        self.assertTrue("param_a='0.5'" in json_dicts_aclib2[0]['incumbent'])
        self.assertTrue("param_a='0.0'" in json_dicts_aclib2[2]['incumbent'])

        # Check alljson-format
        self.assertEqual(json_dicts_alljson[0]['cpu_time'], .5)
        self.assertEqual(json_dicts_alljson[0]['cost'], 0.9)
        self.assertEqual(len(json_dicts_alljson[0]['incumbent']), 4)
        self.assertTrue(json_dicts_alljson[0]["incumbent"]["param_a"] == 0.5)
        self.assertTrue(json_dicts_alljson[2]["incumbent"]["param_a"] == 0.0)
        self.assertEqual(json_dicts_alljson[0]['budget'], 0)
        self.assertEqual(json_dicts_alljson[2]['budget'], 10)

    @patch('smac.stats.stats.Stats')
    def test_ambigious_categoricals(self, mock_stats):
        mock_stats.ta_time_used = 0.5
        mock_stats.get_used_wallclock_time = self.mocked_get_used_wallclock_time
        mock_stats.finished_ta_runs = 1

        with tempfile.TemporaryDirectory() as tmpdir:
            tl = TrajLogger(output_dir=tmpdir, stats=mock_stats)

            problem_config = Configuration(self.cs, {'param_a': 0.0, 'param_b': 2, 'param_c': 'value',
                                                     'ambigous_categorical': True})  # not recoverable without json
            tl.add_entry(0.9, 1, problem_config)

            from_aclib2 = tl.read_traj_aclib_format(os.path.join(tmpdir, 'traj_aclib2.json'), self.cs)
            from_alljson = tl.read_traj_alljson_format(os.path.join(tmpdir, 'traj.json'), self.cs)

        # Wrong! but passes:
        self.assertIsInstance(from_aclib2[0]['incumbent']['ambigous_categorical'], str)
        # Works good for alljson:
        self.assertIsInstance(from_alljson[0]['incumbent']['ambigous_categorical'], bool)
Example #4
0
    def add(
        self,
        config: Configuration,
        cost: float,
        time: float,
        status: StatusType,
        instance_id: typing.Optional[str] = None,
        seed: typing.Optional[int] = None,
        budget: float = 0.0,
        starttime: float = 0.0,
        endtime: float = 0.0,
        additional_info: typing.Optional[typing.Dict] = None,
        origin: DataOrigin = DataOrigin.INTERNAL,
        force_update: bool = False,
    ) -> None:
        """Adds a data of a new target algorithm (TA) run;
        it will update data if the same key values are used
        (config, instance_id, seed)

        Parameters
        ----------
            config : dict (or other type -- depending on config space module)
                Parameter configuration
            cost: float
                Cost of TA run (will be minimized)
            time: float
                Runtime of TA run
            status: str
                Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT}
            instance_id: str
                String representing an instance (default: None)
            seed: int
                Random seed used by TA (default: None)
            budget: float
                budget (cutoff) used in intensifier to limit TA (default: 0)
            starttime: float
                starting timestamp of TA evaluation
            endtime: float
                ending timestamp of TA evaluation
            additional_info: dict
                Additional run infos (could include further returned
                information from TA or fields such as start time and host_id)
            origin: DataOrigin
                Defines how data will be used.
            force_update: bool (default: False)
                Forces the addition of a config to the history
        """

        if config is None:
            raise TypeError(
                'Configuration to add to the runhistory must not be None')
        elif not isinstance(config, Configuration):
            raise TypeError(
                'Configuration to add to the runhistory is not of type Configuration, but %s'
                % type(config))

        # Get the config id
        config_id_tmp = self.config_ids.get(config)
        if config_id_tmp is None:
            self._n_id += 1
            self.config_ids[config] = self._n_id
            config_id = typing.cast(int, self.config_ids.get(config))
            self.ids_config[self._n_id] = config
        else:
            config_id = typing.cast(int, config_id_tmp)

        k = RunKey(config_id, instance_id, seed, budget)
        v = RunValue(cost, time, status, starttime, endtime, additional_info)
        # Construct keys and values for the data dictionary
        for key, value in (
            ('config', config.get_dictionary()),
            ('config_id', config_id),
            ('instance_id', instance_id),
            ('seed', seed),
            ('budget', budget),
            ('cost', cost),
            ('time', time),
            ('status', status),
            ('starttime', starttime),
            ('endtime', endtime),
            ('additional_info', additional_info),
            ('origin', config.origin),
        ):
            self._check_json_serializable(key, value, EnumEncoder, k, v)

        # Each runkey is supposed to be used only once. Repeated tries to add
        # the same runkey will be ignored silently if not capped.
        if self.overwrite_existing_runs or force_update or self.data.get(
                k) is None:
            self._add(k, v, status, origin)
        elif status != StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED:
            # overwrite capped runs with uncapped runs
            self._add(k, v, status, origin)
        elif status == StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED and cost > self.data[k].cost:
            # overwrite if censored with a larger cutoff
            self._add(k, v, status, origin)