Example #1
0
 def test_objective_runtime(self):
     ''' test if everything is ok with objective runtime (imputing!) '''
     scen = Scenario(self.scen_fn, cmd_options={'run_obj': 'runtime',
                                                'cutoff_time': 5})
     validator = Validator(scen, self.trajectory, self.rng)
     old_configs = [entry["incumbent"] for entry in self.trajectory]
     old_rh = RunHistory()
     for config in old_configs[:int(len(old_configs) / 2)]:
         old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0')
     validator.validate_epm('all', 'train', 1, old_rh)
    def test_choose_next(self):
        seed = 42
        config = self.scenario.cs.sample_configuration()
        rh = RunHistory()
        rh.add(config, 10, 10, StatusType.SUCCESS)

        smbo = SMAC4AC(self.scenario, rng=seed, runhistory=rh).solver

        x = next(smbo.epm_chooser.choose_next()).get_array()
        self.assertEqual(x.shape, (2, ))
Example #3
0
    def test_get_config_runs(self):
        '''
            get some config runs from runhistory
        '''

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs,
                                values={'a': 1, 'b': 2})
        config2 = Configuration(cs,
                                values={'a': 1, 'b': 3})
        rh.add(config=config1, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=1)

        rh.add(config=config2, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=1)

        rh.add(config=config1, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=2,
               seed=2)

        ist = rh.get_runs_for_config(config=config1)
        #print(ist)
        #print(ist[0])
        #print(ist[1])
        self.assertEqual(len(ist), 2)
        self.assertEqual(ist[0].instance, 1)
        self.assertEqual(ist[1].instance, 2)
Example #4
0
    def __init__(self, folder: str, ta_exec_dir: Union[str, None] = None):
        """Initialize scenario, runhistory and incumbent from folder, execute
        init-method of SMAC facade (so you could simply use SMAC-instances instead)

        Parameters
        ----------
        folder: string
            output-dir of this run
        ta_exec_dir: string
            if the execution directory for the SMAC-run differs from the cwd,
            there might be problems loading instance-, feature- or PCS-files
            in the scenario-object. since instance- and PCS-files are necessary,
            specify the path to the execution-dir of SMAC here
        """
        run_1_existed = os.path.exists('run_1')
        self.logger = logging.getLogger("cave.SMACrun.{}".format(folder))
        in_reader = InputReader()

        self.folder = folder
        self.logger.debug("Loading from %s", folder)

        split_folder = os.path.split(folder)
        self.logger.info(split_folder)
        if ta_exec_dir is None:
            ta_exec_dir = '.'

        self.scen_fn = os.path.join(folder, 'scenario.txt')
        self.rh_fn = os.path.join(folder, 'runhistory.json')
        self.traj_fn = os.path.join(folder, 'traj_aclib2.json')
        self.traj_old_fn = os.path.join(folder, 'traj_old.csv')

        # Create Scenario (disable output_dir to avoid cluttering)
        scen_dict = in_reader.read_scenario_file(self.scen_fn)
        scen_dict['output_dir'] = ""
        with changedir(ta_exec_dir):
            self.scen = Scenario(scen_dict)

        # Load runhistory and trajectory
        self.runhistory = RunHistory(average_cost)
        self.runhistory.update_from_json(self.rh_fn, self.scen.cs)
        self.traj = TrajLogger.read_traj_aclib_format(fn=self.traj_fn,
                                                      cs=self.scen.cs)

        incumbent = self.traj[-1]['incumbent']
        self.train_inst = self.scen.train_insts
        self.test_inst = self.scen.test_insts

        # Initialize SMAC-object
        super().__init__(scenario=self.scen, runhistory=self.runhistory)
        #restore_incumbent=incumbent)
        # TODO use restore, delete next line
        self.solver.incumbent = incumbent

        if (not run_1_existed) and os.path.exists('run_1'):
            shutil.rmtree('run_1')
Example #5
0
    def test_multiple_budgets(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={'a': 1, 'b': 2})

        rh.add(config=config1,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=1)

        self.assertEqual(rh.get_cost(config1), 10)

        # only the higher budget gets included in the config cost
        rh.add(config=config1,
               cost=20,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=2)

        self.assertEqual(rh.get_cost(config1), 20)
        self.assertEqual(rh.get_min_cost(config1), 10)
Example #6
0
    def test_get_configs_per_budget(self):

        rh = RunHistory()
        cs = get_config_space()

        config1 = Configuration(cs, values={'a': 1, 'b': 1})
        rh.add(config=config1,
               cost=10,
               time=10,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=1)

        config2 = Configuration(cs, values={'a': 2, 'b': 2})
        rh.add(config=config2,
               cost=20,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=1)

        config3 = Configuration(cs, values={'a': 3, 'b': 3})
        rh.add(config=config3,
               cost=30,
               time=30,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=3)

        self.assertListEqual(rh.get_all_configs_per_budget([1]),
                             [config1, config2])
Example #7
0
    def _get_instances_to_run(
            self, challenger: Configuration, incumbent: Configuration,
            run_history: RunHistory,
            N: int) -> typing.Tuple[typing.List[InstSeedBudgetKey], float]:
        """
        Returns the minimum list of <instance, seed> pairs to run the challenger on
        before comparing it with the incumbent

        Parameters
        ----------
        incumbent: Configuration
            incumbent configuration
        challenger: Configuration
            promising configuration that is presently being evaluated
        run_history: smac.runhistory.runhistory.RunHistory
            Stores all runs we ran so far
        N: int
            number of <instance, seed> pairs to select

        Returns
        -------
        typing.List[InstSeedBudgetKey]
            list of <instance, seed, budget> tuples to run
        float
            total (runtime) cost of running the incumbent on the instances (used for adaptive capping while racing)
        """
        # get next instances left for the challenger
        # Line 8
        inc_inst_seeds = set(
            run_history.get_runs_for_config(incumbent,
                                            only_max_observed_budget=True))
        chall_inst_seeds = set(
            run_history.get_runs_for_config(challenger,
                                            only_max_observed_budget=True))
        # Line 10
        missing_runs = list(inc_inst_seeds - chall_inst_seeds)

        # Line 11
        self.rs.shuffle(missing_runs)
        if N < 0:
            raise ValueError(
                'Argument N must not be smaller than zero, but is %s' % str(N))
        to_run = missing_runs[:min(N, len(missing_runs))]
        missing_runs = missing_runs[min(N, len(missing_runs)):]

        # for adaptive capping
        # because of efficiency computed here
        inst_seed_pairs = list(inc_inst_seeds - set(missing_runs))
        # cost used by incumbent for going over all runs in inst_seed_pairs
        inc_sum_cost = run_history.sum_cost(
            config=incumbent,
            instance_seed_budget_keys=inst_seed_pairs,
        )

        return to_run, inc_sum_cost
    def test_choose_next_higher_budget(self):
        seed = 42
        config = self.scenario.cs.sample_configuration
        rh = RunHistory()
        rh.add(
            config=config(),
            cost=1,
            time=10,
            instance_id=None,
            seed=1,
            budget=1,
            additional_info=None,
            status=StatusType.SUCCESS,
        )
        rh.add(
            config=config(),
            cost=2,
            time=10,
            instance_id=None,
            seed=1,
            budget=2,
            additional_info=None,
            status=StatusType.SUCCESS,
        )
        rh.add(
            config=config(),
            cost=3,
            time=10,
            instance_id=None,
            seed=1,
            budget=2,
            additional_info=None,
            status=StatusType.SUCCESS,
        )
        rh.add(
            config=config(),
            cost=4,
            time=10,
            instance_id=None,
            seed=1,
            budget=3,
            additional_info=None,
            status=StatusType.SUCCESS,
        )

        smbo = SMAC4AC(self.scenario, rng=seed, runhistory=rh).solver
        smbo.epm_chooser.min_samples_model = 2

        # Return two configurations evaluated with budget==2
        X, Y, X_configurations = smbo.epm_chooser._collect_data_to_train_model(
        )
        self.assertListEqual(list(Y.flatten()), [2, 3])
        self.assertEqual(X.shape[0], 2)
        self.assertEqual(X_configurations.shape[0], 2)
def merge_foreign_data(
    scenario: Scenario, runhistory: RunHistory,
    in_scenario_list: typing.List[Scenario],
    in_runhistory_list: typing.List[RunHistory]
) -> typing.Tuple[Scenario, RunHistory]:
    """Extend <scenario> and <runhistory> with runhistory data from another
    <in_scenario> assuming the same pcs, feature space, but different instances

    Parameters
    ----------
    scenario: Scenario
        original scenario -- feature dictionary will be extended
    runhistory: RunHistory
        original runhistory -- will be extended by further data points
    in_scenario_list: typing.List[Scenario]
        input scenario
    in_runhistory_list: typing.List[RunHistory]
        list of runhistories wrt <in_scenario>

    Returns
    -------
    scenario: Scenario
    runhistory: Runhistory
    """
    # add further instance features
    for in_scenario in in_scenario_list:
        if scenario.n_features != in_scenario.n_features:
            raise ValueError(
                "Feature Space has to be the same for both scenarios (%d vs %d)."
                % (scenario.n_features, in_scenario.n_features))

        if scenario.cs != in_scenario.cs:  # type: ignore[attr-defined] # noqa F821
            raise ValueError("PCS of both scenarios have to be identical.")

        if scenario.cutoff != in_scenario.cutoff:  # type: ignore[attr-defined] # noqa F821
            raise ValueError("Cutoffs of both scenarios have to be identical.")

        scenario.feature_dict.update(in_scenario.feature_dict)

    # extend runhistory
    for rh in in_runhistory_list:
        runhistory.update(rh, origin=DataOrigin.EXTERNAL_DIFFERENT_INSTANCES)

    for date in runhistory.data:
        if scenario.feature_dict.get(date.instance_id) is None:
            raise ValueError(
                "Instance feature for \"%s\" was not found in scenario data." %
                (date.instance_id))

    runhistory.compute_all_costs(instances=scenario.train_insts)

    return scenario, runhistory
Example #10
0
 def test_no_feature_dict(self):
     scen = Scenario(self.scen_fn, cmd_options={'run_obj': 'quality'})
     scen.feature_array = None
     validator = Validator(scen, self.trajectory)
     old_rh = RunHistory()
     for config in [e["incumbent"] for e in self.trajectory]:
         old_rh.add(config,
                    1,
                    1,
                    StatusType.SUCCESS,
                    instance_id='0',
                    seed=127)
     validator.validate_epm(runhistory=old_rh)
Example #11
0
def merge_foreign_data(scenario: Scenario, runhistory: RunHistory,
                       in_scenario_list: typing.List[Scenario],
                       in_runhistory_list: typing.List[RunHistory]):
    '''
        extend <scenario> and <runhistory> with runhistory data from another <in_scenario> 
        assuming the same pcs, feature space, but different instances

        Arguments
        ---------
        scenario: Scenario
            original scenario -- feature dictionary will be extended
        runhistory: RunHistory
            original runhistory -- will be extended by further data points
        in_scenario_list: typing.List[Scenario]
            input scenario 
        in_runhistory_list: typing.List[RunHistory]
            list of runhistories wrt <in_scenario>

        Returns
        -------
            scenario, runhistory
    '''

    # add further instance features
    for in_scenario in in_scenario_list:
        if scenario.n_features != in_scenario.n_features:
            raise ValueError(
                "Feature Space has to be the same for both scenarios (%d vs %d)."
                % (scenario.n_features, in_scenario.n_features))

        if scenario.cs != in_scenario.cs:
            raise ValueError("PCS of both scenarios have to be identical.")

        if scenario.cutoff != in_scenario.cutoff:
            raise ValueError("Cutoffs of both scenarios have to be identical.")

        scenario.feature_dict.update(in_scenario.feature_dict)

    # extend runhistory
    for rh in in_runhistory_list:
        runhistory.update(rh, external_data=True)

    for date in runhistory.data:
        if scenario.feature_dict.get(date.instance_id) is None:
            raise ValueError(
                "Instance feature for \"%s\" was not found in scenario data." %
                (date.instance_id))

    runhistory.compute_all_costs(instances=scenario.train_insts)

    return scenario, runhistory
Example #12
0
    def _aggregate(self, runs):
        # path_to_folder is the concatenation of all the paths of the individual runs
        path_to_folder = '-'.join(
            sorted(list(set([r.path_to_folder for r in runs]))))
        # budgets are the union of individual budgets. if they are not the same for all runs (no usecase atm),
        #   they get an additional entry of the hash over the string of the combination to avoid false-positives
        budgets = [r.reduced_to_budgets for r in runs]
        budget_hash = ['budgetmix-%d' % (hash(str(budgets)))
                       ] if len(set([frozenset(b)
                                     for b in budgets])) != 1 else []
        budgets = [
            a for b in [x for x in budgets if x is not None] for a in b
        ] + budget_hash

        if ConfiguratorRun.identify(path_to_folder, budgets) in self.cache:
            return self.cache[ConfiguratorRun.identify(path_to_folder,
                                                       budgets)]

        orig_rh, vali_rh = RunHistory(), RunHistory()
        for run in runs:
            orig_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL)
            vali_rh.update(run.original_runhistory, origin=DataOrigin.INTERNAL)
            if run.validated_runhistory:
                vali_rh.update(run.validated_runhistory,
                               origin=DataOrigin.EXTERNAL_SAME_INSTANCES)

        for rh_name, rh in [
            ("original", orig_rh),
            ("validated", vali_rh),
        ]:
            self.logger.debug(
                'Combined number of %s RunHistory data points: %d '
                '# Configurations: %d. # Configurator runs: %d', rh_name,
                len(rh.data), len(rh.get_all_configs()), len(runs))

        traj = combine_trajectories([run.trajectory for run in runs],
                                    self.logger)

        new_cr = ConfiguratorRun(
            runs[0].scenario,
            orig_rh,
            vali_rh,
            traj,
            self.analyzing_options,
            output_dir=self.output_dir,
            path_to_folder=path_to_folder,
            reduced_to_budgets=budgets,
        )

        self._cache(new_cr)
        return new_cr
    def test_choose_next_2(self):
        # Test with a single configuration in the runhistory!
        def side_effect(X):
            return np.mean(X, axis=1).reshape((-1, 1))

        def side_effect_predict(X):
            m, v = np.ones((X.shape[0], 1)), None
            return m, v

        seed = 42
        incumbent = self.scenario.cs.get_default_configuration()
        rh = RunHistory()
        rh.add(incumbent, 10, 10, StatusType.SUCCESS)
        epm_chooser = SMAC4AC(self.scenario, rng=seed,
                              runhistory=rh).solver.epm_chooser

        epm_chooser.model = mock.Mock(spec=RandomForestWithInstances)
        epm_chooser.model.predict_marginalized_over_instances.side_effect = side_effect_predict
        epm_chooser.acquisition_func._compute = mock.Mock(
            spec=RandomForestWithInstances)
        epm_chooser.acquisition_func._compute.side_effect = side_effect
        epm_chooser.incumbent = incumbent

        challengers = epm_chooser.choose_next()
        # Convert challenger list (a generator) to a real list
        challengers = [c for c in challengers]

        self.assertEqual(epm_chooser.model.train.call_count, 1)

        # For each configuration it is randomly sampled whether to take it from the list of challengers or to sample it
        # completely at random. Therefore, it is not guaranteed to obtain twice the number of configurations selected
        # by EI.
        self.assertEqual(len(challengers), 9968)
        num_random_search_sorted = 0
        num_random_search = 0
        num_local_search = 0
        for c in challengers:
            self.assertIsInstance(c, Configuration)
            if 'Random Search (sorted)' == c.origin:
                num_random_search_sorted += 1
            elif 'Random Search' == c.origin:
                num_random_search += 1
            elif 'Local Search' == c.origin:
                num_local_search += 1
            else:
                raise ValueError((c.origin, 'Local Search' == c.origin,
                                  type('Local Search'), type(c.origin)))

        self.assertEqual(num_local_search, 11)
        self.assertEqual(num_random_search_sorted, 5000)
        self.assertEqual(num_random_search, 4957)
Example #14
0
def read(run_history: RunHistory, output_dirs: typing.Union[str,
                                                            typing.List[str]],
         configuration_space: ConfigurationSpace,
         logger: logging.Logger) -> None:
    """Update runhistory with run results from concurrent runs of pSMAC.

    Parameters
    ----------
    run_history : smac.runhistory.RunHistory
        RunHistory object to be updated with run information from runhistory
        objects stored in the output directory.
    output_dirs : typing.Union[str,typing.List[str]]
        List of SMAC output directories
        or Linux path expression (str) which will be casted into a list with
        glob.glob(). This function will search the output directories
        for files matching the runhistory regular expression.
    configuration_space : ConfigSpace.ConfigurationSpace
        A ConfigurationSpace object to check if loaded configurations are valid.
    logger : logging.Logger
    """
    numruns_in_runhistory = len(run_history.data)
    initial_numruns_in_runhistory = numruns_in_runhistory

    if isinstance(output_dirs, str):
        parsed_output_dirs = glob.glob(output_dirs)
        if glob.glob(os.path.join(output_dirs, "run_*")):
            parsed_output_dirs += glob.glob(os.path.join(output_dirs, "run_*"))
    else:
        parsed_output_dirs = output_dirs

    for output_directory in parsed_output_dirs:
        for file_in_output_directory in os.listdir(output_directory):
            match = re.match(RUNHISTORY_RE, file_in_output_directory)
            valid_match = re.match(VALIDATEDRUNHISTORY_RE,
                                   file_in_output_directory)
            if match or valid_match:
                runhistory_file = os.path.join(output_directory,
                                               file_in_output_directory)
                run_history.update_from_json(runhistory_file,
                                             configuration_space)

                new_numruns_in_runhistory = len(run_history.data)
                difference = new_numruns_in_runhistory - numruns_in_runhistory
                logger.debug('Shared model mode: Loaded %d new runs from %s' %
                             (difference, runhistory_file))
                numruns_in_runhistory = new_numruns_in_runhistory

    difference = numruns_in_runhistory - initial_numruns_in_runhistory
    logger.info(
        'Shared model mode: Finished loading new runs, found %d new runs.' %
        difference)
Example #15
0
def main():
    parser = argparse.ArgumentParser(description='test',
                                     fromfile_prefix_chars="@")
    parser.add_argument('-s',
                        '--scenario_file',
                        dest='scenario',
                        required=True)
    parser.add_argument('-rh',
                        '--runhistory_file',
                        dest='runhistory',
                        required=True)
    parser.add_argument('-o', '--output_file', dest='output', required=True)
    args = parser.parse_args()

    scenario = Scenario(args.scenario)
    # We load the runhistory, ...
    rh_path = os.path.join(args.runhistory)
    runhistory = RunHistory(aggregate_func=None)
    runhistory.load_json(rh_path, scenario.cs)

    cost_default = []
    cost_incumbent = []
    for entry, values in runhistory.data.items(
    ):  # iterate over data because it is an OrderedDict
        config_id = entry.config_id  # look up config id
        config = runhistory.ids_config[config_id]  # look up config
        z_ = values.cost  # get cost
        if z_ > 100:
            z_ = 150
        if config_id == 1:
            cost_default.append(z_)
        else:
            cost_incumbent.append(z_)
    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)
    ax1.plot(cost_incumbent,
             cost_default,
             linestyle='None',
             marker='o',
             color="black")
    ax1.plot([0, 100], [0, 100], 'r-')
    ax1.plot([0, 100], [100, 100], linestyle='dashed', color="black")
    ax1.plot([100, 100], [100, 0], linestyle='dashed', color="black")
    plt.ylabel("Default Configuration")
    plt.xlabel("Incumbent")
    ax1.set_xlim([0, 175])
    ax1.set_ylim([0, 175])
    plt.gca().set_aspect('equal', adjustable='box')
    plt.title("Performance of Incumbent compared to Default Configuration")
    plt.savefig(args.output)
Example #16
0
    def test_multiple_budgets(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})

        rh.add(
            config=config1,
            cost=[10, 50],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=1,
        )

        self.assertEqual(rh.get_cost(config1), 1.0)

        # Only the higher budget gets included in the config cost
        # However, we expect that the bounds are changed
        rh.add(
            config=config1,
            cost=[20, 25],
            time=25,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=5,
        )

        self.assertEqual(rh.get_cost(config1), 0.5)
Example #17
0
    def test_add_and_pickle(self):
        '''
            simply adding some rundata to runhistory, then pickle it
        '''
        rh = RunHistory()
        cs = get_config_space()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        self.assertTrue(rh.empty())

        rh.add(config=config, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=None,
               seed=None,
               additional_info=None)

        rh.add(config=config, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=12354,
               additional_info={"start_time": 10})

        self.assertFalse(rh.empty())

        tmpfile = tempfile.NamedTemporaryFile(mode='wb', delete=False)
        pickle.dump(rh, tmpfile, -1)
        name = tmpfile.name
        tmpfile.close()

        with open(name, 'rb') as fh:
            loaded_rh = pickle.load(fh)
        self.assertEqual(loaded_rh.data, rh.data)
Example #18
0
    def test_incremental_update(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})

        rh.add(
            config=config1,
            cost=10,
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
        )

        self.assertEqual(rh.get_cost(config1), 10)

        rh.add(
            config=config1,
            cost=20,
            time=20,
            status=StatusType.SUCCESS,
            instance_id=2,
            seed=1,
        )

        self.assertEqual(rh.get_cost(config1), 15)
Example #19
0
    def test_add_multiple_times(self):
        rh = RunHistory()
        cs = get_config_space()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        for i in range(5):
            rh.add(config=config, cost=i + 1, time=i + 1,
                   status=StatusType.SUCCESS, instance_id=None,
                   seed=12345, additional_info=None)

        self.assertEqual(len(rh.data), 1)
        self.assertEqual(len(rh.get_runs_for_config(config, only_max_observed_budget=True)), 1)
        self.assertEqual(len(rh._configid_to_inst_seed_budget[1]), 1)
        self.assertEqual(list(rh.data.values())[0].cost, 1)
def merge_foreign_data_from_file(
        scenario: Scenario,
        runhistory: RunHistory,
        in_scenario_fn_list: typing.List[str],
        in_runhistory_fn_list: typing.List[str],
        cs: ConfigurationSpace,
        aggregate_func: typing.Callable = average_cost):
    """Extend <scenario> and <runhistory> with runhistory data from another
    <in_scenario> assuming the same pcs, feature space, but different instances

    Parameters
    ---------
    scenario: Scenario
        original scenario -- feature dictionary will be extended
    runhistory: RunHistory
        original runhistory -- will be extended by further data points
    in_scenario_fn_list: typing.List[str]
        input scenario file names
    in_runhistory_fn_list: typing.List[str]
        list filenames of runhistory dumps
    cs: ConfigurationSpace
        parameter configuration space to read runhistory from file
    aggregate_func: typing.Callable
        function to aggregate performance of a configuratoion across instances

    Returns
    -------
    scenario: Scenario
    runhistory: Runhistory
    """

    if not in_scenario_fn_list:
        raise ValueError(
            "To read warmstart data from previous runhistories, the corresponding scenarios are required. Use option --warmstart_scenario"
        )
    scens = [
        Scenario(scenario=scen_fn, cmd_args={"output_dir": ""})
        for scen_fn in in_scenario_fn_list
    ]
    rhs = []
    for rh_fn in in_runhistory_fn_list:
        rh = RunHistory(aggregate_func)
        rh.load_json(rh_fn, cs)
        rhs.append(rh)

    return merge_foreign_data(scenario,
                              runhistory,
                              in_scenario_list=scens,
                              in_runhistory_list=rhs)
def main():
    parser = argparse.ArgumentParser(description='test',
                                     fromfile_prefix_chars="@")
    parser.add_argument('-s',
                        '--scenario_file',
                        dest='scenario',
                        required=True)
    parser.add_argument('-rh',
                        '--runhistory_file',
                        dest='runhistory',
                        required=True)
    parser.add_argument('-o', '--output_file', dest='output', required=True)
    args = parser.parse_args()

    scenario = Scenario(args.scenario)
    # We load the runhistory, ...
    rh_path = os.path.join(args.runhistory)
    runhistory = RunHistory(aggregate_func=None)
    runhistory.load_json(rh_path, scenario.cs)

    cost_default = 1
    cost_incumbent = []
    for entry, values in runhistory.data.items(
    ):  # iterate over data because it is an OrderedDict
        config_id = entry.config_id  # look up config id
        config = runhistory.ids_config[config_id]  # look up config
        z_ = values.cost  # get cost
        if config_id == 1:  #default configuration
            cost_default = z_
        else:
            cost_incumbent.append(z_)
    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)
    x = range(len(cost_incumbent))
    ax1.plot(x,
             cost_incumbent,
             linestyle='None',
             marker='+',
             color="black",
             label='SMAC')
    ax1.plot([0, len(x)], [cost_default, cost_default],
             'r-',
             label='Default Configuration')
    plt.ylabel("Loss")
    plt.xlabel("SMAC")
    plt.title("Performance of Incumbents compared to Default Configuration")
    plt.legend()
    plt.savefig(args.output)
 def test_validate_epm(self):
     ''' test using epm to validate '''
     scen = Scenario(self.scen_fn,
                     cmd_args={'run_obj':'quality',
                               'instances' : self.train_insts,
                               'test_instances': self.test_insts,
                               'features': self.feature_dict})
     scen.instance_specific = self.inst_specs
     validator = Validator(scen, self.trajectory, self.rng)
     # Add a few runs and check, if they are correctly processed
     old_configs = [entry["incumbent"] for entry in self.trajectory]
     old_rh = RunHistory(average_cost)
     for config in old_configs[:int(len(old_configs)/2)]:
         old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0',
                    seed=127)
     validator.validate_epm('all', 'train', 1, old_rh)
Example #23
0
    def _get_mean_costs(self, incs: typing.List[Configuration],
                        new_rh: RunHistory):
        """
        Compute mean cost per instance

        Parameters
        ----------
        incs : typing.List[Configuration]
            incumbents determined by all parallel SMAC runs
        new_rh : RunHistory
            runhistory to determine mean performance

        Returns
        -------
        List[float] means
        Dict(Config -> Dict(inst_id(str) -> float))

        """
        config_cost_per_inst = {}
        results = []
        for incumbent in incs:
            cost_per_inst = new_rh.get_instance_costs_for_config(
                config=incumbent)
            config_cost_per_inst[incumbent] = cost_per_inst
            values = list(cost_per_inst.values())
            if values:
                results.append(np.mean(values))
            else:
                results.append(np.nan)
        return results, config_cost_per_inst
Example #24
0
def get_cost_dict_for_config(rh: RunHistory,
                             conf: Configuration,
                             par: int = 1,
                             cutoff: typing.Union[float, None] = None):
    """
    Aggregates loss for configuration on evaluated instances over seeds.

    Parameters
    ----------
    rh: RunHistory
        runhistory with data
    conf: Configuration
        configuration to evaluate
    par: int
        par-factor with which to multiply timeouts
    cutoff: float
        cutoff of scenario - used to penalize costs if par != 1

    Returns
    -------
    cost: dict(instance->cost)
        cost per instance (aggregated or as list per seed)
    """
    # Check if config is in runhistory
    conf_id = rh.config_ids[conf]

    # Map instances to seeds in dict
    runs = rh.get_runs_for_config(conf)
    instance_to_seeds = dict()
    for run in runs:
        inst, seed = run
        if inst in instance_to_seeds:
            instance_to_seeds[inst].append(seed)
        else:
            instance_to_seeds[inst] = [seed]

    # Get loss per instance
    instance_costs = {
        i: [rh.data[RunKey(conf_id, i, s)].cost for s in instance_to_seeds[i]]
        for i in instance_to_seeds
    }

    # Aggregate:
    instance_costs = {i: np.mean(instance_costs[i]) for i in instance_costs}

    # TODO: uncomment next line and delete all above after next SMAC dev->master
    # instance_costs = rh.get_instance_costs_for_config(conf)

    if par != 1:
        if cutoff:
            instance_costs = {
                k: v if v < cutoff else v * par
                for k, v in instance_costs.items()
            }
        else:
            raise ValueError(
                "To apply penalization of costs, a cutoff needs to be provided."
            )

    return instance_costs
 def test_epm_reuse_rf(self):
     """ if no runhistory is passed to epm, but there was a model trained
     before, that model should be reused! (if reuse_epm flag is set) """
     scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality'})
     scen.feature_array = None
     validator = Validator(scen, self.trajectory)
     old_rh = RunHistory(average_cost)
     for config in [e["incumbent"] for e in self.trajectory]:
         old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0',
                    seed=127)
     self.assertTrue(isinstance(validator.validate_epm(runhistory=old_rh),
                                RunHistory))
     self.assertTrue(isinstance(validator.validate_epm(
                                 output_fn="test/test_files/validation/"),
                                 RunHistory))
     self.assertRaises(ValueError, validator.validate_epm, reuse_epm=False)
Example #26
0
    def test_choose_next_2(self):
        def side_effect(X, derivative):
            return np.mean(X, axis=1).reshape((-1, 1))

        smbo = SMAC(self.scenario, rng=1).solver
        smbo.incumbent = self.scenario.cs.sample_configuration()
        smbo.runhistory = RunHistory(aggregate_func=average_cost)
        smbo.model = mock.Mock(spec=RandomForestWithInstances)
        smbo.acquisition_func._compute = mock.Mock(
            spec=RandomForestWithInstances)
        smbo.acquisition_func._compute.side_effect = side_effect

        X = smbo.rng.rand(10, 2)
        Y = smbo.rng.rand(10, 1)

        x = smbo.choose_next(X, Y)

        self.assertEqual(smbo.model.train.call_count, 1)
        self.assertEqual(len(x), 2020)
        num_random_search = 0
        num_local_search = 0
        for i in range(0, 2020, 2):
            #print(x[i].origin)
            self.assertIsInstance(x[i], Configuration)
            if 'Random Search (sorted)' in x[i].origin:
                num_random_search += 1
            elif 'Local Search' in x[i].origin:
                num_local_search += 1
        # number of local search configs has to be least 10
        # since x can have duplicates
        # which can be associated with the local search
        self.assertGreaterEqual(num_local_search, 10)
        for i in range(1, 2020, 2):
            self.assertIsInstance(x[i], Configuration)
            self.assertEqual(x[i].origin, 'Random Search')
Example #27
0
def _cost(config: Configuration, run_history: RunHistory,
          instance_seed_pairs=None):
    """Return array of all costs for the given config for further calculations.

    Parameters
    ----------
    config : Configuration
        Configuration to calculate objective for
    run_history : RunHistory
        RunHistory object from which the objective value is computed.
    instance_seed_pairs : list, optional (default=None)
        List of tuples of instance-seeds pairs. If None, the run_history is
        queried for all runs of the given configuration.

    Returns
    -------
    Costs: list
        Array of all costs
    """
    try:
        id_ = run_history.config_ids[config]
    except KeyError:  # challenger was not running so far
        return []

    if instance_seed_pairs is None:
        instance_seed_pairs = run_history.get_runs_for_config(config)

    costs = []
    for i, r in instance_seed_pairs:
        k = RunKey(id_, i, r)
        costs.append(run_history.data[k].cost)
    return costs
Example #28
0
    def __init__(self,
                 scenario: typing.Type[Scenario],
                 rng: typing.Optional[typing.Union[np.random.RandomState, int]] = None,
                 run_id: int = 1,
                 tae: typing.Type[ExecuteTARun] = ExecuteTARunOld,
                 tae_kwargs: typing.Union[dict, None] = None,
                 shared_model: bool = True,
                 validate: bool = True,
                 n_optimizers: int = 2,
                 val_set: typing.Union[typing.List[str], None] = None,
                 n_incs: int = 1,
                 **kwargs):
        """
        Constructor

        Parameters
        ----------
        scenario : ~smac.scenario.scenario.Scenario
            Scenario object
        n_optimizers: int
            Number of optimizers to run in parallel per round
        rng: int/np.random.RandomState
            The randomState/seed to pass to each smac run
        run_id: int
            run_id for this hydra run
        tae: ExecuteTARun
            Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc)
        tae_kwargs: Optional[dict]
            arguments passed to constructor of '~tae'
        shared_model: bool
            Flag to indicate whether information is shared between SMAC runs or not
        validate: bool / None
            Flag to indicate whether to validate the found configurations or to use the SMAC estimates
            None => neither and return the full portfolio
        n_incs: int
            Number of incumbents to return (n_incs <= 0 ==> all found configurations)
        val_set: typing.List[str]
            List of instance-ids to validate on

        """
        self.logger = logging.getLogger(
            self.__module__ + "." + self.__class__.__name__)

        self.scenario = scenario
        self.run_id, self.rng = get_rng(rng, run_id, logger=self.logger)
        self.kwargs = kwargs
        self.output_dir = None
        self.rh = RunHistory()
        self._tae = tae
        self._tae_kwargs = tae_kwargs
        if n_optimizers <= 1:
            self.logger.warning('Invalid value in %s: %d. Setting to 2', 'n_optimizers', n_optimizers)
        self.n_optimizers = max(n_optimizers, 2)
        self.validate = validate
        self.shared_model = shared_model
        self.n_incs = min(max(1, n_incs), self.n_optimizers)
        if val_set is None:
            self.val_set = scenario.train_insts
        else:
            self.val_set = val_set
Example #29
0
    def test_choose_next_2(self):
        def side_effect(X, derivative):
            return np.mean(X, axis=1).reshape((-1, 1))

        smbo = SMBO(self.scenario, 1)
        smbo.runhistory = RunHistory()
        smbo.model = mock.MagicMock()
        smbo.acquisition_func._compute = mock.MagicMock()
        smbo.acquisition_func._compute.side_effect = side_effect
        # local search would call the underlying local search maximizer,
        # which would have to be mocked out. Replacing the method by random
        # search is way easier!
        smbo._get_next_by_local_search = smbo._get_next_by_random_search

        X = smbo.rng.rand(10, 2)
        Y = smbo.rng.rand(10, 1)

        x = smbo.choose_next(X, Y)

        self.assertEqual(smbo.model.train.call_count, 1)
        self.assertEqual(smbo.acquisition_func._compute.call_count, 1)
        self.assertEqual(len(x), 2020)
        num_random_search = 0
        for i in range(0, 2020, 2):
            self.assertIsInstance(x[i], Configuration)
            if x[i].origin == 'Random Search':
                num_random_search += 1
        # Since we replace local search with random search, we have to count
        # the occurences of random seacrh instead
        self.assertEqual(num_random_search, 10)
        for i in range(1, 2020, 2):
            self.assertIsInstance(x[i], Configuration)
            self.assertEqual(x[i].origin, 'Random Search')
Example #30
0
    def __init__(self,
                 scenario: typing.Type[Scenario],
                 rng: typing.Optional[typing.Union[np.random.RandomState,
                                                   int]] = None,
                 run_id: int = 1,
                 tae: typing.Type[BaseRunner] = ExecuteTARunOld,
                 tae_kwargs: typing.Union[dict, None] = None,
                 shared_model: bool = True,
                 validate: bool = True,
                 n_optimizers: int = 2,
                 val_set: typing.Union[typing.List[str], None] = None,
                 n_incs: int = 1,
                 **kwargs):
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.scenario = scenario
        self.run_id, self.rng = get_rng(rng, run_id, logger=self.logger)
        self.kwargs = kwargs
        self.output_dir = None
        self.rh = RunHistory()
        self._tae = tae
        self._tae_kwargs = tae_kwargs
        if n_optimizers <= 1:
            self.logger.warning('Invalid value in %s: %d. Setting to 2',
                                'n_optimizers', n_optimizers)
        self.n_optimizers = max(n_optimizers, 2)
        self.validate = validate
        self.shared_model = shared_model
        self.n_incs = min(max(1, n_incs), self.n_optimizers)
        if val_set is None:
            self.val_set = scenario.train_insts
        else:
            self.val_set = val_set
Example #31
0
    def run_smbo(self, max_iters=1000):
        global evaluator

        # == first things first: load the datamanager
        self.reset_data_manager()
        
        # == Initialize SMBO stuff
        # first create a scenario
        seed = self.seed # TODO
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        run_history = RunHistory()
        meta_runhistory = RunHistory()
        meta_runs_dataset_indices = {}
        num_run = self.start_num_run
        instance_id = self.dataset_name + SENTINEL

        # == Train on subset
        #    before doing anything, let us run the default_cfg
        #    on a subset of the available data to ensure that
        #    we at least have some models
        #    we will try three different ratios of decreasing magnitude
        #    in the hope that at least on the last one we will be able
        #    to get a model
        n_data = self.datamanager.data['X_train'].shape[0]
        subset_ratio = 10000. / n_data
        if subset_ratio >= 0.5:
            subset_ratio = 0.33
            subset_ratios = [subset_ratio, subset_ratio * 0.10]
        else:
            subset_ratios = [subset_ratio, 500. / n_data]
        self.logger.info("Training default configurations on a subset of "
                         "%d/%d data points." %
                         (int(n_data * subset_ratio), n_data))

        # the time limit for these function evaluations is rigorously
        # set to only 1/2 of a full function evaluation
        subset_time_limit = max(5, int(self.func_eval_time_limit / 2))
        # the configs we want to run on the data subset are:
        # 1) the default configs
        # 2) a set of configs we selected for training on a subset
        subset_configs = [self.config_space.get_default_configuration()] \
                          + self.collect_additional_subset_defaults()
        subset_config_succesful = [False] * len(subset_configs)
        for subset_config_id, next_config in enumerate(subset_configs):
            for i, ratio in enumerate(subset_ratios):
                self.reset_data_manager()
                n_data_subsample = int(n_data * ratio)

                # run the config, but throw away the result afterwards
                # since this cfg was evaluated only on a subset
                # and we don't want  to confuse SMAC
                self.logger.info("Starting to evaluate %d on SUBSET "
                                 "with size %d and time limit %ds.",
                                 num_run, n_data_subsample,
                                 subset_time_limit)
                self.logger.info(next_config)
                _info = eval_with_limits(
                    self.datamanager, self.tmp_dir, next_config,
                    seed, num_run,
                    self.resampling_strategy,
                    self.resampling_strategy_args,
                    self.memory_limit,
                    subset_time_limit, n_data_subsample)
                (duration, result, _, additional_run_info, status) = _info
                self.logger.info("Finished evaluating %d. configuration on SUBSET. "
                                 "Duration %f; loss %f; status %s; additional run "
                                 "info: %s ", num_run, duration, result,
                                 str(status), additional_run_info)

                num_run += 1
                if i < len(subset_ratios) - 1:
                    if status != StatusType.SUCCESS:
                        # Do not increase num_run here, because we will try
                        # the same configuration with less data
                        self.logger.info("A CONFIG did not finish "
                                         " for subset ratio %f -> going smaller",
                                         ratio)
                        continue
                    else:
                        self.logger.info("Finished SUBSET training sucessfully "
                                         "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break
                else:
                    if status != StatusType.SUCCESS:
                        self.logger.info("A CONFIG did not finish "
                                         " for subset ratio %f.",
                                         ratio)
                        continue
                    else:
                        self.logger.info("Finished SUBSET training sucessfully "
                                         "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break

        # Use the first non-failing configuration from the subsets as the new
        #  default configuration -> this guards us against the random forest
        # failing on large, sparse datasets
        default_cfg = None
        for subset_config_id, next_config in enumerate(subset_configs):
            if subset_config_succesful[subset_config_id]:
                default_cfg = next_config
                break
        if default_cfg is None:
            default_cfg = self.config_space.get_default_configuration()

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.metadata_directory is None:
            metalearning_directory = os.path.dirname(
                autosklearn.metalearning.__file__)
            # There is no multilabel data in OpenML
            if self.task == MULTILABEL_CLASSIFICATION:
                meta_task = BINARY_CLASSIFICATION
            else:
                meta_task = self.task
            metadata_directory = os.path.join(
                metalearning_directory, 'files',
                '%s_%s_%s' % (METRIC_TO_STRING[self.metric],
                              TASK_TYPES_TO_STRING[meta_task],
                              'sparse' if self.datamanager.info['is_sparse']
                              else 'dense'))
            self.metadata_directory = metadata_directory

        self.logger.info('Metadata directory: %s', self.metadata_directory)
        meta_base = MetaBase(self.config_space, self.metadata_directory)

        metafeature_calculation_time_limit = int(
            self.total_walltime_limit / 4)
        metafeature_calculation_start_time = time.time()
        meta_features = self._calculate_metafeatures_with_limits(
            metafeature_calculation_time_limit)
        metafeature_calculation_end_time = time.time()
        metafeature_calculation_time_limit = \
            metafeature_calculation_time_limit - (
            metafeature_calculation_end_time -
            metafeature_calculation_start_time)

        if metafeature_calculation_time_limit < 1:
            self.logger.warning('Time limit for metafeature calculation less '
                                'than 1 seconds (%f). Skipping calculation '
                                'of metafeatures for encoded dataset.',
                                metafeature_calculation_time_limit)
            meta_features_encoded = None
        else:
            self.datamanager.perform1HotEncoding()
            meta_features_encoded = \
                self._calculate_metafeatures_encoded_with_limits(
                    metafeature_calculation_time_limit)

        # In case there is a problem calculating the encoded meta-features
        if meta_features is None:
            if meta_features_encoded is not None:
                meta_features = meta_features_encoded
        else:
            if meta_features_encoded is not None:
                meta_features.metafeature_values.update(
                    meta_features_encoded.metafeature_values)

        if meta_features is not None:
            meta_base.add_dataset(instance_id, meta_features)
            # Do mean imputation of the meta-features - should be done specific
            # for each prediction model!
            all_metafeatures = meta_base.get_metafeatures(
                features=list(meta_features.keys()))
            all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)

            metalearning_configurations = self.collect_metalearning_suggestions(
                meta_base)
            if metalearning_configurations is None:
                metalearning_configurations = []
            self.reset_data_manager()

            self.logger.info('%s', meta_features)

            # Convert meta-features into a dictionary because the scenario
            # expects a dictionary
            meta_features_dict = {}
            for dataset, series in all_metafeatures.iterrows():
                meta_features_dict[dataset] = series.values
            meta_features_list = []
            for meta_feature_name in all_metafeatures.columns:
                meta_features_list.append(meta_features[meta_feature_name].value)
            meta_features_list = np.array(meta_features_list).reshape((1, -1))
            self.logger.info(list(meta_features_dict.keys()))

            meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
            meta_runs_index = 0
            try:
                meta_durations = meta_base.get_all_runs('runtime')
                read_runtime_data = True
            except KeyError:
                read_runtime_data = False
                self.logger.critical('Cannot read runtime data.')
                if self.acquisition_function == 'EIPS':
                    self.logger.critical('Reverting to acquisition function EI!')
                    self.acquisition_function = 'EI'

            for meta_dataset in meta_runs.index:
                meta_dataset_start_index = meta_runs_index
                for meta_configuration in meta_runs.columns:
                    if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
                        try:
                            config = meta_base.get_configuration_from_algorithm_index(
                                meta_configuration)
                            cost = meta_runs.loc[meta_dataset, meta_configuration]
                            if read_runtime_data:
                                runtime = meta_durations.loc[meta_dataset,
                                                             meta_configuration]
                            else:
                                runtime = 1
                            # TODO read out other status types!
                            meta_runhistory.add(config, cost, runtime,
                                                StatusType.SUCCESS,
                                                instance_id=meta_dataset)
                            meta_runs_index += 1
                        except:
                            # TODO maybe add warning
                            pass

                meta_runs_dataset_indices[meta_dataset] = (
                    meta_dataset_start_index, meta_runs_index)
        else:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        self.scenario = AutoMLScenario(self.config_space,
                                       self.total_walltime_limit,
                                       self.func_eval_time_limit,
                                       meta_features_dict,
                                       self.tmp_dir,
                                       self.shared_mode)

        types = get_types(self.config_space, self.scenario.feature_array)
        if self.acquisition_function == 'EI':
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = RandomForestWithInstances(types,
                                              instance_features=meta_features_list,
                                              seed=1, num_trees=10)
            smac = SMBO(self.scenario, model=model,
                        rng=seed)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'], types, num_trees = 10,
                instance_features=meta_features_list, seed=1)
            acquisition_function = EIPS(model)
            smac = SMBO(self.scenario,
                        acquisition_function=acquisition_function,
                        model=model, runhistory2epm=rh2EPM, rng=seed)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # Transform Y_meta on a per-dataset base
        for meta_dataset in meta_runs_dataset_indices:
            start_index, end_index = meta_runs_dataset_indices[meta_dataset]
            end_index += 1  # Python indexing
            Y_meta[start_index:end_index, 0]\
                [Y_meta[start_index:end_index, 0] >2.0] =  2.0
            dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
            Y_meta[start_index:end_index, 0] = 1 - (
                (1. - Y_meta[start_index:end_index, 0]) /
                (1. - dataset_minimum))
            Y_meta[start_index:end_index, 0]\
                  [Y_meta[start_index:end_index, 0] > 2] = 2

        # == first, evaluate all metelearning and default configurations
        for i, next_config in enumerate(([default_cfg] +
                                          metalearning_configurations)):
            # Do not evaluate default configurations more than once
            if i >= len([default_cfg]) and next_config in [default_cfg]:
                continue

            config_name = 'meta-learning' if i >= len([default_cfg]) \
                else 'default'

            self.logger.info("Starting to evaluate %d. configuration "
                             "(%s configuration) with time limit %ds.",
                             num_run, config_name, self.func_eval_time_limit)
            self.logger.info(next_config)
            self.reset_data_manager()
            info = eval_with_limits(self.datamanager, self.tmp_dir, next_config,
                                    seed, num_run,
                                    self.resampling_strategy,
                                    self.resampling_strategy_args,
                                    self.memory_limit,
                                    self.func_eval_time_limit)
            (duration, result, _, additional_run_info, status) = info
            run_history.add(config=next_config, cost=result,
                            time=duration , status=status,
                            instance_id=instance_id, seed=seed)
            run_history.update_cost(next_config, result)
            self.logger.info("Finished evaluating %d. configuration. "
                             "Duration %f; loss %f; status %s; additional run "
                             "info: %s ", num_run, duration, result,
                             str(status), additional_run_info)
            num_run += 1
            if smac.incumbent is None:
                smac.incumbent = next_config
            elif result < run_history.get_cost(smac.incumbent):
                smac.incumbent = next_config

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)

        # == after metalearning run SMAC loop
        smac.runhistory = run_history
        smac_iter = 0
        finished = False
        while not finished:
            if self.scenario.shared_model:
                pSMAC.read(run_history=run_history,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            next_configs = []
            time_for_choose_next = -1
            try:
                X_cfg, Y_cfg = rh2EPM.transform(run_history)

                if not run_history.empty():
                    # Update costs by normalization
                    dataset_minimum = np.min(Y_cfg[:, 0])
                    Y_cfg[:, 0] = 1 - ((1. - Y_cfg[:, 0]) /
                                       (1. - dataset_minimum))
                    Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2

                if len(X_meta) > 0 and len(X_cfg) > 0:
                    pass
                    #X_cfg = np.concatenate((X_meta, X_cfg))
                    #Y_cfg = np.concatenate((Y_meta, Y_cfg))
                elif len(X_meta) > 0:
                    X_cfg = X_meta.copy()
                    Y_cfg = Y_meta.copy()
                elif len(X_cfg) > 0:
                    X_cfg = X_cfg.copy()
                    Y_cfg = Y_cfg.copy()
                else:
                    raise ValueError('No training data for SMAC random forest!')

                self.logger.info('Using %d training points for SMAC.' %
                                 X_cfg.shape[0])
                choose_next_start_time = time.time()
                next_configs_tmp = smac.choose_next(X_cfg, Y_cfg,
                                                    num_interleaved_random=110,
                                                    num_configurations_by_local_search=10,
                                                    num_configurations_by_random_search_sorted=100)
                time_for_choose_next = time.time() - choose_next_start_time
                self.logger.info('Used %g seconds to find next '
                                 'configurations' % (time_for_choose_next))
                next_configs.extend(next_configs_tmp)
            # TODO put Exception here!
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                next_configs.append(next_config)

            models_fitted_this_iteration = 0
            start_time_this_iteration = time.time()
            for next_config in next_configs:
                x_runtime = impute_inactive_values(next_config)
                x_runtime = impute_inactive_values(x_runtime).get_array()
                # predicted_runtime = runtime_rf.predict_marginalized_over_instances(
                #     x_runtime.reshape((1, -1)))
                # predicted_runtime = np.exp(predicted_runtime[0][0][0]) - 1

                self.logger.info("Starting to evaluate %d. configuration (from "
                                 "SMAC) with time limit %ds.", num_run,
                                 self.func_eval_time_limit)
                self.logger.info(next_config)
                self.reset_data_manager()
                info = eval_with_limits(self.datamanager, self.tmp_dir, next_config,
                                        seed, num_run,
                                        self.resampling_strategy,
                                        self.resampling_strategy_args,
                                        self.memory_limit,
                                        self.func_eval_time_limit)
                (duration, result, _, additional_run_info, status) = info
                run_history.add(config=next_config, cost=result,
                                time=duration , status=status,
                                instance_id=instance_id, seed=seed)
                run_history.update_cost(next_config, result)

                #self.logger.info('Predicted runtime %g, true runtime %g',
                #                 predicted_runtime, duration)

                # TODO add unittest to make sure everything works fine and
                # this does not get outdated!
                if smac.incumbent is None:
                    smac.incumbent = next_config
                elif result < run_history.get_cost(smac.incumbent):
                    smac.incumbent = next_config

                self.logger.info("Finished evaluating %d. configuration. "
                                 "Duration: %f; loss: %f; status %s; additional "
                                 "run info: %s ", num_run, duration, result,
                                 str(status), additional_run_info)
                smac_iter += 1
                num_run += 1

                models_fitted_this_iteration += 1
                time_used_this_iteration = time.time() - start_time_this_iteration
                if models_fitted_this_iteration >= 2 and \
                        time_for_choose_next > 0 and \
                        time_used_this_iteration > time_for_choose_next:
                    break
                elif time_for_choose_next <= 0 and \
                        models_fitted_this_iteration >= 1:
                    break
                elif models_fitted_this_iteration >= 50:
                    break

                if max_iters is not None:
                    finished = (smac_iter < max_iters)

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)