Ejemplo n.º 1
0
class BaseRunner(ABC):
    """Interface class to handle the execution of SMAC' configurations.

    This interface defines how to interact with the SMBO loop.
    The complexity of running a configuration as well as handling the
    results is abstracted to the SMBO via a BaseRunner.

    From SMBO perspective, launching a configuration follows a
    submit/collect scheme as follows:
    1- A run is launched via submit_run()
    1.1- Submit_run internally calls run_wrapper(), a method that
         contains common processing functions among different runners,
         for example, handling capping and stats checking.
    1.2- A class that implements BaseRunner defines run() which is
         really the algorithm to translate a RunInfo to a RunValue, i.e.
         a configuration to an actual result.
    2- A completed run is collected via get_finished_runs(), which returns
       any finished runs, if any.
    3- This interface also offers the method wait() as a mechanism to make
       sure we have enough data in the next iteration to make a decision. For
       example, the intensifier might not be able to select the next challenger
       until more results are available.


    Attributes
    ----------

    results
    ta
    stats
    run_obj
    par_factor
    cost_for_crash
    abort_first_run_crash

    Parameters
    ---------
    ta : typing.Union[typing.List[str], typing.Callable]
        target algorithm
    stats: Stats
         stats object to collect statistics about runtime/additional info
    run_obj: str
        run objective of SMAC
    par_factor: int
        penalization factor
    cost_for_crash : float
        cost that is used in case of crashed runs (including runs
        that returned NaN or inf)
    abort_on_first_run_crash: bool
        if true and first run crashes, raise FirstRunCrashedException
    """
    def __init__(
        self,
        ta: typing.Union[typing.List[str], typing.Callable],
        stats: Stats,
        run_obj: str = "runtime",
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = True,
    ):

        # The results is a FIFO structure, implemented via a list
        # (because the Queue lock is not pickable). Finished runs are
        # put in this list and collected via process_finished_runs
        self.results = []  # type: typing.List[typing.Tuple[RunInfo, RunValue]]

        # Below state the support for a Runner algorithm that
        # implements a ta
        self.ta = ta
        self.stats = stats
        self.run_obj = run_obj
        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash
        self.logger = PickableLoggerAdapter(self.__module__ + '.' +
                                            self.__class__.__name__)
        self._supports_memory_limit = False

        super().__init__()

    @abstractmethod
    def submit_run(self, run_info: RunInfo) -> None:
        """This function submits a configuration
        embedded in a RunInfo object, and uses one of the workers
        to produce a result (such result will eventually be available
        on the self.results FIFO).

        This interface method will be called by SMBO, with the expectation
        that a function will be executed by a worker.

        What will be executed is dictated by run_info, and "how" will it be
        executed is decided via the child class that implements a run() method.

        Because config submission can be a serial/parallel endeavor,
        it is expected to be implemented by a child class.

        Parameters
        ----------
        run_info: RunInfo
            An object containing the configuration and the necessary data to run it

        """
        pass

    @abstractmethod
    def run(
        self,
        config: Configuration,
        instance: str,
        cutoff: typing.Optional[float] = None,
        seed: int = 12345,
        budget: typing.Optional[float] = None,
        instance_specific: str = "0",
    ) -> typing.Tuple[StatusType, float, float, typing.Dict]:
        """Runs target algorithm <self.ta> with configuration <config> on
        instance <instance> with instance specifics <specifics> for at most
        <cutoff> seconds and random seed <seed>

        This method exemplifies how to defined the run() method

        Parameters
        ----------
            config : Configuration
                dictionary param -> value
            instance : string
                problem instance
            cutoff : float, optional
                Wallclock time limit of the target algorithm. If no value is
                provided no limit will be enforced.
            seed : int
                random seed
            budget : float, optional
                A positive, real-valued number representing an arbitrary limit to the target
                algorithm. Handled by the target algorithm internally
            instance_specific: str
                instance specific information (e.g., domain file or solution)

        Returns
        -------
            status: enum of StatusType (int)
                {SUCCESS, TIMEOUT, CRASHED, ABORT}
            cost: float
                cost/regret/quality (float) (None, if not returned by TA)
            runtime: float
                runtime (None if not returned by TA)
            additional_info: dict
                all further additional run information
        """
        pass

    def run_wrapper(
        self,
        run_info: RunInfo,
    ) -> typing.Tuple[RunInfo, RunValue]:
        """Wrapper around run() to exec and check the execution of a given config file

        This function encapsulates common handling/processing, so that run() implementation
        is simplified.

        Parameters
        ----------
            run_info : RunInfo
                Object that contains enough information to execute a configuration run in
                isolation.

        Returns
        -------
            RunInfo:
                an object containing the configuration launched
            RunValue:
                Contains information about the status/performance of config
        """
        start = time.time()

        if run_info.cutoff is None and self.run_obj == "runtime":
            if self.logger:
                self.logger.critical(
                    "For scenarios optimizing running time "
                    "(run objective), a cutoff time is required, "
                    "but not given to this call.")
            raise ValueError("For scenarios optimizing running time "
                             "(run objective), a cutoff time is required, "
                             "but not given to this call.")
        cutoff = None
        if run_info.cutoff is not None:
            cutoff = int(math.ceil(run_info.cutoff))

        try:
            status, cost, runtime, additional_info = self.run(
                config=run_info.config,
                instance=run_info.instance,
                cutoff=cutoff,
                seed=run_info.seed,
                budget=run_info.budget,
                instance_specific=run_info.instance_specific)
        except Exception as e:
            status = StatusType.CRASHED
            cost = self.cost_for_crash
            runtime = time.time() - start

            # Add context information to the error message
            exception_traceback = traceback.format_exc()
            error_message = repr(e)
            additional_info = {
                'traceback': exception_traceback,
                'error': error_message
            }

        end = time.time()

        if run_info.budget == 0 and status == StatusType.DONOTADVANCE:
            raise ValueError(
                "Cannot handle DONOTADVANCE state when using intensify or SH/HB on "
                "instances.")

        # Catch NaN or inf.
        if (self.run_obj == 'runtime' and not np.isfinite(runtime)
                or self.run_obj == 'quality' and not np.isfinite(cost)):
            if self.logger:
                self.logger.warning(
                    "Target Algorithm returned NaN or inf as {}. "
                    "Algorithm run is treated as CRASHED, cost "
                    "is set to {} for quality scenarios. "
                    "(Change value through \"cost_for_crash\""
                    "-option.)".format(self.run_obj, self.cost_for_crash))
            status = StatusType.CRASHED

        if self.run_obj == "runtime":
            # The following line pleases mypy - we already check for cutoff not being none above,
            # prior to calling run. However, mypy assumes that the data type of cutoff
            # is still Optional[int]
            assert cutoff is not None
            if runtime > self.par_factor * cutoff:
                self.logger.warning("Returned running time is larger "
                                    "than {0} times the passed cutoff time. "
                                    "Clamping to {0} x cutoff.".format(
                                        self.par_factor))
                runtime = cutoff * self.par_factor
                status = StatusType.TIMEOUT
            if status == StatusType.SUCCESS:
                cost = runtime
            else:
                cost = cutoff * self.par_factor
            if status == StatusType.TIMEOUT and run_info.capped:
                status = StatusType.CAPPED
        else:
            if status == StatusType.CRASHED:
                cost = self.cost_for_crash

        return run_info, RunValue(status=status,
                                  cost=cost,
                                  time=runtime,
                                  additional_info=additional_info,
                                  starttime=start,
                                  endtime=end)

    @abstractmethod
    def get_finished_runs(
            self) -> typing.List[typing.Tuple[RunInfo, RunValue]]:
        """This method returns any finished configuration, and returns a list with
        the results of exercising the configurations. This class keeps populating results
        to self.results until a call to get_finished runs is done. In this case, the
        self.results list is emptied and all RunValues produced by running run() are
        returned.

        Returns
        -------
            List[RunInfo, RunValue]: A list of pais RunInfo/RunValues
            a submitted configuration
        """
        raise NotImplementedError()

    @abstractmethod
    def wait(self) -> None:
        """SMBO/intensifier might need to wait for runs to finish before making a decision.
        This method waits until 1 run completes
        """
        pass

    @abstractmethod
    def pending_runs(self) -> bool:
        """
        Whether or not there are configs still running. Generally if the runner is serial,
        launching a run instantly returns it's result. On parallel runners, there might
        be pending configurations to complete.
        """
        pass

    @abstractmethod
    def num_workers(self) -> int:
        """
        Return the active number of workers that will execute tae runs.
        """
        pass
Ejemplo n.º 2
0
class EnsembleNN(AbstractEPM):
    def __init__(self,
                 configspace: ConfigurationSpace,
                 types: typing.List[int],
                 bounds: typing.List[typing.Tuple[float, float]],
                 seed: int,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 5000,
                 batch_size: int = 16,
                 number_of_networks: int = 5,
                 var: bool = True,
                 train_with_lognormal_llh=False,
                 compute_mean_in_logspace=False,
                 max_cat: int = np.inf,
                 ignore_cens: bool = False,
                 learned_weight_init: bool = False,
                 optimization_algorithm: str = 'sgd',
                 **kwargs):
        super().__init__(configspace, types, bounds, seed, **kwargs)
        #self.types[self.types == 0] = -1
        self.types = [int(f) for f in self.types]
        assert not (train_with_lognormal_llh and compute_mean_in_logspace)

        if type(self.seed) != int:
            self.seed = self.seed[0]

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 1000
        self.log_error = 5000

        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.number_of_networks = number_of_networks
        self.train_with_lognormal = train_with_lognormal_llh
        self.compute_mean_in_logspace = compute_mean_in_logspace
        self.max_cat = max_cat
        self.ignore_cens = ignore_cens
        self.learned_weight_init = learned_weight_init
        self.optimization_algorithm = optimization_algorithm

        self._my = None
        self._sy = None

        # Quick check, should not take too long
        a = np.random.normal(42, 23, 1000)
        m1, v1 = (np.mean(a), np.var(a))
        a = self._preprocess_y(a)
        m2, v2 = self._postprocess_mv(np.mean(a), np.var(a))
        assert np.abs(m1 - m2) < 1e-3, (m1, m2)
        assert np.abs(v1 - v2) < 1e-3, (v1, v2)
        self._my = None
        self._sy = None

        self.nns = None
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

    def _preprocess_y(self, y: np.ndarray, redo=False):
        if self._my is None or redo:
            self._my = np.mean(y)
            self._sy = np.std(y)
            if self._sy == 0:
                # all y's are the same
                self._sy = 1

        if not self.train_with_lognormal:
            y -= self._my
            y /= self._sy

        return y

    def _postprocess_mv(self, m: np.ndarray, v: np.ndarray):
        # zero mean scaling
        m = m * self._sy + self._my
        v = v * self._sy**2
        return m, v

    def _preprocess_x(self, x: np.ndarray, redo: bool = False):
        # Replace nans with 0, should be fine for both cats and conts
        # TODO: Maybe refine this and replace cont with mean
        x = np.nan_to_num(x)
        return x

    def _train(self, X: np.ndarray, Y: np.ndarray, C: np.ndarray = None):
        self.logger.critical("Not using C as this is not a Tobit model")
        Y = self._preprocess_y(Y, redo=True)
        X = self._preprocess_x(X, redo=True)
        self.train_data = (X, Y)
        self.nns = []
        self.logger.debug("Start Training %d networks" %
                          self.number_of_networks)
        for i in range(self.number_of_networks):
            nn = SimpleNetworkEmbedding(
                hidden_dims=self.hidden_dims,
                feat_types=self.types,
                lr=self.lr,
                seed=self.seed + i,
                momentum=self.momentum,
                weight_decay=self.weight_decay,
                iterations=self.iterations,
                batch_size=self.batch_size,
                var=self.var,
                lognormal_nllh=self.train_with_lognormal,
                var_bias_init=np.std(Y),
                max_cat=self.max_cat,
                learned_weight_init=self.learned_weight_init,
                optimization_algorithm=self.optimization_algorithm,
            )
            nn.reset()
            nn.train(X, Y)
            self.nns.append(nn)

    def _predict_individual(
            self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
        X = self._preprocess_x(X, redo=True)
        ms = np.zeros([X.shape[0], self.number_of_networks])
        vs = np.zeros([X.shape[0], self.number_of_networks])
        for i_nn, nn in enumerate(self.nns):
            pred = nn.predict(X)
            m = pred[:, 0]
            v = pred[:, 1]

            if not self.train_with_lognormal:
                m, v = self._postprocess_mv(m, v)

            ms[:, i_nn] = m
            vs[:, i_nn] = v

        return ms, vs

    def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
        ms, _ = self._predict_individual(X)
        m = ms.mean(axis=1)
        v = ms.var(axis=1)
        return m.reshape((-1, 1)), v.reshape((-1, 1))

    def predict_marginalized_over_instances(self, X: np.ndarray):
        """Predict mean and variance marginalized over all instances.

        Returns the predictive mean and variance marginalised over all
        instances for a set of configurations.

        Note
        ----
        This method overwrites the same method of ~smac.epm.base_epm.AbstractEPM;
        the following method is random forest specific
        and follows the SMAC2 implementation;
        it requires no distribution assumption
        to marginalize the uncertainty estimates

        Parameters
        ----------
        X : np.ndarray
            [n_samples, n_features (config)]

        Returns
        -------
        means : np.ndarray of shape = [n_samples, 1]
            Predictive mean
        vars : np.ndarray  of shape = [n_samples, 1]
            Predictive variance
        """

        if self.instance_features is None or \
                len(self.instance_features) == 0:
            mean_, var = self.predict(X)
            var[var < self.var_threshold] = self.var_threshold
            var[np.isnan(var)] = self.var_threshold
            return mean_, var

        if len(X.shape) != 2:
            raise ValueError('Expected 2d array, got %dd array!' %
                             len(X.shape))
        if X.shape[1] != len(self.bounds):
            raise ValueError('Rows in X should have %d entries but have %d!' %
                             (len(self.bounds), X.shape[1]))

        mean_ = np.zeros((X.shape[0], 1))
        var = np.zeros(X.shape[0])

        for i, x in enumerate(X):

            # marginalize over instance
            # 1. Get predictions for all networks

            # Not very efficient
            # preds_nns1 = np.zeros([len(self.instance_features), self.number_of_networks])
            #for i_f, feat in enumerate(self.instance_features):
            #    x_ = np.concatenate([x, feat]).reshape([1, -1])
            #    print(i_f, x_)
            #    m, _ = self._predict_individual(x_)
            #    preds_nns1[i_f, :] = m

            input = np.concatenate((np.tile(
                x, (len(self.instance_features), 1)), self.instance_features),
                                   axis=1)
            preds_nns, _ = self._predict_individual(input)

            # 2. Average in each NN for all instances
            pred_per_nn = []
            for nn_id in range(self.number_of_networks):
                if self.compute_mean_in_logspace:
                    pred_per_nn.append(
                        np.log(np.mean(np.exp(preds_nns[:, nn_id]))))
                else:
                    pred_per_nn.append(np.mean(preds_nns[:, nn_id]))

            # 3. compute statistics across trees
            mean_x = np.mean(pred_per_nn)
            var_x = np.var(pred_per_nn)
            if var_x < self.var_threshold:
                var_x = self.var_threshold

            var[i] = var_x
            mean_[i] = mean_x

        if len(mean_.shape) == 1:
            mean_ = mean_.reshape((-1, 1))
        if len(var.shape) == 1:
            var = var.reshape((-1, 1))

        return mean_, var
Ejemplo n.º 3
0
class NeuralNet(nn.Module):
    def __init__(self,
                 hidden_dims,
                 input_size,
                 feat_type=None,
                 var: bool = True,
                 max_cat: int = np.inf):
        super(NeuralNet, self).__init__()
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        self.feat_type = feat_type
        self.input_size = input_size
        self.num_neurons = hidden_dims
        self.activation = nn.Tanh
        self.num_layer = len(hidden_dims)
        self.max_cat = max_cat
        if var:
            self.n_output = 2
        else:
            self.n_output = 1

        if np.sum(self.feat_type) == 0:
            self.feat_type = None

        if self.feat_type is not None:
            self.logger.info("Use cat embedding")
            assert len(self.feat_type) == self.input_size
            emb = nn.ModuleList()
            sz = int(0)
            for f in self.feat_type:
                if f == 0:
                    # In SMAC 0 encodes a numerical
                    emb.append(None)
                    sz += 1
                else:
                    es = min(self.max_cat, int(f))
                    emb.append(nn.Embedding(int(f), es))
                    sz += es
            assert int(sz) == sz
            sz = int(sz)
            num_neurons = [sz] + self.num_neurons
            self.embedding = emb
        else:
            num_neurons = [self.input_size] + self.num_neurons

        self.weights = nn.ModuleList()
        self.acts = nn.ModuleList()

        print(num_neurons)
        for i in range(self.num_layer):
            self.weights.append(nn.Linear(num_neurons[i], num_neurons[i + 1]))
            self.acts.append(self.activation())

        self.outlayer = nn.Linear(num_neurons[-1], self.n_output)

    def initialize_weights(self, var_bias_init: float = 1):
        # Use Xavier normal intialization, slightly modified from "Understanding the difficulty of ..."
        for i in range(len(self.weights)):
            torch.nn.init.xavier_normal_(self.weights[i].weight)
            self.weights[i].bias.data.fill_(0)
        torch.nn.init.xavier_normal_(self.outlayer.weight)
        # TODO Second bias should be initialised to np.log(np.exp(x) - 1), s.t. softplus = x
        self.outlayer.bias.data[0].fill_(0)
        if var_bias_init == 0:
            self.logger.critical(
                "Can't properly initialize bias unit, initialize wih zero")
            self.outlayer.bias.data[0].fill_(0)
        else:
            self.outlayer.bias.data[1].fill_(np.log(np.exp(var_bias_init) - 1))

    def learn_initial_weights(self, X):
        """Learn initial weights such that the mean over the data is on average zero per neuron"""
        output = torch.tensor(X, dtype=torch.float32)
        for i in range(len(self.weights)):
            torch.nn.init.xavier_normal_(self.weights[i].weight,
                                         torch.nn.init.calculate_gain('tanh'))
            self.weights[i].bias.data.fill_(0)
            output2 = self.weights[i].forward(output)
            mean = output2.mean(axis=0)
            self.weights[i].bias.data = -mean
            output = self.weights[i].forward(output)
            output = self.acts[i](output)
            # print(output.mean(axis=0), output.mean(axis=0).shape)
        torch.nn.init.xavier_normal_(self.outlayer.weight,
                                     torch.nn.init.calculate_gain('tanh'))
        self.outlayer.bias.data.fill_(0)
        # self.outlayer.bias.data[1].fill_(np.log(np.exp(1) - 1))
        # Noise can be tuned here...
        self.outlayer.bias.data[1] = -5

    def forward(self, x):
        out = []
        if self.feat_type is not None:
            for idx, (emb,
                      typ) in enumerate(zip(self.embedding, self.feat_type)):
                if typ == 0:
                    # a numerical
                    out.append(x[:, idx].view(-1, 1))
                else:
                    # a categorical
                    out.append(
                        emb(x[:, idx].long().view(-1, 1)).view(
                            [-1, min(self.max_cat, typ)]))
            out = torch.cat(out, 1)
        else:
            out = x

        for i in range(self.num_layer):
            out = self.weights[i](out)
            out = self.acts[i](out)
        out = self.outlayer(out)
        if self.n_output == 2:
            # Passing second output through softplus function (see Lakshminarayanan (2017))
            out[:, 1] = torch.log(1 + torch.exp(out[:, 1])) + 10e-6
        return out
Ejemplo n.º 4
0
class DNGO(BaseModel):
    def __init__(self,
                 configspace: ConfigurationSpace,
                 types: np.ndarray,
                 bounds: typing.List[typing.Tuple[float, float]],
                 seed: int,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 10000,
                 batch_size: int = 8,
                 var: bool = True,
                 **kwargs):
        super().__init__(configspace, types, bounds, seed, **kwargs)
        print("USE DNGO")
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 100
        self.log_error = 1000

        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size

        self.nn = None
        self.blr = None

        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

    def _train(self, X: np.ndarray, y: np.ndarray):
        self.nn = SimpleNetworkEmbedding(
            hidden_dims=self.hidden_dims,
            lr=self.lr,
            seed=self.seed,
            momentum=self.momentum,
            weight_decay=self.weight_decay,
            iterations=self.iterations,
            batch_size=self.batch_size,
            var=self.var,
        )
        self.blr = BayesianLinearRegressionLayer()

        self._my = np.mean(y)
        self._sy = np.std(y)

        y -= self._my
        y /= self._sy

        #print(X, y)
        #import matplotlib.pyplot as plt

        self.nn.train(X, y)
        #plt.scatter(X, y)

        #x_dense = np.linspace(-0.1, 1.1, 100)
        #pred = self._predict_nn(x_dense.reshape([-1, 1]))
        #m = pred[:, 0].flatten()
        #v = pred[:, 1].flatten()
        #plt.plot(x_dense, m, label="nn")
        #plt.fill_between(x_dense, m - v, m + v, alpha=0.5)
        self.blr.optimize_alpha_beta(self.nn.model, X, y)

        #m, v = self.blr.predict(self.model, x_dense.reshape([-1, 1]))
        #m = m.data.numpy().flatten()
        #v = v.data.numpy().flatten()
        #plt.scatter(X, y)
        #plt.plot(x_dense, m, label="blr")
        #plt.fill_between(x_dense, m-v, m+v, alpha=0.5)
        #plt.legend()
        #plt.ylim([-10, 10])
        #plt.show()

    def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
        means, vars = self.blr.predict(self.nn.model, X)
        means = means.data.numpy().flatten()
        vars = vars.data.numpy().flatten()

        means = np.array(means * self._sy + self._my).reshape([-1, 1])
        vars = np.array(vars * self._sy**2).reshape([-1, 1])

        if not np.isfinite(means).any():
            self.logger.critical(
                "All DNGO predictions are NaN. Fall back to random predictions"
            )
            return np.random.randn(means.shape[0],
                                   means.shape[1]), np.zeros_like(vars)
        else:
            return means, vars