Beispiel #1
0
class LearnerD():
    def __init__(self):

        self.is_ready_to_predict = False

        self.learner = ActiveLearner(
            estimator=RandomForestClassifier(n_estimators=100),
            query_strategy=uncertainty_sampling,
        )

    def predict_prob(self, point):
        if self.is_ready_to_predict is False:
            print('predict prob abort, learner is not ready to predict')
            return True, 0

        X = point
        positive_prob = self.learner.predict_proba(X)[0][1]
        negative_prob = self.learner.predict_proba(X)[0][0]
        #print('D learner proba', self.learner.predict_proba(X))
        return True, positive_prob

    def update(self, positive_points, negative_points):
        if len(positive_points) == 0 or len(negative_points) == 0:
            print('update abort, not enough data to update')
            self.is_ready_to_predict = False
            return False

        X = positive_points[0]
        y = np.ones(1)

        for i in range(1, len(positive_points)):
            X = np.concatenate((X, positive_points[i]), axis=0)
            y = np.concatenate((y, np.ones(1)), axis=0)

        for i in range(0, len(negative_points)):
            X = np.concatenate((X, negative_points[i]), axis=0)
            y = np.concatenate((y, np.zeros(1)), axis=0)

        self.learner.fit(X, y)
        self.is_ready_to_predict = True
        return True
Beispiel #2
0
class SurrogateModel:
    def __init__(self,
                 estimator: BaseEstimator,
                 query_strategy: Callable,
                 parameter_space: ParameterSpace,
                 seed_size: int = 20,
                 n_exploration_iterations: int = 10,
                 n_runs_per_iter: int = 10,
                 exploration_multiplier: int = 10,
                 exploitation_eval_size: int = 10,
                 exploitation_multiplier: int = 10,
                 verbose: int = 0):
        """
        Constructs new optimizer backend
        :param estimator: Base extimator for surrogate model, could be
        RandomForestRegressor or GaussianProcessRegressor
        :param query_strategy: function, which take regressor and unlabeled
        set of parameters and selects most informative
        :param parameter_space: instance of ParameterSpace
        :param seed_size: number of parameters to be scored for seeding stage
        :param n_exploration_iterations: number of iterations for exploration stage,
        i.e training surrogate model
        :param n_runs_per_iter: number of parameter evaluations per iteration
        :param exploration_multiplier: multiplier for sampling parameters for exploration
        :param exploitation_eval_size: number of parameter evaluations for exploitation stage
        :param exploitation_multiplier: multiplier for parameter sampling for exploitation
        """
        self.surrogate = ActiveLearner(estimator, query_strategy)
        self.parameter_space = parameter_space

        self.seed_size = seed_size
        self.n_exploration_iterations = n_exploration_iterations
        self.n_runs_per_iter = n_runs_per_iter
        self.exploration_multiplier = exploration_multiplier
        self.exploitation_eval_size = exploitation_eval_size
        self.exploitation_multiplier = exploitation_multiplier

        self.__history_parameters = []
        self.__history_scores = []

        self.verbose = verbose
        log.setLevel(verbose)

    def __sample_batch(self, batch_size: int = 10):
        return pd.concat([x for x in self.parameter_space.sample(batch_size)])

    def __update_history(self, params: pd.DataFrame, scores: np.ndarray):
        self.__history_parameters.append(params)
        self.__history_scores += list(scores)

    def objective(self, func):
        """
        Wraps generic argumented function into a function that consequently evaluates multiple parameter sets
        :param func: func that accepts params and returns score, that must be minimized
        :return: wrapper
        """
        def wrapper(params):
            scores = []
            for param_set in params:
                scores.append(func(**param_set))

            return scores

        return wrapper

    def seeding(self, objective_evaluator):
        """
        Runs seed stage of optimizer
        :param objective_evaluator: function which takes sequence of parameter dicts
         and returns array of scores
        :return: NoReturn
        """
        params = self.__sample_batch(self.seed_size)
        scores = objective_evaluator(params.to_dict(orient='records'))

        self.__update_history(params, scores)

        self.surrogate.fit(X=params.values, y=scores)

        if self.verbose > 0:
            mae = self.score_on_history()
            log.info(f"Seed stage MAE = {mae:.4f}")

    def exploration(self, objective_evaluator):
        """
        Runs exploration stage of optimizer - training optimizer to pred
        :param objective_evaluator: function which takes sequence of parameter dicts
         and returns array of scores
        :return: NoReturn
        """
        for i in range(self.n_exploration_iterations):
            space = self.__sample_batch(self.n_runs_per_iter *
                                        self.exploration_multiplier)
            idxs, _ = self.surrogate.query(space.values,
                                           batch_size=self.n_runs_per_iter)
            params = space.iloc[idxs]

            scores = objective_evaluator(params.to_dict(orient='records'))

            self.__update_history(params, scores)
            self.surrogate.teach(params.values, scores)

            if self.verbose:
                mae = self.score_on_history()
                log.info(f"Exploration iter {i} MAE = {mae:.4f}")

    def score_on_history(self):
        """
        Scores model on existing hostorical evaluations
        :return: mean absolute error score
        """
        X = pd.concat(self.__history_parameters)
        y_true = np.array(self.__history_scores)
        y_pred = self.surrogate.predict(X.values)
        mae = mean_absolute_error(y_true, y_pred)
        return mae

    def exploitation(self, objective_evaluator):
        """
        Runs exploitatio stage
        :param objective_evaluator: function which takes sequence of parameter dicts
         and returns array of scores
        :return: final mode mae on best params
        """
        space = self.__sample_batch(self.exploitation_eval_size *
                                    self.exploitation_multiplier)
        scores_pred = self.surrogate.predict(space.values)
        idxs = np.argsort(scores_pred)[::-1]
        idxs = idxs[:self.exploitation_eval_size]
        params = space.iloc[idxs]
        scores_pred = scores_pred[idxs]

        scores_true = objective_evaluator(params.to_dict(orient='records'))
        mae = mean_absolute_error(scores_true, scores_pred)
        self.__update_history(params, scores_true)
        return mae

    @property
    def best_params(self):
        """
        Best params so far
        :return: dictionary with parameters
        """
        params = pd.concat(self.__history_parameters)
        scores = np.array(self.__history_scores)

        idx = np.argmax(scores)
        return params.to_dict(orient='records')[idx]