def set_evaluation_feedback(self, feedbacks): """Inform optimizer of outcome of a rollout with current weights.""" return_ = check_feedback(feedbacks, compute_sum=True) # Transform reward (e.g. to a log-scale) return_ = self.value_transform(return_) self.bayes_opt.update(self.parameters, return_)
def set_evaluation_feedback(self, feedbacks): """Inform optimizer of outcome of a rollout with current weights.""" return_ = check_feedback(feedbacks, compute_sum=True) # Transform reward (e.g., to a log-scale) return_ = self.value_transform(return_) self.bayes_opt.update(np.hstack((self.context, self.parameters)), return_) if self.policy is not None: # Policy derived from internal model is no longer valid as the data # has changed self.policy_fitted = False
def set_evaluation_feedback(self, feedbacks): """Inform optimizer of outcome of a rollout with current weights.""" self.reward = check_feedback(feedbacks, compute_sum=True) self.history_theta.append(self.params) self.history_R.append(self.reward) self.it += 1 if self.it % self.train_freq == 0: theta = np.asarray(self.history_theta) R = np.asarray(self.history_R) d = solve_dual_reps(R, self.epsilon, self.min_eta)[0] self.policy_.fit(None, theta, d) self.logger.info("Reward %.6f" % self.reward) if self.reward > self.max_return: self.max_return = self.reward self.best_params = self.params
def set_evaluation_feedback(self, feedback): """Set feedbacks for the parameter vector. Parameters ---------- feedback : list of float feedbacks for each step or for the episode, depends on the problem """ k = self.it % self.n_samples_per_update self.fitness[k] = check_feedback(feedback, compute_sum=True) if self.maximize: self.fitness[k] *= -1 if self.fitness[k] <= self.best_fitness: self.best_fitness = self.fitness[k] self.best_fitness_it = self.it self.best_params[:] = self.samples[k] self.it += 1 if (self.it - self.initial_it) % self.n_samples_per_update == 0: self._update(self.samples, self.fitness)
def test_check_feedback_inf(): feedbacks = [0, 1, np.inf] assert_true( np.isinf(check_feedback(feedbacks, compute_sum=True, check_inf=False))) assert_raises(ValueError, check_feedback, feedbacks)
def test_check_feedback_nan(): feedbacks = [0, 1, np.nan] assert_true( np.isnan(check_feedback(feedbacks, compute_sum=True, check_nan=False))) assert_raises(ValueError, check_feedback, feedbacks)